mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-30 12:44:07 +00:00
Merge branch 'deblock_fix' into 'master'
[Deblock] Fix deblock when using inter See merge request cs/ultravideo/vvc/uvg266!8
This commit is contained in:
commit
25a8a40de0
67
src/filter.c
67
src/filter.c
|
@ -374,6 +374,21 @@ static INLINE void gather_deblock_pixels(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Gather pixels from src to dst using a custom stride and step for src
|
||||
*/
|
||||
static INLINE void gather_pixels(
|
||||
const kvz_pixel *src,
|
||||
int step,
|
||||
int stride,
|
||||
int numel,
|
||||
kvz_pixel *dst)
|
||||
{
|
||||
for (int i = 0; i < numel; ++i) {
|
||||
dst[i] = src[i * step + stride];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Scatter pixels
|
||||
*/
|
||||
|
@ -693,8 +708,8 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
int32_t tc_index;
|
||||
int32_t tc;
|
||||
|
||||
//Deblock adapted to halve pixel mvd. TODO: Tie into actual number of fractional mv bits
|
||||
const int16_t mvdThreashold = 2; //(1 << (MV_INTERNAL_FRACTIONAL_BITS - 1))
|
||||
//Deblock adapted to halve pixel mvd.
|
||||
const int16_t mvdThreashold = 1 << (INTERNAL_MV_PREC - 1);
|
||||
|
||||
uint32_t num_4px_parts = length / 4;
|
||||
|
||||
|
@ -722,7 +737,7 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
} else {
|
||||
x_coord = x + 4 * block_idx;
|
||||
cu_p = kvz_cu_array_at(frame->cu_array, x_coord, y - 1);
|
||||
cu_q = kvz_cu_array_at(frame->cu_array, x_coord, y );
|
||||
cu_q = kvz_cu_array_at(frame->cu_array, x_coord, y);
|
||||
}
|
||||
|
||||
bool nonzero_coeffs = cbf_is_set(cu_q->cbf, cu_q->tr_depth, COLOR_Y)
|
||||
|
@ -730,24 +745,15 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
|
||||
// Filter strength
|
||||
strength = 0;
|
||||
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) {
|
||||
if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { // Intra is used
|
||||
strength = 2;
|
||||
} else if (tu_boundary && nonzero_coeffs) {
|
||||
}
|
||||
else if (tu_boundary && nonzero_coeffs) {
|
||||
// Non-zero residual/coeffs and transform boundary
|
||||
// Neither CU is intra so tr_depth <= MAX_DEPTH.
|
||||
strength = 1;
|
||||
} else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 &&
|
||||
((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) ||
|
||||
(abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) {
|
||||
// Absolute motion vector diff between blocks >= 0.5 (Integer pixel)
|
||||
strength = 1;
|
||||
} else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 &&
|
||||
cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) {
|
||||
strength = 1;
|
||||
}
|
||||
|
||||
// B-slice related checks
|
||||
if(!strength && state->frame->slicetype == KVZ_SLICE_B) {
|
||||
else if(cu_p->inter.mv_dir == 3 || cu_q->inter.mv_dir == 3 || state->frame->slicetype == KVZ_SLICE_B) { // B-slice related checks. TODO: Need to account for cu_p being in another slice?
|
||||
|
||||
// Zero all undefined motion vectors for easier usage
|
||||
if(!(cu_q->inter.mv_dir & 1)) {
|
||||
|
@ -807,6 +813,17 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
strength = 1;
|
||||
}
|
||||
}
|
||||
else /*if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3)*/ { //is P-slice
|
||||
if (cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) {
|
||||
// Reference pictures are different
|
||||
strength = 1;
|
||||
} else if (
|
||||
((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) ||
|
||||
(abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) {
|
||||
// Absolute motion vector diff between blocks >= 0.5 (Integer pixel)
|
||||
strength = 1;
|
||||
}
|
||||
}
|
||||
|
||||
tc_index = CLIP(0, MAX_QP + 2, (int32_t)(qp + 2*(strength - 1) + (tc_offset_div2 << 1)));
|
||||
tc = lumaBitdepth < 10 ? ((kvz_g_tc_table_8x8[tc_index] + (1 << (9 - lumaBitdepth))) >> (10 - lumaBitdepth))
|
||||
|
@ -867,19 +884,20 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
int_fast32_t dp3L = dp3;
|
||||
int_fast32_t dq3L = dq3;
|
||||
|
||||
//In case of large blocks, need to gather extra pixels
|
||||
//bL:
|
||||
//line0 p7 p6 p5 p4 q4 q5 q6 q7
|
||||
kvz_pixel bL[4][8];
|
||||
|
||||
if (is_side_P_large) {
|
||||
gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 0 * y_stride, 2, &bL[0][0]/* - 2 */);
|
||||
gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 3 * y_stride, 2, &bL[3][0]/* - 2 */);
|
||||
gather_pixels(edge_src - 8 * x_stride, x_stride, 0 * y_stride, 4, &bL[0][0]);
|
||||
gather_pixels(edge_src - 8 * x_stride, x_stride, 3 * y_stride, 4, &bL[3][0]);
|
||||
dp0L = (dp0L + abs(bL[0][2] - 2 * bL[0][3] + b[0][0]) + 1) >> 1;
|
||||
dp3L = (dp3L + abs(bL[3][2] - 2 * bL[3][3] + b[3][0]) + 1) >> 1;
|
||||
}
|
||||
if (is_side_Q_large) {
|
||||
gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 0 * y_stride, 2, &bL[0][2]);
|
||||
gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 3 * y_stride, 2, &bL[3][2]);
|
||||
gather_pixels(edge_src + 4 * x_stride, x_stride, 0 * y_stride, 4, &bL[0][4]);
|
||||
gather_pixels(edge_src + 4 * x_stride, x_stride, 3 * y_stride, 4, &bL[3][4]);
|
||||
dq0L = (dq0L + abs(b[0][7] - 2 * bL[0][4] + bL[0][5]) + 1) >> 1;
|
||||
dq3L = (dq3L + abs(b[3][7] - 2 * bL[3][4] + bL[3][5]) + 1) >> 1;
|
||||
}
|
||||
|
@ -897,13 +915,13 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
|
|||
gather_deblock_pixels(edge_src, x_stride, 2 * y_stride, 4, &b[2][0]);
|
||||
if (is_side_P_large)
|
||||
{
|
||||
gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 1 * y_stride, 2, &bL[1][0] - 2);
|
||||
gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 2 * y_stride, 2, &bL[2][0] - 2);
|
||||
gather_pixels(edge_src - 8 * x_stride, x_stride, 1 * y_stride, 4, &bL[1][0]);
|
||||
gather_pixels(edge_src - 8 * x_stride, x_stride, 2 * y_stride, 4, &bL[2][0]);
|
||||
}
|
||||
if (is_side_Q_large)
|
||||
{
|
||||
gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 1 * y_stride, 2, &bL[1][2]);
|
||||
gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 2 * y_stride, 2, &bL[2][2]);
|
||||
gather_pixels(edge_src + 4 * x_stride, x_stride, 1 * y_stride, 4, &bL[1][4]);
|
||||
gather_pixels(edge_src + 4 * x_stride, x_stride, 2 * y_stride, 4, &bL[2][4]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
|
@ -1302,7 +1320,6 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
|
|||
// - Strength calculation to include average Luma level (Luma Adaptive Deblocing Filter LADF) (optional)
|
||||
// - Deblocking strength for CIIP and IBC modes (CIIP/IBC not currently used)
|
||||
// - Handle new prediction modes (i.e. PLT) (PLT not currently used)
|
||||
// - Luma deblocking on a 4x4 grid
|
||||
// - Deblocking filter for subblock boundaries
|
||||
// - Allow loop filtering across slice/tile boundaries?
|
||||
void kvz_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px)
|
||||
|
|
|
@ -200,8 +200,10 @@ static void lcu_fill_cbf(lcu_t *lcu, int x_local, int y_local, int width, cu_inf
|
|||
cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask);
|
||||
cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y);
|
||||
if (cu_from != cu_to) {
|
||||
// Chroma coeff data is not used, luma is needed for deblocking
|
||||
// Chroma and luma coeff data is needed for deblocking
|
||||
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_Y);
|
||||
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_U);
|
||||
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_V);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,6 @@
|
|||
set -eu
|
||||
. "${0%/*}/util.sh"
|
||||
|
||||
valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --tiles=2x2 --no-deblock
|
||||
valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --tiles=2x2
|
||||
#valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --slices=wpp
|
||||
#if [ ! -z ${GITLAB_CI+x} ];then valgrind_test 264x130 20 --threads=2 --owf=1 --preset=fast --slices=wpp --no-open-gop; fi
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
set -eu
|
||||
. "${0%/*}/util.sh"
|
||||
|
||||
common_args='264x128 10 yuv420p -p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --pu-depth-inter 0-3 --no-bipred --no-tmvp --no-deblock --gop=0'
|
||||
common_args='264x128 10 yuv420p -p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --pu-depth-inter 0-3 --no-bipred --no-tmvp --gop=0'
|
||||
|
||||
valgrind_test $common_args --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3
|
||||
valgrind_test $common_args --no-rdoq --no-signhide --subme=0 --bipred
|
||||
|
|
Loading…
Reference in a new issue