From 9e95b16368d9a46e8195451a4c3fe255025a8514 Mon Sep 17 00:00:00 2001 From: Jaakko Laitinen Date: Thu, 9 Dec 2021 18:04:58 +0200 Subject: [PATCH 1/6] [Deblock] Fix chroma deblocking issues when inter is enabled Added cbf_copy() calls for chroma components in the lcu_fill_cbf() function since deblocking now uses that information for chroma as well. --- src/filter.c | 1 + src/search.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/filter.c b/src/filter.c index aa42c22b..bdaab081 100644 --- a/src/filter.c +++ b/src/filter.c @@ -1305,6 +1305,7 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, // - Luma deblocking on a 4x4 grid // - Deblocking filter for subblock boundaries // - Allow loop filtering across slice/tile boundaries? +// - Account for bi-pred and multi ref P frames void kvz_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px) { assert(!state->encoder_control->cfg.lossless); diff --git a/src/search.c b/src/search.c index 13fa66a7..a06628d9 100644 --- a/src/search.c +++ b/src/search.c @@ -200,8 +200,10 @@ static void lcu_fill_cbf(lcu_t *lcu, int x_local, int y_local, int width, cu_inf cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask); cu_info_t *cu_to = LCU_GET_CU_AT_PX(lcu, x, y); if (cu_from != cu_to) { - // Chroma coeff data is not used, luma is needed for deblocking + // Chroma and luma coeff data is needed for deblocking cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_Y); + cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_U); + cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_V); } } } From 9a67e622f53f83b410372812f6f5c8b01bbb3a47 Mon Sep 17 00:00:00 2001 From: Jaakko Laitinen Date: Fri, 10 Dec 2021 15:18:39 +0200 Subject: [PATCH 2/6] [CI] Enable Deblock in tests --- tests/test_tools.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_tools.sh b/tests/test_tools.sh index 053ad47d..fbcd1856 100755 --- a/tests/test_tools.sh +++ b/tests/test_tools.sh @@ -5,7 +5,7 @@ set -eu . "${0%/*}/util.sh" -common_args='264x128 10 yuv420p -p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --pu-depth-inter 0-3 --no-bipred --no-tmvp --no-deblock --gop=0' +common_args='264x128 10 yuv420p -p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --pu-depth-inter 0-3 --no-bipred --no-tmvp --gop=0' valgrind_test $common_args --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3 valgrind_test $common_args --no-rdoq --no-signhide --subme=0 --bipred From 60119ca3fa964daaa91899ee9c0961430680e3ba Mon Sep 17 00:00:00 2001 From: Jaakko Laitinen Date: Fri, 10 Dec 2021 15:40:39 +0200 Subject: [PATCH 3/6] [CI] Enable Deblock in slice tests --- tests/test_slices.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_slices.sh b/tests/test_slices.sh index 6ad9a5f6..512888b0 100755 --- a/tests/test_slices.sh +++ b/tests/test_slices.sh @@ -3,6 +3,6 @@ set -eu . "${0%/*}/util.sh" -valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --tiles=2x2 --no-deblock +valgrind_test 512x256 10 yuv420p --threads=2 --owf=1 --preset=ultrafast --tiles=2x2 #valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --slices=wpp #if [ ! -z ${GITLAB_CI+x} ];then valgrind_test 264x130 20 --threads=2 --owf=1 --preset=fast --slices=wpp --no-open-gop; fi From 77457391b4bec62e717810b231bce6033c35ed69 Mon Sep 17 00:00:00 2001 From: Jaakko Laitinen Date: Fri, 10 Dec 2021 19:05:43 +0200 Subject: [PATCH 4/6] [Deblock] Refactor deblock pixel gather Refactor deblock pixel gather for large blocks to be less confusing. --- src/filter.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/filter.c b/src/filter.c index bdaab081..83683254 100644 --- a/src/filter.c +++ b/src/filter.c @@ -374,6 +374,21 @@ static INLINE void gather_deblock_pixels( } } +/** +* \brief Gather pixels from src to dst using a custom stride and step for src +*/ +static INLINE void gather_pixels( + const kvz_pixel *src, + int step, + int stride, + int numel, + kvz_pixel *dst) +{ + for (int i = 0; i < numel; ++i) { + dst[i] = src[i * step + stride]; + } +} + /** * \brief Scatter pixels */ @@ -867,19 +882,20 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, int_fast32_t dp3L = dp3; int_fast32_t dq3L = dq3; + //In case of large blocks, need to gather extra pixels //bL: //line0 p7 p6 p5 p4 q4 q5 q6 q7 kvz_pixel bL[4][8]; if (is_side_P_large) { - gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 0 * y_stride, 2, &bL[0][0]/* - 2 */); - gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 3 * y_stride, 2, &bL[3][0]/* - 2 */); + gather_pixels(edge_src - 8 * x_stride, x_stride, 0 * y_stride, 4, &bL[0][0]); + gather_pixels(edge_src - 8 * x_stride, x_stride, 3 * y_stride, 4, &bL[3][0]); dp0L = (dp0L + abs(bL[0][2] - 2 * bL[0][3] + b[0][0]) + 1) >> 1; dp3L = (dp3L + abs(bL[3][2] - 2 * bL[3][3] + b[3][0]) + 1) >> 1; } if (is_side_Q_large) { - gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 0 * y_stride, 2, &bL[0][2]); - gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 3 * y_stride, 2, &bL[3][2]); + gather_pixels(edge_src + 4 * x_stride, x_stride, 0 * y_stride, 4, &bL[0][4]); + gather_pixels(edge_src + 4 * x_stride, x_stride, 3 * y_stride, 4, &bL[3][4]); dq0L = (dq0L + abs(b[0][7] - 2 * bL[0][4] + bL[0][5]) + 1) >> 1; dq3L = (dq3L + abs(b[3][7] - 2 * bL[3][4] + bL[3][5]) + 1) >> 1; } @@ -897,13 +913,13 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, gather_deblock_pixels(edge_src, x_stride, 2 * y_stride, 4, &b[2][0]); if (is_side_P_large) { - gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 1 * y_stride, 2, &bL[1][0] - 2); - gather_deblock_pixels(edge_src - 6 * x_stride, x_stride, 2 * y_stride, 2, &bL[2][0] - 2); + gather_pixels(edge_src - 8 * x_stride, x_stride, 1 * y_stride, 4, &bL[1][0]); + gather_pixels(edge_src - 8 * x_stride, x_stride, 2 * y_stride, 4, &bL[2][0]); } if (is_side_Q_large) { - gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 1 * y_stride, 2, &bL[1][2]); - gather_deblock_pixels(edge_src + 6 * x_stride, x_stride, 2 * y_stride, 2, &bL[2][2]); + gather_pixels(edge_src + 4 * x_stride, x_stride, 1 * y_stride, 4, &bL[1][4]); + gather_pixels(edge_src + 4 * x_stride, x_stride, 2 * y_stride, 4, &bL[2][4]); } for (int i = 0; i < 4; ++i) { From fe9d673b2af00a97d7975f586aa2192ab85f020d Mon Sep 17 00:00:00 2001 From: Jaakko Laitinen Date: Wed, 29 Dec 2021 17:26:00 +0200 Subject: [PATCH 5/6] [Deblock] Fix Luma issue mvdThreashold set incorrectly --- src/filter.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/filter.c b/src/filter.c index 83683254..16473b00 100644 --- a/src/filter.c +++ b/src/filter.c @@ -708,8 +708,8 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, int32_t tc_index; int32_t tc; - //Deblock adapted to halve pixel mvd. TODO: Tie into actual number of fractional mv bits - const int16_t mvdThreashold = 2; //(1 << (MV_INTERNAL_FRACTIONAL_BITS - 1)) + //Deblock adapted to halve pixel mvd. + const int16_t mvdThreashold = 1 << (INTERNAL_MV_PREC - 1); uint32_t num_4px_parts = length / 4; @@ -732,37 +732,28 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, if (dir == EDGE_VER) { y_coord = y + 4 * block_idx; cu_p = kvz_cu_array_at(frame->cu_array, x - 1, y_coord); - cu_q = kvz_cu_array_at(frame->cu_array, x, y_coord); + cu_q = kvz_cu_array_at(frame->cu_array, x, y_coord); } else { x_coord = x + 4 * block_idx; cu_p = kvz_cu_array_at(frame->cu_array, x_coord, y - 1); - cu_q = kvz_cu_array_at(frame->cu_array, x_coord, y ); + cu_q = kvz_cu_array_at(frame->cu_array, x_coord, y); } - + bool nonzero_coeffs = cbf_is_set(cu_q->cbf, cu_q->tr_depth, COLOR_Y) - || cbf_is_set(cu_p->cbf, cu_p->tr_depth, COLOR_Y); + || cbf_is_set(cu_p->cbf, cu_p->tr_depth, COLOR_Y); // Filter strength strength = 0; - if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { + if (cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { // Intra is used strength = 2; - } else if (tu_boundary && nonzero_coeffs) { + } + else if (tu_boundary && nonzero_coeffs) { // Non-zero residual/coeffs and transform boundary // Neither CU is intra so tr_depth <= MAX_DEPTH. - strength = 1; - } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && - ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) || - (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) { - // Absolute motion vector diff between blocks >= 0.5 (Integer pixel) - strength = 1; - } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && - cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { strength = 1; } - - // B-slice related checks - if(!strength && state->frame->slicetype == KVZ_SLICE_B) { + else if(cu_p->inter.mv_dir == 3 || cu_q->inter.mv_dir == 3/*state->frame->slicetype == KVZ_SLICE_B*/) { // B-slice related checks // Zero all undefined motion vectors for easier usage if(!(cu_q->inter.mv_dir & 1)) { @@ -822,7 +813,18 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, strength = 1; } } - + else /*if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3)*/ { //is P-slice + if (cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { + // Reference pictures are different + strength = 1; + } else if ( + ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= mvdThreashold) || + (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= mvdThreashold))) { + // Absolute motion vector diff between blocks >= 0.5 (Integer pixel) + strength = 1; + } + } + tc_index = CLIP(0, MAX_QP + 2, (int32_t)(qp + 2*(strength - 1) + (tc_offset_div2 << 1))); tc = lumaBitdepth < 10 ? ((kvz_g_tc_table_8x8[tc_index] + (1 << (9 - lumaBitdepth))) >> (10 - lumaBitdepth)) : ((kvz_g_tc_table_8x8[tc_index] << (lumaBitdepth - 10))); From 57d862393154245a5ac0d4c57c419d2f5219cd2c Mon Sep 17 00:00:00 2001 From: Jaakko Laitinen Date: Thu, 30 Dec 2021 16:11:46 +0200 Subject: [PATCH 6/6] [Deblock] Fix B-frame detection --- src/filter.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/filter.c b/src/filter.c index 16473b00..aad84dbc 100644 --- a/src/filter.c +++ b/src/filter.c @@ -753,7 +753,7 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, // Neither CU is intra so tr_depth <= MAX_DEPTH. strength = 1; } - else if(cu_p->inter.mv_dir == 3 || cu_q->inter.mv_dir == 3/*state->frame->slicetype == KVZ_SLICE_B*/) { // B-slice related checks + else if(cu_p->inter.mv_dir == 3 || cu_q->inter.mv_dir == 3 || state->frame->slicetype == KVZ_SLICE_B) { // B-slice related checks. TODO: Need to account for cu_p being in another slice? // Zero all undefined motion vectors for easier usage if(!(cu_q->inter.mv_dir & 1)) { @@ -1320,10 +1320,8 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, // - Strength calculation to include average Luma level (Luma Adaptive Deblocing Filter LADF) (optional) // - Deblocking strength for CIIP and IBC modes (CIIP/IBC not currently used) // - Handle new prediction modes (i.e. PLT) (PLT not currently used) -// - Luma deblocking on a 4x4 grid // - Deblocking filter for subblock boundaries // - Allow loop filtering across slice/tile boundaries? -// - Account for bi-pred and multi ref P frames void kvz_filter_deblock_lcu(encoder_state_t * const state, int x_px, int y_px) { assert(!state->encoder_control->cfg.lossless);