From 544b59b7de490d4a24c18fc78ccd199c022000fe Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 8 Jul 2024 10:17:52 +0300 Subject: [PATCH 1/7] [api] Add new parameter "ref_wraparound" for motion vector wrapping --- src/cfg.c | 5 +++++ src/cli.c | 2 ++ src/encoder_state-bitstream.c | 2 +- src/uvg266.h | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/cfg.c b/src/cfg.c index 5dfe3a83..5885e241 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -243,6 +243,9 @@ int uvg_config_init(uvg_config *cfg) cfg->ibc = 0; cfg->dep_quant = 0; + + cfg->ref_wraparound = 0; + return 1; } @@ -1624,6 +1627,8 @@ int uvg_config_parse(uvg_config *cfg, const char *name, const char *value) } else if OPT("dep-quant") { cfg->dep_quant = (bool)atobool(value); + } else if OPT ("ref-wraparound") { + cfg->ref_wraparound = (bool)atobool(value); } else { return 0; diff --git a/src/cli.c b/src/cli.c index 6e66f77e..00237503 100644 --- a/src/cli.c +++ b/src/cli.c @@ -201,6 +201,8 @@ static const struct option long_options[] = { { "ibc", required_argument, NULL, 0 }, { "dep-quant", no_argument, NULL, 0 }, { "no-dep-quant", no_argument, NULL, 0 }, + { "ref-wraparound", no_argument, NULL, 0 }, + { "no-ref-wraparound", no_argument, NULL, 0 }, {0, 0, 0, 0} }; diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 2083e36f..59488197 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -612,7 +612,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, WRITE_UE(stream, 0, "num_ref_pic_lists_in_sps[0]"); WRITE_UE(stream, 0, "num_ref_pic_lists_in_sps[0]"); - WRITE_U(stream, 0, 1, "sps_ref_wraparound_enabled_flag"); + WRITE_U(stream, encoder->cfg.ref_wraparound, 1, "sps_ref_wraparound_enabled_flag"); diff --git a/src/uvg266.h b/src/uvg266.h index c71a835a..b252bc73 100644 --- a/src/uvg266.h +++ b/src/uvg266.h @@ -553,6 +553,9 @@ typedef struct uvg_config uint8_t ibc; /* \brief Intra Block Copy parameter */ uint8_t dep_quant; + + uint8_t ref_wraparound; /* \brief MV reference wraparound */ + } uvg_config; /** From a4f43cc576618485285006fde244b500cf2576ab Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 11 Jul 2024 10:59:26 +0300 Subject: [PATCH 2/7] [refwrap] Implement a block extension function for the wraparound --- src/encoder_state-bitstream.c | 7 +++- src/image.c | 9 ++++- src/image.h | 3 +- src/inter.c | 49 +++++++++++++++++------ src/search_inter.c | 9 ++++- src/strategies/generic/ipol-generic.c | 57 +++++++++++++++++++++++++++ src/strategies/strategies-ipol.c | 1 + src/strategies/strategies-ipol.h | 2 + 8 files changed, 119 insertions(+), 18 deletions(-) diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 59488197..ee9ddf28 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -801,7 +801,12 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream, WRITE_U(stream, 0, 1, "pps_weighted_pred_flag"); // Use of Weighting Prediction (P_SLICE) WRITE_U(stream, 0, 1, "pps_weighted_bipred_flag"); // Use of Weighting Bi-Prediction (B_SLICE) - WRITE_U(stream, 0, 1, "pps_ref_wraparound_enabled_flag"); + WRITE_U(stream, encoder->cfg.ref_wraparound, 1, "pps_ref_wraparound_enabled_flag"); + + if (encoder->cfg.ref_wraparound) { + // ToDo: Add wraparound offset + WRITE_UE(stream, 0, "pps_pic_width_minus_wraparound_offset"); + } WRITE_SE(stream, ((int8_t)encoder->cfg.qp) - 26, "pps_init_qp_minus26"); WRITE_U(stream, state->frame->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag"); diff --git a/src/image.c b/src/image.c index ff960f26..85f88c6b 100644 --- a/src/image.c +++ b/src/image.c @@ -486,7 +486,8 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic, int ref_x, int ref_y, int block_width, - int block_height) + int block_height, + uint8_t ref_wraparound) { assert(pic_x >= 0 && pic_x <= pic->width - block_width); assert(pic_y >= 0 && pic_y <= pic->height - block_height); @@ -536,7 +537,11 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic, epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; - uvg_get_extended_block(&epol_args); + if (ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } const uvg_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; diff --git a/src/image.h b/src/image.h index a7f19bc9..3750c12c 100644 --- a/src/image.h +++ b/src/image.h @@ -112,7 +112,8 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic, int ref_x, int ref_y, int block_width, - int block_height); + int block_height, + uint8_t ref_wraparound); void uvg_pixels_blit(const uvg_pixel* orig, uvg_pixel *dst, diff --git a/src/inter.c b/src/inter.c index d275f4ea..3fd62a92 100644 --- a/src/inter.c +++ b/src/inter.c @@ -99,7 +99,11 @@ static void inter_recon_frac_luma(const encoder_state_t * const state, epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_sample_quarterpel_luma(state->encoder_control, ext_origin, ext_s, @@ -155,7 +159,11 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state, epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_sample_quarterpel_luma_hi(state->encoder_control, ext_origin, ext_s, @@ -218,7 +226,11 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state, epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_sample_octpel_chroma(state->encoder_control, ext_origin, ext_s, @@ -232,7 +244,11 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state, // Chroma V epol_args.src = ref->v; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_sample_octpel_chroma(state->encoder_control, ext_origin, ext_s, @@ -295,7 +311,11 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state, epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_sample_octpel_chroma_hi(state->encoder_control, ext_origin, ext_s, @@ -309,7 +329,11 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state, // Chroma V epol_args.src = ref->v; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_sample_octpel_chroma_hi(state->encoder_control, ext_origin, ext_s, @@ -340,13 +364,14 @@ static void inter_cp_with_ext_border(const uvg_pixel *ref_buf, int ref_stride, int ref_width, int ref_height, uvg_pixel *rec_buf, int rec_stride, int width, int height, - const vector2d_t *mv_in_frame) + const vector2d_t *mv_in_frame, + const int mv_wrap) { for (int y = mv_in_frame->y; y < mv_in_frame->y + height; ++y) { for (int x = mv_in_frame->x; x < mv_in_frame->x + width; ++x) { vector2d_t in_frame = { - CLIP(0, ref_width - 1, x), - CLIP(0, ref_height - 1, y), + mv_wrap?((x<0)?x+ref_width:x%ref_width):CLIP(0, ref_width - 1, x), + mv_wrap?((y<0)?y+ref_height:y%ref_height):CLIP(0, ref_height - 1, y), }; vector2d_t in_pu = { x - mv_in_frame->x, @@ -432,7 +457,7 @@ static unsigned inter_recon_unipred( ref->width, ref->height, yuv_px->y, out_stride_luma, pu_w, pu_h, - &int_mv_in_frame); + &int_mv_in_frame, state->encoder_control->cfg.ref_wraparound); } else { const int frame_mv_index = int_mv_in_frame.y * ref->stride + int_mv_in_frame.x; @@ -473,12 +498,12 @@ static unsigned inter_recon_unipred( ref->width / 2, ref->height / 2, yuv_px->u, out_stride_c, pu_w / 2, pu_h / 2, - &int_mv_in_frame_c); + &int_mv_in_frame_c, state->encoder_control->cfg.ref_wraparound); inter_cp_with_ext_border(ref->v, ref->stride / 2, ref->width / 2, ref->height / 2, yuv_px->v, out_stride_c, pu_w / 2, pu_h / 2, - &int_mv_in_frame_c); + &int_mv_in_frame_c, state->encoder_control->cfg.ref_wraparound); } else { const int frame_mv_index = int_mv_in_frame_c.y * ref->stride / 2 + int_mv_in_frame_c.x; diff --git a/src/search_inter.c b/src/search_inter.c index 76c7fc36..cce5aa5a 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1102,7 +1102,11 @@ static void search_frac(inter_search_info_t *info, epol_args.ext_origin = &ext_origin; epol_args.ext_s = &ext_s; - uvg_get_extended_block(&epol_args); + if (state->encoder_control->cfg.ref_wraparound) { + uvg_get_extended_block_wraparound(&epol_args); + } else { + uvg_get_extended_block(&epol_args); + } uvg_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x; int tmp_stride = pic->stride; @@ -1451,7 +1455,8 @@ static void search_pu_inter_ref( info->state->tile->offset_x + info->origin.x + (best_mv.x >> INTERNAL_MV_PREC), info->state->tile->offset_y + info->origin.y + (best_mv.y >> INTERNAL_MV_PREC), info->width, - info->height); + info->height, + cfg->ref_wraparound); best_cost += best_bits * info->state->lambda_sqrt; } diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c index a720bfdb..b6581a9c 100644 --- a/src/strategies/generic/ipol-generic.c +++ b/src/strategies/generic/ipol-generic.c @@ -809,6 +809,61 @@ void uvg_get_extended_block_generic(uvg_epol_args *args) { } } + +void uvg_get_extended_block_wraparound_generic(uvg_epol_args *args) +{ + + int min_y = args->blk_y - args->pad_t; + int max_y = args->blk_y + args->blk_h + args->pad_b + args->pad_b_simd - 1; + bool out_of_bounds_y = (min_y < 0) || (max_y >= args->src_h); + + int min_x = args->blk_x - args->pad_l; + int max_x = args->blk_x + args->blk_w + args->pad_r - 1; + bool out_of_bounds_x = (min_x < 0) || (max_x >= args->src_w); + + if (out_of_bounds_y || out_of_bounds_x) { + + *args->ext = args->buf; + *args->ext_s = args->pad_l + args->blk_w + args->pad_r; + *args->ext_origin = args->buf + args->pad_t * (*args->ext_s) + args->pad_l; + + // Note that stride equals width here. + int cnt_l = CLIP(0, *args->ext_s, -min_x); + int cnt_r = CLIP(0, *args->ext_s, max_x - (args->src_w - 1)); + int cnt_m = CLIP(0, *args->ext_s, *args->ext_s - cnt_l - cnt_r); + + // For each row including real padding. + // Don't read "don't care" values (SIMD padding). Zero them out. + int y; + for (y = -args->pad_t; y < args->blk_h + args->pad_b; ++y) { + int absolute_y = args->blk_y + y; + int wrapped_y = absolute_y<0?args->src_h+absolute_y:((absolute_y > args->src_h - 1)?absolute_y-args->src_h:absolute_y); + uvg_pixel *sample_l = args->src + wrapped_y * args->src_s; + uvg_pixel *sample_r = + args->src + wrapped_y * args->src_s + args->src_w - 1; + uvg_pixel *src_m = args->src + wrapped_y * args->src_s + MAX(min_x, 0); + uvg_pixel *dst_l = args->buf + (y + args->pad_t) * (*args->ext_s); + uvg_pixel *dst_m = dst_l + cnt_l; + uvg_pixel *dst_r = dst_m + cnt_m; + for (int i = 0; i < cnt_l; ++i) *(dst_l + i) = *(sample_r - (cnt_l-i)); + for (int i = 0; i < cnt_m; ++i) *(dst_m + i) = *(src_m + i); + for (int i = 0; i < cnt_r; ++i) *(dst_r + i) = *(sample_l + i); + } + + for (int y_simd = 0; y_simd < args->pad_b_simd; ++y_simd) { + uvg_pixel *dst = args->buf + (y + args->pad_t + y_simd) * (*args->ext_s); + FILL_ARRAY(dst, 0, *args->ext_s); + } + + } else { + + *args->ext = args->src + (args->blk_y - args->pad_t) * args->src_s + + (args->blk_x - args->pad_l); + *args->ext_origin = args->src + args->blk_y * args->src_s + args->blk_x; + *args->ext_s = args->src_s; + } +} + int uvg_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth) { bool success = true; @@ -822,6 +877,8 @@ int uvg_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth) success &= uvg_strategyselector_register(opaque, "sample_quarterpel_luma_hi", "generic", 0, &uvg_sample_quarterpel_luma_hi_generic); success &= uvg_strategyselector_register(opaque, "sample_octpel_chroma_hi", "generic", 0, &uvg_sample_octpel_chroma_hi_generic); success &= uvg_strategyselector_register(opaque, "get_extended_block", "generic", 0, &uvg_get_extended_block_generic); + success &= uvg_strategyselector_register(opaque, "get_extended_block_wraparound", "generic", 0, &uvg_get_extended_block_wraparound_generic); + return success; } diff --git a/src/strategies/strategies-ipol.c b/src/strategies/strategies-ipol.c index 38794e8f..7e648bc3 100644 --- a/src/strategies/strategies-ipol.c +++ b/src/strategies/strategies-ipol.c @@ -43,6 +43,7 @@ ipol_blocks_func * uvg_filter_hpel_blocks_diag_luma; ipol_blocks_func * uvg_filter_qpel_blocks_hor_ver_luma; ipol_blocks_func * uvg_filter_qpel_blocks_diag_luma; epol_func *uvg_get_extended_block; +epol_func *uvg_get_extended_block_wraparound; uvg_sample_quarterpel_luma_func * uvg_sample_quarterpel_luma; uvg_sample_octpel_chroma_func * uvg_sample_octpel_chroma; uvg_sample_quarterpel_luma_hi_func * uvg_sample_quarterpel_luma_hi; diff --git a/src/strategies/strategies-ipol.h b/src/strategies/strategies-ipol.h index 4fc873c5..02ab7daf 100644 --- a/src/strategies/strategies-ipol.h +++ b/src/strategies/strategies-ipol.h @@ -128,6 +128,7 @@ extern ipol_blocks_func * uvg_filter_hpel_blocks_diag_luma; extern ipol_blocks_func * uvg_filter_qpel_blocks_hor_ver_luma; extern ipol_blocks_func * uvg_filter_qpel_blocks_diag_luma; extern epol_func * uvg_get_extended_block; +extern epol_func * uvg_get_extended_block_wraparound; extern uvg_sample_quarterpel_luma_func * uvg_sample_quarterpel_luma; extern uvg_sample_octpel_chroma_func * uvg_sample_octpel_chroma; extern uvg_sample_quarterpel_luma_hi_func * uvg_sample_quarterpel_luma_hi; @@ -147,6 +148,7 @@ int uvg_strategy_register_ipol(void* opaque, uint8_t bitdepth); {"sample_quarterpel_luma_hi", (void**) &uvg_sample_quarterpel_luma_hi}, \ {"sample_octpel_chroma_hi", (void**) &uvg_sample_octpel_chroma_hi}, \ {"get_extended_block", (void**) &uvg_get_extended_block}, \ + {"get_extended_block_wraparound", (void**) &uvg_get_extended_block_wraparound}, \ From 505d0f8c16f7d1c7956c6ca1626a385bed01ce63 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 11 Jul 2024 12:44:44 +0300 Subject: [PATCH 3/7] [debug] Fix the YUView debug printing --- src/encode_coding_tree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 801e9943..28c692c9 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -796,10 +796,10 @@ int uvg_encode_inter_prediction_unit( } } #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - int abs_x = x + state->tile->offset_x; - int abs_y = y + state->tile->offset_y; - if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L0, abs_x, abs_y, width, height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); - if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, width, height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); + int abs_x = cu_loc->x + state->tile->offset_x; + int abs_y = cu_loc->y + state->tile->offset_y; + if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); + if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); #endif } else { if (state->frame->slicetype == UVG_SLICE_B && cur_cu->type != CU_IBC) { @@ -821,9 +821,9 @@ int uvg_encode_inter_prediction_unit( continue; } #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - int abs_x = x + state->tile->offset_x; - int abs_y = y + state->tile->offset_y; - DBG_YUVIEW_MV(state->frame->poc, ref_list_idx ? DBG_YUVIEW_MVINTER_L1 : DBG_YUVIEW_MVINTER_L0, abs_x, abs_y, width, height, cur_cu->inter.mv[ref_list_idx][0], cur_cu->inter.mv[ref_list_idx][1]); + int abs_x = cu_loc->x + state->tile->offset_x; + int abs_y = cu_loc->y + state->tile->offset_y; + DBG_YUVIEW_MV(state->frame->poc, ref_list_idx ? DBG_YUVIEW_MVINTER_L1 : DBG_YUVIEW_MVINTER_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[ref_list_idx][0], cur_cu->inter.mv[ref_list_idx][1]); #endif // size of the current reference index list (L0/L1) uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx]; From 96c6dee6ab6141b582d228bf25c23c07f3985366 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 29 Jul 2024 12:05:27 +0300 Subject: [PATCH 4/7] [refwrap] Fix the MV wrapping by only wrapping the x-axis and rename some variables --- src/inter.c | 2 +- src/strategies/generic/ipol-generic.c | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/inter.c b/src/inter.c index 3fd62a92..3f71c866 100644 --- a/src/inter.c +++ b/src/inter.c @@ -371,7 +371,7 @@ static void inter_cp_with_ext_border(const uvg_pixel *ref_buf, int ref_stride, for (int x = mv_in_frame->x; x < mv_in_frame->x + width; ++x) { vector2d_t in_frame = { mv_wrap?((x<0)?x+ref_width:x%ref_width):CLIP(0, ref_width - 1, x), - mv_wrap?((y<0)?y+ref_height:y%ref_height):CLIP(0, ref_height - 1, y), + CLIP(0, ref_height - 1, y), }; vector2d_t in_pu = { x - mv_in_frame->x, diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c index b6581a9c..9777d02e 100644 --- a/src/strategies/generic/ipol-generic.c +++ b/src/strategies/generic/ipol-generic.c @@ -828,26 +828,25 @@ void uvg_get_extended_block_wraparound_generic(uvg_epol_args *args) *args->ext_origin = args->buf + args->pad_t * (*args->ext_s) + args->pad_l; // Note that stride equals width here. - int cnt_l = CLIP(0, *args->ext_s, -min_x); - int cnt_r = CLIP(0, *args->ext_s, max_x - (args->src_w - 1)); - int cnt_m = CLIP(0, *args->ext_s, *args->ext_s - cnt_l - cnt_r); + int count_left = CLIP(0, *args->ext_s, -min_x); + int count_right = CLIP(0, *args->ext_s, max_x - (args->src_w - 1)); + int count_middle = CLIP(0, *args->ext_s, *args->ext_s - count_left - count_right); // For each row including real padding. // Don't read "don't care" values (SIMD padding). Zero them out. int y; for (y = -args->pad_t; y < args->blk_h + args->pad_b; ++y) { - int absolute_y = args->blk_y + y; - int wrapped_y = absolute_y<0?args->src_h+absolute_y:((absolute_y > args->src_h - 1)?absolute_y-args->src_h:absolute_y); - uvg_pixel *sample_l = args->src + wrapped_y * args->src_s; + int clipped_y = CLIP(0, args->src_h - 1, args->blk_y + y); + uvg_pixel *sample_l = args->src + clipped_y * args->src_s; uvg_pixel *sample_r = - args->src + wrapped_y * args->src_s + args->src_w - 1; - uvg_pixel *src_m = args->src + wrapped_y * args->src_s + MAX(min_x, 0); + args->src + clipped_y * args->src_s + args->src_w - 1; + uvg_pixel *src_m = args->src + clipped_y * args->src_s + MAX(min_x, 0); uvg_pixel *dst_l = args->buf + (y + args->pad_t) * (*args->ext_s); - uvg_pixel *dst_m = dst_l + cnt_l; - uvg_pixel *dst_r = dst_m + cnt_m; - for (int i = 0; i < cnt_l; ++i) *(dst_l + i) = *(sample_r - (cnt_l-i)); - for (int i = 0; i < cnt_m; ++i) *(dst_m + i) = *(src_m + i); - for (int i = 0; i < cnt_r; ++i) *(dst_r + i) = *(sample_l + i); + uvg_pixel *dst_m = dst_l + count_left; + uvg_pixel *dst_r = dst_m + count_middle; + for (int i = 0; i < count_left; ++i) *(dst_l + i) = *((sample_r - (count_left-1)) + i); + for (int i = 0; i < count_middle; ++i) *(dst_m + i) = *(src_m + i); + for (int i = 0; i < count_right; ++i) *(dst_r + i) = *(sample_l + i); } for (int y_simd = 0; y_simd < args->pad_b_simd; ++y_simd) { From e8633dab1daccec764b7999352d52f792d32fd09 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 1 Aug 2024 15:58:36 +0300 Subject: [PATCH 5/7] [refwrap] Fix the uvg_get_extended_block_wraparound() and use memcpy --- src/strategies/generic/ipol-generic.c | 51 ++++++++++++++++++--------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c index 9777d02e..2723f430 100644 --- a/src/strategies/generic/ipol-generic.c +++ b/src/strategies/generic/ipol-generic.c @@ -818,35 +818,54 @@ void uvg_get_extended_block_wraparound_generic(uvg_epol_args *args) bool out_of_bounds_y = (min_y < 0) || (max_y >= args->src_h); int min_x = args->blk_x - args->pad_l; - int max_x = args->blk_x + args->blk_w + args->pad_r - 1; + int max_x = args->blk_x + args->blk_w + args->pad_r; bool out_of_bounds_x = (min_x < 0) || (max_x >= args->src_w); if (out_of_bounds_y || out_of_bounds_x) { + int first_x_start = 0; + int first_x_count = 0; + + int second_x_start = 0; + int second_x_count = 0; + + + if (out_of_bounds_x) { + if (min_x < 0) { // The block goes over the left edge of the frame. + first_x_start = args->src_w + min_x; + first_x_count = -min_x; + if (max_x >= 0) { // Right side of the block doesn't wrap around the frame so we need to copy it separately. + second_x_count = max_x; + } + } else { // The block wraps around the right edge of the frame. + if (min_x >= args->src_w) { // The whole block wraps around the frame so copy in one go. + first_x_start = min_x - args->src_w; + first_x_count = max_x-min_x; + } else { // The left side of the block doesn't wrap around the frame so we need to copy it separately. + first_x_start = min_x; + first_x_count = args->src_w - min_x; + second_x_count = max_x - args->src_w; + } + } + } else { // The block is not over the horizontal edges of the frame, just copy directly + first_x_count = max_x-min_x; + first_x_start = min_x; + } + *args->ext = args->buf; *args->ext_s = args->pad_l + args->blk_w + args->pad_r; *args->ext_origin = args->buf + args->pad_t * (*args->ext_s) + args->pad_l; - // Note that stride equals width here. - int count_left = CLIP(0, *args->ext_s, -min_x); - int count_right = CLIP(0, *args->ext_s, max_x - (args->src_w - 1)); - int count_middle = CLIP(0, *args->ext_s, *args->ext_s - count_left - count_right); - // For each row including real padding. // Don't read "don't care" values (SIMD padding). Zero them out. int y; for (y = -args->pad_t; y < args->blk_h + args->pad_b; ++y) { int clipped_y = CLIP(0, args->src_h - 1, args->blk_y + y); - uvg_pixel *sample_l = args->src + clipped_y * args->src_s; - uvg_pixel *sample_r = - args->src + clipped_y * args->src_s + args->src_w - 1; - uvg_pixel *src_m = args->src + clipped_y * args->src_s + MAX(min_x, 0); - uvg_pixel *dst_l = args->buf + (y + args->pad_t) * (*args->ext_s); - uvg_pixel *dst_m = dst_l + count_left; - uvg_pixel *dst_r = dst_m + count_middle; - for (int i = 0; i < count_left; ++i) *(dst_l + i) = *((sample_r - (count_left-1)) + i); - for (int i = 0; i < count_middle; ++i) *(dst_m + i) = *(src_m + i); - for (int i = 0; i < count_right; ++i) *(dst_r + i) = *(sample_l + i); + uvg_pixel *samples = args->src + clipped_y * args->src_s; + uvg_pixel *dst = args->buf + (y + args->pad_t) * (*args->ext_s); + + if (first_x_count) memcpy(dst, samples + first_x_start, first_x_count*sizeof(uvg_pixel)); + if (second_x_count) memcpy(dst + first_x_count, samples + second_x_start, second_x_count*sizeof(uvg_pixel)); } for (int y_simd = 0; y_simd < args->pad_b_simd; ++y_simd) { From f212ce136912beb83e106cce2febeb33bb66dfd2 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 2 Aug 2024 14:45:20 +0300 Subject: [PATCH 6/7] [refwrap] Add ref_lcu dependancy to the rightmost lcu since refwrap will wrap around the frame --- src/encoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/encoder.c b/src/encoder.c index 4b6b42d0..915bab42 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -244,6 +244,8 @@ encoder_control_t* uvg_encoder_control_init(const uvg_config *const cfg) encoder->max_inter_ref_lcu.right = 1; encoder->max_inter_ref_lcu.down = 1; + if (encoder->cfg.ref_wraparound) encoder->max_inter_ref_lcu.right = (encoder->cfg.width+LCU_LUMA_SIZE-1)>>LOG2_LCU_WIDTH; + int max_threads = encoder->cfg.threads; if (max_threads < 0) { max_threads = cfg_num_threads(); From a42d79eade158ebd3c8d8fd41323e80eee5506fa Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 2 Aug 2024 14:46:21 +0300 Subject: [PATCH 7/7] [debug] Output yuview mv data only when cabac->only_count is not used --- src/encode_coding_tree.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 28c692c9..9c51f377 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -796,10 +796,18 @@ int uvg_encode_inter_prediction_unit( } } #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - int abs_x = cu_loc->x + state->tile->offset_x; - int abs_y = cu_loc->y + state->tile->offset_y; - if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); - if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); + if (!cabac->only_count) { + int abs_x = cu_loc->x + state->tile->offset_x; + int abs_y = cu_loc->y + state->tile->offset_y; + if (cur_cu->inter.mv_dir & 1) { + DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_REFIDX_MERGE_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv_ref[0]); + } + if (cur_cu->inter.mv_dir & 2) { + DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVMERGE_L1, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_REFIDX_MERGE_L1, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv_ref[1]); + } + } #endif } else { if (state->frame->slicetype == UVG_SLICE_B && cur_cu->type != CU_IBC) { @@ -821,9 +829,12 @@ int uvg_encode_inter_prediction_unit( continue; } #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - int abs_x = cu_loc->x + state->tile->offset_x; - int abs_y = cu_loc->y + state->tile->offset_y; - DBG_YUVIEW_MV(state->frame->poc, ref_list_idx ? DBG_YUVIEW_MVINTER_L1 : DBG_YUVIEW_MVINTER_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[ref_list_idx][0], cur_cu->inter.mv[ref_list_idx][1]); + if (!cabac->only_count) { + int abs_x = cu_loc->x + state->tile->offset_x; + int abs_y = cu_loc->y + state->tile->offset_y; + DBG_YUVIEW_MV(state->frame->poc, ref_list_idx ? DBG_YUVIEW_MVINTER_L1 : DBG_YUVIEW_MVINTER_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv[ref_list_idx][0], cur_cu->inter.mv[ref_list_idx][1]); + DBG_YUVIEW_VALUE(state->frame->poc, ref_list_idx ? DBG_YUVIEW_REFIDX_INTER_L1 : DBG_YUVIEW_REFIDX_INTER_L0, abs_x, abs_y, cu_loc->width, cu_loc->height, cur_cu->inter.mv_ref[ref_list_idx]); + } #endif // size of the current reference index list (L0/L1) uint8_t ref_LX_size = state->frame->ref_LX_size[ref_list_idx]; @@ -1474,8 +1485,16 @@ void uvg_encode_coding_tree( } } #ifdef UVG_DEBUG_PRINT_YUVIEW_CSV - if (cur_cu->inter.mv_dir & 1) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); - if (cur_cu->inter.mv_dir & 2) DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); + if (!cabac->only_count) { + if (cur_cu->inter.mv_dir & 1) { + DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[0][0], cur_cu->inter.mv[0][1]); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_REFIDX_SKIP_L0, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv_ref[0]); + } + if (cur_cu->inter.mv_dir & 2) { + DBG_YUVIEW_MV(state->frame->poc, DBG_YUVIEW_MVSKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv[1][0], cur_cu->inter.mv[1][1]); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_REFIDX_SKIP_L1, abs_x, abs_y, cu_width, cu_height, cur_cu->inter.mv_ref[1]); + } + } #endif goto end;