[refwrap] Implement a block extension function for the wraparound

This commit is contained in:
Marko Viitanen 2024-07-11 10:59:26 +03:00
parent 544b59b7de
commit a4f43cc576
8 changed files with 119 additions and 18 deletions

View file

@ -801,7 +801,12 @@ static void encoder_state_write_bitstream_pic_parameter_set(bitstream_t* stream,
WRITE_U(stream, 0, 1, "pps_weighted_pred_flag"); // Use of Weighting Prediction (P_SLICE) WRITE_U(stream, 0, 1, "pps_weighted_pred_flag"); // Use of Weighting Prediction (P_SLICE)
WRITE_U(stream, 0, 1, "pps_weighted_bipred_flag"); // Use of Weighting Bi-Prediction (B_SLICE) WRITE_U(stream, 0, 1, "pps_weighted_bipred_flag"); // Use of Weighting Bi-Prediction (B_SLICE)
WRITE_U(stream, 0, 1, "pps_ref_wraparound_enabled_flag"); WRITE_U(stream, encoder->cfg.ref_wraparound, 1, "pps_ref_wraparound_enabled_flag");
if (encoder->cfg.ref_wraparound) {
// ToDo: Add wraparound offset
WRITE_UE(stream, 0, "pps_pic_width_minus_wraparound_offset");
}
WRITE_SE(stream, ((int8_t)encoder->cfg.qp) - 26, "pps_init_qp_minus26"); WRITE_SE(stream, ((int8_t)encoder->cfg.qp) - 26, "pps_init_qp_minus26");
WRITE_U(stream, state->frame->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag"); WRITE_U(stream, state->frame->max_qp_delta_depth >= 0 ? 1:0, 1, "pps_cu_qp_delta_enabled_flag");

View file

@ -486,7 +486,8 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic,
int ref_x, int ref_x,
int ref_y, int ref_y,
int block_width, int block_width,
int block_height) int block_height,
uint8_t ref_wraparound)
{ {
assert(pic_x >= 0 && pic_x <= pic->width - block_width); assert(pic_x >= 0 && pic_x <= pic->width - block_width);
assert(pic_y >= 0 && pic_y <= pic->height - block_height); assert(pic_y >= 0 && pic_y <= pic->height - block_height);
@ -536,7 +537,11 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic,
epol_args.ext_origin = &ext_origin; epol_args.ext_origin = &ext_origin;
epol_args.ext_s = &ext_s; epol_args.ext_s = &ext_s;
if (ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
const uvg_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; const uvg_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];

View file

@ -112,7 +112,8 @@ unsigned uvg_image_calc_satd(const uvg_picture *pic,
int ref_x, int ref_x,
int ref_y, int ref_y,
int block_width, int block_width,
int block_height); int block_height,
uint8_t ref_wraparound);
void uvg_pixels_blit(const uvg_pixel* orig, uvg_pixel *dst, void uvg_pixels_blit(const uvg_pixel* orig, uvg_pixel *dst,

View file

@ -99,7 +99,11 @@ static void inter_recon_frac_luma(const encoder_state_t * const state,
epol_args.ext_origin = &ext_origin; epol_args.ext_origin = &ext_origin;
epol_args.ext_s = &ext_s; epol_args.ext_s = &ext_s;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_sample_quarterpel_luma(state->encoder_control, uvg_sample_quarterpel_luma(state->encoder_control,
ext_origin, ext_origin,
ext_s, ext_s,
@ -155,7 +159,11 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state,
epol_args.ext_origin = &ext_origin; epol_args.ext_origin = &ext_origin;
epol_args.ext_s = &ext_s; epol_args.ext_s = &ext_s;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_sample_quarterpel_luma_hi(state->encoder_control, uvg_sample_quarterpel_luma_hi(state->encoder_control,
ext_origin, ext_origin,
ext_s, ext_s,
@ -218,7 +226,11 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state,
epol_args.ext_origin = &ext_origin; epol_args.ext_origin = &ext_origin;
epol_args.ext_s = &ext_s; epol_args.ext_s = &ext_s;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_sample_octpel_chroma(state->encoder_control, uvg_sample_octpel_chroma(state->encoder_control,
ext_origin, ext_origin,
ext_s, ext_s,
@ -232,7 +244,11 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state,
// Chroma V // Chroma V
epol_args.src = ref->v; epol_args.src = ref->v;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_sample_octpel_chroma(state->encoder_control, uvg_sample_octpel_chroma(state->encoder_control,
ext_origin, ext_origin,
ext_s, ext_s,
@ -295,7 +311,11 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
epol_args.ext_origin = &ext_origin; epol_args.ext_origin = &ext_origin;
epol_args.ext_s = &ext_s; epol_args.ext_s = &ext_s;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_sample_octpel_chroma_hi(state->encoder_control, uvg_sample_octpel_chroma_hi(state->encoder_control,
ext_origin, ext_origin,
ext_s, ext_s,
@ -309,7 +329,11 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
// Chroma V // Chroma V
epol_args.src = ref->v; epol_args.src = ref->v;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_sample_octpel_chroma_hi(state->encoder_control, uvg_sample_octpel_chroma_hi(state->encoder_control,
ext_origin, ext_origin,
ext_s, ext_s,
@ -340,13 +364,14 @@ static void inter_cp_with_ext_border(const uvg_pixel *ref_buf, int ref_stride,
int ref_width, int ref_height, int ref_width, int ref_height,
uvg_pixel *rec_buf, int rec_stride, uvg_pixel *rec_buf, int rec_stride,
int width, int height, int width, int height,
const vector2d_t *mv_in_frame) const vector2d_t *mv_in_frame,
const int mv_wrap)
{ {
for (int y = mv_in_frame->y; y < mv_in_frame->y + height; ++y) { for (int y = mv_in_frame->y; y < mv_in_frame->y + height; ++y) {
for (int x = mv_in_frame->x; x < mv_in_frame->x + width; ++x) { for (int x = mv_in_frame->x; x < mv_in_frame->x + width; ++x) {
vector2d_t in_frame = { vector2d_t in_frame = {
CLIP(0, ref_width - 1, x), mv_wrap?((x<0)?x+ref_width:x%ref_width):CLIP(0, ref_width - 1, x),
CLIP(0, ref_height - 1, y), mv_wrap?((y<0)?y+ref_height:y%ref_height):CLIP(0, ref_height - 1, y),
}; };
vector2d_t in_pu = { vector2d_t in_pu = {
x - mv_in_frame->x, x - mv_in_frame->x,
@ -432,7 +457,7 @@ static unsigned inter_recon_unipred(
ref->width, ref->height, ref->width, ref->height,
yuv_px->y, out_stride_luma, yuv_px->y, out_stride_luma,
pu_w, pu_h, pu_w, pu_h,
&int_mv_in_frame); &int_mv_in_frame, state->encoder_control->cfg.ref_wraparound);
} }
else { else {
const int frame_mv_index = int_mv_in_frame.y * ref->stride + int_mv_in_frame.x; const int frame_mv_index = int_mv_in_frame.y * ref->stride + int_mv_in_frame.x;
@ -473,12 +498,12 @@ static unsigned inter_recon_unipred(
ref->width / 2, ref->height / 2, ref->width / 2, ref->height / 2,
yuv_px->u, out_stride_c, yuv_px->u, out_stride_c,
pu_w / 2, pu_h / 2, pu_w / 2, pu_h / 2,
&int_mv_in_frame_c); &int_mv_in_frame_c, state->encoder_control->cfg.ref_wraparound);
inter_cp_with_ext_border(ref->v, ref->stride / 2, inter_cp_with_ext_border(ref->v, ref->stride / 2,
ref->width / 2, ref->height / 2, ref->width / 2, ref->height / 2,
yuv_px->v, out_stride_c, yuv_px->v, out_stride_c,
pu_w / 2, pu_h / 2, pu_w / 2, pu_h / 2,
&int_mv_in_frame_c); &int_mv_in_frame_c, state->encoder_control->cfg.ref_wraparound);
} else { } else {
const int frame_mv_index = int_mv_in_frame_c.y * ref->stride / 2 + int_mv_in_frame_c.x; const int frame_mv_index = int_mv_in_frame_c.y * ref->stride / 2 + int_mv_in_frame_c.x;

View file

@ -1102,7 +1102,11 @@ static void search_frac(inter_search_info_t *info,
epol_args.ext_origin = &ext_origin; epol_args.ext_origin = &ext_origin;
epol_args.ext_s = &ext_s; epol_args.ext_s = &ext_s;
if (state->encoder_control->cfg.ref_wraparound) {
uvg_get_extended_block_wraparound(&epol_args);
} else {
uvg_get_extended_block(&epol_args); uvg_get_extended_block(&epol_args);
}
uvg_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x; uvg_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x;
int tmp_stride = pic->stride; int tmp_stride = pic->stride;
@ -1451,7 +1455,8 @@ static void search_pu_inter_ref(
info->state->tile->offset_x + info->origin.x + (best_mv.x >> INTERNAL_MV_PREC), info->state->tile->offset_x + info->origin.x + (best_mv.x >> INTERNAL_MV_PREC),
info->state->tile->offset_y + info->origin.y + (best_mv.y >> INTERNAL_MV_PREC), info->state->tile->offset_y + info->origin.y + (best_mv.y >> INTERNAL_MV_PREC),
info->width, info->width,
info->height); info->height,
cfg->ref_wraparound);
best_cost += best_bits * info->state->lambda_sqrt; best_cost += best_bits * info->state->lambda_sqrt;
} }

View file

@ -809,6 +809,61 @@ void uvg_get_extended_block_generic(uvg_epol_args *args) {
} }
} }
void uvg_get_extended_block_wraparound_generic(uvg_epol_args *args)
{
int min_y = args->blk_y - args->pad_t;
int max_y = args->blk_y + args->blk_h + args->pad_b + args->pad_b_simd - 1;
bool out_of_bounds_y = (min_y < 0) || (max_y >= args->src_h);
int min_x = args->blk_x - args->pad_l;
int max_x = args->blk_x + args->blk_w + args->pad_r - 1;
bool out_of_bounds_x = (min_x < 0) || (max_x >= args->src_w);
if (out_of_bounds_y || out_of_bounds_x) {
*args->ext = args->buf;
*args->ext_s = args->pad_l + args->blk_w + args->pad_r;
*args->ext_origin = args->buf + args->pad_t * (*args->ext_s) + args->pad_l;
// Note that stride equals width here.
int cnt_l = CLIP(0, *args->ext_s, -min_x);
int cnt_r = CLIP(0, *args->ext_s, max_x - (args->src_w - 1));
int cnt_m = CLIP(0, *args->ext_s, *args->ext_s - cnt_l - cnt_r);
// For each row including real padding.
// Don't read "don't care" values (SIMD padding). Zero them out.
int y;
for (y = -args->pad_t; y < args->blk_h + args->pad_b; ++y) {
int absolute_y = args->blk_y + y;
int wrapped_y = absolute_y<0?args->src_h+absolute_y:((absolute_y > args->src_h - 1)?absolute_y-args->src_h:absolute_y);
uvg_pixel *sample_l = args->src + wrapped_y * args->src_s;
uvg_pixel *sample_r =
args->src + wrapped_y * args->src_s + args->src_w - 1;
uvg_pixel *src_m = args->src + wrapped_y * args->src_s + MAX(min_x, 0);
uvg_pixel *dst_l = args->buf + (y + args->pad_t) * (*args->ext_s);
uvg_pixel *dst_m = dst_l + cnt_l;
uvg_pixel *dst_r = dst_m + cnt_m;
for (int i = 0; i < cnt_l; ++i) *(dst_l + i) = *(sample_r - (cnt_l-i));
for (int i = 0; i < cnt_m; ++i) *(dst_m + i) = *(src_m + i);
for (int i = 0; i < cnt_r; ++i) *(dst_r + i) = *(sample_l + i);
}
for (int y_simd = 0; y_simd < args->pad_b_simd; ++y_simd) {
uvg_pixel *dst = args->buf + (y + args->pad_t + y_simd) * (*args->ext_s);
FILL_ARRAY(dst, 0, *args->ext_s);
}
} else {
*args->ext = args->src + (args->blk_y - args->pad_t) * args->src_s +
(args->blk_x - args->pad_l);
*args->ext_origin = args->src + args->blk_y * args->src_s + args->blk_x;
*args->ext_s = args->src_s;
}
}
int uvg_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth) int uvg_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth)
{ {
bool success = true; bool success = true;
@ -822,6 +877,8 @@ int uvg_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "sample_quarterpel_luma_hi", "generic", 0, &uvg_sample_quarterpel_luma_hi_generic); success &= uvg_strategyselector_register(opaque, "sample_quarterpel_luma_hi", "generic", 0, &uvg_sample_quarterpel_luma_hi_generic);
success &= uvg_strategyselector_register(opaque, "sample_octpel_chroma_hi", "generic", 0, &uvg_sample_octpel_chroma_hi_generic); success &= uvg_strategyselector_register(opaque, "sample_octpel_chroma_hi", "generic", 0, &uvg_sample_octpel_chroma_hi_generic);
success &= uvg_strategyselector_register(opaque, "get_extended_block", "generic", 0, &uvg_get_extended_block_generic); success &= uvg_strategyselector_register(opaque, "get_extended_block", "generic", 0, &uvg_get_extended_block_generic);
success &= uvg_strategyselector_register(opaque, "get_extended_block_wraparound", "generic", 0, &uvg_get_extended_block_wraparound_generic);
return success; return success;
} }

View file

@ -43,6 +43,7 @@ ipol_blocks_func * uvg_filter_hpel_blocks_diag_luma;
ipol_blocks_func * uvg_filter_qpel_blocks_hor_ver_luma; ipol_blocks_func * uvg_filter_qpel_blocks_hor_ver_luma;
ipol_blocks_func * uvg_filter_qpel_blocks_diag_luma; ipol_blocks_func * uvg_filter_qpel_blocks_diag_luma;
epol_func *uvg_get_extended_block; epol_func *uvg_get_extended_block;
epol_func *uvg_get_extended_block_wraparound;
uvg_sample_quarterpel_luma_func * uvg_sample_quarterpel_luma; uvg_sample_quarterpel_luma_func * uvg_sample_quarterpel_luma;
uvg_sample_octpel_chroma_func * uvg_sample_octpel_chroma; uvg_sample_octpel_chroma_func * uvg_sample_octpel_chroma;
uvg_sample_quarterpel_luma_hi_func * uvg_sample_quarterpel_luma_hi; uvg_sample_quarterpel_luma_hi_func * uvg_sample_quarterpel_luma_hi;

View file

@ -128,6 +128,7 @@ extern ipol_blocks_func * uvg_filter_hpel_blocks_diag_luma;
extern ipol_blocks_func * uvg_filter_qpel_blocks_hor_ver_luma; extern ipol_blocks_func * uvg_filter_qpel_blocks_hor_ver_luma;
extern ipol_blocks_func * uvg_filter_qpel_blocks_diag_luma; extern ipol_blocks_func * uvg_filter_qpel_blocks_diag_luma;
extern epol_func * uvg_get_extended_block; extern epol_func * uvg_get_extended_block;
extern epol_func * uvg_get_extended_block_wraparound;
extern uvg_sample_quarterpel_luma_func * uvg_sample_quarterpel_luma; extern uvg_sample_quarterpel_luma_func * uvg_sample_quarterpel_luma;
extern uvg_sample_octpel_chroma_func * uvg_sample_octpel_chroma; extern uvg_sample_octpel_chroma_func * uvg_sample_octpel_chroma;
extern uvg_sample_quarterpel_luma_hi_func * uvg_sample_quarterpel_luma_hi; extern uvg_sample_quarterpel_luma_hi_func * uvg_sample_quarterpel_luma_hi;
@ -147,6 +148,7 @@ int uvg_strategy_register_ipol(void* opaque, uint8_t bitdepth);
{"sample_quarterpel_luma_hi", (void**) &uvg_sample_quarterpel_luma_hi}, \ {"sample_quarterpel_luma_hi", (void**) &uvg_sample_quarterpel_luma_hi}, \
{"sample_octpel_chroma_hi", (void**) &uvg_sample_octpel_chroma_hi}, \ {"sample_octpel_chroma_hi", (void**) &uvg_sample_octpel_chroma_hi}, \
{"get_extended_block", (void**) &uvg_get_extended_block}, \ {"get_extended_block", (void**) &uvg_get_extended_block}, \
{"get_extended_block_wraparound", (void**) &uvg_get_extended_block_wraparound}, \