From b52a930bed2f4aea1009542b2f6dcdf08961e0cd Mon Sep 17 00:00:00 2001 From: Ari Lemmetti Date: Sat, 4 Apr 2020 22:14:10 +0300 Subject: [PATCH] About working with generics --- src/image.c | 21 -- src/image.h | 16 +- src/inter.c | 256 ++++++++++++----------- src/kvazaar.h | 2 + src/strategies/generic/ipol-generic.c | 22 +- src/strategies/generic/picture-generic.c | 59 +++--- src/strategies/strategies-ipol.h | 22 ++ src/strategies/strategies-picture.h | 29 ++- 8 files changed, 226 insertions(+), 201 deletions(-) diff --git a/src/image.c b/src/image.c index a48e5a4f..ddd58d47 100644 --- a/src/image.c +++ b/src/image.c @@ -218,27 +218,6 @@ void kvz_yuv_t_free(yuv_t *yuv) FREE_POINTER(yuv); } -hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size) -{ - // Get buffers with separate mallocs in order to take advantage of - // automatic buffer overrun checks. - hi_prec_buf_t *yuv = (hi_prec_buf_t *)malloc(sizeof(*yuv)); - yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y)); - yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u)); - yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v)); - yuv->size = luma_size; - - return yuv; -} - -void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv) -{ - free(yuv->y); - free(yuv->u); - free(yuv->v); - free(yuv); -} - static INLINE uint32_t reg_sad_maybe_optimized(const kvz_pixel * const data1, const kvz_pixel * const data2, const int32_t width, const int32_t height, const uint32_t stride1, const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad) diff --git a/src/image.h b/src/image.h index ccac4553..a6a08b0c 100644 --- a/src/image.h +++ b/src/image.h @@ -51,13 +51,6 @@ typedef struct { enum kvz_chroma_format chroma_format; } lcu_yuv_t; -typedef struct { - int size; - int16_t *y; - int16_t *u; - int16_t *v; -} hi_prec_buf_t; - typedef struct { int size; kvz_pixel *y; @@ -65,6 +58,12 @@ typedef struct { kvz_pixel *v; } yuv_t; +typedef struct { + int size; + kvz_pixel_ip *y; + kvz_pixel_ip *u; + kvz_pixel_ip *v; +} yuv_ip_t; kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height); kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height); @@ -82,9 +81,6 @@ kvz_picture *kvz_image_make_subimage(kvz_picture *const orig_image, yuv_t * kvz_yuv_t_alloc(int luma_size, int chroma_size); void kvz_yuv_t_free(yuv_t * yuv); -hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size); -void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv); - //Algorithms unsigned kvz_image_calc_sad(const kvz_picture *pic, diff --git a/src/inter.c b/src/inter.c index 65a981dc..a311aecf 100644 --- a/src/inter.c +++ b/src/inter.c @@ -52,14 +52,15 @@ typedef struct { } merge_candidates_t; -static void inter_recon_frac_luma(const encoder_state_t *const state, - const kvz_picture *const ref, - int32_t xpos, - int32_t ypos, - int32_t block_width, - int32_t block_height, - const int16_t mv_param[2], - lcu_t *lcu) +static void inter_recon_frac_luma(const encoder_state_t * const state, + const kvz_picture * const ref, + int32_t xpos, + int32_t ypos, + int32_t block_width, + int32_t block_height, + const int16_t mv_param[2], + yuv_t *out, + unsigned out_stride) { int mv_frac_x = (mv_param[0] & 3); int mv_frac_y = (mv_param[1] & 3); @@ -349,130 +350,130 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride, * * \param state encoder state * \param ref picture to copy the data from - * \param xpos PU x position - * \param ypos PU y position + * \param pu_x PU x position + * \param pu_y PU y position * \param width PU width * \param height PU height * \param mv_param motion vector - * \param lcu destination lcu - * \param hi_prec_out destination of high precision output, or NULL if not needed + * \param lcu_px destination lcu + * \param lcu_ip destination of high precision output, or NULL if not needed * \param predict_luma Enable or disable luma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call. */ -static void inter_recon_unipred(const encoder_state_t * const state, - const kvz_picture * const ref, - int32_t xpos, - int32_t ypos, - int32_t width, - int32_t height, - const int16_t mv_param[2], - lcu_t *lcu, - hi_prec_buf_t *hi_prec_out, - bool predict_luma, - bool predict_chroma) +static unsigned inter_recon_unipred(const encoder_state_t * const state, + const kvz_picture * const ref, + int32_t pu_x, + int32_t pu_y, + int32_t pu_w, + int32_t pu_h, + int32_t out_stride_luma, + const int16_t mv_param[2], + yuv_t *yuv_px, + yuv_ip_t *yuv_ip, + bool predict_luma, + bool predict_chroma) { - const vector2d_t pu_in_tile = { xpos, ypos }; - const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH }; - - const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 }; - const vector2d_t mv_in_frame = { - mv_in_pu.x + pu_in_tile.x + state->tile->offset_x, - mv_in_pu.y + pu_in_tile.y + state->tile->offset_y + const vector2d_t int_mv = { mv_param[0] >> 2, mv_param[1] >> 2 }; + const vector2d_t int_mv_in_frame = { + int_mv.x + pu_x + state->tile->offset_x, + int_mv.y + pu_y + state->tile->offset_y }; - const bool mv_is_outside_frame = mv_in_frame.x < 0 || - mv_in_frame.y < 0 || - mv_in_frame.x + width > ref->width || - mv_in_frame.y + height > ref->height; + const bool int_mv_outside_frame = int_mv_in_frame.x < 0 || + int_mv_in_frame.y < 0 || + int_mv_in_frame.x + pu_w > ref->width || + int_mv_in_frame.y + pu_h > ref->height; // With 420, odd coordinates need interpolation. - const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1); - const int8_t fractional_luma = ((mv_param[0] & 3) || (mv_param[1] & 3)); + const bool fractional_chroma = (int_mv.x & 1) || (int_mv.y & 1); + const bool fractional_luma = (mv_param[0] & 3) || (mv_param[1] & 3); // Generate prediction for luma. if (predict_luma) { if (fractional_luma) { // With a fractional MV, do interpolation. - if (state->encoder_control->cfg.bipred && hi_prec_out) { + if (state->encoder_control->cfg.bipred && yuv_ip) { inter_recon_frac_luma_hi(state, ref, - pu_in_tile.x, pu_in_tile.y, - width, height, - mv_param, hi_prec_out); + pu_x, pu_y, + pu_w, pu_h, + mv_param, yuv_ip, out_stride_luma); } else { inter_recon_frac_luma(state, ref, - pu_in_tile.x, pu_in_tile.y, - width, height, - mv_param, lcu); + pu_x, pu_y, + pu_w, pu_h, + mv_param, yuv_px, out_stride_luma); } } else { // With an integer MV, copy pixels directly from the reference. - const int lcu_pu_index = pu_in_lcu.y * LCU_WIDTH + pu_in_lcu.x; - if (mv_is_outside_frame) { + if (int_mv_outside_frame) { inter_cp_with_ext_border(ref->y, ref->width, ref->width, ref->height, - &lcu->rec.y[lcu_pu_index], LCU_WIDTH, - width, height, - &mv_in_frame); + yuv_px->y, out_stride_luma, + pu_w, pu_h, + &int_mv_in_frame); } else { - const int frame_mv_index = mv_in_frame.y * ref->width + mv_in_frame.x; + const int frame_mv_index = int_mv_in_frame.y * ref->width + int_mv_in_frame.x; kvz_pixels_blit(&ref->y[frame_mv_index], - &lcu->rec.y[lcu_pu_index], - width, height, - ref->width, LCU_WIDTH); + yuv_px->y, + pu_w, pu_h, + ref->width, out_stride_luma); } } } if (!predict_chroma) { - return; + return fractional_luma; } + const unsigned out_stride_c = out_stride_luma / 2; + // Generate prediction for chroma. if (fractional_luma || fractional_chroma) { // With a fractional MV, do interpolation. - if (state->encoder_control->cfg.bipred && hi_prec_out) { + if (state->encoder_control->cfg.bipred && yuv_ip) { inter_recon_frac_chroma_hi(state, ref, - pu_in_tile.x, pu_in_tile.y, - width, height, - mv_param, hi_prec_out); + pu_x, pu_y, + pu_w, pu_h, + mv_param, yuv_ip, out_stride_c); } else { inter_recon_frac_chroma(state, ref, - pu_in_tile.x, pu_in_tile.y, - width, height, - mv_param, lcu); + pu_x, pu_y, + pu_w, pu_h, + mv_param, yuv_px, out_stride_c); } } else { // With an integer MV, copy pixels directly from the reference. - const int lcu_pu_index_c = pu_in_lcu.y / 2 * LCU_WIDTH_C + pu_in_lcu.x / 2; - const vector2d_t mv_in_frame_c = { mv_in_frame.x / 2, mv_in_frame.y / 2 }; + const vector2d_t int_mv_in_frame_c = { int_mv_in_frame.x / 2, int_mv_in_frame.y / 2 }; - if (mv_is_outside_frame) { + if (int_mv_outside_frame) { inter_cp_with_ext_border(ref->u, ref->width / 2, ref->width / 2, ref->height / 2, - &lcu->rec.u[lcu_pu_index_c], LCU_WIDTH_C, - width / 2, height / 2, - &mv_in_frame_c); + yuv_px->u, out_stride_c, + pu_w / 2, pu_h / 2, + &int_mv_in_frame_c); inter_cp_with_ext_border(ref->v, ref->width / 2, ref->width / 2, ref->height / 2, - &lcu->rec.v[lcu_pu_index_c], LCU_WIDTH_C, - width / 2, height / 2, - &mv_in_frame_c); + yuv_px->v, out_stride_c, + pu_w / 2, pu_h / 2, + &int_mv_in_frame_c); } else { - const int frame_mv_index = mv_in_frame_c.y * ref->width / 2 + mv_in_frame_c.x; + const int frame_mv_index = int_mv_in_frame_c.y * ref->width / 2 + int_mv_in_frame_c.x; kvz_pixels_blit(&ref->u[frame_mv_index], - &lcu->rec.u[lcu_pu_index_c], - width / 2, height / 2, - ref->width / 2, LCU_WIDTH_C); + yuv_px->u, + pu_w / 2, pu_h / 2, + ref->width / 2, out_stride_c); kvz_pixels_blit(&ref->v[frame_mv_index], - &lcu->rec.v[lcu_pu_index_c], - width / 2, height / 2, - ref->width / 2, LCU_WIDTH_C); + yuv_px->v, + pu_w / 2, pu_h / 2, + ref->width / 2, out_stride_c); } } + + return fractional_luma | ((fractional_luma || fractional_chroma) << 1); } /** * \brief Reconstruct bi-pred inter PU @@ -480,8 +481,8 @@ static void inter_recon_unipred(const encoder_state_t * const state, * \param state encoder state * \param ref1 reference picture to copy the data from * \param ref2 other reference picture to copy the data from - * \param xpos PU x position - * \param ypos PU y position + * \param pu_x PU x position + * \param pu_y PU y position * \param width PU width * \param height PU height * \param mv_param motion vectors @@ -489,56 +490,60 @@ static void inter_recon_unipred(const encoder_state_t * const state, * \param predict_luma Enable or disable luma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call. */ -void kvz_inter_recon_bipred(const encoder_state_t * const state, - const kvz_picture * ref1, - const kvz_picture * ref2, - int32_t xpos, - int32_t ypos, - int32_t width, - int32_t height, - int16_t mv_param[2][2], - lcu_t* lcu, - bool predict_luma, - bool predict_chroma) +void kvz_inter_recon_bipred(const encoder_state_t *const state, + const kvz_picture *ref1, + const kvz_picture *ref2, + int32_t pu_x, + int32_t pu_y, + int32_t pu_w, + int32_t pu_h, + int16_t mv_param[2][2], + lcu_t *lcu, + bool predict_luma, + bool predict_chroma) { - kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH]; - kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C]; - kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C]; + // Allocate maximum size arrays for interpolated and copied samples + ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; + ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; + ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; + ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; - const int hi_prec_luma_rec0 = mv_param[0][0] & 3 || mv_param[0][1] & 3; - const int hi_prec_luma_rec1 = mv_param[1][0] & 3 || mv_param[1][1] & 3; + yuv_t px_L0; + px_L0.size = pu_w * pu_h; + px_L0.y = &px_buf_L0[0]; + px_L0.u = &px_buf_L0[LCU_LUMA_SIZE]; + px_L0.v = &px_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE]; - const int hi_prec_chroma_rec0 = mv_param[0][0] & 7 || mv_param[0][1] & 7; - const int hi_prec_chroma_rec1 = mv_param[1][0] & 7 || mv_param[1][1] & 7; + yuv_t px_L1; + px_L1.size = pu_w * pu_h; + px_L1.y = &px_buf_L1[0]; + px_L1.u = &px_buf_L1[LCU_LUMA_SIZE]; + px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE]; - hi_prec_buf_t* high_precision_rec0 = 0; - hi_prec_buf_t* high_precision_rec1 = 0; - if (hi_prec_chroma_rec0) high_precision_rec0 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH); - if (hi_prec_chroma_rec1) high_precision_rec1 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH); + yuv_ip_t ip_L0; + ip_L0.size = pu_w * pu_h; + ip_L0.y = &ip_buf_L0[0]; + ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE]; + ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE]; + yuv_ip_t ip_L1; + ip_L1.size = pu_w * pu_h; + ip_L1.y = &ip_buf_L1[0]; + ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE]; + ip_L1.v = &ip_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE]; - //Reconstruct both predictors - inter_recon_unipred(state, ref1, xpos, ypos, width, height, mv_param[0], lcu, high_precision_rec0, - predict_luma, predict_chroma); - if (!hi_prec_luma_rec0){ - memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64); // copy to temp_lcu_y - } - if (!hi_prec_chroma_rec0){ - memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_u - memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_v - } - inter_recon_unipred(state, ref2, xpos, ypos, width, height, mv_param[1], lcu, high_precision_rec1, - predict_luma, predict_chroma); + // Sample blocks from both reference picture lists. + // Flags state if the outputs were written to high-precision / interpolated sample buffers. + unsigned ip_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0], + &px_L0, &ip_L0, predict_luma, predict_chroma); + unsigned ip_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1], + &px_L1, &ip_L1, predict_luma, predict_chroma); // After reconstruction, merge the predictors by taking an average of each pixel - kvz_inter_recon_bipred_blend(hi_prec_luma_rec0, hi_prec_luma_rec1, - hi_prec_chroma_rec0, hi_prec_chroma_rec1, - height, width, ypos, xpos, - high_precision_rec0, high_precision_rec1, - lcu, temp_lcu_y, temp_lcu_u, temp_lcu_v, predict_luma, predict_chroma); - - if (high_precision_rec0 != 0) kvz_hi_prec_buf_t_free(high_precision_rec0); - if (high_precision_rec1 != 0) kvz_hi_prec_buf_t_free(high_precision_rec1); + kvz_inter_recon_bipred_blend(lcu, &px_L0, &px_L1, &ip_L0, &ip_L1, + pu_x, pu_y, pu_w, pu_h, + ip_flags_L0, ip_flags_L1, + predict_luma, predict_chroma); } @@ -626,12 +631,21 @@ void kvz_inter_pred_pu(const encoder_state_t * const state, state->frame->ref_LX[mv_idx][ pu->inter.mv_ref[mv_idx]]]; + const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x); + const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2; + yuv_t lcu_adapter; + lcu_adapter.size = pu_w * pu_h; + lcu_adapter.y = lcu->rec.y + offset_luma, + lcu_adapter.u = lcu->rec.u + offset_chroma, + lcu_adapter.v = lcu->rec.v + offset_chroma, + inter_recon_unipred(state, ref, pu_x, pu_y, pu_w, pu_h, + LCU_WIDTH, pu->inter.mv[mv_idx], - lcu, + &lcu_adapter, NULL, predict_luma, predict_chroma); } diff --git a/src/kvazaar.h b/src/kvazaar.h index 856ea6e8..9cd97188 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -97,6 +97,8 @@ typedef uint8_t kvz_pixel; typedef uint16_t kvz_pixel; #endif +typedef int16_t kvz_pixel_ip; + /** * \brief Opaque data structure representing one instance of the encoder. */ diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c index 67db7db9..cdd8c040 100644 --- a/src/strategies/generic/ipol-generic.c +++ b/src/strategies/generic/ipol-generic.c @@ -131,7 +131,16 @@ int32_t kvz_four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int return temp; } -void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]) +void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, + kvz_pixel *src, + int16_t src_stride, + int width, + int height, + kvz_pixel *dst, + int16_t dst_stride, + int8_t hor_flag, + int8_t ver_flag, + const int16_t mv[2]) { //TODO: horizontal and vertical only filtering int32_t x, y; @@ -669,7 +678,16 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder, } } -void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]) +void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, + kvz_pixel *src, + int16_t src_stride, + int width, + int height, + kvz_pixel *dst, + int16_t dst_stride, + int8_t hor_flag, + int8_t ver_flag, + const int16_t mv[2]) { //TODO: horizontal and vertical only filtering int32_t x, y; diff --git a/src/strategies/generic/picture-generic.c b/src/strategies/generic/picture-generic.c index 16dde988..f5fcb033 100644 --- a/src/strategies/generic/picture-generic.c +++ b/src/strategies/generic/picture-generic.c @@ -547,55 +547,52 @@ static unsigned pixels_calc_ssd_generic(const kvz_pixel *const ref, const kvz_pi return ssd >> (2*(KVZ_BIT_DEPTH-8)); } -static void inter_recon_bipred_generic(const int hi_prec_luma_rec0, - const int hi_prec_luma_rec1, - const int hi_prec_chroma_rec0, - const int hi_prec_chroma_rec1, - int32_t height, - int32_t width, - int32_t ypos, - int32_t xpos, - const hi_prec_buf_t*high_precision_rec0, - const hi_prec_buf_t*high_precision_rec1, - lcu_t* lcu, - kvz_pixel* temp_lcu_y, - kvz_pixel* temp_lcu_u, - kvz_pixel* temp_lcu_v, - bool predict_luma, - bool predict_chroma) { +static void inter_recon_bipred_generic(lcu_t *const lcu, + const yuv_t *const px_L0, + const yuv_t *const px_L1, + const yuv_ip_t *const ip_L0, + const yuv_ip_t *const ip_L1, + const unsigned pu_x, + const unsigned pu_y, + const unsigned pu_w, + const unsigned pu_h, + const unsigned ip_flags_L0, + const unsigned ip_flags_L1, + const bool predict_luma, + const bool predict_chroma) { int shift = 15 - KVZ_BIT_DEPTH; int offset = 1 << (shift - 1); + const unsigned pu_w_c = pu_w >> 1; + int y_in_lcu; int x_in_lcu; //After reconstruction, merge the predictors by taking an average of each pixel - for (int temp_y = 0; temp_y < height; ++temp_y) { - - - for (int temp_x = 0; temp_x < width; ++temp_x) { - y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1)); - x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1)); + for (int y = 0; y < pu_h; ++y) { + for (int x = 0; x < pu_w; ++x) { + y_in_lcu = (pu_y + y) & (LCU_WIDTH-1); + x_in_lcu = (pu_x + x) & (LCU_WIDTH-1); if (predict_luma) { - int16_t sample0_y = (hi_prec_luma_rec0 ? high_precision_rec0->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); - int16_t sample1_y = (hi_prec_luma_rec1 ? high_precision_rec1->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); + int16_t sample0_y = ((ip_flags_L0 & 1) ? ip_L0->y[y * pu_w + x] : (px_L0->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH))); + int16_t sample1_y = ((ip_flags_L1 & 1) ? ip_L1->y[y * pu_w + x] : (px_L1->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH))); lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift); } - if (predict_chroma && (temp_x < width >> 1 && temp_y < height >> 1)) { + if (predict_chroma && (x < (pu_w >> 1) && y < (pu_h >> 1))) { - y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1)); - x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1)); + y_in_lcu = SUB_SCU(pu_y) / 2 + y; + x_in_lcu = SUB_SCU(pu_x) / 2 + x; - int16_t sample0_u = (hi_prec_chroma_rec0 ? high_precision_rec0->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); - int16_t sample1_u = (hi_prec_chroma_rec1 ? high_precision_rec1->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); + int16_t sample0_u = ((ip_flags_L0 & 2) ? ip_L0->u[y * pu_w_c + x] : (px_L0->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH))); + int16_t sample1_u = ((ip_flags_L1 & 2) ? ip_L1->u[y * pu_w_c + x] : (px_L1->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH))); lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift); - int16_t sample0_v = (hi_prec_chroma_rec0 ? high_precision_rec0->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); - int16_t sample1_v = (hi_prec_chroma_rec1 ? high_precision_rec1->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); + int16_t sample0_v = ((ip_flags_L0 & 2) ? ip_L0->v[y * pu_w_c + x] : (px_L0->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH))); + int16_t sample1_v = ((ip_flags_L1 & 2) ? ip_L1->v[y * pu_w_c + x] : (px_L1->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH))); lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift); } } diff --git a/src/strategies/strategies-ipol.h b/src/strategies/strategies-ipol.h index 84d66820..7e02e73a 100644 --- a/src/strategies/strategies-ipol.h +++ b/src/strategies/strategies-ipol.h @@ -101,6 +101,28 @@ typedef void(kvz_sample_octpel_chroma_func)(const encoder_control_t * const enco typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]); typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]); +typedef void(kvz_sample_14bit_quarterpel_luma_func)(const encoder_control_t * const encoder, + kvz_pixel *src, + int16_t src_stride, + int width, + int height, + int16_t *dst, + int16_t dst_stride, + int8_t hor_flag, + int8_t ver_flag, + const int16_t mv[2]); + +typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t *const encoder, + kvz_pixel *src, + int16_t src_stride, + int width, + int height, + int16_t *dst, + int16_t dst_stride, + int8_t hor_flag, + int8_t ver_flag, + const int16_t mv[2]); + // Declare function pointers. extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma; extern ipol_blocks_func * kvz_filter_hpel_blocks_diag_luma; diff --git a/src/strategies/strategies-picture.h b/src/strategies/strategies-picture.h index 7b2b509c..85b16bfd 100644 --- a/src/strategies/strategies-picture.h +++ b/src/strategies/strategies-picture.h @@ -133,22 +133,19 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_ int32_t width, int32_t height, uint32_t pic_stride, uint32_t ref_stride, uint32_t left, uint32_t right); -typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0, - const int hi_prec_luma_rec1, - const int hi_prec_chroma_rec0, - const int hi_prec_chroma_rec1, - int height, - int width, - int ypos, - int xpos, - const hi_prec_buf_t*high_precision_rec0, - const hi_prec_buf_t*high_precision_rec1, - lcu_t* lcu, - kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH], - kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C], - kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C], - bool predict_luma, - bool predict_chroma); +typedef void (inter_recon_bipred_func)(lcu_t * const lcu, + const yuv_t *const px_L0, + const yuv_t *const px_L1, + const yuv_ip_t *const ip_L0, + const yuv_ip_t *const ip_L1, + const unsigned pu_x, + const unsigned pu_y, + const unsigned pu_w, + const unsigned pu_h, + const unsigned ip_flags_L0, + const unsigned ip_flags_L1, + const bool predict_luma, + const bool predict_chroma); typedef double (pixel_var_func)(const kvz_pixel *buf, const uint32_t len);