About working with generics

This commit is contained in:
Ari Lemmetti 2020-04-04 22:14:10 +03:00
parent e7857cbb24
commit b52a930bed
8 changed files with 226 additions and 201 deletions

View file

@ -218,27 +218,6 @@ void kvz_yuv_t_free(yuv_t *yuv)
FREE_POINTER(yuv); FREE_POINTER(yuv);
} }
hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size)
{
// Get buffers with separate mallocs in order to take advantage of
// automatic buffer overrun checks.
hi_prec_buf_t *yuv = (hi_prec_buf_t *)malloc(sizeof(*yuv));
yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y));
yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
yuv->size = luma_size;
return yuv;
}
void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv)
{
free(yuv->y);
free(yuv->u);
free(yuv->v);
free(yuv);
}
static INLINE uint32_t reg_sad_maybe_optimized(const kvz_pixel * const data1, const kvz_pixel * const data2, static INLINE uint32_t reg_sad_maybe_optimized(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t width, const int32_t height, const uint32_t stride1, const int32_t width, const int32_t height, const uint32_t stride1,
const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad) const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad)

View file

@ -51,13 +51,6 @@ typedef struct {
enum kvz_chroma_format chroma_format; enum kvz_chroma_format chroma_format;
} lcu_yuv_t; } lcu_yuv_t;
typedef struct {
int size;
int16_t *y;
int16_t *u;
int16_t *v;
} hi_prec_buf_t;
typedef struct { typedef struct {
int size; int size;
kvz_pixel *y; kvz_pixel *y;
@ -65,6 +58,12 @@ typedef struct {
kvz_pixel *v; kvz_pixel *v;
} yuv_t; } yuv_t;
typedef struct {
int size;
kvz_pixel_ip *y;
kvz_pixel_ip *u;
kvz_pixel_ip *v;
} yuv_ip_t;
kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height); kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height);
kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height); kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height);
@ -82,9 +81,6 @@ kvz_picture *kvz_image_make_subimage(kvz_picture *const orig_image,
yuv_t * kvz_yuv_t_alloc(int luma_size, int chroma_size); yuv_t * kvz_yuv_t_alloc(int luma_size, int chroma_size);
void kvz_yuv_t_free(yuv_t * yuv); void kvz_yuv_t_free(yuv_t * yuv);
hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size);
void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv);
//Algorithms //Algorithms
unsigned kvz_image_calc_sad(const kvz_picture *pic, unsigned kvz_image_calc_sad(const kvz_picture *pic,

View file

@ -52,14 +52,15 @@ typedef struct {
} merge_candidates_t; } merge_candidates_t;
static void inter_recon_frac_luma(const encoder_state_t *const state, static void inter_recon_frac_luma(const encoder_state_t * const state,
const kvz_picture *const ref, const kvz_picture * const ref,
int32_t xpos, int32_t xpos,
int32_t ypos, int32_t ypos,
int32_t block_width, int32_t block_width,
int32_t block_height, int32_t block_height,
const int16_t mv_param[2], const int16_t mv_param[2],
lcu_t *lcu) yuv_t *out,
unsigned out_stride)
{ {
int mv_frac_x = (mv_param[0] & 3); int mv_frac_x = (mv_param[0] & 3);
int mv_frac_y = (mv_param[1] & 3); int mv_frac_y = (mv_param[1] & 3);
@ -349,130 +350,130 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride,
* *
* \param state encoder state * \param state encoder state
* \param ref picture to copy the data from * \param ref picture to copy the data from
* \param xpos PU x position * \param pu_x PU x position
* \param ypos PU y position * \param pu_y PU y position
* \param width PU width * \param width PU width
* \param height PU height * \param height PU height
* \param mv_param motion vector * \param mv_param motion vector
* \param lcu destination lcu * \param lcu_px destination lcu
* \param hi_prec_out destination of high precision output, or NULL if not needed * \param lcu_ip destination of high precision output, or NULL if not needed
* \param predict_luma Enable or disable luma prediction for this call. * \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
*/ */
static void inter_recon_unipred(const encoder_state_t * const state, static unsigned inter_recon_unipred(const encoder_state_t * const state,
const kvz_picture * const ref, const kvz_picture * const ref,
int32_t xpos, int32_t pu_x,
int32_t ypos, int32_t pu_y,
int32_t width, int32_t pu_w,
int32_t height, int32_t pu_h,
int32_t out_stride_luma,
const int16_t mv_param[2], const int16_t mv_param[2],
lcu_t *lcu, yuv_t *yuv_px,
hi_prec_buf_t *hi_prec_out, yuv_ip_t *yuv_ip,
bool predict_luma, bool predict_luma,
bool predict_chroma) bool predict_chroma)
{ {
const vector2d_t pu_in_tile = { xpos, ypos }; const vector2d_t int_mv = { mv_param[0] >> 2, mv_param[1] >> 2 };
const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH }; const vector2d_t int_mv_in_frame = {
int_mv.x + pu_x + state->tile->offset_x,
const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 }; int_mv.y + pu_y + state->tile->offset_y
const vector2d_t mv_in_frame = {
mv_in_pu.x + pu_in_tile.x + state->tile->offset_x,
mv_in_pu.y + pu_in_tile.y + state->tile->offset_y
}; };
const bool mv_is_outside_frame = mv_in_frame.x < 0 || const bool int_mv_outside_frame = int_mv_in_frame.x < 0 ||
mv_in_frame.y < 0 || int_mv_in_frame.y < 0 ||
mv_in_frame.x + width > ref->width || int_mv_in_frame.x + pu_w > ref->width ||
mv_in_frame.y + height > ref->height; int_mv_in_frame.y + pu_h > ref->height;
// With 420, odd coordinates need interpolation. // With 420, odd coordinates need interpolation.
const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1); const bool fractional_chroma = (int_mv.x & 1) || (int_mv.y & 1);
const int8_t fractional_luma = ((mv_param[0] & 3) || (mv_param[1] & 3)); const bool fractional_luma = (mv_param[0] & 3) || (mv_param[1] & 3);
// Generate prediction for luma. // Generate prediction for luma.
if (predict_luma) { if (predict_luma) {
if (fractional_luma) { if (fractional_luma) {
// With a fractional MV, do interpolation. // With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && hi_prec_out) { if (state->encoder_control->cfg.bipred && yuv_ip) {
inter_recon_frac_luma_hi(state, ref, inter_recon_frac_luma_hi(state, ref,
pu_in_tile.x, pu_in_tile.y, pu_x, pu_y,
width, height, pu_w, pu_h,
mv_param, hi_prec_out); mv_param, yuv_ip, out_stride_luma);
} }
else { else {
inter_recon_frac_luma(state, ref, inter_recon_frac_luma(state, ref,
pu_in_tile.x, pu_in_tile.y, pu_x, pu_y,
width, height, pu_w, pu_h,
mv_param, lcu); mv_param, yuv_px, out_stride_luma);
} }
} }
else { else {
// With an integer MV, copy pixels directly from the reference. // With an integer MV, copy pixels directly from the reference.
const int lcu_pu_index = pu_in_lcu.y * LCU_WIDTH + pu_in_lcu.x; if (int_mv_outside_frame) {
if (mv_is_outside_frame) {
inter_cp_with_ext_border(ref->y, ref->width, inter_cp_with_ext_border(ref->y, ref->width,
ref->width, ref->height, ref->width, ref->height,
&lcu->rec.y[lcu_pu_index], LCU_WIDTH, yuv_px->y, out_stride_luma,
width, height, pu_w, pu_h,
&mv_in_frame); &int_mv_in_frame);
} }
else { else {
const int frame_mv_index = mv_in_frame.y * ref->width + mv_in_frame.x; const int frame_mv_index = int_mv_in_frame.y * ref->width + int_mv_in_frame.x;
kvz_pixels_blit(&ref->y[frame_mv_index], kvz_pixels_blit(&ref->y[frame_mv_index],
&lcu->rec.y[lcu_pu_index], yuv_px->y,
width, height, pu_w, pu_h,
ref->width, LCU_WIDTH); ref->width, out_stride_luma);
} }
} }
} }
if (!predict_chroma) { if (!predict_chroma) {
return; return fractional_luma;
} }
const unsigned out_stride_c = out_stride_luma / 2;
// Generate prediction for chroma. // Generate prediction for chroma.
if (fractional_luma || fractional_chroma) { if (fractional_luma || fractional_chroma) {
// With a fractional MV, do interpolation. // With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && hi_prec_out) { if (state->encoder_control->cfg.bipred && yuv_ip) {
inter_recon_frac_chroma_hi(state, ref, inter_recon_frac_chroma_hi(state, ref,
pu_in_tile.x, pu_in_tile.y, pu_x, pu_y,
width, height, pu_w, pu_h,
mv_param, hi_prec_out); mv_param, yuv_ip, out_stride_c);
} else { } else {
inter_recon_frac_chroma(state, ref, inter_recon_frac_chroma(state, ref,
pu_in_tile.x, pu_in_tile.y, pu_x, pu_y,
width, height, pu_w, pu_h,
mv_param, lcu); mv_param, yuv_px, out_stride_c);
} }
} else { } else {
// With an integer MV, copy pixels directly from the reference. // With an integer MV, copy pixels directly from the reference.
const int lcu_pu_index_c = pu_in_lcu.y / 2 * LCU_WIDTH_C + pu_in_lcu.x / 2; const vector2d_t int_mv_in_frame_c = { int_mv_in_frame.x / 2, int_mv_in_frame.y / 2 };
const vector2d_t mv_in_frame_c = { mv_in_frame.x / 2, mv_in_frame.y / 2 };
if (mv_is_outside_frame) { if (int_mv_outside_frame) {
inter_cp_with_ext_border(ref->u, ref->width / 2, inter_cp_with_ext_border(ref->u, ref->width / 2,
ref->width / 2, ref->height / 2, ref->width / 2, ref->height / 2,
&lcu->rec.u[lcu_pu_index_c], LCU_WIDTH_C, yuv_px->u, out_stride_c,
width / 2, height / 2, pu_w / 2, pu_h / 2,
&mv_in_frame_c); &int_mv_in_frame_c);
inter_cp_with_ext_border(ref->v, ref->width / 2, inter_cp_with_ext_border(ref->v, ref->width / 2,
ref->width / 2, ref->height / 2, ref->width / 2, ref->height / 2,
&lcu->rec.v[lcu_pu_index_c], LCU_WIDTH_C, yuv_px->v, out_stride_c,
width / 2, height / 2, pu_w / 2, pu_h / 2,
&mv_in_frame_c); &int_mv_in_frame_c);
} else { } else {
const int frame_mv_index = mv_in_frame_c.y * ref->width / 2 + mv_in_frame_c.x; const int frame_mv_index = int_mv_in_frame_c.y * ref->width / 2 + int_mv_in_frame_c.x;
kvz_pixels_blit(&ref->u[frame_mv_index], kvz_pixels_blit(&ref->u[frame_mv_index],
&lcu->rec.u[lcu_pu_index_c], yuv_px->u,
width / 2, height / 2, pu_w / 2, pu_h / 2,
ref->width / 2, LCU_WIDTH_C); ref->width / 2, out_stride_c);
kvz_pixels_blit(&ref->v[frame_mv_index], kvz_pixels_blit(&ref->v[frame_mv_index],
&lcu->rec.v[lcu_pu_index_c], yuv_px->v,
width / 2, height / 2, pu_w / 2, pu_h / 2,
ref->width / 2, LCU_WIDTH_C); ref->width / 2, out_stride_c);
} }
} }
return fractional_luma | ((fractional_luma || fractional_chroma) << 1);
} }
/** /**
* \brief Reconstruct bi-pred inter PU * \brief Reconstruct bi-pred inter PU
@ -480,8 +481,8 @@ static void inter_recon_unipred(const encoder_state_t * const state,
* \param state encoder state * \param state encoder state
* \param ref1 reference picture to copy the data from * \param ref1 reference picture to copy the data from
* \param ref2 other reference picture to copy the data from * \param ref2 other reference picture to copy the data from
* \param xpos PU x position * \param pu_x PU x position
* \param ypos PU y position * \param pu_y PU y position
* \param width PU width * \param width PU width
* \param height PU height * \param height PU height
* \param mv_param motion vectors * \param mv_param motion vectors
@ -489,56 +490,60 @@ static void inter_recon_unipred(const encoder_state_t * const state,
* \param predict_luma Enable or disable luma prediction for this call. * \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
*/ */
void kvz_inter_recon_bipred(const encoder_state_t * const state, void kvz_inter_recon_bipred(const encoder_state_t *const state,
const kvz_picture * ref1, const kvz_picture *ref1,
const kvz_picture * ref2, const kvz_picture *ref2,
int32_t xpos, int32_t pu_x,
int32_t ypos, int32_t pu_y,
int32_t width, int32_t pu_w,
int32_t height, int32_t pu_h,
int16_t mv_param[2][2], int16_t mv_param[2][2],
lcu_t* lcu, lcu_t *lcu,
bool predict_luma, bool predict_luma,
bool predict_chroma) bool predict_chroma)
{ {
kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH]; // Allocate maximum size arrays for interpolated and copied samples
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C]; ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C]; ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
const int hi_prec_luma_rec0 = mv_param[0][0] & 3 || mv_param[0][1] & 3; yuv_t px_L0;
const int hi_prec_luma_rec1 = mv_param[1][0] & 3 || mv_param[1][1] & 3; px_L0.size = pu_w * pu_h;
px_L0.y = &px_buf_L0[0];
px_L0.u = &px_buf_L0[LCU_LUMA_SIZE];
px_L0.v = &px_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
const int hi_prec_chroma_rec0 = mv_param[0][0] & 7 || mv_param[0][1] & 7; yuv_t px_L1;
const int hi_prec_chroma_rec1 = mv_param[1][0] & 7 || mv_param[1][1] & 7; px_L1.size = pu_w * pu_h;
px_L1.y = &px_buf_L1[0];
px_L1.u = &px_buf_L1[LCU_LUMA_SIZE];
px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
hi_prec_buf_t* high_precision_rec0 = 0; yuv_ip_t ip_L0;
hi_prec_buf_t* high_precision_rec1 = 0; ip_L0.size = pu_w * pu_h;
if (hi_prec_chroma_rec0) high_precision_rec0 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH); ip_L0.y = &ip_buf_L0[0];
if (hi_prec_chroma_rec1) high_precision_rec1 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH); ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE];
ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
yuv_ip_t ip_L1;
ip_L1.size = pu_w * pu_h;
ip_L1.y = &ip_buf_L1[0];
ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE];
ip_L1.v = &ip_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
//Reconstruct both predictors // Sample blocks from both reference picture lists.
inter_recon_unipred(state, ref1, xpos, ypos, width, height, mv_param[0], lcu, high_precision_rec0, // Flags state if the outputs were written to high-precision / interpolated sample buffers.
predict_luma, predict_chroma); unsigned ip_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0],
if (!hi_prec_luma_rec0){ &px_L0, &ip_L0, predict_luma, predict_chroma);
memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64); // copy to temp_lcu_y unsigned ip_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1],
} &px_L1, &ip_L1, predict_luma, predict_chroma);
if (!hi_prec_chroma_rec0){
memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_u
memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_v
}
inter_recon_unipred(state, ref2, xpos, ypos, width, height, mv_param[1], lcu, high_precision_rec1,
predict_luma, predict_chroma);
// After reconstruction, merge the predictors by taking an average of each pixel // After reconstruction, merge the predictors by taking an average of each pixel
kvz_inter_recon_bipred_blend(hi_prec_luma_rec0, hi_prec_luma_rec1, kvz_inter_recon_bipred_blend(lcu, &px_L0, &px_L1, &ip_L0, &ip_L1,
hi_prec_chroma_rec0, hi_prec_chroma_rec1, pu_x, pu_y, pu_w, pu_h,
height, width, ypos, xpos, ip_flags_L0, ip_flags_L1,
high_precision_rec0, high_precision_rec1, predict_luma, predict_chroma);
lcu, temp_lcu_y, temp_lcu_u, temp_lcu_v, predict_luma, predict_chroma);
if (high_precision_rec0 != 0) kvz_hi_prec_buf_t_free(high_precision_rec0);
if (high_precision_rec1 != 0) kvz_hi_prec_buf_t_free(high_precision_rec1);
} }
@ -626,12 +631,21 @@ void kvz_inter_pred_pu(const encoder_state_t * const state,
state->frame->ref_LX[mv_idx][ state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]]; pu->inter.mv_ref[mv_idx]]];
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
yuv_t lcu_adapter;
lcu_adapter.size = pu_w * pu_h;
lcu_adapter.y = lcu->rec.y + offset_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma,
inter_recon_unipred(state, inter_recon_unipred(state,
ref, ref,
pu_x, pu_y, pu_x, pu_y,
pu_w, pu_h, pu_w, pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx], pu->inter.mv[mv_idx],
lcu, &lcu_adapter,
NULL, NULL,
predict_luma, predict_chroma); predict_luma, predict_chroma);
} }

View file

@ -97,6 +97,8 @@ typedef uint8_t kvz_pixel;
typedef uint16_t kvz_pixel; typedef uint16_t kvz_pixel;
#endif #endif
typedef int16_t kvz_pixel_ip;
/** /**
* \brief Opaque data structure representing one instance of the encoder. * \brief Opaque data structure representing one instance of the encoder.
*/ */

View file

@ -131,7 +131,16 @@ int32_t kvz_four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int
return temp; return temp;
} }
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]) void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
kvz_pixel *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2])
{ {
//TODO: horizontal and vertical only filtering //TODO: horizontal and vertical only filtering
int32_t x, y; int32_t x, y;
@ -669,7 +678,16 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
} }
} }
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]) void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
kvz_pixel *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2])
{ {
//TODO: horizontal and vertical only filtering //TODO: horizontal and vertical only filtering
int32_t x, y; int32_t x, y;

View file

@ -547,55 +547,52 @@ static unsigned pixels_calc_ssd_generic(const kvz_pixel *const ref, const kvz_pi
return ssd >> (2*(KVZ_BIT_DEPTH-8)); return ssd >> (2*(KVZ_BIT_DEPTH-8));
} }
static void inter_recon_bipred_generic(const int hi_prec_luma_rec0, static void inter_recon_bipred_generic(lcu_t *const lcu,
const int hi_prec_luma_rec1, const yuv_t *const px_L0,
const int hi_prec_chroma_rec0, const yuv_t *const px_L1,
const int hi_prec_chroma_rec1, const yuv_ip_t *const ip_L0,
int32_t height, const yuv_ip_t *const ip_L1,
int32_t width, const unsigned pu_x,
int32_t ypos, const unsigned pu_y,
int32_t xpos, const unsigned pu_w,
const hi_prec_buf_t*high_precision_rec0, const unsigned pu_h,
const hi_prec_buf_t*high_precision_rec1, const unsigned ip_flags_L0,
lcu_t* lcu, const unsigned ip_flags_L1,
kvz_pixel* temp_lcu_y, const bool predict_luma,
kvz_pixel* temp_lcu_u, const bool predict_chroma) {
kvz_pixel* temp_lcu_v,
bool predict_luma,
bool predict_chroma) {
int shift = 15 - KVZ_BIT_DEPTH; int shift = 15 - KVZ_BIT_DEPTH;
int offset = 1 << (shift - 1); int offset = 1 << (shift - 1);
const unsigned pu_w_c = pu_w >> 1;
int y_in_lcu; int y_in_lcu;
int x_in_lcu; int x_in_lcu;
//After reconstruction, merge the predictors by taking an average of each pixel //After reconstruction, merge the predictors by taking an average of each pixel
for (int temp_y = 0; temp_y < height; ++temp_y) { for (int y = 0; y < pu_h; ++y) {
for (int x = 0; x < pu_w; ++x) {
y_in_lcu = (pu_y + y) & (LCU_WIDTH-1);
for (int temp_x = 0; temp_x < width; ++temp_x) { x_in_lcu = (pu_x + x) & (LCU_WIDTH-1);
y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
if (predict_luma) { if (predict_luma) {
int16_t sample0_y = (hi_prec_luma_rec0 ? high_precision_rec0->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); int16_t sample0_y = ((ip_flags_L0 & 1) ? ip_L0->y[y * pu_w + x] : (px_L0->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_y = (hi_prec_luma_rec1 ? high_precision_rec1->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); int16_t sample1_y = ((ip_flags_L1 & 1) ? ip_L1->y[y * pu_w + x] : (px_L1->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift); lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift);
} }
if (predict_chroma && (temp_x < width >> 1 && temp_y < height >> 1)) { if (predict_chroma && (x < (pu_w >> 1) && y < (pu_h >> 1))) {
y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1)); y_in_lcu = SUB_SCU(pu_y) / 2 + y;
x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1)); x_in_lcu = SUB_SCU(pu_x) / 2 + x;
int16_t sample0_u = (hi_prec_chroma_rec0 ? high_precision_rec0->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); int16_t sample0_u = ((ip_flags_L0 & 2) ? ip_L0->u[y * pu_w_c + x] : (px_L0->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_u = (hi_prec_chroma_rec1 ? high_precision_rec1->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); int16_t sample1_u = ((ip_flags_L1 & 2) ? ip_L1->u[y * pu_w_c + x] : (px_L1->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift); lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift);
int16_t sample0_v = (hi_prec_chroma_rec0 ? high_precision_rec0->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); int16_t sample0_v = ((ip_flags_L0 & 2) ? ip_L0->v[y * pu_w_c + x] : (px_L0->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_v = (hi_prec_chroma_rec1 ? high_precision_rec1->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH))); int16_t sample1_v = ((ip_flags_L1 & 2) ? ip_L1->v[y * pu_w_c + x] : (px_L1->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift); lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift);
} }
} }

View file

@ -101,6 +101,28 @@ typedef void(kvz_sample_octpel_chroma_func)(const encoder_control_t * const enco
typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]); typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]); typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
typedef void(kvz_sample_14bit_quarterpel_luma_func)(const encoder_control_t * const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
int16_t *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2]);
typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t *const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
int16_t *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2]);
// Declare function pointers. // Declare function pointers.
extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma; extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma;
extern ipol_blocks_func * kvz_filter_hpel_blocks_diag_luma; extern ipol_blocks_func * kvz_filter_hpel_blocks_diag_luma;

View file

@ -133,22 +133,19 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_
int32_t width, int32_t height, uint32_t pic_stride, int32_t width, int32_t height, uint32_t pic_stride,
uint32_t ref_stride, uint32_t left, uint32_t right); uint32_t ref_stride, uint32_t left, uint32_t right);
typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0, typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
const int hi_prec_luma_rec1, const yuv_t *const px_L0,
const int hi_prec_chroma_rec0, const yuv_t *const px_L1,
const int hi_prec_chroma_rec1, const yuv_ip_t *const ip_L0,
int height, const yuv_ip_t *const ip_L1,
int width, const unsigned pu_x,
int ypos, const unsigned pu_y,
int xpos, const unsigned pu_w,
const hi_prec_buf_t*high_precision_rec0, const unsigned pu_h,
const hi_prec_buf_t*high_precision_rec1, const unsigned ip_flags_L0,
lcu_t* lcu, const unsigned ip_flags_L1,
kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH], const bool predict_luma,
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C], const bool predict_chroma);
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C],
bool predict_luma,
bool predict_chroma);
typedef double (pixel_var_func)(const kvz_pixel *buf, const uint32_t len); typedef double (pixel_var_func)(const kvz_pixel *buf, const uint32_t len);