About working with generics

This commit is contained in:
Ari Lemmetti 2020-04-04 22:14:10 +03:00
parent e7857cbb24
commit b52a930bed
8 changed files with 226 additions and 201 deletions

View file

@ -218,27 +218,6 @@ void kvz_yuv_t_free(yuv_t *yuv)
FREE_POINTER(yuv);
}
hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size)
{
// Get buffers with separate mallocs in order to take advantage of
// automatic buffer overrun checks.
hi_prec_buf_t *yuv = (hi_prec_buf_t *)malloc(sizeof(*yuv));
yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y));
yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
yuv->size = luma_size;
return yuv;
}
void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv)
{
free(yuv->y);
free(yuv->u);
free(yuv->v);
free(yuv);
}
static INLINE uint32_t reg_sad_maybe_optimized(const kvz_pixel * const data1, const kvz_pixel * const data2,
const int32_t width, const int32_t height, const uint32_t stride1,
const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad)

View file

@ -51,13 +51,6 @@ typedef struct {
enum kvz_chroma_format chroma_format;
} lcu_yuv_t;
typedef struct {
int size;
int16_t *y;
int16_t *u;
int16_t *v;
} hi_prec_buf_t;
typedef struct {
int size;
kvz_pixel *y;
@ -65,6 +58,12 @@ typedef struct {
kvz_pixel *v;
} yuv_t;
typedef struct {
int size;
kvz_pixel_ip *y;
kvz_pixel_ip *u;
kvz_pixel_ip *v;
} yuv_ip_t;
kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height);
kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height);
@ -82,9 +81,6 @@ kvz_picture *kvz_image_make_subimage(kvz_picture *const orig_image,
yuv_t * kvz_yuv_t_alloc(int luma_size, int chroma_size);
void kvz_yuv_t_free(yuv_t * yuv);
hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size);
void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv);
//Algorithms
unsigned kvz_image_calc_sad(const kvz_picture *pic,

View file

@ -52,14 +52,15 @@ typedef struct {
} merge_candidates_t;
static void inter_recon_frac_luma(const encoder_state_t *const state,
const kvz_picture *const ref,
int32_t xpos,
int32_t ypos,
int32_t block_width,
int32_t block_height,
const int16_t mv_param[2],
lcu_t *lcu)
static void inter_recon_frac_luma(const encoder_state_t * const state,
const kvz_picture * const ref,
int32_t xpos,
int32_t ypos,
int32_t block_width,
int32_t block_height,
const int16_t mv_param[2],
yuv_t *out,
unsigned out_stride)
{
int mv_frac_x = (mv_param[0] & 3);
int mv_frac_y = (mv_param[1] & 3);
@ -349,130 +350,130 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride,
*
* \param state encoder state
* \param ref picture to copy the data from
* \param xpos PU x position
* \param ypos PU y position
* \param pu_x PU x position
* \param pu_y PU y position
* \param width PU width
* \param height PU height
* \param mv_param motion vector
* \param lcu destination lcu
* \param hi_prec_out destination of high precision output, or NULL if not needed
* \param lcu_px destination lcu
* \param lcu_ip destination of high precision output, or NULL if not needed
* \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call.
*/
static void inter_recon_unipred(const encoder_state_t * const state,
const kvz_picture * const ref,
int32_t xpos,
int32_t ypos,
int32_t width,
int32_t height,
const int16_t mv_param[2],
lcu_t *lcu,
hi_prec_buf_t *hi_prec_out,
bool predict_luma,
bool predict_chroma)
static unsigned inter_recon_unipred(const encoder_state_t * const state,
const kvz_picture * const ref,
int32_t pu_x,
int32_t pu_y,
int32_t pu_w,
int32_t pu_h,
int32_t out_stride_luma,
const int16_t mv_param[2],
yuv_t *yuv_px,
yuv_ip_t *yuv_ip,
bool predict_luma,
bool predict_chroma)
{
const vector2d_t pu_in_tile = { xpos, ypos };
const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH };
const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 };
const vector2d_t mv_in_frame = {
mv_in_pu.x + pu_in_tile.x + state->tile->offset_x,
mv_in_pu.y + pu_in_tile.y + state->tile->offset_y
const vector2d_t int_mv = { mv_param[0] >> 2, mv_param[1] >> 2 };
const vector2d_t int_mv_in_frame = {
int_mv.x + pu_x + state->tile->offset_x,
int_mv.y + pu_y + state->tile->offset_y
};
const bool mv_is_outside_frame = mv_in_frame.x < 0 ||
mv_in_frame.y < 0 ||
mv_in_frame.x + width > ref->width ||
mv_in_frame.y + height > ref->height;
const bool int_mv_outside_frame = int_mv_in_frame.x < 0 ||
int_mv_in_frame.y < 0 ||
int_mv_in_frame.x + pu_w > ref->width ||
int_mv_in_frame.y + pu_h > ref->height;
// With 420, odd coordinates need interpolation.
const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1);
const int8_t fractional_luma = ((mv_param[0] & 3) || (mv_param[1] & 3));
const bool fractional_chroma = (int_mv.x & 1) || (int_mv.y & 1);
const bool fractional_luma = (mv_param[0] & 3) || (mv_param[1] & 3);
// Generate prediction for luma.
if (predict_luma) {
if (fractional_luma) {
// With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && hi_prec_out) {
if (state->encoder_control->cfg.bipred && yuv_ip) {
inter_recon_frac_luma_hi(state, ref,
pu_in_tile.x, pu_in_tile.y,
width, height,
mv_param, hi_prec_out);
pu_x, pu_y,
pu_w, pu_h,
mv_param, yuv_ip, out_stride_luma);
}
else {
inter_recon_frac_luma(state, ref,
pu_in_tile.x, pu_in_tile.y,
width, height,
mv_param, lcu);
pu_x, pu_y,
pu_w, pu_h,
mv_param, yuv_px, out_stride_luma);
}
}
else {
// With an integer MV, copy pixels directly from the reference.
const int lcu_pu_index = pu_in_lcu.y * LCU_WIDTH + pu_in_lcu.x;
if (mv_is_outside_frame) {
if (int_mv_outside_frame) {
inter_cp_with_ext_border(ref->y, ref->width,
ref->width, ref->height,
&lcu->rec.y[lcu_pu_index], LCU_WIDTH,
width, height,
&mv_in_frame);
yuv_px->y, out_stride_luma,
pu_w, pu_h,
&int_mv_in_frame);
}
else {
const int frame_mv_index = mv_in_frame.y * ref->width + mv_in_frame.x;
const int frame_mv_index = int_mv_in_frame.y * ref->width + int_mv_in_frame.x;
kvz_pixels_blit(&ref->y[frame_mv_index],
&lcu->rec.y[lcu_pu_index],
width, height,
ref->width, LCU_WIDTH);
yuv_px->y,
pu_w, pu_h,
ref->width, out_stride_luma);
}
}
}
if (!predict_chroma) {
return;
return fractional_luma;
}
const unsigned out_stride_c = out_stride_luma / 2;
// Generate prediction for chroma.
if (fractional_luma || fractional_chroma) {
// With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && hi_prec_out) {
if (state->encoder_control->cfg.bipred && yuv_ip) {
inter_recon_frac_chroma_hi(state, ref,
pu_in_tile.x, pu_in_tile.y,
width, height,
mv_param, hi_prec_out);
pu_x, pu_y,
pu_w, pu_h,
mv_param, yuv_ip, out_stride_c);
} else {
inter_recon_frac_chroma(state, ref,
pu_in_tile.x, pu_in_tile.y,
width, height,
mv_param, lcu);
pu_x, pu_y,
pu_w, pu_h,
mv_param, yuv_px, out_stride_c);
}
} else {
// With an integer MV, copy pixels directly from the reference.
const int lcu_pu_index_c = pu_in_lcu.y / 2 * LCU_WIDTH_C + pu_in_lcu.x / 2;
const vector2d_t mv_in_frame_c = { mv_in_frame.x / 2, mv_in_frame.y / 2 };
const vector2d_t int_mv_in_frame_c = { int_mv_in_frame.x / 2, int_mv_in_frame.y / 2 };
if (mv_is_outside_frame) {
if (int_mv_outside_frame) {
inter_cp_with_ext_border(ref->u, ref->width / 2,
ref->width / 2, ref->height / 2,
&lcu->rec.u[lcu_pu_index_c], LCU_WIDTH_C,
width / 2, height / 2,
&mv_in_frame_c);
yuv_px->u, out_stride_c,
pu_w / 2, pu_h / 2,
&int_mv_in_frame_c);
inter_cp_with_ext_border(ref->v, ref->width / 2,
ref->width / 2, ref->height / 2,
&lcu->rec.v[lcu_pu_index_c], LCU_WIDTH_C,
width / 2, height / 2,
&mv_in_frame_c);
yuv_px->v, out_stride_c,
pu_w / 2, pu_h / 2,
&int_mv_in_frame_c);
} else {
const int frame_mv_index = mv_in_frame_c.y * ref->width / 2 + mv_in_frame_c.x;
const int frame_mv_index = int_mv_in_frame_c.y * ref->width / 2 + int_mv_in_frame_c.x;
kvz_pixels_blit(&ref->u[frame_mv_index],
&lcu->rec.u[lcu_pu_index_c],
width / 2, height / 2,
ref->width / 2, LCU_WIDTH_C);
yuv_px->u,
pu_w / 2, pu_h / 2,
ref->width / 2, out_stride_c);
kvz_pixels_blit(&ref->v[frame_mv_index],
&lcu->rec.v[lcu_pu_index_c],
width / 2, height / 2,
ref->width / 2, LCU_WIDTH_C);
yuv_px->v,
pu_w / 2, pu_h / 2,
ref->width / 2, out_stride_c);
}
}
return fractional_luma | ((fractional_luma || fractional_chroma) << 1);
}
/**
* \brief Reconstruct bi-pred inter PU
@ -480,8 +481,8 @@ static void inter_recon_unipred(const encoder_state_t * const state,
* \param state encoder state
* \param ref1 reference picture to copy the data from
* \param ref2 other reference picture to copy the data from
* \param xpos PU x position
* \param ypos PU y position
* \param pu_x PU x position
* \param pu_y PU y position
* \param width PU width
* \param height PU height
* \param mv_param motion vectors
@ -489,56 +490,60 @@ static void inter_recon_unipred(const encoder_state_t * const state,
* \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call.
*/
void kvz_inter_recon_bipred(const encoder_state_t * const state,
const kvz_picture * ref1,
const kvz_picture * ref2,
int32_t xpos,
int32_t ypos,
int32_t width,
int32_t height,
int16_t mv_param[2][2],
lcu_t* lcu,
bool predict_luma,
bool predict_chroma)
void kvz_inter_recon_bipred(const encoder_state_t *const state,
const kvz_picture *ref1,
const kvz_picture *ref2,
int32_t pu_x,
int32_t pu_y,
int32_t pu_w,
int32_t pu_h,
int16_t mv_param[2][2],
lcu_t *lcu,
bool predict_luma,
bool predict_chroma)
{
kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH];
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C];
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C];
// Allocate maximum size arrays for interpolated and copied samples
ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
const int hi_prec_luma_rec0 = mv_param[0][0] & 3 || mv_param[0][1] & 3;
const int hi_prec_luma_rec1 = mv_param[1][0] & 3 || mv_param[1][1] & 3;
yuv_t px_L0;
px_L0.size = pu_w * pu_h;
px_L0.y = &px_buf_L0[0];
px_L0.u = &px_buf_L0[LCU_LUMA_SIZE];
px_L0.v = &px_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
const int hi_prec_chroma_rec0 = mv_param[0][0] & 7 || mv_param[0][1] & 7;
const int hi_prec_chroma_rec1 = mv_param[1][0] & 7 || mv_param[1][1] & 7;
yuv_t px_L1;
px_L1.size = pu_w * pu_h;
px_L1.y = &px_buf_L1[0];
px_L1.u = &px_buf_L1[LCU_LUMA_SIZE];
px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
hi_prec_buf_t* high_precision_rec0 = 0;
hi_prec_buf_t* high_precision_rec1 = 0;
if (hi_prec_chroma_rec0) high_precision_rec0 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH);
if (hi_prec_chroma_rec1) high_precision_rec1 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH);
yuv_ip_t ip_L0;
ip_L0.size = pu_w * pu_h;
ip_L0.y = &ip_buf_L0[0];
ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE];
ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
yuv_ip_t ip_L1;
ip_L1.size = pu_w * pu_h;
ip_L1.y = &ip_buf_L1[0];
ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE];
ip_L1.v = &ip_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
//Reconstruct both predictors
inter_recon_unipred(state, ref1, xpos, ypos, width, height, mv_param[0], lcu, high_precision_rec0,
predict_luma, predict_chroma);
if (!hi_prec_luma_rec0){
memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64); // copy to temp_lcu_y
}
if (!hi_prec_chroma_rec0){
memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_u
memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_v
}
inter_recon_unipred(state, ref2, xpos, ypos, width, height, mv_param[1], lcu, high_precision_rec1,
predict_luma, predict_chroma);
// Sample blocks from both reference picture lists.
// Flags state if the outputs were written to high-precision / interpolated sample buffers.
unsigned ip_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0],
&px_L0, &ip_L0, predict_luma, predict_chroma);
unsigned ip_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1],
&px_L1, &ip_L1, predict_luma, predict_chroma);
// After reconstruction, merge the predictors by taking an average of each pixel
kvz_inter_recon_bipred_blend(hi_prec_luma_rec0, hi_prec_luma_rec1,
hi_prec_chroma_rec0, hi_prec_chroma_rec1,
height, width, ypos, xpos,
high_precision_rec0, high_precision_rec1,
lcu, temp_lcu_y, temp_lcu_u, temp_lcu_v, predict_luma, predict_chroma);
if (high_precision_rec0 != 0) kvz_hi_prec_buf_t_free(high_precision_rec0);
if (high_precision_rec1 != 0) kvz_hi_prec_buf_t_free(high_precision_rec1);
kvz_inter_recon_bipred_blend(lcu, &px_L0, &px_L1, &ip_L0, &ip_L1,
pu_x, pu_y, pu_w, pu_h,
ip_flags_L0, ip_flags_L1,
predict_luma, predict_chroma);
}
@ -626,12 +631,21 @@ void kvz_inter_pred_pu(const encoder_state_t * const state,
state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]];
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
yuv_t lcu_adapter;
lcu_adapter.size = pu_w * pu_h;
lcu_adapter.y = lcu->rec.y + offset_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma,
inter_recon_unipred(state,
ref,
pu_x, pu_y,
pu_w, pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx],
lcu,
&lcu_adapter,
NULL,
predict_luma, predict_chroma);
}

View file

@ -97,6 +97,8 @@ typedef uint8_t kvz_pixel;
typedef uint16_t kvz_pixel;
#endif
typedef int16_t kvz_pixel_ip;
/**
* \brief Opaque data structure representing one instance of the encoder.
*/

View file

@ -131,7 +131,16 @@ int32_t kvz_four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int
return temp;
}
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
kvz_pixel *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2])
{
//TODO: horizontal and vertical only filtering
int32_t x, y;
@ -669,7 +678,16 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
}
}
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
kvz_pixel *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2])
{
//TODO: horizontal and vertical only filtering
int32_t x, y;

View file

@ -547,55 +547,52 @@ static unsigned pixels_calc_ssd_generic(const kvz_pixel *const ref, const kvz_pi
return ssd >> (2*(KVZ_BIT_DEPTH-8));
}
static void inter_recon_bipred_generic(const int hi_prec_luma_rec0,
const int hi_prec_luma_rec1,
const int hi_prec_chroma_rec0,
const int hi_prec_chroma_rec1,
int32_t height,
int32_t width,
int32_t ypos,
int32_t xpos,
const hi_prec_buf_t*high_precision_rec0,
const hi_prec_buf_t*high_precision_rec1,
lcu_t* lcu,
kvz_pixel* temp_lcu_y,
kvz_pixel* temp_lcu_u,
kvz_pixel* temp_lcu_v,
bool predict_luma,
bool predict_chroma) {
static void inter_recon_bipred_generic(lcu_t *const lcu,
const yuv_t *const px_L0,
const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0,
const yuv_ip_t *const ip_L1,
const unsigned pu_x,
const unsigned pu_y,
const unsigned pu_w,
const unsigned pu_h,
const unsigned ip_flags_L0,
const unsigned ip_flags_L1,
const bool predict_luma,
const bool predict_chroma) {
int shift = 15 - KVZ_BIT_DEPTH;
int offset = 1 << (shift - 1);
const unsigned pu_w_c = pu_w >> 1;
int y_in_lcu;
int x_in_lcu;
//After reconstruction, merge the predictors by taking an average of each pixel
for (int temp_y = 0; temp_y < height; ++temp_y) {
for (int temp_x = 0; temp_x < width; ++temp_x) {
y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
for (int y = 0; y < pu_h; ++y) {
for (int x = 0; x < pu_w; ++x) {
y_in_lcu = (pu_y + y) & (LCU_WIDTH-1);
x_in_lcu = (pu_x + x) & (LCU_WIDTH-1);
if (predict_luma) {
int16_t sample0_y = (hi_prec_luma_rec0 ? high_precision_rec0->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_y = (hi_prec_luma_rec1 ? high_precision_rec1->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
int16_t sample0_y = ((ip_flags_L0 & 1) ? ip_L0->y[y * pu_w + x] : (px_L0->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_y = ((ip_flags_L1 & 1) ? ip_L1->y[y * pu_w + x] : (px_L1->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift);
}
if (predict_chroma && (temp_x < width >> 1 && temp_y < height >> 1)) {
if (predict_chroma && (x < (pu_w >> 1) && y < (pu_h >> 1))) {
y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1));
y_in_lcu = SUB_SCU(pu_y) / 2 + y;
x_in_lcu = SUB_SCU(pu_x) / 2 + x;
int16_t sample0_u = (hi_prec_chroma_rec0 ? high_precision_rec0->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_u = (hi_prec_chroma_rec1 ? high_precision_rec1->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
int16_t sample0_u = ((ip_flags_L0 & 2) ? ip_L0->u[y * pu_w_c + x] : (px_L0->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_u = ((ip_flags_L1 & 2) ? ip_L1->u[y * pu_w_c + x] : (px_L1->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift);
int16_t sample0_v = (hi_prec_chroma_rec0 ? high_precision_rec0->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_v = (hi_prec_chroma_rec1 ? high_precision_rec1->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
int16_t sample0_v = ((ip_flags_L0 & 2) ? ip_L0->v[y * pu_w_c + x] : (px_L0->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
int16_t sample1_v = ((ip_flags_L1 & 2) ? ip_L1->v[y * pu_w_c + x] : (px_L1->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift);
}
}

View file

@ -101,6 +101,28 @@ typedef void(kvz_sample_octpel_chroma_func)(const encoder_control_t * const enco
typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
typedef void(kvz_sample_14bit_quarterpel_luma_func)(const encoder_control_t * const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
int16_t *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2]);
typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t *const encoder,
kvz_pixel *src,
int16_t src_stride,
int width,
int height,
int16_t *dst,
int16_t dst_stride,
int8_t hor_flag,
int8_t ver_flag,
const int16_t mv[2]);
// Declare function pointers.
extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma;
extern ipol_blocks_func * kvz_filter_hpel_blocks_diag_luma;

View file

@ -133,22 +133,19 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_
int32_t width, int32_t height, uint32_t pic_stride,
uint32_t ref_stride, uint32_t left, uint32_t right);
typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0,
const int hi_prec_luma_rec1,
const int hi_prec_chroma_rec0,
const int hi_prec_chroma_rec1,
int height,
int width,
int ypos,
int xpos,
const hi_prec_buf_t*high_precision_rec0,
const hi_prec_buf_t*high_precision_rec1,
lcu_t* lcu,
kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH],
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C],
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C],
bool predict_luma,
bool predict_chroma);
typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
const yuv_t *const px_L0,
const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0,
const yuv_ip_t *const ip_L1,
const unsigned pu_x,
const unsigned pu_y,
const unsigned pu_w,
const unsigned pu_h,
const unsigned ip_flags_L0,
const unsigned ip_flags_L1,
const bool predict_luma,
const bool predict_chroma);
typedef double (pixel_var_func)(const kvz_pixel *buf, const uint32_t len);