mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
About working with generics
This commit is contained in:
parent
e7857cbb24
commit
b52a930bed
21
src/image.c
21
src/image.c
|
@ -218,27 +218,6 @@ void kvz_yuv_t_free(yuv_t *yuv)
|
|||
FREE_POINTER(yuv);
|
||||
}
|
||||
|
||||
hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size)
|
||||
{
|
||||
// Get buffers with separate mallocs in order to take advantage of
|
||||
// automatic buffer overrun checks.
|
||||
hi_prec_buf_t *yuv = (hi_prec_buf_t *)malloc(sizeof(*yuv));
|
||||
yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y));
|
||||
yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
|
||||
yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
|
||||
yuv->size = luma_size;
|
||||
|
||||
return yuv;
|
||||
}
|
||||
|
||||
void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv)
|
||||
{
|
||||
free(yuv->y);
|
||||
free(yuv->u);
|
||||
free(yuv->v);
|
||||
free(yuv);
|
||||
}
|
||||
|
||||
static INLINE uint32_t reg_sad_maybe_optimized(const kvz_pixel * const data1, const kvz_pixel * const data2,
|
||||
const int32_t width, const int32_t height, const uint32_t stride1,
|
||||
const uint32_t stride2, optimized_sad_func_ptr_t optimized_sad)
|
||||
|
|
16
src/image.h
16
src/image.h
|
@ -51,13 +51,6 @@ typedef struct {
|
|||
enum kvz_chroma_format chroma_format;
|
||||
} lcu_yuv_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
int16_t *y;
|
||||
int16_t *u;
|
||||
int16_t *v;
|
||||
} hi_prec_buf_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
kvz_pixel *y;
|
||||
|
@ -65,6 +58,12 @@ typedef struct {
|
|||
kvz_pixel *v;
|
||||
} yuv_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
kvz_pixel_ip *y;
|
||||
kvz_pixel_ip *u;
|
||||
kvz_pixel_ip *v;
|
||||
} yuv_ip_t;
|
||||
|
||||
kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height);
|
||||
kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height);
|
||||
|
@ -82,9 +81,6 @@ kvz_picture *kvz_image_make_subimage(kvz_picture *const orig_image,
|
|||
yuv_t * kvz_yuv_t_alloc(int luma_size, int chroma_size);
|
||||
void kvz_yuv_t_free(yuv_t * yuv);
|
||||
|
||||
hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size);
|
||||
void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv);
|
||||
|
||||
|
||||
//Algorithms
|
||||
unsigned kvz_image_calc_sad(const kvz_picture *pic,
|
||||
|
|
256
src/inter.c
256
src/inter.c
|
@ -52,14 +52,15 @@ typedef struct {
|
|||
} merge_candidates_t;
|
||||
|
||||
|
||||
static void inter_recon_frac_luma(const encoder_state_t *const state,
|
||||
const kvz_picture *const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
static void inter_recon_frac_luma(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
yuv_t *out,
|
||||
unsigned out_stride)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 3);
|
||||
int mv_frac_y = (mv_param[1] & 3);
|
||||
|
@ -349,130 +350,130 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride,
|
|||
*
|
||||
* \param state encoder state
|
||||
* \param ref picture to copy the data from
|
||||
* \param xpos PU x position
|
||||
* \param ypos PU y position
|
||||
* \param pu_x PU x position
|
||||
* \param pu_y PU y position
|
||||
* \param width PU width
|
||||
* \param height PU height
|
||||
* \param mv_param motion vector
|
||||
* \param lcu destination lcu
|
||||
* \param hi_prec_out destination of high precision output, or NULL if not needed
|
||||
* \param lcu_px destination lcu
|
||||
* \param lcu_ip destination of high precision output, or NULL if not needed
|
||||
* \param predict_luma Enable or disable luma prediction for this call.
|
||||
* \param predict_chroma Enable or disable chroma prediction for this call.
|
||||
*/
|
||||
static void inter_recon_unipred(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
const int16_t mv_param[2],
|
||||
lcu_t *lcu,
|
||||
hi_prec_buf_t *hi_prec_out,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
static unsigned inter_recon_unipred(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t pu_x,
|
||||
int32_t pu_y,
|
||||
int32_t pu_w,
|
||||
int32_t pu_h,
|
||||
int32_t out_stride_luma,
|
||||
const int16_t mv_param[2],
|
||||
yuv_t *yuv_px,
|
||||
yuv_ip_t *yuv_ip,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
{
|
||||
const vector2d_t pu_in_tile = { xpos, ypos };
|
||||
const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH };
|
||||
|
||||
const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 };
|
||||
const vector2d_t mv_in_frame = {
|
||||
mv_in_pu.x + pu_in_tile.x + state->tile->offset_x,
|
||||
mv_in_pu.y + pu_in_tile.y + state->tile->offset_y
|
||||
const vector2d_t int_mv = { mv_param[0] >> 2, mv_param[1] >> 2 };
|
||||
const vector2d_t int_mv_in_frame = {
|
||||
int_mv.x + pu_x + state->tile->offset_x,
|
||||
int_mv.y + pu_y + state->tile->offset_y
|
||||
};
|
||||
|
||||
const bool mv_is_outside_frame = mv_in_frame.x < 0 ||
|
||||
mv_in_frame.y < 0 ||
|
||||
mv_in_frame.x + width > ref->width ||
|
||||
mv_in_frame.y + height > ref->height;
|
||||
const bool int_mv_outside_frame = int_mv_in_frame.x < 0 ||
|
||||
int_mv_in_frame.y < 0 ||
|
||||
int_mv_in_frame.x + pu_w > ref->width ||
|
||||
int_mv_in_frame.y + pu_h > ref->height;
|
||||
|
||||
// With 420, odd coordinates need interpolation.
|
||||
const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1);
|
||||
const int8_t fractional_luma = ((mv_param[0] & 3) || (mv_param[1] & 3));
|
||||
const bool fractional_chroma = (int_mv.x & 1) || (int_mv.y & 1);
|
||||
const bool fractional_luma = (mv_param[0] & 3) || (mv_param[1] & 3);
|
||||
|
||||
// Generate prediction for luma.
|
||||
if (predict_luma) {
|
||||
if (fractional_luma) {
|
||||
// With a fractional MV, do interpolation.
|
||||
if (state->encoder_control->cfg.bipred && hi_prec_out) {
|
||||
if (state->encoder_control->cfg.bipred && yuv_ip) {
|
||||
inter_recon_frac_luma_hi(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param, hi_prec_out);
|
||||
pu_x, pu_y,
|
||||
pu_w, pu_h,
|
||||
mv_param, yuv_ip, out_stride_luma);
|
||||
}
|
||||
else {
|
||||
inter_recon_frac_luma(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param, lcu);
|
||||
pu_x, pu_y,
|
||||
pu_w, pu_h,
|
||||
mv_param, yuv_px, out_stride_luma);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// With an integer MV, copy pixels directly from the reference.
|
||||
const int lcu_pu_index = pu_in_lcu.y * LCU_WIDTH + pu_in_lcu.x;
|
||||
if (mv_is_outside_frame) {
|
||||
if (int_mv_outside_frame) {
|
||||
inter_cp_with_ext_border(ref->y, ref->width,
|
||||
ref->width, ref->height,
|
||||
&lcu->rec.y[lcu_pu_index], LCU_WIDTH,
|
||||
width, height,
|
||||
&mv_in_frame);
|
||||
yuv_px->y, out_stride_luma,
|
||||
pu_w, pu_h,
|
||||
&int_mv_in_frame);
|
||||
}
|
||||
else {
|
||||
const int frame_mv_index = mv_in_frame.y * ref->width + mv_in_frame.x;
|
||||
const int frame_mv_index = int_mv_in_frame.y * ref->width + int_mv_in_frame.x;
|
||||
kvz_pixels_blit(&ref->y[frame_mv_index],
|
||||
&lcu->rec.y[lcu_pu_index],
|
||||
width, height,
|
||||
ref->width, LCU_WIDTH);
|
||||
yuv_px->y,
|
||||
pu_w, pu_h,
|
||||
ref->width, out_stride_luma);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!predict_chroma) {
|
||||
return;
|
||||
return fractional_luma;
|
||||
}
|
||||
|
||||
const unsigned out_stride_c = out_stride_luma / 2;
|
||||
|
||||
// Generate prediction for chroma.
|
||||
if (fractional_luma || fractional_chroma) {
|
||||
// With a fractional MV, do interpolation.
|
||||
if (state->encoder_control->cfg.bipred && hi_prec_out) {
|
||||
if (state->encoder_control->cfg.bipred && yuv_ip) {
|
||||
inter_recon_frac_chroma_hi(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param, hi_prec_out);
|
||||
pu_x, pu_y,
|
||||
pu_w, pu_h,
|
||||
mv_param, yuv_ip, out_stride_c);
|
||||
} else {
|
||||
inter_recon_frac_chroma(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param, lcu);
|
||||
pu_x, pu_y,
|
||||
pu_w, pu_h,
|
||||
mv_param, yuv_px, out_stride_c);
|
||||
}
|
||||
} else {
|
||||
// With an integer MV, copy pixels directly from the reference.
|
||||
const int lcu_pu_index_c = pu_in_lcu.y / 2 * LCU_WIDTH_C + pu_in_lcu.x / 2;
|
||||
const vector2d_t mv_in_frame_c = { mv_in_frame.x / 2, mv_in_frame.y / 2 };
|
||||
const vector2d_t int_mv_in_frame_c = { int_mv_in_frame.x / 2, int_mv_in_frame.y / 2 };
|
||||
|
||||
if (mv_is_outside_frame) {
|
||||
if (int_mv_outside_frame) {
|
||||
inter_cp_with_ext_border(ref->u, ref->width / 2,
|
||||
ref->width / 2, ref->height / 2,
|
||||
&lcu->rec.u[lcu_pu_index_c], LCU_WIDTH_C,
|
||||
width / 2, height / 2,
|
||||
&mv_in_frame_c);
|
||||
yuv_px->u, out_stride_c,
|
||||
pu_w / 2, pu_h / 2,
|
||||
&int_mv_in_frame_c);
|
||||
inter_cp_with_ext_border(ref->v, ref->width / 2,
|
||||
ref->width / 2, ref->height / 2,
|
||||
&lcu->rec.v[lcu_pu_index_c], LCU_WIDTH_C,
|
||||
width / 2, height / 2,
|
||||
&mv_in_frame_c);
|
||||
yuv_px->v, out_stride_c,
|
||||
pu_w / 2, pu_h / 2,
|
||||
&int_mv_in_frame_c);
|
||||
} else {
|
||||
const int frame_mv_index = mv_in_frame_c.y * ref->width / 2 + mv_in_frame_c.x;
|
||||
const int frame_mv_index = int_mv_in_frame_c.y * ref->width / 2 + int_mv_in_frame_c.x;
|
||||
|
||||
kvz_pixels_blit(&ref->u[frame_mv_index],
|
||||
&lcu->rec.u[lcu_pu_index_c],
|
||||
width / 2, height / 2,
|
||||
ref->width / 2, LCU_WIDTH_C);
|
||||
yuv_px->u,
|
||||
pu_w / 2, pu_h / 2,
|
||||
ref->width / 2, out_stride_c);
|
||||
kvz_pixels_blit(&ref->v[frame_mv_index],
|
||||
&lcu->rec.v[lcu_pu_index_c],
|
||||
width / 2, height / 2,
|
||||
ref->width / 2, LCU_WIDTH_C);
|
||||
yuv_px->v,
|
||||
pu_w / 2, pu_h / 2,
|
||||
ref->width / 2, out_stride_c);
|
||||
}
|
||||
}
|
||||
|
||||
return fractional_luma | ((fractional_luma || fractional_chroma) << 1);
|
||||
}
|
||||
/**
|
||||
* \brief Reconstruct bi-pred inter PU
|
||||
|
@ -480,8 +481,8 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
* \param state encoder state
|
||||
* \param ref1 reference picture to copy the data from
|
||||
* \param ref2 other reference picture to copy the data from
|
||||
* \param xpos PU x position
|
||||
* \param ypos PU y position
|
||||
* \param pu_x PU x position
|
||||
* \param pu_y PU y position
|
||||
* \param width PU width
|
||||
* \param height PU height
|
||||
* \param mv_param motion vectors
|
||||
|
@ -489,56 +490,60 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
* \param predict_luma Enable or disable luma prediction for this call.
|
||||
* \param predict_chroma Enable or disable chroma prediction for this call.
|
||||
*/
|
||||
void kvz_inter_recon_bipred(const encoder_state_t * const state,
|
||||
const kvz_picture * ref1,
|
||||
const kvz_picture * ref2,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t width,
|
||||
int32_t height,
|
||||
int16_t mv_param[2][2],
|
||||
lcu_t* lcu,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
void kvz_inter_recon_bipred(const encoder_state_t *const state,
|
||||
const kvz_picture *ref1,
|
||||
const kvz_picture *ref2,
|
||||
int32_t pu_x,
|
||||
int32_t pu_y,
|
||||
int32_t pu_w,
|
||||
int32_t pu_h,
|
||||
int16_t mv_param[2][2],
|
||||
lcu_t *lcu,
|
||||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
{
|
||||
kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH];
|
||||
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C];
|
||||
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C];
|
||||
// Allocate maximum size arrays for interpolated and copied samples
|
||||
ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
|
||||
|
||||
const int hi_prec_luma_rec0 = mv_param[0][0] & 3 || mv_param[0][1] & 3;
|
||||
const int hi_prec_luma_rec1 = mv_param[1][0] & 3 || mv_param[1][1] & 3;
|
||||
yuv_t px_L0;
|
||||
px_L0.size = pu_w * pu_h;
|
||||
px_L0.y = &px_buf_L0[0];
|
||||
px_L0.u = &px_buf_L0[LCU_LUMA_SIZE];
|
||||
px_L0.v = &px_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
|
||||
|
||||
const int hi_prec_chroma_rec0 = mv_param[0][0] & 7 || mv_param[0][1] & 7;
|
||||
const int hi_prec_chroma_rec1 = mv_param[1][0] & 7 || mv_param[1][1] & 7;
|
||||
yuv_t px_L1;
|
||||
px_L1.size = pu_w * pu_h;
|
||||
px_L1.y = &px_buf_L1[0];
|
||||
px_L1.u = &px_buf_L1[LCU_LUMA_SIZE];
|
||||
px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
|
||||
|
||||
hi_prec_buf_t* high_precision_rec0 = 0;
|
||||
hi_prec_buf_t* high_precision_rec1 = 0;
|
||||
if (hi_prec_chroma_rec0) high_precision_rec0 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH);
|
||||
if (hi_prec_chroma_rec1) high_precision_rec1 = kvz_hi_prec_buf_t_alloc(LCU_WIDTH*LCU_WIDTH);
|
||||
yuv_ip_t ip_L0;
|
||||
ip_L0.size = pu_w * pu_h;
|
||||
ip_L0.y = &ip_buf_L0[0];
|
||||
ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE];
|
||||
ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
|
||||
|
||||
yuv_ip_t ip_L1;
|
||||
ip_L1.size = pu_w * pu_h;
|
||||
ip_L1.y = &ip_buf_L1[0];
|
||||
ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE];
|
||||
ip_L1.v = &ip_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
|
||||
|
||||
//Reconstruct both predictors
|
||||
inter_recon_unipred(state, ref1, xpos, ypos, width, height, mv_param[0], lcu, high_precision_rec0,
|
||||
predict_luma, predict_chroma);
|
||||
if (!hi_prec_luma_rec0){
|
||||
memcpy(temp_lcu_y, lcu->rec.y, sizeof(kvz_pixel) * 64 * 64); // copy to temp_lcu_y
|
||||
}
|
||||
if (!hi_prec_chroma_rec0){
|
||||
memcpy(temp_lcu_u, lcu->rec.u, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_u
|
||||
memcpy(temp_lcu_v, lcu->rec.v, sizeof(kvz_pixel) * 32 * 32); // copy to temp_lcu_v
|
||||
}
|
||||
inter_recon_unipred(state, ref2, xpos, ypos, width, height, mv_param[1], lcu, high_precision_rec1,
|
||||
predict_luma, predict_chroma);
|
||||
// Sample blocks from both reference picture lists.
|
||||
// Flags state if the outputs were written to high-precision / interpolated sample buffers.
|
||||
unsigned ip_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0],
|
||||
&px_L0, &ip_L0, predict_luma, predict_chroma);
|
||||
unsigned ip_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1],
|
||||
&px_L1, &ip_L1, predict_luma, predict_chroma);
|
||||
|
||||
// After reconstruction, merge the predictors by taking an average of each pixel
|
||||
kvz_inter_recon_bipred_blend(hi_prec_luma_rec0, hi_prec_luma_rec1,
|
||||
hi_prec_chroma_rec0, hi_prec_chroma_rec1,
|
||||
height, width, ypos, xpos,
|
||||
high_precision_rec0, high_precision_rec1,
|
||||
lcu, temp_lcu_y, temp_lcu_u, temp_lcu_v, predict_luma, predict_chroma);
|
||||
|
||||
if (high_precision_rec0 != 0) kvz_hi_prec_buf_t_free(high_precision_rec0);
|
||||
if (high_precision_rec1 != 0) kvz_hi_prec_buf_t_free(high_precision_rec1);
|
||||
kvz_inter_recon_bipred_blend(lcu, &px_L0, &px_L1, &ip_L0, &ip_L1,
|
||||
pu_x, pu_y, pu_w, pu_h,
|
||||
ip_flags_L0, ip_flags_L1,
|
||||
predict_luma, predict_chroma);
|
||||
}
|
||||
|
||||
|
||||
|
@ -626,12 +631,21 @@ void kvz_inter_pred_pu(const encoder_state_t * const state,
|
|||
state->frame->ref_LX[mv_idx][
|
||||
pu->inter.mv_ref[mv_idx]]];
|
||||
|
||||
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
|
||||
const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
|
||||
yuv_t lcu_adapter;
|
||||
lcu_adapter.size = pu_w * pu_h;
|
||||
lcu_adapter.y = lcu->rec.y + offset_luma,
|
||||
lcu_adapter.u = lcu->rec.u + offset_chroma,
|
||||
lcu_adapter.v = lcu->rec.v + offset_chroma,
|
||||
|
||||
inter_recon_unipred(state,
|
||||
ref,
|
||||
pu_x, pu_y,
|
||||
pu_w, pu_h,
|
||||
LCU_WIDTH,
|
||||
pu->inter.mv[mv_idx],
|
||||
lcu,
|
||||
&lcu_adapter,
|
||||
NULL,
|
||||
predict_luma, predict_chroma);
|
||||
}
|
||||
|
|
|
@ -97,6 +97,8 @@ typedef uint8_t kvz_pixel;
|
|||
typedef uint16_t kvz_pixel;
|
||||
#endif
|
||||
|
||||
typedef int16_t kvz_pixel_ip;
|
||||
|
||||
/**
|
||||
* \brief Opaque data structure representing one instance of the encoder.
|
||||
*/
|
||||
|
|
|
@ -131,7 +131,16 @@ int32_t kvz_four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int
|
|||
return temp;
|
||||
}
|
||||
|
||||
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder,
|
||||
kvz_pixel *src,
|
||||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
kvz_pixel *dst,
|
||||
int16_t dst_stride,
|
||||
int8_t hor_flag,
|
||||
int8_t ver_flag,
|
||||
const int16_t mv[2])
|
||||
{
|
||||
//TODO: horizontal and vertical only filtering
|
||||
int32_t x, y;
|
||||
|
@ -669,7 +678,16 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|||
}
|
||||
}
|
||||
|
||||
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder,
|
||||
kvz_pixel *src,
|
||||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
kvz_pixel *dst,
|
||||
int16_t dst_stride,
|
||||
int8_t hor_flag,
|
||||
int8_t ver_flag,
|
||||
const int16_t mv[2])
|
||||
{
|
||||
//TODO: horizontal and vertical only filtering
|
||||
int32_t x, y;
|
||||
|
|
|
@ -547,55 +547,52 @@ static unsigned pixels_calc_ssd_generic(const kvz_pixel *const ref, const kvz_pi
|
|||
return ssd >> (2*(KVZ_BIT_DEPTH-8));
|
||||
}
|
||||
|
||||
static void inter_recon_bipred_generic(const int hi_prec_luma_rec0,
|
||||
const int hi_prec_luma_rec1,
|
||||
const int hi_prec_chroma_rec0,
|
||||
const int hi_prec_chroma_rec1,
|
||||
int32_t height,
|
||||
int32_t width,
|
||||
int32_t ypos,
|
||||
int32_t xpos,
|
||||
const hi_prec_buf_t*high_precision_rec0,
|
||||
const hi_prec_buf_t*high_precision_rec1,
|
||||
lcu_t* lcu,
|
||||
kvz_pixel* temp_lcu_y,
|
||||
kvz_pixel* temp_lcu_u,
|
||||
kvz_pixel* temp_lcu_v,
|
||||
bool predict_luma,
|
||||
bool predict_chroma) {
|
||||
static void inter_recon_bipred_generic(lcu_t *const lcu,
|
||||
const yuv_t *const px_L0,
|
||||
const yuv_t *const px_L1,
|
||||
const yuv_ip_t *const ip_L0,
|
||||
const yuv_ip_t *const ip_L1,
|
||||
const unsigned pu_x,
|
||||
const unsigned pu_y,
|
||||
const unsigned pu_w,
|
||||
const unsigned pu_h,
|
||||
const unsigned ip_flags_L0,
|
||||
const unsigned ip_flags_L1,
|
||||
const bool predict_luma,
|
||||
const bool predict_chroma) {
|
||||
|
||||
int shift = 15 - KVZ_BIT_DEPTH;
|
||||
int offset = 1 << (shift - 1);
|
||||
|
||||
const unsigned pu_w_c = pu_w >> 1;
|
||||
|
||||
int y_in_lcu;
|
||||
int x_in_lcu;
|
||||
|
||||
//After reconstruction, merge the predictors by taking an average of each pixel
|
||||
for (int temp_y = 0; temp_y < height; ++temp_y) {
|
||||
|
||||
|
||||
for (int temp_x = 0; temp_x < width; ++temp_x) {
|
||||
y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
|
||||
x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
|
||||
for (int y = 0; y < pu_h; ++y) {
|
||||
for (int x = 0; x < pu_w; ++x) {
|
||||
y_in_lcu = (pu_y + y) & (LCU_WIDTH-1);
|
||||
x_in_lcu = (pu_x + x) & (LCU_WIDTH-1);
|
||||
|
||||
if (predict_luma) {
|
||||
int16_t sample0_y = (hi_prec_luma_rec0 ? high_precision_rec0->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample1_y = (hi_prec_luma_rec1 ? high_precision_rec1->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample0_y = ((ip_flags_L0 & 1) ? ip_L0->y[y * pu_w + x] : (px_L0->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample1_y = ((ip_flags_L1 & 1) ? ip_L1->y[y * pu_w + x] : (px_L1->y[y * pu_w + x] << (14 - KVZ_BIT_DEPTH)));
|
||||
|
||||
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift);
|
||||
}
|
||||
|
||||
if (predict_chroma && (temp_x < width >> 1 && temp_y < height >> 1)) {
|
||||
if (predict_chroma && (x < (pu_w >> 1) && y < (pu_h >> 1))) {
|
||||
|
||||
y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
|
||||
x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1));
|
||||
y_in_lcu = SUB_SCU(pu_y) / 2 + y;
|
||||
x_in_lcu = SUB_SCU(pu_x) / 2 + x;
|
||||
|
||||
int16_t sample0_u = (hi_prec_chroma_rec0 ? high_precision_rec0->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample1_u = (hi_prec_chroma_rec1 ? high_precision_rec1->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample0_u = ((ip_flags_L0 & 2) ? ip_L0->u[y * pu_w_c + x] : (px_L0->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample1_u = ((ip_flags_L1 & 2) ? ip_L1->u[y * pu_w_c + x] : (px_L1->u[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
|
||||
lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift);
|
||||
|
||||
int16_t sample0_v = (hi_prec_chroma_rec0 ? high_precision_rec0->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample1_v = (hi_prec_chroma_rec1 ? high_precision_rec1->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample0_v = ((ip_flags_L0 & 2) ? ip_L0->v[y * pu_w_c + x] : (px_L0->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
|
||||
int16_t sample1_v = ((ip_flags_L1 & 2) ? ip_L1->v[y * pu_w_c + x] : (px_L1->v[y * pu_w_c + x] << (14 - KVZ_BIT_DEPTH)));
|
||||
lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,6 +101,28 @@ typedef void(kvz_sample_octpel_chroma_func)(const encoder_control_t * const enco
|
|||
typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
|
||||
typedef void(kvz_sample_14bit_quarterpel_luma_func)(const encoder_control_t * const encoder,
|
||||
kvz_pixel *src,
|
||||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
int16_t *dst,
|
||||
int16_t dst_stride,
|
||||
int8_t hor_flag,
|
||||
int8_t ver_flag,
|
||||
const int16_t mv[2]);
|
||||
|
||||
typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t *const encoder,
|
||||
kvz_pixel *src,
|
||||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
int16_t *dst,
|
||||
int16_t dst_stride,
|
||||
int8_t hor_flag,
|
||||
int8_t ver_flag,
|
||||
const int16_t mv[2]);
|
||||
|
||||
// Declare function pointers.
|
||||
extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma;
|
||||
extern ipol_blocks_func * kvz_filter_hpel_blocks_diag_luma;
|
||||
|
|
|
@ -133,22 +133,19 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_
|
|||
int32_t width, int32_t height, uint32_t pic_stride,
|
||||
uint32_t ref_stride, uint32_t left, uint32_t right);
|
||||
|
||||
typedef void (inter_recon_bipred_func)(const int hi_prec_luma_rec0,
|
||||
const int hi_prec_luma_rec1,
|
||||
const int hi_prec_chroma_rec0,
|
||||
const int hi_prec_chroma_rec1,
|
||||
int height,
|
||||
int width,
|
||||
int ypos,
|
||||
int xpos,
|
||||
const hi_prec_buf_t*high_precision_rec0,
|
||||
const hi_prec_buf_t*high_precision_rec1,
|
||||
lcu_t* lcu,
|
||||
kvz_pixel temp_lcu_y[LCU_WIDTH*LCU_WIDTH],
|
||||
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C],
|
||||
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C],
|
||||
bool predict_luma,
|
||||
bool predict_chroma);
|
||||
typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
|
||||
const yuv_t *const px_L0,
|
||||
const yuv_t *const px_L1,
|
||||
const yuv_ip_t *const ip_L0,
|
||||
const yuv_ip_t *const ip_L1,
|
||||
const unsigned pu_x,
|
||||
const unsigned pu_y,
|
||||
const unsigned pu_w,
|
||||
const unsigned pu_h,
|
||||
const unsigned ip_flags_L0,
|
||||
const unsigned ip_flags_L1,
|
||||
const bool predict_luma,
|
||||
const bool predict_chroma);
|
||||
|
||||
typedef double (pixel_var_func)(const kvz_pixel *buf, const uint32_t len);
|
||||
|
||||
|
|
Loading…
Reference in a new issue