mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Merge branch 'interpolation-2021'
This commit is contained in:
commit
c36d423a8c
59
src/image.c
59
src/image.c
|
@ -477,33 +477,46 @@ unsigned kvz_image_calc_satd(const kvz_picture *pic,
|
|||
ref->stride) >> (KVZ_BIT_DEPTH - 8);
|
||||
} else {
|
||||
// Extrapolate pixels from outside the frame.
|
||||
kvz_extended_block block;
|
||||
kvz_get_extended_block(pic_x,
|
||||
pic_y,
|
||||
ref_x - pic_x,
|
||||
ref_y - pic_y,
|
||||
0,
|
||||
0,
|
||||
ref->y,
|
||||
ref->width,
|
||||
ref->height,
|
||||
0,
|
||||
block_width,
|
||||
block_height,
|
||||
&block);
|
||||
|
||||
// Space for extrapolated pixels and the part from the picture
|
||||
// The extrapolation function will set the pointers and stride.
|
||||
kvz_pixel ext_buffer[LCU_LUMA_SIZE];
|
||||
kvz_pixel *ext = NULL;
|
||||
kvz_pixel *ext_origin = NULL;
|
||||
int ext_s = 0;
|
||||
kvz_epol_args epol_args = {
|
||||
.src = ref->y,
|
||||
.src_w = ref->width,
|
||||
.src_h = ref->height,
|
||||
.src_s = ref->stride,
|
||||
.blk_x = ref_x,
|
||||
.blk_y = ref_y,
|
||||
.blk_w = block_width,
|
||||
.blk_h = block_height,
|
||||
.pad_l = 0,
|
||||
.pad_r = 0,
|
||||
.pad_t = 0,
|
||||
.pad_b = 0,
|
||||
.pad_b_simd = 0,
|
||||
};
|
||||
|
||||
// Initialize separately. Gets rid of warning
|
||||
// about using nonstandard extension.
|
||||
epol_args.buf = ext_buffer;
|
||||
epol_args.ext = &ext;
|
||||
epol_args.ext_origin = &ext_origin;
|
||||
epol_args.ext_s = &ext_s;
|
||||
|
||||
kvz_get_extended_block(&epol_args);
|
||||
|
||||
const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x];
|
||||
|
||||
unsigned satd = kvz_satd_any_size(block_width,
|
||||
block_height,
|
||||
pic_data,
|
||||
pic->stride,
|
||||
block.buffer,
|
||||
block.stride) >> (KVZ_BIT_DEPTH - 8);
|
||||
|
||||
if (block.malloc_used) {
|
||||
FREE_POINTER(block.buffer);
|
||||
}
|
||||
block_height,
|
||||
pic_data,
|
||||
pic->stride,
|
||||
ext_origin,
|
||||
ext_s) >> (KVZ_BIT_DEPTH - 8);
|
||||
|
||||
return satd;
|
||||
}
|
||||
|
|
406
src/inter.c
406
src/inter.c
|
@ -40,224 +40,258 @@ typedef struct {
|
|||
} merge_candidates_t;
|
||||
|
||||
|
||||
static void inter_recon_frac_luma(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
static void inter_recon_frac_luma(const encoder_state_t *const state,
|
||||
const kvz_picture *const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 3);
|
||||
int mv_frac_y = (mv_param[1] & 3);
|
||||
|
||||
// Fractional luma 1/4-pel
|
||||
kvz_extended_block src = {0, 0, 0, 0};
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
// The extrapolation function will set the pointers and stride.
|
||||
kvz_pixel ext_buffer[KVZ_IPOL_MAX_INPUT_SIZE_LUMA_SIMD];
|
||||
kvz_pixel *ext = NULL;
|
||||
kvz_pixel *ext_origin = NULL;
|
||||
int ext_s = 0;
|
||||
kvz_epol_args epol_args = {
|
||||
.src = ref->y,
|
||||
.src_w = ref->width,
|
||||
.src_h = ref->height,
|
||||
.src_s = ref->stride,
|
||||
.blk_x = state->tile->offset_x + xpos + (mv_param[0] >> 2),
|
||||
.blk_y = state->tile->offset_y + ypos + (mv_param[1] >> 2),
|
||||
.blk_w = block_width,
|
||||
.blk_h = block_height,
|
||||
.pad_l = KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_r = KVZ_EXT_PADDING_LUMA - KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_t = KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_b = KVZ_EXT_PADDING_LUMA - KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_b_simd = 1 // One row for AVX2
|
||||
};
|
||||
|
||||
// Fractional luma
|
||||
kvz_get_extended_block(xpos,
|
||||
ypos,
|
||||
mv_param[0] >> 2,
|
||||
mv_param[1] >> 2,
|
||||
state->tile->offset_x,
|
||||
state->tile->offset_y,
|
||||
ref->y,
|
||||
ref->width,
|
||||
ref->height,
|
||||
KVZ_LUMA_FILTER_TAPS,
|
||||
block_width,
|
||||
block_height,
|
||||
&src);
|
||||
// Initialize separately. Gets rid of warning
|
||||
// about using nonstandard extension.
|
||||
epol_args.buf = ext_buffer;
|
||||
epol_args.ext = &ext;
|
||||
epol_args.ext_origin = &ext_origin;
|
||||
epol_args.ext_s = &ext_s;
|
||||
|
||||
kvz_get_extended_block(&epol_args);
|
||||
kvz_sample_quarterpel_luma(state->encoder_control,
|
||||
src.orig_topleft,
|
||||
src.stride,
|
||||
block_width,
|
||||
block_height,
|
||||
lcu->rec.y + (ypos % LCU_WIDTH) * LCU_WIDTH + (xpos % LCU_WIDTH),
|
||||
LCU_WIDTH,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
|
||||
if (src.malloc_used) free(src.buffer);
|
||||
ext_origin,
|
||||
ext_s,
|
||||
block_width,
|
||||
block_height,
|
||||
lcu->rec.y + (ypos % LCU_WIDTH) * LCU_WIDTH + (xpos % LCU_WIDTH),
|
||||
LCU_WIDTH,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
}
|
||||
|
||||
static void inter_recon_14bit_frac_luma(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
hi_prec_buf_t *hi_prec_out)
|
||||
static void inter_recon_frac_luma_hi(const encoder_state_t *const state,
|
||||
const kvz_picture *const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
hi_prec_buf_t *hi_prec_out)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 3);
|
||||
int mv_frac_y = (mv_param[1] & 3);
|
||||
|
||||
// Fractional luma 1/4-pel
|
||||
kvz_extended_block src = { 0, 0, 0, 0 };
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
// The extrapolation function will set the pointers and stride.
|
||||
kvz_pixel ext_buffer[KVZ_IPOL_MAX_INPUT_SIZE_LUMA_SIMD];
|
||||
kvz_pixel *ext = NULL;
|
||||
kvz_pixel *ext_origin = NULL;
|
||||
int ext_s = 0;
|
||||
kvz_epol_args epol_args = {
|
||||
.src = ref->y,
|
||||
.src_w = ref->width,
|
||||
.src_h = ref->height,
|
||||
.src_s = ref->stride,
|
||||
.blk_x = state->tile->offset_x + xpos + (mv_param[0] >> 2),
|
||||
.blk_y = state->tile->offset_y + ypos + (mv_param[1] >> 2),
|
||||
.blk_w = block_width,
|
||||
.blk_h = block_height,
|
||||
.pad_l = KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_r = KVZ_EXT_PADDING_LUMA - KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_t = KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_b = KVZ_EXT_PADDING_LUMA - KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_b_simd = 1 // One row for AVX2
|
||||
};
|
||||
|
||||
// Fractional luma
|
||||
kvz_get_extended_block(xpos,
|
||||
ypos,
|
||||
mv_param[0] >> 2,
|
||||
mv_param[1] >> 2,
|
||||
state->tile->offset_x,
|
||||
state->tile->offset_y,
|
||||
ref->y,
|
||||
ref->width,
|
||||
ref->height,
|
||||
KVZ_LUMA_FILTER_TAPS,
|
||||
block_width,
|
||||
block_height,
|
||||
&src);
|
||||
kvz_sample_14bit_quarterpel_luma(state->encoder_control,
|
||||
src.orig_topleft,
|
||||
src.stride,
|
||||
block_width,
|
||||
block_height,
|
||||
hi_prec_out->y + (ypos % LCU_WIDTH) * LCU_WIDTH + (xpos % LCU_WIDTH),
|
||||
LCU_WIDTH,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
// Initialize separately. Gets rid of warning
|
||||
// about using nonstandard extension.
|
||||
epol_args.buf = ext_buffer;
|
||||
epol_args.ext = &ext;
|
||||
epol_args.ext_origin = &ext_origin;
|
||||
epol_args.ext_s = &ext_s;
|
||||
|
||||
if (src.malloc_used) free(src.buffer);
|
||||
kvz_get_extended_block(&epol_args);
|
||||
kvz_sample_quarterpel_luma_hi(state->encoder_control,
|
||||
ext_origin,
|
||||
ext_s,
|
||||
block_width,
|
||||
block_height,
|
||||
hi_prec_out->y + (ypos % LCU_WIDTH) * LCU_WIDTH + (xpos % LCU_WIDTH),
|
||||
LCU_WIDTH,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
}
|
||||
|
||||
static void inter_recon_frac_chroma(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
static void inter_recon_frac_chroma(const encoder_state_t *const state,
|
||||
const kvz_picture *const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 7);
|
||||
int mv_frac_y = (mv_param[1] & 7);
|
||||
|
||||
// Translate to chroma
|
||||
xpos >>= 1;
|
||||
ypos >>= 1;
|
||||
block_width >>= 1;
|
||||
block_height >>= 1;
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
// The extrapolation function will set the pointers and stride.
|
||||
kvz_pixel ext_buffer[KVZ_IPOL_MAX_INPUT_SIZE_CHROMA_SIMD];
|
||||
kvz_pixel *ext = NULL;
|
||||
kvz_pixel *ext_origin = NULL;
|
||||
int ext_s = 0;
|
||||
|
||||
// Fractional chroma 1/8-pel
|
||||
kvz_extended_block src_u = { 0, 0, 0, 0 };
|
||||
kvz_extended_block src_v = { 0, 0, 0, 0 };
|
||||
// Chroma U
|
||||
// Divisions by 2 due to 4:2:0 chroma subsampling
|
||||
kvz_epol_args epol_args = {
|
||||
.src = ref->u,
|
||||
.src_w = ref->width / 2,
|
||||
.src_h = ref->height / 2,
|
||||
.src_s = ref->stride / 2,
|
||||
.blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> 3),
|
||||
.blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> 3),
|
||||
.blk_w = block_width / 2,
|
||||
.blk_h = block_height / 2,
|
||||
.pad_l = KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_r = KVZ_EXT_PADDING_CHROMA - KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_t = KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_b = KVZ_EXT_PADDING_CHROMA - KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_b_simd = 3 // Three rows for AVX2
|
||||
};
|
||||
|
||||
//Fractional chroma U
|
||||
kvz_get_extended_block(xpos, ypos,
|
||||
(mv_param[0] >> 2) >> 1,
|
||||
(mv_param[1] >> 2) >> 1,
|
||||
state->tile->offset_x >> 1,
|
||||
state->tile->offset_y >> 1,
|
||||
ref->u,
|
||||
ref->width >> 1,
|
||||
ref->height >> 1,
|
||||
KVZ_CHROMA_FILTER_TAPS,
|
||||
block_width,
|
||||
block_height,
|
||||
&src_u);
|
||||
kvz_sample_octpel_chroma(state->encoder_control, src_u.orig_topleft, src_u.stride, block_width,
|
||||
block_height, lcu->rec.u + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);
|
||||
// Initialize separately. Gets rid of warning
|
||||
// about using nonstandard extension.
|
||||
epol_args.buf = ext_buffer;
|
||||
epol_args.ext = &ext;
|
||||
epol_args.ext_origin = &ext_origin;
|
||||
epol_args.ext_s = &ext_s;
|
||||
|
||||
//Fractional chroma V
|
||||
kvz_get_extended_block(xpos, ypos,
|
||||
(mv_param[0] >> 2) >> 1,
|
||||
(mv_param[1] >> 2) >> 1,
|
||||
state->tile->offset_x >> 1,
|
||||
state->tile->offset_y >> 1,
|
||||
ref->v,
|
||||
ref->width >> 1,
|
||||
ref->height >> 1,
|
||||
KVZ_CHROMA_FILTER_TAPS,
|
||||
block_width,
|
||||
block_height,
|
||||
&src_v);
|
||||
kvz_sample_octpel_chroma(state->encoder_control, src_v.orig_topleft, src_v.stride, block_width,
|
||||
block_height, lcu->rec.v + (ypos % LCU_WIDTH_C) * LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);
|
||||
kvz_get_extended_block(&epol_args);
|
||||
kvz_sample_octpel_chroma(state->encoder_control,
|
||||
ext_origin,
|
||||
ext_s,
|
||||
block_width / 2,
|
||||
block_height / 2,
|
||||
lcu->rec.u + ((ypos / 2) % LCU_WIDTH_C) * LCU_WIDTH_C + ((xpos / 2) % LCU_WIDTH_C),
|
||||
LCU_WIDTH_C,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
|
||||
if (src_u.malloc_used) free(src_u.buffer);
|
||||
if (src_v.malloc_used) free(src_v.buffer);
|
||||
// Chroma V
|
||||
epol_args.src = ref->v;
|
||||
kvz_get_extended_block(&epol_args);
|
||||
kvz_sample_octpel_chroma(state->encoder_control,
|
||||
ext_origin,
|
||||
ext_s,
|
||||
block_width / 2,
|
||||
block_height / 2,
|
||||
lcu->rec.v + ((ypos / 2) % LCU_WIDTH_C) * LCU_WIDTH_C + ((xpos / 2) % LCU_WIDTH_C),
|
||||
LCU_WIDTH_C,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
}
|
||||
|
||||
static void inter_recon_14bit_frac_chroma(const encoder_state_t * const state,
|
||||
const kvz_picture * const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
hi_prec_buf_t *hi_prec_out)
|
||||
static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
|
||||
const kvz_picture *const ref,
|
||||
int32_t xpos,
|
||||
int32_t ypos,
|
||||
int32_t block_width,
|
||||
int32_t block_height,
|
||||
const int16_t mv_param[2],
|
||||
hi_prec_buf_t *hi_prec_out)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 7);
|
||||
int mv_frac_y = (mv_param[1] & 7);
|
||||
|
||||
// Translate to chroma
|
||||
xpos >>= 1;
|
||||
ypos >>= 1;
|
||||
block_width >>= 1;
|
||||
block_height >>= 1;
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
// The extrapolation function will set the pointers and stride.
|
||||
kvz_pixel ext_buffer[KVZ_IPOL_MAX_INPUT_SIZE_CHROMA_SIMD];
|
||||
kvz_pixel *ext = NULL;
|
||||
kvz_pixel *ext_origin = NULL;
|
||||
int ext_s = 0;
|
||||
|
||||
// Fractional chroma 1/8-pel
|
||||
kvz_extended_block src_u = { 0, 0, 0, 0 };
|
||||
kvz_extended_block src_v = { 0, 0, 0, 0 };
|
||||
// Chroma U
|
||||
// Divisions by 2 due to 4:2:0 chroma subsampling
|
||||
kvz_epol_args epol_args = {
|
||||
.src = ref->u,
|
||||
.src_w = ref->width / 2,
|
||||
.src_h = ref->height / 2,
|
||||
.src_s = ref->stride / 2,
|
||||
.blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> 3),
|
||||
.blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> 3),
|
||||
.blk_w = block_width / 2,
|
||||
.blk_h = block_height / 2,
|
||||
.pad_l = KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_r = KVZ_EXT_PADDING_CHROMA - KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_t = KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_b = KVZ_EXT_PADDING_CHROMA - KVZ_CHROMA_FILTER_OFFSET,
|
||||
.pad_b_simd = 3 // Three rows for AVX2
|
||||
};
|
||||
|
||||
//Fractional chroma U
|
||||
kvz_get_extended_block(xpos,
|
||||
ypos,
|
||||
(mv_param[0] >> 2) >> 1,
|
||||
(mv_param[1] >> 2) >> 1,
|
||||
state->tile->offset_x >> 1,
|
||||
state->tile->offset_y >> 1,
|
||||
ref->u,
|
||||
ref->width >> 1,
|
||||
ref->height >> 1,
|
||||
KVZ_CHROMA_FILTER_TAPS,
|
||||
block_width,
|
||||
block_height,
|
||||
&src_u);
|
||||
kvz_sample_14bit_octpel_chroma(state->encoder_control,
|
||||
src_u.orig_topleft,
|
||||
src_u.stride,
|
||||
block_width,
|
||||
block_height,
|
||||
hi_prec_out->u + (ypos % LCU_WIDTH_C) * LCU_WIDTH_C + (xpos % LCU_WIDTH_C),
|
||||
LCU_WIDTH_C,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
// Initialize separately. Gets rid of warning
|
||||
// about using nonstandard extension.
|
||||
epol_args.buf = ext_buffer;
|
||||
epol_args.ext = &ext;
|
||||
epol_args.ext_origin = &ext_origin;
|
||||
epol_args.ext_s = &ext_s;
|
||||
|
||||
//Fractional chroma V
|
||||
kvz_get_extended_block(xpos,
|
||||
ypos,
|
||||
(mv_param[0] >> 2) >> 1,
|
||||
(mv_param[1] >> 2) >> 1,
|
||||
state->tile->offset_x >> 1,
|
||||
state->tile->offset_y >> 1,
|
||||
ref->v,
|
||||
ref->width >> 1,
|
||||
ref->height >> 1,
|
||||
KVZ_CHROMA_FILTER_TAPS,
|
||||
block_width,
|
||||
block_height,
|
||||
&src_v);
|
||||
kvz_sample_14bit_octpel_chroma(state->encoder_control,
|
||||
src_v.orig_topleft,
|
||||
src_v.stride,
|
||||
block_width,
|
||||
block_height,
|
||||
hi_prec_out->v + (ypos % LCU_WIDTH_C) * LCU_WIDTH_C + (xpos % LCU_WIDTH_C),
|
||||
LCU_WIDTH_C,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
kvz_get_extended_block(&epol_args);
|
||||
kvz_sample_octpel_chroma_hi(state->encoder_control,
|
||||
ext_origin,
|
||||
ext_s,
|
||||
block_width / 2,
|
||||
block_height / 2,
|
||||
hi_prec_out->u + ((ypos / 2) % LCU_WIDTH_C) * LCU_WIDTH_C + ((xpos / 2) % LCU_WIDTH_C),
|
||||
LCU_WIDTH_C,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
|
||||
if (src_u.malloc_used) free(src_u.buffer);
|
||||
if (src_v.malloc_used) free(src_v.buffer);
|
||||
// Chroma V
|
||||
epol_args.src = ref->v;
|
||||
kvz_get_extended_block(&epol_args);
|
||||
kvz_sample_octpel_chroma_hi(state->encoder_control,
|
||||
ext_origin,
|
||||
ext_s,
|
||||
block_width / 2,
|
||||
block_height / 2,
|
||||
hi_prec_out->v + ((ypos / 2) % LCU_WIDTH_C) * LCU_WIDTH_C + ((xpos / 2) % LCU_WIDTH_C),
|
||||
LCU_WIDTH_C,
|
||||
mv_frac_x,
|
||||
mv_frac_y,
|
||||
mv_param);
|
||||
}
|
||||
|
||||
|
||||
|
@ -348,7 +382,7 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
if (fractional_luma) {
|
||||
// With a fractional MV, do interpolation.
|
||||
if (state->encoder_control->cfg.bipred && hi_prec_out) {
|
||||
inter_recon_14bit_frac_luma(state, ref,
|
||||
inter_recon_frac_luma_hi(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param, hi_prec_out);
|
||||
|
@ -388,7 +422,7 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
if (fractional_luma || fractional_chroma) {
|
||||
// With a fractional MV, do interpolation.
|
||||
if (state->encoder_control->cfg.bipred && hi_prec_out) {
|
||||
inter_recon_14bit_frac_chroma(state, ref,
|
||||
inter_recon_frac_chroma_hi(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param, hi_prec_out);
|
||||
|
|
|
@ -992,12 +992,11 @@ static void search_frac(inter_search_info_t *info)
|
|||
|
||||
unsigned costs[4] = { 0 };
|
||||
|
||||
kvz_extended_block src = { 0, 0, 0, 0 };
|
||||
ALIGNED(64) kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH];
|
||||
ALIGNED(64) kvz_pixel filtered[4][LCU_LUMA_SIZE];
|
||||
|
||||
// Storage buffers for intermediate horizontally filtered results.
|
||||
// Have the first columns in contiguous memory for vectorization.
|
||||
ALIGNED(64) int16_t intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH];
|
||||
ALIGNED(64) int16_t intermediate[5][KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD];
|
||||
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1];
|
||||
|
||||
const kvz_picture *ref = info->ref;
|
||||
|
@ -1013,20 +1012,45 @@ static void search_frac(inter_search_info_t *info)
|
|||
int8_t sample_off_x = 0;
|
||||
int8_t sample_off_y = 0;
|
||||
|
||||
kvz_get_extended_block(orig.x, orig.y, mv.x - 1, mv.y - 1,
|
||||
state->tile->offset_x,
|
||||
state->tile->offset_y,
|
||||
ref->y, ref->width, ref->height, KVZ_LUMA_FILTER_TAPS,
|
||||
internal_width+1, internal_height+1,
|
||||
&src);
|
||||
// Space for (possibly) extrapolated pixels and the part from the picture
|
||||
// One extra row and column compared to normal interpolation and some extra for AVX2.
|
||||
// The extrapolation function will set the pointers and stride.
|
||||
kvz_pixel ext_buffer[KVZ_FME_MAX_INPUT_SIZE_SIMD];
|
||||
kvz_pixel *ext = NULL;
|
||||
kvz_pixel *ext_origin = NULL;
|
||||
int ext_s = 0;
|
||||
kvz_epol_args epol_args = {
|
||||
.src = ref->y,
|
||||
.src_w = ref->width,
|
||||
.src_h = ref->height,
|
||||
.src_s = ref->stride,
|
||||
.blk_x = state->tile->offset_x + orig.x + mv.x - 1,
|
||||
.blk_y = state->tile->offset_y + orig.y + mv.y - 1,
|
||||
.blk_w = internal_width + 1, // TODO: real width
|
||||
.blk_h = internal_height + 1, // TODO: real height
|
||||
.pad_l = KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_r = KVZ_EXT_PADDING_LUMA - KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_t = KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_b = KVZ_EXT_PADDING_LUMA - KVZ_LUMA_FILTER_OFFSET,
|
||||
.pad_b_simd = 0 // AVX2 padding unnecessary because of blk_h
|
||||
};
|
||||
|
||||
// Initialize separately. Gets rid of warning
|
||||
// about using nonstandard extension.
|
||||
epol_args.buf = ext_buffer;
|
||||
epol_args.ext = &ext;
|
||||
epol_args.ext_origin = &ext_origin;
|
||||
epol_args.ext_s = &ext_s;
|
||||
|
||||
kvz_get_extended_block(&epol_args);
|
||||
|
||||
kvz_pixel *tmp_pic = pic->y + orig.y * pic->stride + orig.x;
|
||||
int tmp_stride = pic->stride;
|
||||
|
||||
// Search integer position
|
||||
costs[0] = kvz_satd_any_size(width, height,
|
||||
tmp_pic, tmp_stride,
|
||||
src.orig_topleft + src.stride + 1, src.stride);
|
||||
tmp_pic, tmp_stride,
|
||||
ext_origin + ext_s + 1, ext_s);
|
||||
|
||||
costs[0] += info->mvd_cost_func(state,
|
||||
mv.x, mv.y, 2,
|
||||
|
@ -1056,8 +1080,8 @@ static void search_frac(inter_search_info_t *info)
|
|||
const int mv_shift = (step < 2) ? 1 : 0;
|
||||
|
||||
filter_steps[step](state->encoder_control,
|
||||
src.orig_topleft,
|
||||
src.stride,
|
||||
ext_origin,
|
||||
ext_s,
|
||||
internal_width,
|
||||
internal_height,
|
||||
filtered,
|
||||
|
@ -1131,8 +1155,6 @@ static void search_frac(inter_search_info_t *info)
|
|||
info->best_mv = mv;
|
||||
info->best_cost = best_cost;
|
||||
info->best_bitcost = best_bitcost;
|
||||
|
||||
if (src.malloc_used) free(src.buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -156,7 +156,7 @@ void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder,
|
|||
}
|
||||
}
|
||||
|
||||
void kvz_sample_14bit_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
void kvz_sample_quarterpel_luma_hi_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
{
|
||||
//TODO: horizontal and vertical only filtering
|
||||
int32_t x, y;
|
||||
|
@ -194,8 +194,8 @@ void kvz_filter_hpel_blocks_hor_ver_luma_generic(const encoder_control_t * encod
|
|||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
||||
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
||||
kvz_pixel filtered[4][LCU_LUMA_SIZE],
|
||||
int16_t hor_intermediate[5][KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD],
|
||||
int8_t fme_level,
|
||||
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
||||
int8_t hpel_off_x, int8_t hpel_off_y)
|
||||
|
@ -309,8 +309,8 @@ void kvz_filter_hpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
||||
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
||||
kvz_pixel filtered[4][LCU_LUMA_SIZE],
|
||||
int16_t hor_intermediate[5][KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD],
|
||||
int8_t fme_level,
|
||||
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
||||
int8_t hpel_off_x, int8_t hpel_off_y)
|
||||
|
@ -390,8 +390,8 @@ void kvz_filter_qpel_blocks_hor_ver_luma_generic(const encoder_control_t * encod
|
|||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
||||
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
||||
kvz_pixel filtered[4][LCU_LUMA_SIZE],
|
||||
int16_t hor_intermediate[5][KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD],
|
||||
int8_t fme_level,
|
||||
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
||||
int8_t hpel_off_x, int8_t hpel_off_y)
|
||||
|
@ -550,8 +550,8 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|||
int16_t src_stride,
|
||||
int width,
|
||||
int height,
|
||||
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
||||
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
||||
kvz_pixel filtered[4][LCU_LUMA_SIZE],
|
||||
int16_t hor_intermediate[5][KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD],
|
||||
int8_t fme_level,
|
||||
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
||||
int8_t hpel_off_x, int8_t hpel_off_y)
|
||||
|
@ -694,7 +694,7 @@ void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, k
|
|||
}
|
||||
}
|
||||
|
||||
void kvz_sample_14bit_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
void kvz_sample_octpel_chroma_hi_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
{
|
||||
//TODO: horizontal and vertical only filtering
|
||||
int32_t x, y;
|
||||
|
@ -728,59 +728,55 @@ void kvz_sample_14bit_octpel_chroma_generic(const encoder_control_t * const enco
|
|||
}
|
||||
|
||||
|
||||
void kvz_get_extended_block_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filter_size, int width, int height, kvz_extended_block *out) {
|
||||
void kvz_get_extended_block_generic(kvz_epol_args *args) {
|
||||
|
||||
int half_filter_size = filter_size >> 1;
|
||||
int min_y = args->blk_y - args->pad_t;
|
||||
int max_y = args->blk_y + args->blk_h + args->pad_b + args->pad_b_simd - 1;
|
||||
bool out_of_bounds_y = (min_y < 0) || (max_y >= args->src_h);
|
||||
|
||||
out->buffer = ref + (ypos - half_filter_size + off_y + mv_y) * ref_width + (xpos - half_filter_size + off_x + mv_x);
|
||||
out->stride = ref_width;
|
||||
out->orig_topleft = out->buffer + out->stride * half_filter_size + half_filter_size;
|
||||
out->malloc_used = 0;
|
||||
int min_x = args->blk_x - args->pad_l;
|
||||
int max_x = args->blk_x + args->blk_w + args->pad_r - 1;
|
||||
bool out_of_bounds_x = (min_x < 0) || (max_x >= args->src_w);
|
||||
|
||||
int min_y = ypos - half_filter_size + off_y + mv_y;
|
||||
int max_y = min_y + height + filter_size;
|
||||
int out_of_bounds_y = (min_y < 0) || (max_y >= ref_height);
|
||||
if (out_of_bounds_y || out_of_bounds_x) {
|
||||
|
||||
int min_x = xpos - half_filter_size + off_x + mv_x;
|
||||
int max_x = min_x + width + filter_size;
|
||||
int out_of_bounds_x = (min_x < 0) || (max_x >= ref_width);
|
||||
*args->ext = args->buf;
|
||||
*args->ext_s = args->pad_l + args->blk_w + args->pad_r;
|
||||
*args->ext_origin = args->buf + args->pad_t * (*args->ext_s) + args->pad_l;
|
||||
|
||||
int sample_out_of_bounds = out_of_bounds_y || out_of_bounds_x;
|
||||
// Note that stride equals width here.
|
||||
int cnt_l = CLIP(0, *args->ext_s, -min_x);
|
||||
int cnt_r = CLIP(0, *args->ext_s, max_x - (args->src_w - 1));
|
||||
int cnt_m = CLIP(0, *args->ext_s, *args->ext_s - cnt_l - cnt_r);
|
||||
|
||||
if (sample_out_of_bounds){
|
||||
out->buffer = MALLOC(kvz_pixel, (width + filter_size) * (height + filter_size));
|
||||
if (!out->buffer){
|
||||
fprintf(stderr, "Memory allocation failed!\n");
|
||||
assert(0);
|
||||
// For each row including real padding.
|
||||
// Don't read "don't care" values (SIMD padding). Zero them out.
|
||||
int y;
|
||||
for (y = -args->pad_t; y < args->blk_h + args->pad_b; ++y) {
|
||||
|
||||
int clipped_y = CLIP(0, args->src_h - 1, args->blk_y + y);
|
||||
kvz_pixel *sample_l = args->src + clipped_y * args->src_s;
|
||||
kvz_pixel *sample_r = args->src + clipped_y * args->src_s + args->src_w - 1;
|
||||
kvz_pixel *src_m = args->src + clipped_y * args->src_s + MAX(min_x, 0);
|
||||
kvz_pixel *dst_l = args->buf + (y + args->pad_t) * (*args->ext_s);
|
||||
kvz_pixel *dst_m = dst_l + cnt_l;
|
||||
kvz_pixel *dst_r = dst_m + cnt_m;
|
||||
for (int i = 0; i < cnt_l; ++i) *(dst_l + i) = *sample_l;
|
||||
for (int i = 0; i < cnt_m; ++i) *(dst_m + i) = *(src_m + i);
|
||||
for (int i = 0; i < cnt_r; ++i) *(dst_r + i) = *sample_r;
|
||||
}
|
||||
out->stride = width + filter_size;
|
||||
out->orig_topleft = out->buffer + out->stride * half_filter_size + half_filter_size;
|
||||
out->malloc_used = 1;
|
||||
|
||||
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y;
|
||||
|
||||
for (dst_y = 0, y = ypos - half_filter_size; y < ((ypos + height)) + half_filter_size; dst_y++, y++) {
|
||||
|
||||
// calculate y-pixel offset
|
||||
coord_y = y + off_y + mv_y;
|
||||
coord_y = CLIP(0, (ref_height)-1, coord_y);
|
||||
coord_y *= ref_width;
|
||||
|
||||
if (!out_of_bounds_x){
|
||||
memcpy(&out->buffer[dst_y * out->stride + 0], &ref[coord_y + min_x], out->stride * sizeof(kvz_pixel));
|
||||
} else {
|
||||
for (dst_x = 0, x = (xpos)-half_filter_size; x < ((xpos + width)) + half_filter_size; dst_x++, x++) {
|
||||
|
||||
coord_x = x + off_x + mv_x;
|
||||
coord_x = CLIP(0, (ref_width)-1, coord_x);
|
||||
|
||||
// Store source block data (with extended borders)
|
||||
out->buffer[dst_y * out->stride + dst_x] = ref[coord_y + coord_x];
|
||||
}
|
||||
}
|
||||
for (int y_simd = 0; y_simd < args->pad_b_simd; ++y_simd) {
|
||||
kvz_pixel *dst = args->buf + (y + args->pad_t + y_simd) * (*args->ext_s);
|
||||
FILL_ARRAY(dst, 0, *args->ext_s);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
*args->ext = args->src + (args->blk_y - args->pad_t) * args->src_s + (args->blk_x - args->pad_l);
|
||||
*args->ext_origin = args->src + args->blk_y * args->src_s + args->blk_x;
|
||||
*args->ext_s = args->src_s;
|
||||
}
|
||||
}
|
||||
|
||||
int kvz_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth)
|
||||
|
@ -793,8 +789,8 @@ int kvz_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth)
|
|||
success &= kvz_strategyselector_register(opaque, "filter_qpel_blocks_diag_luma", "generic", 0, &kvz_filter_qpel_blocks_diag_luma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_quarterpel_luma", "generic", 0, &kvz_sample_quarterpel_luma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_octpel_chroma", "generic", 0, &kvz_sample_octpel_chroma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_14bit_quarterpel_luma", "generic", 0, &kvz_sample_14bit_quarterpel_luma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_14bit_octpel_chroma", "generic", 0, &kvz_sample_14bit_octpel_chroma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_quarterpel_luma_hi", "generic", 0, &kvz_sample_quarterpel_luma_hi_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_octpel_chroma_hi", "generic", 0, &kvz_sample_octpel_chroma_hi_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "get_extended_block", "generic", 0, &kvz_get_extended_block_generic);
|
||||
|
||||
return success;
|
||||
|
|
|
@ -32,9 +32,9 @@
|
|||
|
||||
int kvz_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth);
|
||||
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
void kvz_sample_14bit_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
void kvz_sample_quarterpel_luma_hi_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
void kvz_sample_14bit_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
void kvz_sample_octpel_chroma_hi_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
|
||||
|
||||
#endif //STRATEGIES_IPOL_GENERIC_H_
|
||||
|
|
|
@ -33,8 +33,8 @@ ipol_blocks_func * kvz_filter_qpel_blocks_diag_luma;
|
|||
epol_func *kvz_get_extended_block;
|
||||
kvz_sample_quarterpel_luma_func * kvz_sample_quarterpel_luma;
|
||||
kvz_sample_octpel_chroma_func * kvz_sample_octpel_chroma;
|
||||
kvz_sample_14bit_quarterpel_luma_func * kvz_sample_14bit_quarterpel_luma;
|
||||
kvz_sample_14bit_octpel_chroma_func * kvz_sample_14bit_octpel_chroma;
|
||||
kvz_sample_quarterpel_luma_hi_func * kvz_sample_quarterpel_luma_hi;
|
||||
kvz_sample_octpel_chroma_hi_func * kvz_sample_octpel_chroma_hi;
|
||||
|
||||
|
||||
int kvz_strategy_register_ipol(void* opaque, uint8_t bitdepth) {
|
||||
|
|
|
@ -31,21 +31,63 @@
|
|||
#include "kvazaar.h"
|
||||
#include "search_inter.h"
|
||||
|
||||
// AVX2 implementation of horizontal filter reads and
|
||||
// writes two rows for luma and four for chroma at a time.
|
||||
// Extra vertical padding is added to prevent segfaults.
|
||||
// Horizontal padding is not needed even if one extra byte
|
||||
// is read because kvz_image_alloc adds enough padding.
|
||||
#define KVZ_IPOL_MAX_INPUT_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * KVZ_EXT_BLOCK_W_LUMA)
|
||||
#define KVZ_IPOL_MAX_INPUT_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * KVZ_EXT_BLOCK_W_CHROMA)
|
||||
#define KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH)
|
||||
#define KVZ_IPOL_MAX_IM_SIZE_CHROMA_SIMD ((KVZ_EXT_BLOCK_W_CHROMA + 3) * LCU_WIDTH_C)
|
||||
|
||||
// On top of basic interpolation, FME needs one extra
|
||||
// column and row for ME (left and up). Adding the
|
||||
// extra row happens to satisfy AVX2 requirements for
|
||||
// row count. No other extra rows are needed.
|
||||
#define KVZ_FME_MAX_INPUT_SIZE_SIMD ((KVZ_EXT_BLOCK_W_LUMA + 1) * (KVZ_EXT_BLOCK_W_LUMA + 1))
|
||||
|
||||
typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; unsigned malloc_used; } kvz_extended_block;
|
||||
|
||||
typedef void(ipol_blocks_func)(const encoder_control_t * encoder, kvz_pixel *src, int16_t src_stride, int width, int height,
|
||||
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH], int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH], int8_t fme_level, int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
||||
kvz_pixel filtered[4][LCU_LUMA_SIZE], int16_t hor_intermediate[5][KVZ_IPOL_MAX_IM_SIZE_LUMA_SIMD], int8_t fme_level, int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
||||
int8_t sample_off_x, int8_t sample_off_y);
|
||||
|
||||
typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filter_size, int width, int height, kvz_extended_block *out);
|
||||
typedef struct {
|
||||
// Source samples
|
||||
kvz_pixel *src; // Top-left sample
|
||||
int src_w; // Width
|
||||
int src_h; // Height
|
||||
int src_s; // Stride
|
||||
|
||||
// Requested sampling position, base dimensions, and padding
|
||||
int blk_x;
|
||||
int blk_y;
|
||||
int blk_w; // Width
|
||||
int blk_h; // Height
|
||||
int pad_l; // Left
|
||||
int pad_r; // Right
|
||||
int pad_t; // Top
|
||||
int pad_b; // Bottom
|
||||
int pad_b_simd; // "Don't care" rows in the end. Zeroed out.
|
||||
|
||||
// Buffer for possible extrapolation. Free memory provided by the caller.
|
||||
kvz_pixel *buf;
|
||||
|
||||
// Extended block data. These are set by the function.
|
||||
kvz_pixel **ext; // Top-left sample with padding
|
||||
kvz_pixel **ext_origin; // Top-left sample without padding
|
||||
int *ext_s; // Stride
|
||||
} kvz_epol_args;
|
||||
|
||||
typedef void(epol_func)(kvz_epol_args *args);
|
||||
|
||||
|
||||
typedef void(kvz_sample_quarterpel_luma_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
typedef void(kvz_sample_octpel_chroma_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
|
||||
typedef void(kvz_sample_14bit_quarterpel_luma_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
typedef void(kvz_sample_quarterpel_luma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
typedef void(kvz_sample_octpel_chroma_hi_func)(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2]);
|
||||
|
||||
// Declare function pointers.
|
||||
extern ipol_blocks_func * kvz_filter_hpel_blocks_hor_ver_luma;
|
||||
|
@ -55,8 +97,8 @@ extern ipol_blocks_func * kvz_filter_qpel_blocks_diag_luma;
|
|||
extern epol_func * kvz_get_extended_block;
|
||||
extern kvz_sample_quarterpel_luma_func * kvz_sample_quarterpel_luma;
|
||||
extern kvz_sample_octpel_chroma_func * kvz_sample_octpel_chroma;
|
||||
extern kvz_sample_14bit_quarterpel_luma_func * kvz_sample_14bit_quarterpel_luma;
|
||||
extern kvz_sample_14bit_octpel_chroma_func * kvz_sample_14bit_octpel_chroma;
|
||||
extern kvz_sample_quarterpel_luma_hi_func * kvz_sample_quarterpel_luma_hi;
|
||||
extern kvz_sample_octpel_chroma_hi_func * kvz_sample_octpel_chroma_hi;
|
||||
|
||||
|
||||
int kvz_strategy_register_ipol(void* opaque, uint8_t bitdepth);
|
||||
|
@ -69,8 +111,8 @@ int kvz_strategy_register_ipol(void* opaque, uint8_t bitdepth);
|
|||
{"filter_qpel_blocks_diag_luma", (void**) &kvz_filter_qpel_blocks_diag_luma}, \
|
||||
{"sample_quarterpel_luma", (void**) &kvz_sample_quarterpel_luma}, \
|
||||
{"sample_octpel_chroma", (void**) &kvz_sample_octpel_chroma}, \
|
||||
{"sample_14bit_quarterpel_luma", (void**) &kvz_sample_14bit_quarterpel_luma}, \
|
||||
{"sample_14bit_octpel_chroma", (void**) &kvz_sample_14bit_octpel_chroma}, \
|
||||
{"sample_quarterpel_luma_hi", (void**) &kvz_sample_quarterpel_luma_hi}, \
|
||||
{"sample_octpel_chroma_hi", (void**) &kvz_sample_octpel_chroma_hi}, \
|
||||
{"get_extended_block", (void**) &kvz_get_extended_block}, \
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
race:kvz_eight_tap_filter_hor_8x1_avx2
|
||||
# AVX2 interpolation reads some extra pixels
|
||||
race:kvz_ipol_8tap_hor_px_im_avx2
|
||||
race:kvz_filter_hpel_blocks_hor_ver_luma_avx2
|
||||
race:kvz_eight_tap_filter_hor_avx2
|
||||
race:kvz_eight_tap_filter_hor_avx2
|
||||
|
|
Loading…
Reference in a new issue