mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Merge branch 'fme_1_16th_px'
This commit is contained in:
commit
b11eb32e9a
50
src/filter.c
50
src/filter.c
|
@ -46,24 +46,60 @@ const uint8_t kvz_g_beta_table_8x8[64] =
|
|||
58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88
|
||||
};
|
||||
|
||||
const int8_t kvz_g_luma_filter[4][8] =
|
||||
const int8_t kvz_g_luma_filter[16][8] =
|
||||
{
|
||||
{ 0, 0, 0, 64, 0, 0, 0, 0 },
|
||||
{ -1, 4, -10, 58, 17, -5, 1, 0 },
|
||||
{ -1, 4, -11, 40, 40, -11, 4, -1 },
|
||||
{ 0, 1, -5, 17, 58, -10, 4, -1 }
|
||||
{ 0, 0, 0, 64, 0, 0, 0, 0 },
|
||||
{ 0, 1, -3, 63, 4, -2, 1, 0 },
|
||||
{ -1, 2, -5, 62, 8, -3, 1, 0 },
|
||||
{ -1, 3, -8, 60, 13, -4, 1, 0 },
|
||||
{ -1, 4, -10, 58, 17, -5, 1, 0 }, //1/4
|
||||
{ -1, 4, -11, 52, 26, -8, 3, -1 },
|
||||
{ -1, 3, -9, 47, 31, -10, 4, -1 },
|
||||
{ -1, 4, -11, 45, 34, -10, 4, -1 },
|
||||
{ -1, 4, -11, 40, 40, -11, 4, -1 }, //1/2
|
||||
{ -1, 4, -10, 34, 45, -11, 4, -1 },
|
||||
{ -1, 4, -10, 31, 47, -9, 3, -1 },
|
||||
{ -1, 3, -8, 26, 52, -11, 4, -1 },
|
||||
{ 0, 1, -5, 17, 58, -10, 4, -1 }, //3/4
|
||||
{ 0, 1, -4, 13, 60, -8, 3, -1 },
|
||||
{ 0, 1, -3, 8, 62, -5, 2, -1 },
|
||||
{ 0, 1, -2, 4, 63, -3, 1, 0 }
|
||||
};
|
||||
|
||||
const int8_t kvz_g_chroma_filter[8][4] =
|
||||
const int8_t kvz_g_chroma_filter[32][4] =
|
||||
{
|
||||
{ 0, 64, 0, 0 },
|
||||
{ -1, 63, 2, 0 },
|
||||
{ -2, 62, 4, 0 },
|
||||
{ -2, 60, 7, -1 },
|
||||
{ -2, 58, 10, -2 },
|
||||
{ -3, 57, 12, -2 },
|
||||
{ -4, 56, 14, -2 },
|
||||
{ -4, 55, 15, -2 },
|
||||
{ -4, 54, 16, -2 },
|
||||
{ -5, 53, 18, -2 },
|
||||
{ -6, 52, 20, -2 },
|
||||
{ -6, 49, 24, -3 },
|
||||
{ -6, 46, 28, -4 },
|
||||
{ -5, 44, 29, -4 },
|
||||
{ -4, 42, 30, -4 },
|
||||
{ -4, 39, 33, -4 },
|
||||
{ -4, 36, 36, -4 },
|
||||
{ -4, 33, 39, -4 },
|
||||
{ -4, 30, 42, -4 },
|
||||
{ -4, 29, 44, -5 },
|
||||
{ -4, 28, 46, -6 },
|
||||
{ -3, 24, 49, -6 },
|
||||
{ -2, 20, 52, -6 },
|
||||
{ -2, 18, 53, -5 },
|
||||
{ -2, 16, 54, -4 },
|
||||
{ -2, 10, 58, -2 }
|
||||
{ -2, 15, 55, -4 },
|
||||
{ -2, 14, 56, -4 },
|
||||
{ -2, 12, 57, -3 },
|
||||
{ -2, 10, 58, -2 },
|
||||
{ -1, 7, 60, -2 },
|
||||
{ 0, 4, 62, -2 },
|
||||
{ 0, 2, 63, -1 },
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
|
|
72
src/inter.c
72
src/inter.c
|
@ -62,8 +62,8 @@ static void inter_recon_frac_luma(const encoder_state_t *const state,
|
|||
const mv_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 3);
|
||||
int mv_frac_y = (mv_param[1] & 3);
|
||||
int mv_frac_x = (mv_param[0] & 15);
|
||||
int mv_frac_y = (mv_param[1] & 15);
|
||||
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
|
@ -77,8 +77,8 @@ static void inter_recon_frac_luma(const encoder_state_t *const state,
|
|||
.src_w = ref->width,
|
||||
.src_h = ref->height,
|
||||
.src_s = ref->stride,
|
||||
.blk_x = state->tile->offset_x + xpos + (mv_param[0] >> 2),
|
||||
.blk_y = state->tile->offset_y + ypos + (mv_param[1] >> 2),
|
||||
.blk_x = state->tile->offset_x + xpos + (mv_param[0] >> INTERNAL_MV_PREC),
|
||||
.blk_y = state->tile->offset_y + ypos + (mv_param[1] >> INTERNAL_MV_PREC),
|
||||
.blk_w = block_width,
|
||||
.blk_h = block_height,
|
||||
.pad_l = KVZ_LUMA_FILTER_OFFSET,
|
||||
|
@ -117,8 +117,8 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state,
|
|||
const mv_t mv_param[2],
|
||||
hi_prec_buf_t *hi_prec_out)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 3);
|
||||
int mv_frac_y = (mv_param[1] & 3);
|
||||
int mv_frac_x = (mv_param[0] & 15);
|
||||
int mv_frac_y = (mv_param[1] & 15);
|
||||
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
|
@ -132,8 +132,8 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state,
|
|||
.src_w = ref->width,
|
||||
.src_h = ref->height,
|
||||
.src_s = ref->stride,
|
||||
.blk_x = state->tile->offset_x + xpos + (mv_param[0] >> 2),
|
||||
.blk_y = state->tile->offset_y + ypos + (mv_param[1] >> 2),
|
||||
.blk_x = state->tile->offset_x + xpos + (mv_param[0] >> INTERNAL_MV_PREC),
|
||||
.blk_y = state->tile->offset_y + ypos + (mv_param[1] >> INTERNAL_MV_PREC),
|
||||
.blk_w = block_width,
|
||||
.blk_h = block_height,
|
||||
.pad_l = KVZ_LUMA_FILTER_OFFSET,
|
||||
|
@ -172,8 +172,8 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state,
|
|||
const mv_t mv_param[2],
|
||||
lcu_t *lcu)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 7);
|
||||
int mv_frac_y = (mv_param[1] & 7);
|
||||
int mv_frac_x = (mv_param[0] & 31);
|
||||
int mv_frac_y = (mv_param[1] & 31);
|
||||
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
|
@ -190,8 +190,8 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state,
|
|||
.src_w = ref->width / 2,
|
||||
.src_h = ref->height / 2,
|
||||
.src_s = ref->stride / 2,
|
||||
.blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> 3),
|
||||
.blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> 3),
|
||||
.blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> (INTERNAL_MV_PREC + 1)),
|
||||
.blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> (INTERNAL_MV_PREC + 1)),
|
||||
.blk_w = block_width / 2,
|
||||
.blk_h = block_height / 2,
|
||||
.pad_l = KVZ_CHROMA_FILTER_OFFSET,
|
||||
|
@ -244,8 +244,8 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
|
|||
const mv_t mv_param[2],
|
||||
hi_prec_buf_t *hi_prec_out)
|
||||
{
|
||||
int mv_frac_x = (mv_param[0] & 7);
|
||||
int mv_frac_y = (mv_param[1] & 7);
|
||||
int mv_frac_x = (mv_param[0] & 31);
|
||||
int mv_frac_y = (mv_param[1] & 31);
|
||||
|
||||
// Space for extrapolated pixels and the part from the picture.
|
||||
// Some extra for AVX2.
|
||||
|
@ -262,8 +262,8 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
|
|||
.src_w = ref->width / 2,
|
||||
.src_h = ref->height / 2,
|
||||
.src_s = ref->stride / 2,
|
||||
.blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> 3),
|
||||
.blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> 3),
|
||||
.blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> (INTERNAL_MV_PREC + 1) ),
|
||||
.blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> (INTERNAL_MV_PREC + 1) ),
|
||||
.blk_w = block_width / 2,
|
||||
.blk_h = block_height / 2,
|
||||
.pad_l = KVZ_CHROMA_FILTER_OFFSET,
|
||||
|
@ -372,9 +372,7 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
bool predict_luma,
|
||||
bool predict_chroma)
|
||||
{
|
||||
mv_t mv_param_qpel[2] = { mv_param[0], mv_param[1] };
|
||||
mv_t mv_param_fpel[2] = { mv_param[0], mv_param[1] };
|
||||
kvz_change_precision(INTERNAL_MV_PREC, 2, &mv_param_qpel[0], &mv_param_qpel[1]);
|
||||
|
||||
kvz_change_precision(INTERNAL_MV_PREC, 0, &mv_param_fpel[0], &mv_param_fpel[1]);
|
||||
|
||||
|
@ -394,7 +392,7 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
|
||||
// With 420, odd coordinates need interpolation.
|
||||
const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1);
|
||||
const int8_t fractional_luma = ((mv_param_qpel[0] & 3) || (mv_param_qpel[1] & 3));
|
||||
const int8_t fractional_luma = ((mv_param[0] & 15) || (mv_param[1] & 15));
|
||||
|
||||
// Generate prediction for luma.
|
||||
if (predict_luma) {
|
||||
|
@ -404,13 +402,13 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
inter_recon_frac_luma_hi(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param_qpel, hi_prec_out);
|
||||
mv_param, hi_prec_out);
|
||||
}
|
||||
else {
|
||||
inter_recon_frac_luma(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param_qpel, lcu);
|
||||
mv_param, lcu);
|
||||
}
|
||||
} else {
|
||||
// With an integer MV, copy pixels directly from the reference.
|
||||
|
@ -443,12 +441,12 @@ static void inter_recon_unipred(const encoder_state_t * const state,
|
|||
inter_recon_frac_chroma_hi(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param_qpel, hi_prec_out);
|
||||
mv_param, hi_prec_out);
|
||||
} else {
|
||||
inter_recon_frac_chroma(state, ref,
|
||||
pu_in_tile.x, pu_in_tile.y,
|
||||
width, height,
|
||||
mv_param_qpel, lcu);
|
||||
mv_param, lcu);
|
||||
}
|
||||
} else {
|
||||
// With an integer MV, copy pixels directly from the reference.
|
||||
|
@ -511,11 +509,11 @@ void kvz_inter_recon_bipred(const encoder_state_t * const state,
|
|||
kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C];
|
||||
kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C];
|
||||
|
||||
const int hi_prec_luma_rec0 = (mv_param[0][0]>>2) & 3 || (mv_param[0][1]>>2) & 3;
|
||||
const int hi_prec_luma_rec1 = (mv_param[1][0]>>2) & 3 || (mv_param[1][1]>>2) & 3;
|
||||
const int hi_prec_luma_rec0 = (mv_param[0][0] & 15) || (mv_param[0][1] & 15);
|
||||
const int hi_prec_luma_rec1 = (mv_param[1][0] & 15) || (mv_param[1][1] & 15);
|
||||
|
||||
const int hi_prec_chroma_rec0 = (mv_param[0][0]>>2) & 7 || (mv_param[0][1]>>2) & 7;
|
||||
const int hi_prec_chroma_rec1 = (mv_param[1][0]>>2) & 7 || (mv_param[1][1]>>2) & 7;
|
||||
const int hi_prec_chroma_rec0 = (mv_param[0][0] & 31) || (mv_param[0][1] & 31);
|
||||
const int hi_prec_chroma_rec1 = (mv_param[1][0] & 31) || (mv_param[1][1] & 31);
|
||||
|
||||
hi_prec_buf_t* high_precision_rec0 = 0;
|
||||
hi_prec_buf_t* high_precision_rec1 = 0;
|
||||
|
@ -1511,6 +1509,26 @@ void kvz_change_precision(int src, int dst, mv_t* hor, mv_t* ver) {
|
|||
}
|
||||
}
|
||||
|
||||
void kvz_change_precision_vector2d(int src, int dst, vector2d_t *mv) {
|
||||
|
||||
const int shift = (int)dst - (int)src;
|
||||
if (shift >= 0)
|
||||
{
|
||||
int* hor_unsigned = &mv->x;
|
||||
int* ver_unsigned = &mv->y;
|
||||
|
||||
*hor_unsigned <<= shift;
|
||||
*ver_unsigned <<= shift;
|
||||
}
|
||||
else
|
||||
{
|
||||
const int right_shift = -shift;
|
||||
const int offset = 1 << (right_shift - 1);
|
||||
mv->x = mv->x >= 0 ? (mv->x + offset - 1) >> right_shift : (mv->x + offset) >> right_shift;
|
||||
mv->y = mv->y >= 0 ? (mv->y + offset - 1) >> right_shift : (mv->y + offset) >> right_shift;
|
||||
}
|
||||
}
|
||||
|
||||
void kvz_round_precision(int src, int dst, mv_t* hor, mv_t* ver) {
|
||||
kvz_change_precision(src, dst, hor, ver);
|
||||
kvz_change_precision(dst, src, hor, ver);
|
||||
|
|
|
@ -42,6 +42,7 @@ typedef struct {
|
|||
} inter_merge_cand_t;
|
||||
|
||||
void kvz_change_precision(int src, int dst, mv_t* hor, mv_t* ver);
|
||||
void kvz_change_precision_vector2d(int src, int dst, vector2d_t* mv);
|
||||
void kvz_round_precision(int src, int dst, mv_t* hor, mv_t* ver);
|
||||
void kvz_round_precision_vector2d(int src, int dst, vector2d_t* mv);
|
||||
|
||||
|
|
14
src/rdo.c
14
src/rdo.c
|
@ -1753,7 +1753,7 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
int x,
|
||||
int y,
|
||||
int mv_shift,
|
||||
int16_t mv_cand[2][2],
|
||||
mv_t mv_cand[2][2],
|
||||
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
|
||||
int16_t num_cand,
|
||||
int32_t ref_idx,
|
||||
|
@ -1802,6 +1802,10 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
x - mv_cand[1][0],
|
||||
y - mv_cand[1][1],
|
||||
};
|
||||
|
||||
kvz_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd1);
|
||||
kvz_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
|
||||
|
||||
uint32_t cand1_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd1.x, mvd1.y);
|
||||
uint32_t cand2_cost = kvz_get_mvd_coding_cost_cabac(state, cabac, mvd2.x, mvd2.y);
|
||||
|
||||
|
@ -1881,12 +1885,8 @@ uint32_t kvz_calc_mvd_cost_cabac(const encoder_state_t * state,
|
|||
}
|
||||
|
||||
// Signal which candidate MV to use
|
||||
kvz_cabac_write_unary_max_symbol(
|
||||
cabac,
|
||||
&cabac->ctx.mvp_idx_model,
|
||||
cur_mv_cand,
|
||||
1,
|
||||
AMVP_MAX_NUM_CANDS - 1);
|
||||
cabac->cur_ctx = &(cabac->ctx.mvp_idx_model);
|
||||
CABAC_BIN(cabac, cur_mv_cand, "mvp_flag");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -272,10 +272,10 @@ static bool mv_in_merge(const inter_search_info_t *info, vector2d_t mv)
|
|||
for (int i = 0; i < info->num_merge_cand; ++i) {
|
||||
if (info->merge_cand[i].dir == 3) continue;
|
||||
const vector2d_t merge_mv = {
|
||||
(info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] + 2) >> INTERNAL_MV_PREC,
|
||||
(info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] + 2) >> INTERNAL_MV_PREC
|
||||
info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0],
|
||||
info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1]
|
||||
};
|
||||
if (merge_mv.x == mv.x && merge_mv.y == mv.y) {
|
||||
if (merge_mv.x == mv.x * (1 << (INTERNAL_MV_PREC)) && merge_mv.y == mv.y * (1 << (INTERNAL_MV_PREC))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -356,19 +356,25 @@ static int select_mv_cand(const encoder_state_t *state,
|
|||
mvd_coding_cost = get_mvd_coding_cost;
|
||||
}
|
||||
|
||||
vector2d_t mvd = { mv_x - mv_cand[0][0], mv_y - mv_cand[0][1] };
|
||||
|
||||
kvz_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd);
|
||||
|
||||
uint32_t cand1_cost = mvd_coding_cost(
|
||||
state, &state->cabac,
|
||||
mv_x - mv_cand[0][0],
|
||||
mv_y - mv_cand[0][1]);
|
||||
mvd.x,
|
||||
mvd.y);
|
||||
|
||||
uint32_t cand2_cost;
|
||||
if (same_cand) {
|
||||
cand2_cost = cand1_cost;
|
||||
} else {
|
||||
vector2d_t mvd2 = { mv_x - mv_cand[1][0], mv_y - mv_cand[1][1] };
|
||||
kvz_change_precision_vector2d(INTERNAL_MV_PREC, 2, &mvd2);
|
||||
cand2_cost = mvd_coding_cost(
|
||||
state, &state->cabac,
|
||||
mv_x - mv_cand[1][0],
|
||||
mv_y - mv_cand[1][1]);
|
||||
mvd2.x,
|
||||
mvd2.y);
|
||||
}
|
||||
|
||||
if (cost_out) {
|
||||
|
@ -1684,9 +1690,6 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) {
|
||||
inter_merge_cand_t *cur_cand = &info.merge_cand[merge_idx];
|
||||
|
||||
if ((cur_cand->dir & 1 && (cur_cand->mv[0][0] & 3 || cur_cand->mv[0][1] & 3)) ||
|
||||
(cur_cand->dir & 2 && (cur_cand->mv[1][0] & 3 || cur_cand->mv[1][1] & 3))) continue;
|
||||
|
||||
cur_cu->inter.mv_dir = cur_cand->dir;
|
||||
cur_cu->inter.mv_ref[0] = cur_cand->ref[0];
|
||||
cur_cu->inter.mv_ref[1] = cur_cand->ref[1];
|
||||
|
|
|
@ -38,8 +38,8 @@
|
|||
#include "strategies/generic/ipol-generic.h"
|
||||
|
||||
|
||||
extern int8_t kvz_g_luma_filter[4][8];
|
||||
extern int8_t kvz_g_chroma_filter[8][4];
|
||||
extern int8_t kvz_g_luma_filter[16][8];
|
||||
extern int8_t kvz_g_chroma_filter[32][4];
|
||||
|
||||
static int32_t kvz_eight_tap_filter_hor_avx2(int8_t *filter, kvz_pixel *data)
|
||||
{
|
||||
|
@ -635,7 +635,7 @@ static void kvz_filter_hpel_blocks_hor_ver_luma_avx2(const encoder_control_t * e
|
|||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir2 = kvz_g_luma_filter[8];
|
||||
|
||||
int16_t dst_stride = LCU_WIDTH;
|
||||
int16_t hor_stride = LCU_WIDTH;
|
||||
|
@ -745,7 +745,7 @@ static void kvz_filter_hpel_blocks_diag_luma_avx2(const encoder_control_t * enco
|
|||
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir2 = kvz_g_luma_filter[8];
|
||||
|
||||
int16_t dst_stride = LCU_WIDTH;
|
||||
int16_t hor_stride = LCU_WIDTH;
|
||||
|
@ -826,9 +826,9 @@ static void kvz_filter_qpel_blocks_hor_ver_luma_avx2(const encoder_control_t * e
|
|||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir1 = kvz_g_luma_filter[1];
|
||||
int8_t *fir3 = kvz_g_luma_filter[3];
|
||||
int8_t *fir2 = kvz_g_luma_filter[8];
|
||||
int8_t *fir1 = kvz_g_luma_filter[4];
|
||||
int8_t *fir3 = kvz_g_luma_filter[12];
|
||||
|
||||
// Horiziontal positions. Positions 0 and 2 have already been calculated in filtered.
|
||||
int16_t *hor_pos0 = hor_intermediate[0];
|
||||
|
@ -1004,8 +1004,8 @@ static void kvz_filter_qpel_blocks_diag_luma_avx2(const encoder_control_t * enco
|
|||
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir1 = kvz_g_luma_filter[1];
|
||||
int8_t *fir3 = kvz_g_luma_filter[3];
|
||||
int8_t *fir1 = kvz_g_luma_filter[4];
|
||||
int8_t *fir3 = kvz_g_luma_filter[12];
|
||||
|
||||
int16_t *hor_pos_l = hor_intermediate[3];
|
||||
int16_t *hor_pos_r = hor_intermediate[4];
|
||||
|
@ -1137,8 +1137,8 @@ static void kvz_sample_quarterpel_luma_avx2(const encoder_control_t * const enco
|
|||
const mv_t mv[2])
|
||||
{
|
||||
// TODO: horizontal and vertical only filtering
|
||||
int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 3];
|
||||
int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 3];
|
||||
int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 15];
|
||||
int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 15];
|
||||
|
||||
// Buffer for intermediate values with one extra row
|
||||
// because the loop writes two rows each iteration.
|
||||
|
@ -1162,8 +1162,8 @@ static void kvz_sample_quarterpel_luma_hi_avx2(const encoder_control_t * const e
|
|||
const mv_t mv[2])
|
||||
{
|
||||
// TODO: horizontal and vertical only filtering
|
||||
int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 3];
|
||||
int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 3];
|
||||
int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 15];
|
||||
int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 15];
|
||||
|
||||
// Buffer for intermediate values with one extra row
|
||||
// because the loop writes two rows each iteration.
|
||||
|
@ -1191,8 +1191,8 @@ static void kvz_sample_octpel_chroma_avx2(const encoder_control_t *const encoder
|
|||
kvz_sample_octpel_chroma_generic(encoder, src, src_stride, width, height, dst, dst_stride, hor_flag, ver_flag, mv);
|
||||
return;
|
||||
}
|
||||
int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 7];
|
||||
int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 7];
|
||||
int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 31];
|
||||
int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 31];
|
||||
|
||||
// Buffer for intermediate values with 3 extra rows
|
||||
// because the loop writes four rows each iteration.
|
||||
|
@ -1219,8 +1219,8 @@ static void kvz_sample_octpel_chroma_hi_avx2(const encoder_control_t *const enco
|
|||
kvz_sample_octpel_chroma_hi_generic(encoder, src, src_stride, width, height, dst, dst_stride, hor_flag, ver_flag, mv);
|
||||
return;
|
||||
}
|
||||
int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 7];
|
||||
int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 7];
|
||||
int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 31];
|
||||
int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 31];
|
||||
|
||||
// Buffer for intermediate values with 3 extra rows
|
||||
// because the loop writes four rows each iteration.
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
#include "strategies/strategies-ipol.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
extern int8_t kvz_g_luma_filter[4][8];
|
||||
extern int8_t kvz_g_chroma_filter[8][4];
|
||||
extern int8_t kvz_g_luma_filter[16][8];
|
||||
extern int8_t kvz_g_chroma_filter[32][4];
|
||||
|
||||
int32_t kvz_eight_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data)
|
||||
{
|
||||
|
@ -133,8 +133,8 @@ void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder,
|
|||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
// Select filters according to the fractional part of the x and y mv components
|
||||
int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 3];
|
||||
int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 3];
|
||||
int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 15];
|
||||
int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 15];
|
||||
|
||||
int16_t hor_filtered[KVZ_EXT_BLOCK_W_LUMA][LCU_WIDTH];
|
||||
int16_t hor_stride = LCU_WIDTH;
|
||||
|
@ -166,8 +166,8 @@ void kvz_sample_quarterpel_luma_hi_generic(const encoder_control_t * const encod
|
|||
int32_t shift2 = 6;
|
||||
|
||||
// Select filters according to the fractional part of the x and y mv components
|
||||
int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 3];
|
||||
int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 3];
|
||||
int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 15];
|
||||
int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 15];
|
||||
|
||||
int16_t hor_filtered[KVZ_EXT_BLOCK_W_LUMA][LCU_WIDTH];
|
||||
int16_t hor_stride = LCU_WIDTH;
|
||||
|
@ -210,7 +210,7 @@ void kvz_filter_hpel_blocks_hor_ver_luma_generic(const encoder_control_t * encod
|
|||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir2 = kvz_g_luma_filter[8];
|
||||
|
||||
int16_t dst_stride = LCU_WIDTH;
|
||||
int16_t hor_stride = LCU_WIDTH;
|
||||
|
@ -324,7 +324,7 @@ void kvz_filter_hpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|||
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir2 = kvz_g_luma_filter[8];
|
||||
|
||||
int16_t dst_stride = LCU_WIDTH;
|
||||
int16_t hor_stride = LCU_WIDTH;
|
||||
|
@ -407,9 +407,9 @@ void kvz_filter_qpel_blocks_hor_ver_luma_generic(const encoder_control_t * encod
|
|||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir1 = kvz_g_luma_filter[1];
|
||||
int8_t *fir3 = kvz_g_luma_filter[3];
|
||||
int8_t *fir2 = kvz_g_luma_filter[8];
|
||||
int8_t *fir1 = kvz_g_luma_filter[4];
|
||||
int8_t *fir3 = kvz_g_luma_filter[12];
|
||||
|
||||
// Horiziontal positions. Positions 0 and 2 have already been calculated in filtered.
|
||||
int16_t *hor_pos0 = hor_intermediate[0];
|
||||
|
@ -565,8 +565,8 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|||
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
int8_t *fir1 = kvz_g_luma_filter[1];
|
||||
int8_t *fir3 = kvz_g_luma_filter[3];
|
||||
int8_t *fir1 = kvz_g_luma_filter[4];
|
||||
int8_t *fir3 = kvz_g_luma_filter[12];
|
||||
|
||||
// Horiziontal positions.
|
||||
int16_t *hor_pos_l = hor_intermediate[3];
|
||||
|
@ -671,8 +671,8 @@ void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, k
|
|||
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
||||
|
||||
// Select filters according to the fractional part of the x and y mv components
|
||||
int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 7];
|
||||
int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 7];
|
||||
int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 31];
|
||||
int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 31];
|
||||
|
||||
int16_t hor_filtered[KVZ_EXT_BLOCK_W_CHROMA][LCU_WIDTH_C];
|
||||
int16_t hor_stride = LCU_WIDTH_C;
|
||||
|
@ -704,8 +704,8 @@ void kvz_sample_octpel_chroma_hi_generic(const encoder_control_t * const encoder
|
|||
int32_t shift2 = 6;
|
||||
|
||||
// Select filters according to the fractional part of the x and y mv components
|
||||
int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 7];
|
||||
int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 7];
|
||||
int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 31];
|
||||
int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 31];
|
||||
|
||||
int16_t hor_filtered[KVZ_EXT_BLOCK_W_CHROMA][LCU_WIDTH_C];
|
||||
int16_t hor_stride = LCU_WIDTH_C;
|
||||
|
|
|
@ -10,4 +10,4 @@ common_args='264x128 10 yuv420p -p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --pu-de
|
|||
valgrind_test $common_args --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3
|
||||
valgrind_test $common_args --no-rdoq --no-signhide --subme=0
|
||||
valgrind_test $common_args --rdoq --no-deblock --no-sao --subme=0
|
||||
valgrind_test $common_args --gop=8 --subme=0
|
||||
valgrind_test $common_args --gop=8 --subme=4
|
||||
|
|
Loading…
Reference in a new issue