From 64705a27ee27c1a77537e7525321cd5622680c9f Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 22 Nov 2021 16:04:57 +0200 Subject: [PATCH] [fme] Enable 1/16th luma pixel fme --- src/filter.c | 50 ++++++++++++++++++++++---- src/inter.c | 52 +++++++++++++-------------- src/search_inter.c | 9 ++--- src/strategies/avx2/ipol-avx2.c | 30 ++++++++-------- src/strategies/generic/ipol-generic.c | 34 +++++++++--------- 5 files changed, 103 insertions(+), 72 deletions(-) diff --git a/src/filter.c b/src/filter.c index 9eb7327d..cc517e8a 100644 --- a/src/filter.c +++ b/src/filter.c @@ -46,24 +46,60 @@ const uint8_t kvz_g_beta_table_8x8[64] = 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88 }; -const int8_t kvz_g_luma_filter[4][8] = +const int8_t kvz_g_luma_filter[16][8] = { - { 0, 0, 0, 64, 0, 0, 0, 0 }, - { -1, 4, -10, 58, 17, -5, 1, 0 }, - { -1, 4, -11, 40, 40, -11, 4, -1 }, - { 0, 1, -5, 17, 58, -10, 4, -1 } + { 0, 0, 0, 64, 0, 0, 0, 0 }, + { 0, 1, -3, 63, 4, -2, 1, 0 }, + { -1, 2, -5, 62, 8, -3, 1, 0 }, + { -1, 3, -8, 60, 13, -4, 1, 0 }, + { -1, 4, -10, 58, 17, -5, 1, 0 }, //1/4 + { -1, 4, -11, 52, 26, -8, 3, -1 }, + { -1, 3, -9, 47, 31, -10, 4, -1 }, + { -1, 4, -11, 45, 34, -10, 4, -1 }, + { -1, 4, -11, 40, 40, -11, 4, -1 }, //1/2 + { -1, 4, -10, 34, 45, -11, 4, -1 }, + { -1, 4, -10, 31, 47, -9, 3, -1 }, + { -1, 3, -8, 26, 52, -11, 4, -1 }, + { 0, 1, -5, 17, 58, -10, 4, -1 }, //3/4 + { 0, 1, -4, 13, 60, -8, 3, -1 }, + { 0, 1, -3, 8, 62, -5, 2, -1 }, + { 0, 1, -2, 4, 63, -3, 1, 0 } }; -const int8_t kvz_g_chroma_filter[8][4] = +const int8_t kvz_g_chroma_filter[32][4] = { { 0, 64, 0, 0 }, + { -1, 63, 2, 0 }, + { -2, 62, 4, 0 }, + { -2, 60, 7, -1 }, { -2, 58, 10, -2 }, + { -3, 57, 12, -2 }, + { -4, 56, 14, -2 }, + { -4, 55, 15, -2 }, { -4, 54, 16, -2 }, + { -5, 53, 18, -2 }, + { -6, 52, 20, -2 }, + { -6, 49, 24, -3 }, { -6, 46, 28, -4 }, + { -5, 44, 29, -4 }, + { -4, 42, 30, -4 }, + { -4, 39, 33, -4 }, { -4, 36, 36, -4 }, + { -4, 33, 39, -4 }, + { -4, 30, 42, -4 }, + { -4, 29, 44, -5 }, { -4, 28, 46, -6 }, + { -3, 24, 49, -6 }, + { -2, 20, 52, -6 }, + { -2, 18, 53, -5 }, { -2, 16, 54, -4 }, - { -2, 10, 58, -2 } + { -2, 15, 55, -4 }, + { -2, 14, 56, -4 }, + { -2, 12, 57, -3 }, + { -2, 10, 58, -2 }, + { -1, 7, 60, -2 }, + { 0, 4, 62, -2 }, + { 0, 2, 63, -1 }, }; ////////////////////////////////////////////////////////////////////////// diff --git a/src/inter.c b/src/inter.c index 24211d2b..91af7330 100644 --- a/src/inter.c +++ b/src/inter.c @@ -62,8 +62,8 @@ static void inter_recon_frac_luma(const encoder_state_t *const state, const mv_t mv_param[2], lcu_t *lcu) { - int mv_frac_x = (mv_param[0] & 3); - int mv_frac_y = (mv_param[1] & 3); + int mv_frac_x = (mv_param[0] & 15); + int mv_frac_y = (mv_param[1] & 15); // Space for extrapolated pixels and the part from the picture. // Some extra for AVX2. @@ -77,8 +77,8 @@ static void inter_recon_frac_luma(const encoder_state_t *const state, .src_w = ref->width, .src_h = ref->height, .src_s = ref->stride, - .blk_x = state->tile->offset_x + xpos + (mv_param[0] >> 2), - .blk_y = state->tile->offset_y + ypos + (mv_param[1] >> 2), + .blk_x = state->tile->offset_x + xpos + (mv_param[0] >> INTERNAL_MV_PREC), + .blk_y = state->tile->offset_y + ypos + (mv_param[1] >> INTERNAL_MV_PREC), .blk_w = block_width, .blk_h = block_height, .pad_l = KVZ_LUMA_FILTER_OFFSET, @@ -117,8 +117,8 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state, const mv_t mv_param[2], hi_prec_buf_t *hi_prec_out) { - int mv_frac_x = (mv_param[0] & 3); - int mv_frac_y = (mv_param[1] & 3); + int mv_frac_x = (mv_param[0] & 15); + int mv_frac_y = (mv_param[1] & 15); // Space for extrapolated pixels and the part from the picture. // Some extra for AVX2. @@ -132,8 +132,8 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state, .src_w = ref->width, .src_h = ref->height, .src_s = ref->stride, - .blk_x = state->tile->offset_x + xpos + (mv_param[0] >> 2), - .blk_y = state->tile->offset_y + ypos + (mv_param[1] >> 2), + .blk_x = state->tile->offset_x + xpos + (mv_param[0] >> INTERNAL_MV_PREC), + .blk_y = state->tile->offset_y + ypos + (mv_param[1] >> INTERNAL_MV_PREC), .blk_w = block_width, .blk_h = block_height, .pad_l = KVZ_LUMA_FILTER_OFFSET, @@ -172,8 +172,8 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state, const mv_t mv_param[2], lcu_t *lcu) { - int mv_frac_x = (mv_param[0] & 7); - int mv_frac_y = (mv_param[1] & 7); + int mv_frac_x = (mv_param[0] & 31); + int mv_frac_y = (mv_param[1] & 31); // Space for extrapolated pixels and the part from the picture. // Some extra for AVX2. @@ -190,8 +190,8 @@ static void inter_recon_frac_chroma(const encoder_state_t *const state, .src_w = ref->width / 2, .src_h = ref->height / 2, .src_s = ref->stride / 2, - .blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> 3), - .blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> 3), + .blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> (INTERNAL_MV_PREC + 1)), + .blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> (INTERNAL_MV_PREC + 1)), .blk_w = block_width / 2, .blk_h = block_height / 2, .pad_l = KVZ_CHROMA_FILTER_OFFSET, @@ -244,8 +244,8 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state, const mv_t mv_param[2], hi_prec_buf_t *hi_prec_out) { - int mv_frac_x = (mv_param[0] & 7); - int mv_frac_y = (mv_param[1] & 7); + int mv_frac_x = (mv_param[0] & 31); + int mv_frac_y = (mv_param[1] & 31); // Space for extrapolated pixels and the part from the picture. // Some extra for AVX2. @@ -262,8 +262,8 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state, .src_w = ref->width / 2, .src_h = ref->height / 2, .src_s = ref->stride / 2, - .blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> 3), - .blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> 3), + .blk_x = (state->tile->offset_x + xpos) / 2 + (mv_param[0] >> (INTERNAL_MV_PREC + 1) ), + .blk_y = (state->tile->offset_y + ypos) / 2 + (mv_param[1] >> (INTERNAL_MV_PREC + 1) ), .blk_w = block_width / 2, .blk_h = block_height / 2, .pad_l = KVZ_CHROMA_FILTER_OFFSET, @@ -372,9 +372,7 @@ static void inter_recon_unipred(const encoder_state_t * const state, bool predict_luma, bool predict_chroma) { - mv_t mv_param_qpel[2] = { mv_param[0], mv_param[1] }; mv_t mv_param_fpel[2] = { mv_param[0], mv_param[1] }; - kvz_change_precision(INTERNAL_MV_PREC, 2, &mv_param_qpel[0], &mv_param_qpel[1]); kvz_change_precision(INTERNAL_MV_PREC, 0, &mv_param_fpel[0], &mv_param_fpel[1]); @@ -394,7 +392,7 @@ static void inter_recon_unipred(const encoder_state_t * const state, // With 420, odd coordinates need interpolation. const int8_t fractional_chroma = (mv_in_pu.x & 1) || (mv_in_pu.y & 1); - const int8_t fractional_luma = ((mv_param_qpel[0] & 3) || (mv_param_qpel[1] & 3)); + const int8_t fractional_luma = ((mv_param[0] & 15) || (mv_param[1] & 15)); // Generate prediction for luma. if (predict_luma) { @@ -404,13 +402,13 @@ static void inter_recon_unipred(const encoder_state_t * const state, inter_recon_frac_luma_hi(state, ref, pu_in_tile.x, pu_in_tile.y, width, height, - mv_param_qpel, hi_prec_out); + mv_param, hi_prec_out); } else { inter_recon_frac_luma(state, ref, pu_in_tile.x, pu_in_tile.y, width, height, - mv_param_qpel, lcu); + mv_param, lcu); } } else { // With an integer MV, copy pixels directly from the reference. @@ -443,12 +441,12 @@ static void inter_recon_unipred(const encoder_state_t * const state, inter_recon_frac_chroma_hi(state, ref, pu_in_tile.x, pu_in_tile.y, width, height, - mv_param_qpel, hi_prec_out); + mv_param, hi_prec_out); } else { inter_recon_frac_chroma(state, ref, pu_in_tile.x, pu_in_tile.y, width, height, - mv_param_qpel, lcu); + mv_param, lcu); } } else { // With an integer MV, copy pixels directly from the reference. @@ -511,11 +509,11 @@ void kvz_inter_recon_bipred(const encoder_state_t * const state, kvz_pixel temp_lcu_u[LCU_WIDTH_C*LCU_WIDTH_C]; kvz_pixel temp_lcu_v[LCU_WIDTH_C*LCU_WIDTH_C]; - const int hi_prec_luma_rec0 = (mv_param[0][0]>>2) & 3 || (mv_param[0][1]>>2) & 3; - const int hi_prec_luma_rec1 = (mv_param[1][0]>>2) & 3 || (mv_param[1][1]>>2) & 3; + const int hi_prec_luma_rec0 = (mv_param[0][0] & 15) || (mv_param[0][1] & 15); + const int hi_prec_luma_rec1 = (mv_param[1][0] & 15) || (mv_param[1][1] & 15); - const int hi_prec_chroma_rec0 = (mv_param[0][0]>>2) & 7 || (mv_param[0][1]>>2) & 7; - const int hi_prec_chroma_rec1 = (mv_param[1][0]>>2) & 7 || (mv_param[1][1]>>2) & 7; + const int hi_prec_chroma_rec0 = (mv_param[0][0] & 31) || (mv_param[0][1] & 31); + const int hi_prec_chroma_rec1 = (mv_param[1][0] & 31) || (mv_param[1][1] & 31); hi_prec_buf_t* high_precision_rec0 = 0; hi_prec_buf_t* high_precision_rec1 = 0; diff --git a/src/search_inter.c b/src/search_inter.c index f061c281..cb1896c8 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -272,10 +272,10 @@ static bool mv_in_merge(const inter_search_info_t *info, vector2d_t mv) for (int i = 0; i < info->num_merge_cand; ++i) { if (info->merge_cand[i].dir == 3) continue; const vector2d_t merge_mv = { - (info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0] + 2) >> INTERNAL_MV_PREC, - (info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] + 2) >> INTERNAL_MV_PREC + info->merge_cand[i].mv[info->merge_cand[i].dir - 1][0], + info->merge_cand[i].mv[info->merge_cand[i].dir - 1][1] }; - if (merge_mv.x == mv.x && merge_mv.y == mv.y) { + if (merge_mv.x == mv.x * (1 << (INTERNAL_MV_PREC)) && merge_mv.y == mv.y * (1 << (INTERNAL_MV_PREC))) { return true; } } @@ -1684,9 +1684,6 @@ static void search_pu_inter(encoder_state_t * const state, for (int merge_idx = 0; merge_idx < info.num_merge_cand; ++merge_idx) { inter_merge_cand_t *cur_cand = &info.merge_cand[merge_idx]; - if ((cur_cand->dir & 1 && (cur_cand->mv[0][0] & 3 || cur_cand->mv[0][1] & 3)) || - (cur_cand->dir & 2 && (cur_cand->mv[1][0] & 3 || cur_cand->mv[1][1] & 3))) continue; - cur_cu->inter.mv_dir = cur_cand->dir; cur_cu->inter.mv_ref[0] = cur_cand->ref[0]; cur_cu->inter.mv_ref[1] = cur_cand->ref[1]; diff --git a/src/strategies/avx2/ipol-avx2.c b/src/strategies/avx2/ipol-avx2.c index a60b7ae7..5c7f8e3a 100644 --- a/src/strategies/avx2/ipol-avx2.c +++ b/src/strategies/avx2/ipol-avx2.c @@ -38,8 +38,8 @@ #include "strategies/generic/ipol-generic.h" -extern int8_t kvz_g_luma_filter[4][8]; -extern int8_t kvz_g_chroma_filter[8][4]; +extern int8_t kvz_g_luma_filter[16][8]; +extern int8_t kvz_g_chroma_filter[32][4]; static int32_t kvz_eight_tap_filter_hor_avx2(int8_t *filter, kvz_pixel *data) { @@ -635,7 +635,7 @@ static void kvz_filter_hpel_blocks_hor_ver_luma_avx2(const encoder_control_t * e int32_t wp_offset1 = 1 << (wp_shift1 - 1); int8_t *fir0 = kvz_g_luma_filter[0]; - int8_t *fir2 = kvz_g_luma_filter[2]; + int8_t *fir2 = kvz_g_luma_filter[8]; int16_t dst_stride = LCU_WIDTH; int16_t hor_stride = LCU_WIDTH; @@ -745,7 +745,7 @@ static void kvz_filter_hpel_blocks_diag_luma_avx2(const encoder_control_t * enco int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH; int32_t wp_offset1 = 1 << (wp_shift1 - 1); - int8_t *fir2 = kvz_g_luma_filter[2]; + int8_t *fir2 = kvz_g_luma_filter[8]; int16_t dst_stride = LCU_WIDTH; int16_t hor_stride = LCU_WIDTH; @@ -826,9 +826,9 @@ static void kvz_filter_qpel_blocks_hor_ver_luma_avx2(const encoder_control_t * e int32_t wp_offset1 = 1 << (wp_shift1 - 1); int8_t *fir0 = kvz_g_luma_filter[0]; - int8_t *fir2 = kvz_g_luma_filter[2]; - int8_t *fir1 = kvz_g_luma_filter[1]; - int8_t *fir3 = kvz_g_luma_filter[3]; + int8_t *fir2 = kvz_g_luma_filter[8]; + int8_t *fir1 = kvz_g_luma_filter[4]; + int8_t *fir3 = kvz_g_luma_filter[12]; // Horiziontal positions. Positions 0 and 2 have already been calculated in filtered. int16_t *hor_pos0 = hor_intermediate[0]; @@ -1137,8 +1137,8 @@ static void kvz_sample_quarterpel_luma_avx2(const encoder_control_t * const enco const mv_t mv[2]) { // TODO: horizontal and vertical only filtering - int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 3]; - int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 3]; + int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 15]; + int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 15]; // Buffer for intermediate values with one extra row // because the loop writes two rows each iteration. @@ -1162,8 +1162,8 @@ static void kvz_sample_quarterpel_luma_hi_avx2(const encoder_control_t * const e const mv_t mv[2]) { // TODO: horizontal and vertical only filtering - int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 3]; - int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 3]; + int8_t *hor_fir = kvz_g_luma_filter[mv[0] & 15]; + int8_t *ver_fir = kvz_g_luma_filter[mv[1] & 15]; // Buffer for intermediate values with one extra row // because the loop writes two rows each iteration. @@ -1191,8 +1191,8 @@ static void kvz_sample_octpel_chroma_avx2(const encoder_control_t *const encoder kvz_sample_octpel_chroma_generic(encoder, src, src_stride, width, height, dst, dst_stride, hor_flag, ver_flag, mv); return; } - int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 7]; - int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 7]; + int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 31]; + int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 31]; // Buffer for intermediate values with 3 extra rows // because the loop writes four rows each iteration. @@ -1219,8 +1219,8 @@ static void kvz_sample_octpel_chroma_hi_avx2(const encoder_control_t *const enco kvz_sample_octpel_chroma_hi_generic(encoder, src, src_stride, width, height, dst, dst_stride, hor_flag, ver_flag, mv); return; } - int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 7]; - int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 7]; + int8_t *hor_fir = kvz_g_chroma_filter[mv[0] & 31]; + int8_t *ver_fir = kvz_g_chroma_filter[mv[1] & 31]; // Buffer for intermediate values with 3 extra rows // because the loop writes four rows each iteration. diff --git a/src/strategies/generic/ipol-generic.c b/src/strategies/generic/ipol-generic.c index e5c5cef6..423d0602 100644 --- a/src/strategies/generic/ipol-generic.c +++ b/src/strategies/generic/ipol-generic.c @@ -28,8 +28,8 @@ #include "strategies/strategies-ipol.h" #include "strategyselector.h" -extern int8_t kvz_g_luma_filter[4][8]; -extern int8_t kvz_g_chroma_filter[8][4]; +extern int8_t kvz_g_luma_filter[16][8]; +extern int8_t kvz_g_chroma_filter[32][4]; int32_t kvz_eight_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data) { @@ -133,8 +133,8 @@ void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, int32_t wp_offset1 = 1 << (wp_shift1 - 1); // Select filters according to the fractional part of the x and y mv components - int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 3]; - int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 3]; + int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 15]; + int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 15]; int16_t hor_filtered[KVZ_EXT_BLOCK_W_LUMA][LCU_WIDTH]; int16_t hor_stride = LCU_WIDTH; @@ -166,8 +166,8 @@ void kvz_sample_quarterpel_luma_hi_generic(const encoder_control_t * const encod int32_t shift2 = 6; // Select filters according to the fractional part of the x and y mv components - int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 3]; - int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 3]; + int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 15]; + int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 15]; int16_t hor_filtered[KVZ_EXT_BLOCK_W_LUMA][LCU_WIDTH]; int16_t hor_stride = LCU_WIDTH; @@ -210,7 +210,7 @@ void kvz_filter_hpel_blocks_hor_ver_luma_generic(const encoder_control_t * encod int32_t wp_offset1 = 1 << (wp_shift1 - 1); int8_t *fir0 = kvz_g_luma_filter[0]; - int8_t *fir2 = kvz_g_luma_filter[2]; + int8_t *fir2 = kvz_g_luma_filter[8]; int16_t dst_stride = LCU_WIDTH; int16_t hor_stride = LCU_WIDTH; @@ -324,7 +324,7 @@ void kvz_filter_hpel_blocks_diag_luma_generic(const encoder_control_t * encoder, int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH; int32_t wp_offset1 = 1 << (wp_shift1 - 1); - int8_t *fir2 = kvz_g_luma_filter[2]; + int8_t *fir2 = kvz_g_luma_filter[8]; int16_t dst_stride = LCU_WIDTH; int16_t hor_stride = LCU_WIDTH; @@ -407,9 +407,9 @@ void kvz_filter_qpel_blocks_hor_ver_luma_generic(const encoder_control_t * encod int32_t wp_offset1 = 1 << (wp_shift1 - 1); int8_t *fir0 = kvz_g_luma_filter[0]; - int8_t *fir2 = kvz_g_luma_filter[2]; - int8_t *fir1 = kvz_g_luma_filter[1]; - int8_t *fir3 = kvz_g_luma_filter[3]; + int8_t *fir2 = kvz_g_luma_filter[8]; + int8_t *fir1 = kvz_g_luma_filter[4]; + int8_t *fir3 = kvz_g_luma_filter[12]; // Horiziontal positions. Positions 0 and 2 have already been calculated in filtered. int16_t *hor_pos0 = hor_intermediate[0]; @@ -565,8 +565,8 @@ void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder, int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH; int32_t wp_offset1 = 1 << (wp_shift1 - 1); - int8_t *fir1 = kvz_g_luma_filter[1]; - int8_t *fir3 = kvz_g_luma_filter[3]; + int8_t *fir1 = kvz_g_luma_filter[4]; + int8_t *fir3 = kvz_g_luma_filter[12]; // Horiziontal positions. int16_t *hor_pos_l = hor_intermediate[3]; @@ -671,8 +671,8 @@ void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, k int32_t wp_offset1 = 1 << (wp_shift1 - 1); // Select filters according to the fractional part of the x and y mv components - int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 7]; - int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 7]; + int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 31]; + int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 31]; int16_t hor_filtered[KVZ_EXT_BLOCK_W_CHROMA][LCU_WIDTH_C]; int16_t hor_stride = LCU_WIDTH_C; @@ -704,8 +704,8 @@ void kvz_sample_octpel_chroma_hi_generic(const encoder_control_t * const encoder int32_t shift2 = 6; // Select filters according to the fractional part of the x and y mv components - int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 7]; - int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 7]; + int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 31]; + int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 31]; int16_t hor_filtered[KVZ_EXT_BLOCK_W_CHROMA][LCU_WIDTH_C]; int16_t hor_stride = LCU_WIDTH_C;