Enable usage of MV RDO with a config parameter (in hexbs, tz, frac, bipred)

This commit is contained in:
Marko Viitanen 2015-11-05 12:24:03 +02:00
parent 9a535e1c56
commit 4e7e9eefbf
2 changed files with 61 additions and 13 deletions

View file

@ -175,6 +175,8 @@ typedef struct kvz_config
kvz_gop_config gop[KVZ_MAX_GOP_LENGTH]; /*!< \brief Array of GOP settings */ kvz_gop_config gop[KVZ_MAX_GOP_LENGTH]; /*!< \brief Array of GOP settings */
int32_t target_bitrate; int32_t target_bitrate;
int8_t mv_rdo; /*!< \brief MV RDO calculation in search (0: estimation, 1: RDO). */
} kvz_config; } kvz_config;
/** /**

View file

@ -141,6 +141,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
vector2d_t mv_best = { 0, 0 }; vector2d_t mv_best = { 0, 0 };
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
assert(pattern_type < 4); assert(pattern_type < 4);
//implemented search patterns //implemented search patterns
@ -252,7 +260,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
@ -293,6 +301,13 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
int k; int k;
vector2d_t mv_best = { 0, 0 }; vector2d_t mv_best = { 0, 0 };
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
//compute SAD values for every point in the iRaster downsampled version of the current search area //compute SAD values for every point in the iRaster downsampled version of the current search area
for (i = iSearchRange; i >= -iSearchRange; i -= iRaster) for (i = iSearchRange; i >= -iSearchRange; i -= iRaster)
@ -309,7 +324,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
@ -362,6 +377,13 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
unsigned best_index = num_cand; unsigned best_index = num_cand;
int max_lcu_below = -1; int max_lcu_below = -1;
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
if (state->encoder_control->owf) { if (state->encoder_control->owf) {
max_lcu_below = 1; max_lcu_below = 1;
} }
@ -377,7 +399,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost); best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -402,7 +424,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -531,6 +553,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
unsigned i; unsigned i;
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs. unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
int max_lcu_below = -1; int max_lcu_below = -1;
int (*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
if (state->encoder_control->owf) { if (state->encoder_control->owf) {
max_lcu_below = 1; max_lcu_below = 1;
@ -554,7 +584,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
best_cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
best_bitcost = bitcost; best_bitcost = bitcost;
best_index = num_cand; best_index = num_cand;
@ -578,7 +608,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -611,7 +641,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
@ -654,7 +684,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
@ -686,7 +716,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
block_width, block_width, max_lcu_below); block_width, block_width, max_lcu_below);
cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
@ -823,6 +853,13 @@ static unsigned search_frac(const encoder_state_t * const state,
kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16]; kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
kvz_pixel* dst_off = &dst[dst_stride*4+4]; kvz_pixel* dst_off = &dst[dst_stride*4+4];
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1, kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1,
state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_x * LCU_WIDTH,
state->tile->lcu_offset_y * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH,
@ -856,7 +893,7 @@ static unsigned search_frac(const encoder_state_t * const state,
cost = satd(tmp_pic,tmp_filtered); cost = satd(tmp_pic,tmp_filtered);
cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
if (cost < best_cost) { if (cost < best_cost) {
best_cost = cost; best_cost = cost;
@ -892,7 +929,7 @@ static unsigned search_frac(const encoder_state_t * const state,
cost = satd(tmp_pic,tmp_filtered); cost = satd(tmp_pic,tmp_filtered);
cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
if (cost < best_cost) { if (cost < best_cost) {
best_cost = cost; best_cost = cost;
@ -1060,6 +1097,15 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }; static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }; static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
uint8_t cutoff = num_cand; uint8_t cutoff = num_cand;
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) { for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) {
uint8_t i = priorityList0[idx]; uint8_t i = priorityList0[idx];
uint8_t j = priorityList1[idx]; uint8_t j = priorityList1[idx];
@ -1110,8 +1156,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
cost = satd(tmp_pic, tmp_block); cost = satd(tmp_pic, tmp_block);
cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]); cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]); cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
if (cost < cur_cu->inter.cost) { if (cost < cur_cu->inter.cost) {