mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-18 03:04:06 +00:00
Enable usage of MV RDO with a config parameter (in hexbs, tz, frac, bipred)
This commit is contained in:
parent
9a535e1c56
commit
4e7e9eefbf
|
@ -175,6 +175,8 @@ typedef struct kvz_config
|
|||
kvz_gop_config gop[KVZ_MAX_GOP_LENGTH]; /*!< \brief Array of GOP settings */
|
||||
|
||||
int32_t target_bitrate;
|
||||
|
||||
int8_t mv_rdo; /*!< \brief MV RDO calculation in search (0: estimation, 1: RDO). */
|
||||
} kvz_config;
|
||||
|
||||
/**
|
||||
|
|
|
@ -141,6 +141,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
|
|||
|
||||
vector2d_t mv_best = { 0, 0 };
|
||||
|
||||
|
||||
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
||||
if (state->encoder_control->cfg->mv_rdo) {
|
||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||
}
|
||||
|
||||
assert(pattern_type < 4);
|
||||
|
||||
//implemented search patterns
|
||||
|
@ -252,7 +260,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
|
||||
|
@ -293,6 +301,13 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
|
|||
int k;
|
||||
|
||||
vector2d_t mv_best = { 0, 0 };
|
||||
|
||||
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
||||
if (state->encoder_control->cfg->mv_rdo) {
|
||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||
}
|
||||
|
||||
//compute SAD values for every point in the iRaster downsampled version of the current search area
|
||||
for (i = iSearchRange; i >= -iSearchRange; i -= iRaster)
|
||||
|
@ -309,7 +324,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
|
||||
|
@ -362,6 +377,13 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
|||
unsigned best_index = num_cand;
|
||||
int max_lcu_below = -1;
|
||||
|
||||
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
||||
if (state->encoder_control->cfg->mv_rdo) {
|
||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||
}
|
||||
|
||||
if (state->encoder_control->owf) {
|
||||
max_lcu_below = 1;
|
||||
}
|
||||
|
@ -377,7 +399,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
||||
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
|
@ -402,7 +424,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
|
@ -531,6 +553,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
|||
unsigned i;
|
||||
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
|
||||
int max_lcu_below = -1;
|
||||
|
||||
int (*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
||||
if (state->encoder_control->cfg->mv_rdo) {
|
||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||
}
|
||||
|
||||
|
||||
if (state->encoder_control->owf) {
|
||||
max_lcu_below = 1;
|
||||
|
@ -554,7 +584,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
best_cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
best_bitcost = bitcost;
|
||||
best_index = num_cand;
|
||||
|
||||
|
@ -578,7 +608,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
|
||||
|
@ -611,7 +641,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||
|
@ -654,7 +684,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
|
||||
|
@ -686,7 +716,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
|
|||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||
block_width, block_width, max_lcu_below);
|
||||
cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
|
||||
|
@ -823,6 +853,13 @@ static unsigned search_frac(const encoder_state_t * const state,
|
|||
kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
|
||||
kvz_pixel* dst_off = &dst[dst_stride*4+4];
|
||||
|
||||
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
||||
if (state->encoder_control->cfg->mv_rdo) {
|
||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||
}
|
||||
|
||||
kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1,
|
||||
state->tile->lcu_offset_x * LCU_WIDTH,
|
||||
state->tile->lcu_offset_y * LCU_WIDTH,
|
||||
|
@ -856,7 +893,7 @@ static unsigned search_frac(const encoder_state_t * const state,
|
|||
|
||||
cost = satd(tmp_pic,tmp_filtered);
|
||||
|
||||
cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
if (cost < best_cost) {
|
||||
best_cost = cost;
|
||||
|
@ -892,7 +929,7 @@ static unsigned search_frac(const encoder_state_t * const state,
|
|||
|
||||
cost = satd(tmp_pic,tmp_filtered);
|
||||
|
||||
cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
|
||||
|
||||
if (cost < best_cost) {
|
||||
best_cost = cost;
|
||||
|
@ -1060,6 +1097,15 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
|
|||
static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
|
||||
static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
|
||||
uint8_t cutoff = num_cand;
|
||||
|
||||
|
||||
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
|
||||
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
|
||||
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
|
||||
if (state->encoder_control->cfg->mv_rdo) {
|
||||
calc_mvd = kvz_calc_mvd_cost_cabac;
|
||||
}
|
||||
|
||||
for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) {
|
||||
uint8_t i = priorityList0[idx];
|
||||
uint8_t j = priorityList1[idx];
|
||||
|
@ -1110,8 +1156,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
|
|||
|
||||
cost = satd(tmp_pic, tmp_block);
|
||||
|
||||
cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
|
||||
cost += kvz_calc_mvd_cost_cabac(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
|
||||
cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
|
||||
cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
|
||||
|
||||
if (cost < cur_cu->inter.cost) {
|
||||
|
||||
|
|
Loading…
Reference in a new issue