diff --git a/src/kvazaar.h b/src/kvazaar.h index df4146bc..1b28397a 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -175,6 +175,8 @@ typedef struct kvz_config kvz_gop_config gop[KVZ_MAX_GOP_LENGTH]; /*!< \brief Array of GOP settings */ int32_t target_bitrate; + + int8_t mv_rdo; /*!< \brief MV RDO calculation in search (0: estimation, 1: RDO). */ } kvz_config; /** diff --git a/src/search_inter.c b/src/search_inter.c index d2f232c5..df5faadb 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -141,6 +141,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi vector2d_t mv_best = { 0, 0 }; + + int(*calc_mvd)(const encoder_state_t * const, int, int, int, + int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS], + int16_t, int32_t, uint32_t *) = calc_mvd_cost; + if (state->encoder_control->cfg->mv_rdo) { + calc_mvd = kvz_calc_mvd_cost_cabac; + } + assert(pattern_type < 4); //implemented search patterns @@ -252,7 +260,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x, @@ -293,6 +301,13 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic int k; vector2d_t mv_best = { 0, 0 }; + + int(*calc_mvd)(const encoder_state_t * const, int, int, int, + int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS], + int16_t, int32_t, uint32_t *) = calc_mvd_cost; + if (state->encoder_control->cfg->mv_rdo) { + calc_mvd = kvz_calc_mvd_cost_cabac; + } //compute SAD values for every point in the iRaster downsampled version of the current search area for (i = iSearchRange; i >= -iSearchRange; i -= iRaster) @@ -309,7 +324,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k, @@ -362,6 +377,13 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, unsigned best_index = num_cand; int max_lcu_below = -1; + int(*calc_mvd)(const encoder_state_t * const, int, int, int, + int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS], + int16_t, int32_t, uint32_t *) = calc_mvd_cost; + if (state->encoder_control->cfg->mv_rdo) { + calc_mvd = kvz_calc_mvd_cost_cabac; + } + if (state->encoder_control->owf) { max_lcu_below = 1; } @@ -377,7 +399,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, block_width, block_width, max_lcu_below); - best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost); + best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -402,7 +424,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, block_width, block_width, max_lcu_below); - cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -531,6 +553,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep unsigned i; unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs. int max_lcu_below = -1; + + int (*calc_mvd)(const encoder_state_t * const, int, int, int, + int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS], + int16_t, int32_t, uint32_t *) = calc_mvd_cost; + if (state->encoder_control->cfg->mv_rdo) { + calc_mvd = kvz_calc_mvd_cost_cabac; + } + if (state->encoder_control->owf) { max_lcu_below = 1; @@ -554,7 +584,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, block_width, block_width, max_lcu_below); - best_cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); best_bitcost = bitcost; best_index = num_cand; @@ -578,7 +608,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y, block_width, block_width, max_lcu_below); - cost += kvz_calc_mvd_cost_cabac(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x, @@ -611,7 +641,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y, block_width, block_width, max_lcu_below); - cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x, @@ -654,7 +684,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width, max_lcu_below); - cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, @@ -686,7 +716,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y, block_width, block_width, max_lcu_below); - cost += kvz_calc_mvd_cost_cabac(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x, (state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width, @@ -823,6 +853,13 @@ static unsigned search_frac(const encoder_state_t * const state, kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16]; kvz_pixel* dst_off = &dst[dst_stride*4+4]; + int(*calc_mvd)(const encoder_state_t * const, int, int, int, + int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS], + int16_t, int32_t, uint32_t *) = calc_mvd_cost; + if (state->encoder_control->cfg->mv_rdo) { + calc_mvd = kvz_calc_mvd_cost_cabac; + } + kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH, @@ -856,7 +893,7 @@ static unsigned search_frac(const encoder_state_t * const state, cost = satd(tmp_pic,tmp_filtered); - cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -892,7 +929,7 @@ static unsigned search_frac(const encoder_state_t * const state, cost = satd(tmp_pic,tmp_filtered); - cost += kvz_calc_mvd_cost_cabac(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); + cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost); if (cost < best_cost) { best_cost = cost; @@ -1060,6 +1097,15 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }; static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }; uint8_t cutoff = num_cand; + + + int(*calc_mvd)(const encoder_state_t * const, int, int, int, + int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS], + int16_t, int32_t, uint32_t *) = calc_mvd_cost; + if (state->encoder_control->cfg->mv_rdo) { + calc_mvd = kvz_calc_mvd_cost_cabac; + } + for (int32_t idx = 0; idxinter.cost) {