Merge branch 'mv_rdo'

This commit is contained in:
Marko Viitanen 2015-11-05 15:23:55 +02:00
commit d60baf64df
9 changed files with 321 additions and 23 deletions

View file

@ -42,6 +42,7 @@ http://ultravideo.cs.tut.fi/#encoder for more information.
0: no RDO
1: estimated RDO
2: full RDO
--mv-rdo : Enable Rate-Distortion Optimized motion vector costs
--full-intra-search : Try all intra modes.
--no-transform-skip : Disable transform skip
--aud : Use access unit delimiters
@ -148,6 +149,7 @@ subme | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1
sao | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1
rdoq | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1
transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1
mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1
full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1

View file

@ -13,7 +13,7 @@ DLLDIR = $(BINDIR)
# Library version number
VER_MAJOR = 2
VER_MINOR = 1
VER_MINOR = 2
VER_RELEASE = 0
PROG = kvazaar

View file

@ -91,6 +91,8 @@ static const struct option long_options[] = {
{ "no-bipred", no_argument, NULL, 0 },
{ "bitrate", required_argument, NULL, 0 },
{ "preset", required_argument, NULL, 0 },
{ "mv-rdo", no_argument, NULL, 0 },
{ "no-mv-rdo", no_argument, NULL, 0 },
{0, 0, 0, 0}
};
@ -291,6 +293,7 @@ void print_help(void)
" 0: no RDO\n"
" 1: estimated RDO\n"
" 2: full RDO\n"
" --mv-rdo : Enable Rate-Distortion Optimized motion vector costs\n"
" --full-intra-search : Try all intra modes.\n"
" --me <string> : Set integer motion estimation algorithm [\"hexbs\"]\n"
" \"hexbs\": Hexagon Based Search (faster)\n"

View file

@ -56,6 +56,7 @@ int kvz_config_init(kvz_config *cfg)
cfg->rdoq_enable = 1;
cfg->signhide_enable = true;
cfg->rdo = 1;
cfg->mv_rdo = 0;
cfg->full_intra_search = 0;
cfg->trskip_enable = 1;
cfg->tr_depth_intra = 0;
@ -278,7 +279,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
static const char * const colormatrix_names[] = { "GBR", "bt709", "undef", "", "fcc", "bt470bg", "smpte170m",
"smpte240m", "YCgCo", "bt2020nc", "bt2020c", NULL };
static const char * const preset_values[11][26] = {
static const char * const preset_values[11][28] = {
{
"ultrafast",
"pu-depth-intra", "2-3",
@ -293,6 +294,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -309,6 +311,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -325,6 +328,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -341,6 +345,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -357,6 +362,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -373,6 +379,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -389,6 +396,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "0",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -405,6 +413,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "1",
"transform-skip", "0",
"full-intra-search", "0",
"mv-rdo", "0",
NULL
},
{
@ -421,6 +430,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "1",
"transform-skip", "1",
"full-intra-search", "0",
"mv-rdo", "1",
NULL
},
{
@ -437,6 +447,7 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
"rdoq", "1",
"transform-skip", "1",
"full-intra-search", "1",
"mv-rdo", "1",
NULL
},
{ NULL }
@ -631,6 +642,8 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
return 0;
}
}
else if OPT("mv-rdo")
cfg->mv_rdo = atobool(value);
else
return 0;
#undef OPT

View file

@ -175,6 +175,8 @@ typedef struct kvz_config
kvz_gop_config gop[KVZ_MAX_GOP_LENGTH]; /*!< \brief Array of GOP settings */
int32_t target_bitrate;
int8_t mv_rdo; /*!< \brief MV RDO calculation in search (0: estimation, 1: RDO). */
} kvz_config;
/**

View file

@ -21,6 +21,6 @@
****************************************************************************/
// KVZ_API_VERSION is incremented every time the public api changes.
#define KVZ_API_VERSION 8
#define KVZ_API_VERSION 9
#endif // KVAZAAR_VERSION_H_

219
src/rdo.c
View file

@ -32,6 +32,7 @@
#include "cabac.h"
#include "transform.h"
#include "strategies/strategies-quant.h"
#include "inter.h"
#define QUANT_SHIFT 14
@ -890,3 +891,221 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff
width, coef, dest_coeff);
}
}
/** MVD cost calculation with CABAC
* \returns int
* Calculates cost of actual motion vectors using CABAC coding
*/
uint32_t kvz_get_mvd_coding_cost_cabac(vector2d_t *mvd, cabac_data_t* cabac) {
uint32_t bitcost = 0;
const int32_t mvd_hor = mvd->x;
const int32_t mvd_ver = mvd->y;
const int8_t hor_abs_gr0 = mvd_hor != 0;
const int8_t ver_abs_gr0 = mvd_ver != 0;
const uint32_t mvd_hor_abs = abs(mvd_hor);
const uint32_t mvd_ver_abs = abs(mvd_ver);
cabac_data_t cabac_copy;
memcpy(&cabac_copy, cabac, sizeof(cabac_data_t));
cabac->only_count = 1;
cabac->cur_ctx = &(cabac->ctx.cu_mvd_model[0]);
CABAC_BIN(cabac, (mvd_hor != 0), "abs_mvd_greater0_flag_hor");
CABAC_BIN(cabac, (mvd_ver != 0), "abs_mvd_greater0_flag_ver");
cabac->cur_ctx = &(cabac->ctx.cu_mvd_model[1]);
if (hor_abs_gr0) {
CABAC_BIN(cabac, (mvd_hor_abs > 1), "abs_mvd_greater1_flag_hor");
}
if (ver_abs_gr0) {
CABAC_BIN(cabac, (mvd_ver_abs > 1), "abs_mvd_greater1_flag_ver");
}
if (hor_abs_gr0) {
if (mvd_hor_abs > 1) {
kvz_cabac_write_ep_ex_golomb(cabac, mvd_hor_abs - 2, 1);
}
CABAC_BIN_EP(cabac, (mvd_hor > 0) ? 0 : 1, "mvd_sign_flag_hor");
}
if (ver_abs_gr0) {
if (mvd_ver_abs > 1) {
kvz_cabac_write_ep_ex_golomb(cabac, mvd_ver_abs - 2, 1);
}
CABAC_BIN_EP(cabac, (mvd_ver > 0) ? 0 : 1, "mvd_sign_flag_ver");
}
bitcost = ((23 - cabac->bits_left) + (cabac->num_buffered_bytes << 3)) - ((23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3));
memcpy(cabac, &cabac_copy, sizeof(cabac_data_t));
return bitcost;
}
/** MVD cost calculation with CABAC
* \returns int
* Calculates Motion Vector cost and related costs using CABAC coding
*/
int kvz_calc_mvd_cost_cabac(const encoder_state_t * const state, int x, int y, int mv_shift,
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost) {
cabac_data_t state_cabac_copy;
cabac_data_t* cabac;
uint32_t merge_idx;
int cand1_cost, cand2_cost;
vector2d_t mvd_temp1, mvd_temp2, mvd = { 0, 0 };
int8_t merged = 0;
int8_t cur_mv_cand = 0;
x <<= mv_shift;
y <<= mv_shift;
// Check every candidate to find a match
for (merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
if (merge_cand[merge_idx].dir == 3) continue;
if (merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][0] == x &&
merge_cand[merge_idx].mv[merge_cand[merge_idx].dir - 1][1] == y &&
merge_cand[merge_idx].ref[merge_cand[merge_idx].dir - 1] == ref_idx) {
merged = 1;
break;
}
}
// Store cabac state and contexts
memcpy(&state_cabac_copy, &state->cabac, sizeof(cabac_data_t));
// Clear bytes and bits and set mode to "count"
state_cabac_copy.only_count = 1;
state_cabac_copy.num_buffered_bytes = 0;
state_cabac_copy.bits_left = 23;
cabac = &state_cabac_copy;
if (!merged) {
mvd_temp1.x = x - mv_cand[0][0];
mvd_temp1.y = y - mv_cand[0][1];
cand1_cost = kvz_get_mvd_coding_cost_cabac(&mvd_temp1, cabac);
mvd_temp2.x = x - mv_cand[1][0];
mvd_temp2.y = y - mv_cand[1][1];
cand2_cost = kvz_get_mvd_coding_cost_cabac(&mvd_temp2, cabac);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
cur_mv_cand = 1;
mvd = mvd_temp2;
} else {
mvd = mvd_temp1;
}
}
cabac->cur_ctx = &(cabac->ctx.cu_merge_flag_ext_model);
CABAC_BIN(cabac, merged, "MergeFlag");
num_cand = MRG_MAX_NUM_CANDS;
if (merged) {
if (num_cand > 1) {
int32_t ui;
for (ui = 0; ui < num_cand - 1; ui++) {
int32_t symbol = (ui != merge_idx);
if (ui == 0) {
cabac->cur_ctx = &(cabac->ctx.cu_merge_idx_ext_model);
CABAC_BIN(cabac, symbol, "MergeIndex");
} else {
CABAC_BIN_EP(cabac, symbol, "MergeIndex");
}
if (symbol == 0) break;
}
}
} else {
uint32_t ref_list_idx;
uint32_t j;
int ref_list[2] = { 0, 0 };
for (j = 0; j < state->global->ref->used_size; j++) {
if (state->global->ref->pocs[j] < state->global->poc) {
ref_list[0]++;
} else {
ref_list[1]++;
}
}
//ToDo: bidir mv support
for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) {
if (/*cur_cu->inter.mv_dir*/ 1 & (1 << ref_list_idx)) {
if (ref_list[ref_list_idx] > 1) {
// parseRefFrmIdx
int32_t ref_frame = ref_idx;
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[0]);
CABAC_BIN(cabac, (ref_frame != 0), "ref_idx_lX");
if (ref_frame > 0) {
int32_t i;
int32_t ref_num = ref_list[ref_list_idx] - 2;
cabac->cur_ctx = &(cabac->ctx.cu_ref_pic_model[1]);
ref_frame--;
for (i = 0; i < ref_num; ++i) {
const uint32_t symbol = (i == ref_frame) ? 0 : 1;
if (i == 0) {
CABAC_BIN(cabac, symbol, "ref_idx_lX");
} else {
CABAC_BIN_EP(cabac, symbol, "ref_idx_lX");
}
if (symbol == 0) break;
}
}
}
// ToDo: Bidir vector support
if (!(state->global->ref_list == REF_PIC_LIST_1 && /*cur_cu->inter.mv_dir == 3*/ 0)) {
const int32_t mvd_hor = mvd.x;
const int32_t mvd_ver = mvd.y;
const int8_t hor_abs_gr0 = mvd_hor != 0;
const int8_t ver_abs_gr0 = mvd_ver != 0;
const uint32_t mvd_hor_abs = abs(mvd_hor);
const uint32_t mvd_ver_abs = abs(mvd_ver);
cabac->cur_ctx = &(cabac->ctx.cu_mvd_model[0]);
CABAC_BIN(cabac, (mvd_hor != 0), "abs_mvd_greater0_flag_hor");
CABAC_BIN(cabac, (mvd_ver != 0), "abs_mvd_greater0_flag_ver");
cabac->cur_ctx = &(cabac->ctx.cu_mvd_model[1]);
if (hor_abs_gr0) {
CABAC_BIN(cabac, (mvd_hor_abs > 1), "abs_mvd_greater1_flag_hor");
}
if (ver_abs_gr0) {
CABAC_BIN(cabac, (mvd_ver_abs > 1), "abs_mvd_greater1_flag_ver");
}
if (hor_abs_gr0) {
if (mvd_hor_abs > 1) {
kvz_cabac_write_ep_ex_golomb(cabac, mvd_hor_abs - 2, 1);
}
CABAC_BIN_EP(cabac, (mvd_hor > 0) ? 0 : 1, "mvd_sign_flag_hor");
}
if (ver_abs_gr0) {
if (mvd_ver_abs > 1) {
kvz_cabac_write_ep_ex_golomb(cabac, mvd_ver_abs - 2, 1);
}
CABAC_BIN_EP(cabac, (mvd_ver > 0) ? 0 : 1, "mvd_sign_flag_ver");
}
}
// Signal which candidate MV to use
kvz_cabac_write_unary_max_symbol(cabac, cabac->ctx.mvp_idx_model, cur_mv_cand, 1,
AMVP_MAX_NUM_CANDS - 1);
}
}
}
*bitcost = (23 - state_cabac_copy.bits_left) + (state_cabac_copy.num_buffered_bytes << 3);
// Store bitcost before restoring cabac
return *bitcost * (int32_t)(state->global->cur_lambda_cost_sqrt + 0.5);
}

View file

@ -29,6 +29,7 @@
#include "encoder.h"
#include "encoderstate.h"
#include "inter.h"
extern const uint32_t kvz_g_go_rice_range[5];
@ -52,6 +53,11 @@ uint32_t kvz_get_coded_level(encoder_state_t * state, double* coded_cost, double
uint32_t c1_idx, uint32_t c2_idx,
int32_t q_bits,double temp, int8_t last, int8_t type);
int kvz_calc_mvd_cost_cabac(const encoder_state_t * const state, int x, int y, int mv_shift,
int16_t mv_cand[2][2], inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost);
uint32_t kvz_get_mvd_coding_cost_cabac(vector2d_t *mvd, cabac_data_t* cabac);
extern const float kvz_f_entropy_bits[128];
#define CTX_ENTROPY_FBITS(ctx,val) kvz_f_entropy_bits[(ctx)->uc_state ^ (val)]

View file

@ -29,6 +29,7 @@
#include "inter.h"
#include "strategies/strategies-picture.h"
#include "strategies/strategies-ipol.h"
#include "rdo.h"
// Temporarily for debugging.
@ -49,7 +50,7 @@ static uint32_t get_ep_ex_golomb_bitcost(uint32_t symbol, uint32_t count)
}
static uint32_t get_mvd_coding_cost(vector2d_t *mvd)
static uint32_t get_mvd_coding_cost(vector2d_t *mvd, cabac_data_t* cabac)
{
uint32_t bitcost = 0;
const int32_t mvd_hor = mvd->x;
@ -112,11 +113,11 @@ static int calc_mvd_cost(const encoder_state_t * const state, int x, int y, int
if(!merged) {
mvd_temp1.x = x - mv_cand[0][0];
mvd_temp1.y = y - mv_cand[0][1];
cand1_cost = get_mvd_coding_cost(&mvd_temp1);
cand1_cost = get_mvd_coding_cost(&mvd_temp1, NULL);
mvd_temp2.x = x - mv_cand[1][0];
mvd_temp2.y = y - mv_cand[1][1];
cand2_cost = get_mvd_coding_cost(&mvd_temp2);
cand2_cost = get_mvd_coding_cost(&mvd_temp2, NULL);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
@ -140,6 +141,14 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
vector2d_t mv_best = { 0, 0 };
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
assert(pattern_type < 4);
//implemented search patterns
@ -251,7 +260,7 @@ unsigned kvz_tz_pattern_search(const encoder_state_t * const state, const kvz_pi
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + current->y,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
cost += calc_mvd(state, mv->x + current->x, mv->y + current->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + current->x,
@ -292,6 +301,13 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
int k;
vector2d_t mv_best = { 0, 0 };
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
//compute SAD values for every point in the iRaster downsampled version of the current search area
for (i = iSearchRange; i >= -iSearchRange; i -= iRaster)
@ -308,7 +324,7 @@ unsigned kvz_tz_raster_search(const encoder_state_t * const state, const kvz_pic
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv->y + i,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
cost += calc_mvd(state, mv->x + k, mv->y + i, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv->x + k,
@ -361,6 +377,13 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
unsigned best_index = num_cand;
int max_lcu_below = -1;
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
if (state->encoder_control->owf) {
max_lcu_below = 1;
}
@ -376,7 +399,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below);
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &best_bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -401,7 +424,7 @@ static unsigned tz_search(const encoder_state_t * const state, unsigned depth,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -530,6 +553,14 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
unsigned i;
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
int max_lcu_below = -1;
int (*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
if (state->encoder_control->owf) {
max_lcu_below = 1;
@ -553,7 +584,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below);
best_cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
best_cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
best_bitcost = bitcost;
best_index = num_cand;
@ -577,7 +608,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
cost += calc_mvd(state, mv.x, mv.y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x,
@ -610,7 +641,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
@ -653,7 +684,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=large_hexbs_iterative,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
@ -685,7 +716,7 @@ static unsigned hexagon_search(const encoder_state_t * const state, unsigned dep
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
block_width, block_width, max_lcu_below);
cost += calc_mvd_cost(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd(state, mv.x + offset->x, mv.y + offset->y, 2, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
PERFORMANCE_MEASURE_END(KVZ_PERF_SEARCHPX, state->encoder_control->threadqueue, "type=sad,step=small_hexbs,frame=%d,tile=%d,px_x=%d-%d,px_y=%d-%d,ref_px_x=%d-%d,ref_px_y=%d-%d", state->global->frame, state->tile->id, orig->x, orig->x + block_width, orig->y, orig->y + block_width,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
(state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x + block_width,
@ -822,6 +853,13 @@ static unsigned search_frac(const encoder_state_t * const state,
kvz_pixel dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
kvz_pixel* dst_off = &dst[dst_stride*4+4];
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
kvz_get_extended_block(orig->x, orig->y, mv.x-1, mv.y-1,
state->tile->lcu_offset_x * LCU_WIDTH,
state->tile->lcu_offset_y * LCU_WIDTH,
@ -855,7 +893,7 @@ static unsigned search_frac(const encoder_state_t * const state,
cost = satd(tmp_pic,tmp_filtered);
cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
if (cost < best_cost) {
best_cost = cost;
@ -891,7 +929,7 @@ static unsigned search_frac(const encoder_state_t * const state,
cost = satd(tmp_pic,tmp_filtered);
cost += calc_mvd_cost(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
cost += calc_mvd(state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand, merge_cand, num_cand, ref_idx, &bitcost);
if (cost < best_cost) {
best_cost = cost;
@ -934,6 +972,12 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
// Get list of candidates
int16_t num_cand = kvz_inter_get_merge_cand(state, x, y, depth, merge_cand, lcu);
uint32_t(*get_mvd_cost)(vector2d_t *, cabac_data_t*) = get_mvd_coding_cost;
if (state->encoder_control->cfg->mv_rdo) {
get_mvd_cost = kvz_get_mvd_coding_cost_cabac;
}
int max_lcu_below = -1;
if (state->encoder_control->owf) {
@ -1018,11 +1062,11 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
mvd_temp1.x = mv.x - mv_cand[0][0];
mvd_temp1.y = mv.y - mv_cand[0][1];
cand1_cost = get_mvd_coding_cost(&mvd_temp1);
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
mvd_temp2.x = mv.x - mv_cand[1][0];
mvd_temp2.y = mv.y - mv_cand[1][1];
cand2_cost = get_mvd_coding_cost(&mvd_temp2);
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {
@ -1059,6 +1103,15 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
static const uint8_t priorityList0[] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
static const uint8_t priorityList1[] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
uint8_t cutoff = num_cand;
int(*calc_mvd)(const encoder_state_t * const, int, int, int,
int16_t[2][2], inter_merge_cand_t[MRG_MAX_NUM_CANDS],
int16_t, int32_t, uint32_t *) = calc_mvd_cost;
if (state->encoder_control->cfg->mv_rdo) {
calc_mvd = kvz_calc_mvd_cost_cabac;
}
for (int32_t idx = 0; idx<cutoff*(cutoff - 1); idx++) {
uint8_t i = priorityList0[idx];
uint8_t j = priorityList1[idx];
@ -1109,8 +1162,8 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
cost = satd(tmp_pic, tmp_block);
cost += calc_mvd_cost(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
cost += calc_mvd_cost(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
cost += calc_mvd(state, merge_cand[i].mv[0][0], merge_cand[i].mv[0][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[0]);
cost += calc_mvd(state, merge_cand[i].mv[1][0], merge_cand[i].mv[1][1], 0, mv_cand, merge_cand, 0, ref_idx, &bitcost[1]);
if (cost < cur_cu->inter.cost) {
@ -1154,11 +1207,11 @@ int kvz_search_cu_inter(const encoder_state_t * const state, int x, int y, int d
mvd_temp1.x = cur_cu->inter.mv[reflist][0] - mv_cand[0][0];
mvd_temp1.y = cur_cu->inter.mv[reflist][1] - mv_cand[0][1];
cand1_cost = get_mvd_coding_cost(&mvd_temp1);
cand1_cost = get_mvd_cost(&mvd_temp1, (cabac_data_t*)&state->cabac);
mvd_temp2.x = cur_cu->inter.mv[reflist][0] - mv_cand[1][0];
mvd_temp2.y = cur_cu->inter.mv[reflist][1] - mv_cand[1][1];
cand2_cost = get_mvd_coding_cost(&mvd_temp2);
cand2_cost = get_mvd_cost(&mvd_temp2, (cabac_data_t*)&state->cabac);
// Select candidate 1 if it has lower cost
if (cand2_cost < cand1_cost) {