Preparation for accurately counting inter bits

This commit is contained in:
Joose Sainio 2021-12-13 10:43:19 +02:00
parent 311fceade7
commit d8648fe1de
11 changed files with 96 additions and 80 deletions

View file

@ -308,11 +308,11 @@ static void encode_transform_coeff(encoder_state_t * const state,
}
}
static void encode_inter_prediction_unit(encoder_state_t * const state,
void kvz_encode_inter_prediction_unit(encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int x, int y, int width, int height,
int depth)
int depth, lcu_t* lcu)
{
// Mergeflag
int16_t num_cand = 0;
@ -385,10 +385,20 @@ static void encode_inter_prediction_unit(encoder_state_t * const state,
if (state->frame->ref_list != REF_PIC_LIST_1 || cur_cu->inter.mv_dir != 3) {
int16_t mv_cand[2][2];
kvz_inter_get_mv_cand_cua(
if (lcu) {
kvz_inter_get_mv_cand(
state,
x, y, width, height,
mv_cand, cur_cu, ref_list_idx);
mv_cand, cur_cu,
lcu, ref_list_idx);
}
else {
kvz_inter_get_mv_cand_cua(
state,
x, y, width, height,
mv_cand, cur_cu, ref_list_idx
);
}
uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx);
const int32_t mvd_hor = cur_cu->inter.mv[ref_list_idx][0] - mv_cand[cu_mv_cand][0];
@ -855,7 +865,7 @@ void kvz_encode_coding_tree(encoder_state_t * const state,
const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i);
const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y);
encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth);
kvz_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, NULL);
}
{

View file

@ -51,6 +51,12 @@ void kvz_encode_mvd(encoder_state_t * const state,
int32_t mvd_hor,
int32_t mvd_ver);
void kvz_encode_inter_prediction_unit(encoder_state_t* const state,
cabac_data_t* const cabac,
const cu_info_t* const cur_cu,
int x, int y, int width, int height,
int depth, lcu_t* lcu);
void kvz_encode_last_significant_xy(cabac_data_t * const cabac,
uint8_t lastpos_x, uint8_t lastpos_y,
uint8_t width, uint8_t height,

View file

@ -40,7 +40,7 @@ static uint16_t to_q88(float f)
return (uint16_t)(f * 256.0f + 0.5f);
}
static uint64_t to_4xq88(const float f[4])
static uint64_t to_4xq88(const double f[4])
{
int i;
uint64_t result = 0;
@ -58,9 +58,9 @@ int kvz_fast_coeff_table_parse(fast_coeff_table_t *fast_coeff_table, FILE *fast_
uint64_t *wts_by_qp = fast_coeff_table->wts_by_qp;
for (i = 0; i < MAX_FAST_COEFF_COST_QP; i++) {
float curr_wts[4];
double curr_wts[4];
if (fscanf(fast_coeff_table_f, "%f %f %f %f\n", curr_wts + 0,
if (fscanf(fast_coeff_table_f, "%lf %lf %lf %lf\n", curr_wts + 0,
curr_wts + 1,
curr_wts + 2,
curr_wts + 3) != 4) {

View file

@ -45,7 +45,7 @@ typedef struct {
// Weights for 4 buckets (coeff 0, coeff 1, coeff 2, coeff >= 3), for QPs from
// 0 to MAX_FAST_COEFF_COST_QP
static const float default_fast_coeff_cost_wts[][4] = {
static const double default_fast_coeff_cost_wts[][4] = {
// Just extend it by stretching the first actual values..
{0.164240, 4.161530, 3.509033, 6.928047},
{0.164240, 4.161530, 3.509033, 6.928047},

View file

@ -110,7 +110,7 @@
typedef int16_t coeff_t;
// #define VERBOSE 1
#define VERBOSE 1
/* CONFIG VARIABLES */

View file

@ -1228,7 +1228,7 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
int32_t width,
int32_t height,
const merge_candidates_t *merge_cand,
const cu_info_t *cur_cu,
const cu_info_t * const cur_cu,
int8_t reflist,
int16_t mv_cand[2][2])
{
@ -1335,7 +1335,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
int32_t width,
int32_t height,
int16_t mv_cand[2][2],
cu_info_t* cur_cu,
const cu_info_t * const cur_cu,
lcu_t *lcu,
int8_t reflist)
{

View file

@ -88,7 +88,7 @@ void kvz_inter_get_mv_cand(const encoder_state_t * const state,
int32_t width,
int32_t height,
int16_t mv_cand[2][2],
cu_info_t* cur_cu,
const cu_info_t* cur_cu,
lcu_t *lcu,
int8_t reflist);

View file

@ -318,7 +318,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
// However, if we have different tr_depth, the bits cannot be written in correct
// order anyways so do not touch the chroma cbf here.
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
const cabac_ctx_t* cr_ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]);
cabac_ctx_t* cr_ctx = &(state->search_cabac.ctx.qt_cbf_model_chroma[tr_depth]);
cabac->cur_ctx = cr_ctx;
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
@ -536,7 +536,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int cu_width = LCU_WIDTH >> depth;
double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE;
uint32_t inter_bitcost = MAX_INT;
double inter_bitcost = MAX_INT;
cu_info_t *cur_cu;
cabac_data_t pre_search_cabac;
memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
@ -600,7 +600,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (can_use_inter) {
double mode_cost;
uint32_t mode_bitcost;
double mode_bitcost;
kvz_search_cu_inter(state,
x, y,
depth,
@ -760,7 +760,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
else if(depth == MAX_DEPTH && cur_cu->type == CU_INTRA) {
// Add cost of intra part_size.
const cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
bits += CTX_ENTROPY_FBITS(ctx, 1); // NxN
state->search_cabac.cur_ctx = ctx;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
@ -835,7 +835,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (depth < MAX_DEPTH) {
// Add cost of cu_split_flag.
uint8_t split_model = get_ctx_cu_split_model(lcu, x, y, depth);
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
cabac_ctx_t *ctx = &(state->search_cabac.ctx.split_flag_model[split_model]);
split_cost += CTX_ENTROPY_FBITS(ctx, 1) * state->lambda;
state->search_cabac.cur_ctx = ctx;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");
@ -844,7 +844,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (cur_cu->type == CU_INTRA && depth == MAX_DEPTH) {
// Add cost of intra part_size.
const cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]);
cabac_ctx_t *ctx = &(state->search_cabac.ctx.part_size_model[0]);
split_cost += CTX_ENTROPY_FBITS(ctx, 0) * state->lambda; // NxN
state->search_cabac.cur_ctx = ctx;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 0), x, y, depth, "split");
@ -893,7 +893,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
else if (depth == MAX_DEPTH && cur_cu->type == CU_INTRA) {
// Add cost of intra part_size.
const cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
cabac_ctx_t* ctx = &(state->search_cabac.ctx.part_size_model[0]);
bits += CTX_ENTROPY_FBITS(ctx, 1); // NxN
state->search_cabac.cur_ctx = ctx;
FILE_BITS(CTX_ENTROPY_FBITS(ctx, 1), x, y, depth, "split");

View file

@ -59,7 +59,7 @@ typedef struct unit_stats_map_t {
cu_info_t unit[MAX_UNIT_STATS_MAP_SIZE]; //!< list of searched units
double cost[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching RD costs
uint32_t bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs
double bits[MAX_UNIT_STATS_MAP_SIZE]; //!< list of matching bit costs
int8_t keys[MAX_UNIT_STATS_MAP_SIZE]; //!< list of keys (indices) to elements in the other arrays
int size; //!< number of active elements in the lists
} unit_stats_map_t;

View file

@ -199,15 +199,15 @@ static INLINE bool intmv_within_tile(const inter_search_info_t *info, int x, int
* \return true if best_mv was changed, false otherwise
*/
static bool check_mv_cost(inter_search_info_t *info,
int x,
int y,
double *best_cost,
uint32_t *best_bits,
vector2d_t *best_mv)
int x,
int y,
double *best_cost,
double* best_bits,
vector2d_t *best_mv)
{
if (!intmv_within_tile(info, x, y)) return false;
uint32_t bitcost = 0;
double bitcost = 0;
double cost = kvz_image_calc_sad(
info->pic,
info->ref,
@ -292,10 +292,10 @@ static bool mv_in_merge(const inter_search_info_t *info, vector2d_t mv)
* best_mv to the best one.
*/
static void select_starting_point(inter_search_info_t *info,
vector2d_t extra_mv,
double *best_cost,
uint32_t *best_bits,
vector2d_t *best_mv)
vector2d_t extra_mv,
double *best_cost,
double* best_bits,
vector2d_t *best_mv)
{
// Check the 0-vector, so we can ignore all 0-vectors in the merge cand list.
check_mv_cost(info, 0, 0, best_cost, best_bits, best_mv);
@ -394,9 +394,9 @@ static double calc_mvd_cost(const encoder_state_t *state,
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost)
double* bitcost)
{
uint32_t temp_bitcost = 0;
double temp_bitcost = 0;
uint32_t merge_idx;
int8_t merged = 0;
@ -429,9 +429,9 @@ static double calc_mvd_cost(const encoder_state_t *state,
static bool early_terminate(inter_search_info_t *info,
double *best_cost,
uint32_t *best_bits,
vector2d_t *best_mv)
double *best_cost,
double* best_bits,
vector2d_t *best_mv)
{
static const vector2d_t small_hexbs[7] = {
{ 0, -1 }, { -1, 0 }, { 0, 1 }, { 1, 0 },
@ -485,7 +485,7 @@ void kvz_tz_pattern_search(inter_search_info_t *info,
vector2d_t mv,
int *best_dist,
double *best_cost,
uint32_t *best_bits,
double* best_bits,
vector2d_t *best_mv)
{
assert(pattern_type < 4);
@ -603,7 +603,7 @@ void kvz_tz_raster_search(inter_search_info_t *info,
int iSearchRange,
int iRaster,
double *best_cost,
uint32_t *best_bits,
double* best_bits,
vector2d_t *best_mv)
{
const vector2d_t mv = { best_mv->x >> 2, best_mv->y >> 2 };
@ -618,10 +618,10 @@ void kvz_tz_raster_search(inter_search_info_t *info,
static void tz_search(inter_search_info_t *info,
vector2d_t extra_mv,
double *best_cost,
uint32_t *best_bits,
vector2d_t *best_mv)
vector2d_t extra_mv,
double *best_cost,
double* best_bits,
vector2d_t *best_mv)
{
//TZ parameters
const int iSearchRange = 96; // search range for each stage
@ -705,11 +705,11 @@ static void tz_search(inter_search_info_t *info,
* points like 0,0 might be used, such as vectors from top or left.
*/
static void hexagon_search(inter_search_info_t *info,
vector2d_t extra_mv,
uint32_t steps,
double *best_cost,
uint32_t *best_bits,
vector2d_t *best_mv)
vector2d_t extra_mv,
uint32_t steps,
double *best_cost,
double* best_bits,
vector2d_t *best_mv)
{
// The start of the hexagonal pattern has been repeated at the end so that
// the indices between 1-6 can be used as the start of a 3-point list of new
@ -803,11 +803,11 @@ static void hexagon_search(inter_search_info_t *info,
* points like 0,0 might be used, such as vectors from top or left.
**/
static void diamond_search(inter_search_info_t *info,
vector2d_t extra_mv,
uint32_t steps,
double *best_cost,
uint32_t *best_bits,
vector2d_t *best_mv)
vector2d_t extra_mv,
uint32_t steps,
double *best_cost,
double* best_bits,
vector2d_t *best_mv)
{
enum diapos {
DIA_UP = 0,
@ -888,7 +888,7 @@ static void search_mv_full(inter_search_info_t *info,
int32_t search_range,
vector2d_t extra_mv,
double *best_cost,
uint32_t *best_bits,
double* best_bits,
vector2d_t *best_mv)
{
// Search around the 0-vector.
@ -968,7 +968,7 @@ static void search_mv_full(inter_search_info_t *info,
*/
static void search_frac(inter_search_info_t *info,
double *best_cost,
uint32_t *best_bits,
double *best_bits,
vector2d_t *best_mv)
{
// Map indexes to relative coordinates in the following way:
@ -985,8 +985,8 @@ static void search_frac(inter_search_info_t *info,
vector2d_t mv = { best_mv->x >> 2, best_mv->y >> 2 };
double cost = MAX_DOUBLE;
uint32_t bitcost = 0;
uint32_t bitcosts[4] = { 0 };
double bitcost = 0;
double bitcosts[4] = { 0 };
unsigned best_index = 0;
// Keep this as unsigned until SAD / SATD functions are updated
@ -1314,7 +1314,7 @@ static void search_pu_inter_ref(inter_search_info_t *info,
}
double best_cost = MAX_DOUBLE;
uint32_t best_bits = MAX_INT;
double best_bits = MAX_INT;
// Select starting point from among merge candidates. These should
// include both mv_cand vectors and (0, 0).
@ -1338,12 +1338,12 @@ static void search_pu_inter_ref(inter_search_info_t *info,
case KVZ_IME_DIA:
diamond_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps,
&best_cost, &best_bits, &best_mv);
&best_cost, &best_bits, &best_mv);
break;
default:
hexagon_search(info, best_mv, info->state->encoder_control->cfg.me_max_steps,
&best_cost, &best_bits, &best_mv);
&best_cost, &best_bits, &best_mv);
break;
}
}
@ -1484,7 +1484,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
double cost =
kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, frame->source->width);
uint32_t bitcost[2] = { 0, 0 };
double bitcost[2] = { 0, 0 };
cost += info->mvd_cost_func(info->state,
merge_cand[i].mv[0][0],
@ -1827,7 +1827,7 @@ static void search_pu_inter(encoder_state_t * const state,
list);
double frac_cost = MAX_DOUBLE;
uint32_t frac_bits = MAX_INT;
double frac_bits = MAX_INT;
vector2d_t frac_mv = { unipred_pu->inter.mv[list][0], unipred_pu->inter.mv[list][1] };
search_frac(info, &frac_cost, &frac_bits, &frac_mv);
@ -1917,7 +1917,7 @@ static void search_pu_inter(encoder_state_t * const state,
best_bipred_cost =
kvz_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH);
uint32_t bitcost[2] = { 0, 0 };
double bitcost[2] = { 0, 0 };
best_bipred_cost += info->mvd_cost_func(info->state,
bipred_pu->inter.mv[0][0],
@ -1990,10 +1990,10 @@ static void search_pu_inter(encoder_state_t * const state,
* \param inter_bitcost Return inter bitcost
*/
void kvz_cu_cost_inter_rd2(encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost){
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
double* inter_bitcost){
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
int tr_depth = MAX(1, depth);
@ -2040,7 +2040,7 @@ void kvz_search_cu_inter(encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost)
double* inter_bitcost)
{
*inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT;
@ -2108,10 +2108,10 @@ void kvz_search_cu_inter(encoder_state_t * const state,
// Calculate more accurate cost when needed
if (state->encoder_control->cfg.rdo >= 2) {
kvz_cu_cost_inter_rd2(state,
x, y, depth,
lcu,
inter_cost,
inter_bitcost);
x, y, depth,
lcu,
inter_cost,
inter_bitcost);
}
if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) {
@ -2146,7 +2146,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
part_mode_t part_mode,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost)
double* inter_bitcost)
{
*inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT;
@ -2173,7 +2173,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
const int height_pu = PU_GET_H(part_mode, width, i);
double cost = MAX_DOUBLE;
uint32_t bitcost = MAX_INT;
double bitcost = MAX_INT;
search_pu_inter(state, x, y, depth, part_mode, i, lcu, amvp, &merge, &info);
@ -2250,10 +2250,10 @@ void kvz_search_cu_smp(encoder_state_t * const state,
// Calculate more accurate cost when needed
if (state->encoder_control->cfg.rdo >= 2) {
kvz_cu_cost_inter_rd2(state,
x, y, depth,
lcu,
inter_cost,
inter_bitcost);
x, y, depth,
lcu,
inter_cost,
inter_bitcost);
} else {
*inter_cost += state->lambda_sqrt * smp_extra_bits;
}

View file

@ -71,13 +71,13 @@ typedef double kvz_mvd_cost_func(const encoder_state_t *state,
inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS],
int16_t num_cand,
int32_t ref_idx,
uint32_t *bitcost);
double *bitcost);
void kvz_search_cu_inter(encoder_state_t * const state,
int x, int y, int depth,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost);
double* inter_bitcost);
void kvz_search_cu_smp(encoder_state_t * const state,
int x, int y,
@ -85,7 +85,7 @@ void kvz_search_cu_smp(encoder_state_t * const state,
part_mode_t part_mode,
lcu_t *lcu,
double *inter_cost,
uint32_t *inter_bitcost);
double* inter_bitcost);
unsigned kvz_inter_satd_cost(const encoder_state_t* state,