[rdoq] improve the cost calculations and clean up unused code

This commit is contained in:
Marko Viitanen 2021-02-26 16:41:54 +02:00
parent d6379c02e0
commit 6544c25daf
4 changed files with 30 additions and 120 deletions

View file

@ -502,76 +502,6 @@ uint32_t kvz_context_get_sig_coeff_group( uint32_t *sig_coeff_group_flag,
} }
/**
* \brief Pattern decision for context derivation process of significant_coeff_flag
* \param sig_coeff_group_flag pointer to prior coded significant coeff group
* \param pos_x column of current coefficient group
* \param pos_y row of current coefficient group
* \param width width of the block
* \returns pattern for current coefficient group
*/
int32_t kvz_context_calc_pattern_sig_ctx(const uint32_t *sig_coeff_group_flag, uint32_t pos_x, uint32_t pos_y, int32_t width)
{
uint32_t sigRight = 0;
uint32_t sigLower = 0;
if (width == 4) return -1;
width >>= 2;
if (pos_x < (uint32_t)width - 1) sigRight = (sig_coeff_group_flag[pos_y * width + pos_x + 1] != 0);
if (pos_y < (uint32_t)width - 1) sigLower = (sig_coeff_group_flag[(pos_y + 1 ) * width + pos_x] != 0);
return sigRight + (sigLower<<1);
}
/**
* \brief Context derivation process of coeff_abs_significant_flag
* \param pattern_sig_ctx pattern for current coefficient group
* \param scan_idx pixel scan type in use
* \param pos_x column of current scan position
* \param pos_y row of current scan position
* \param block_type log2 value of block size if square block, or 4 otherwise
* \param width width of the block
* \param texture_type texture type (TEXT_LUMA...)
* \returns ctx_inc for current scan position
*/
int32_t kvz_context_get_sig_ctx_inc(int32_t pattern_sig_ctx, uint32_t scan_idx, int32_t pos_x,
int32_t pos_y, int32_t block_type, int8_t texture_type)
{
const int32_t ctx_ind_map[16] =
{
0, 1, 4, 5,
2, 3, 4, 5,
6, 6, 8, 8,
7, 7, 8, 8
};
int32_t cnt,offset,pos_x_in_subset,pos_y_in_subset;
if (pos_x + pos_y == 0) return 0;
if (block_type == 2) return ctx_ind_map[4 * pos_y + pos_x];
offset = (block_type == 3) ? ((scan_idx == SCAN_DIAG) ? 9 : 15) : ((texture_type == 0) ? 21 : 12);
pos_x_in_subset = pos_x - ((pos_x>>2)<<2);
pos_y_in_subset = pos_y - ((pos_y>>2)<<2);
if (pattern_sig_ctx == 0) {
cnt = (pos_x_in_subset + pos_y_in_subset <= 2) ? ((pos_x_in_subset + pos_y_in_subset==0) ? 2 : 1) : 0;
} else if (pattern_sig_ctx==1) {
cnt = (pos_y_in_subset <= 1) ? ((pos_y_in_subset == 0) ? 2 : 1) : 0;
} else if (pattern_sig_ctx==2) {
cnt = (pos_x_in_subset <= 1) ? ((pos_x_in_subset == 0) ? 2 : 1) : 0;
} else {
cnt = 2;
}
return (( texture_type == 0 && ((pos_x>>2) + (pos_y>>2)) > 0 ) ? 3 : 0) + offset + cnt;
}
/** /**
* \brief Context derivation process of coeff_abs_significant_flag * \brief Context derivation process of coeff_abs_significant_flag
* \param coeff pointer to the current coefficient * \param coeff pointer to the current coefficient

View file

@ -36,14 +36,9 @@ void kvz_ctx_init(cabac_ctx_t* ctx, int32_t qp, int32_t init_value, uint8_t rate
void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice); void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice);
void kvz_context_copy(encoder_state_t * target_state, const encoder_state_t * source_state); void kvz_context_copy(encoder_state_t * target_state, const encoder_state_t * source_state);
int32_t kvz_context_calc_pattern_sig_ctx( const uint32_t *sig_coeff_group_flag, uint32_t pos_x, uint32_t pos_y, int32_t width);
uint32_t kvz_context_get_sig_coeff_group( uint32_t *sig_coeff_group_flag,uint32_t pos_x, uint32_t pos_y,int32_t width); uint32_t kvz_context_get_sig_coeff_group( uint32_t *sig_coeff_group_flag,uint32_t pos_x, uint32_t pos_y,int32_t width);
int32_t kvz_context_get_sig_ctx_inc(int32_t pattern_sig_ctx,uint32_t scan_idx,int32_t pos_x,
int32_t pos_y,int32_t block_type, int8_t texture_type);
uint32_t kvz_context_get_sig_ctx_idx_abs(const coeff_t* coeff, int32_t pos_x, int32_t pos_y, uint32_t kvz_context_get_sig_ctx_idx_abs(const coeff_t* coeff, int32_t pos_x, int32_t pos_y,
uint32_t height, uint32_t width, int8_t type, uint32_t height, uint32_t width, int8_t type,
int32_t* temp_diag, int32_t* temp_sum); int32_t* temp_diag, int32_t* temp_sum);

View file

@ -305,13 +305,12 @@ INLINE int32_t kvz_get_ic_rate(encoder_state_t * const state,
uint16_t ctx_num_gt2, uint16_t ctx_num_gt2,
uint16_t ctx_num_par, uint16_t ctx_num_par,
uint16_t abs_go_rice, uint16_t abs_go_rice,
uint32_t c1_idx, uint32_t reg_bins,
uint32_t c2_idx,
int8_t type) int8_t type)
{ {
cabac_data_t * const cabac = &state->cabac; cabac_data_t * const cabac = &state->cabac;
int32_t rate = 1 << CTX_FRAC_BITS; // cost of sign bit int32_t rate = 1 << CTX_FRAC_BITS; // cost of sign bit
uint32_t base_level = (c1_idx < C1FLAG_NUMBER)? (2 + (c2_idx < C2FLAG_NUMBER)) : 1; uint32_t base_level = 4;
cabac_ctx_t *base_par_ctx = (type == 0) ? &(cabac->ctx.cu_parity_flag_model_luma[0]) : &(cabac->ctx.cu_parity_flag_model_chroma[0]); cabac_ctx_t *base_par_ctx = (type == 0) ? &(cabac->ctx.cu_parity_flag_model_luma[0]) : &(cabac->ctx.cu_parity_flag_model_chroma[0]);
cabac_ctx_t *base_gt1_ctx = (type == 0) ? &(cabac->ctx.cu_gtx_flag_model_luma[0][0]) : &(cabac->ctx.cu_gtx_flag_model_luma[0][0]); cabac_ctx_t *base_gt1_ctx = (type == 0) ? &(cabac->ctx.cu_gtx_flag_model_luma[0][0]) : &(cabac->ctx.cu_gtx_flag_model_luma[0][0]);
cabac_ctx_t* base_gt2_ctx = (type == 0) ? &(cabac->ctx.cu_gtx_flag_model_luma[1][0]) : &(cabac->ctx.cu_gtx_flag_model_luma[1][0]); cabac_ctx_t* base_gt2_ctx = (type == 0) ? &(cabac->ctx.cu_gtx_flag_model_luma[1][0]) : &(cabac->ctx.cu_gtx_flag_model_luma[1][0]);
@ -383,7 +382,7 @@ INLINE uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *cod
int32_t level_double, uint32_t max_abs_level, int32_t level_double, uint32_t max_abs_level,
uint16_t ctx_num_sig, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par, uint16_t ctx_num_sig, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par,
uint16_t abs_go_rice, uint16_t abs_go_rice,
uint32_t c1_idx, uint32_t c2_idx, uint32_t reg_bins,
int32_t q_bits,double temp, int8_t last, int8_t type) int32_t q_bits,double temp, int8_t last, int8_t type)
{ {
cabac_data_t * const cabac = &state->cabac; cabac_data_t * const cabac = &state->cabac;
@ -410,7 +409,7 @@ INLINE uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *cod
double err = (double)(level_double - ( abs_level * (1 << q_bits) ) ); double err = (double)(level_double - ( abs_level * (1 << q_bits) ) );
double cur_cost = err * err * temp + state->lambda * double cur_cost = err * err * temp + state->lambda *
kvz_get_ic_rate( state, abs_level, ctx_num_gt1, ctx_num_gt2, ctx_num_par, kvz_get_ic_rate( state, abs_level, ctx_num_gt1, ctx_num_gt2, ctx_num_par,
abs_go_rice, c1_idx, c2_idx, type); abs_go_rice, reg_bins, type);
cur_cost += cur_cost_sig; cur_cost += cur_cost_sig;
if( cur_cost < *coded_cost ) { if( cur_cost < *coded_cost ) {
@ -640,6 +639,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
uint32_t log2_tr_size = kvz_g_convert_to_bit[ width ] + 2; uint32_t log2_tr_size = kvz_g_convert_to_bit[ width ] + 2;
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; // Represents scaling through forward transform int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - log2_tr_size; // Represents scaling through forward transform
uint16_t go_rice_param = 0; uint16_t go_rice_param = 0;
const uint32_t reg_bins = (width * height * 28) >> 4;
const uint32_t log2_block_size = kvz_g_convert_to_bit[ width ] + 2; const uint32_t log2_block_size = kvz_g_convert_to_bit[ width ] + 2;
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
@ -671,12 +671,10 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
uint32_t sig_coeffgroup_flag[ 64 ]; uint32_t sig_coeffgroup_flag[ 64 ];
uint16_t ctx_set = 0; uint16_t ctx_set = 0;
int16_t c1 = 1;
int16_t c2 = 0;
double base_cost = 0; double base_cost = 0;
int32_t temp_diag = -1;
int32_t temp_sum = -1;
uint32_t c1_idx = 0;
uint32_t c2_idx = 0;
int32_t base_level; int32_t base_level;
const uint32_t *scan = kvz_g_sig_last_scan[ scan_mode ][ log2_block_size - 1 ]; const uint32_t *scan = kvz_g_sig_last_scan[ scan_mode ][ log2_block_size - 1 ];
@ -722,7 +720,6 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
if (max_abs_level > 0) { if (max_abs_level > 0) {
last_scanpos = scanpos; last_scanpos = scanpos;
ctx_set = (scanpos > 0 && type == 0) ? 2 : 0;
cg_last_scanpos = cg_scanpos; cg_last_scanpos = cg_scanpos;
sh_rates.sig_coeff_inc[blkpos] = 0; sh_rates.sig_coeff_inc[blkpos] = 0;
break; break;
@ -736,6 +733,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
return; return;
} }
for (; cg_scanpos >= 0; cg_scanpos--) cost_coeffgroup_sig[cg_scanpos] = 0; for (; cg_scanpos >= 0; cg_scanpos--) cost_coeffgroup_sig[cg_scanpos] = 0;
int32_t last_x_bits[32], last_y_bits[32]; int32_t last_x_bits[32], last_y_bits[32];
@ -746,9 +744,6 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
uint32_t cg_pos_y = cg_blkpos / num_blk_side; uint32_t cg_pos_y = cg_blkpos / num_blk_side;
uint32_t cg_pos_x = cg_blkpos - (cg_pos_y * num_blk_side); uint32_t cg_pos_x = cg_blkpos - (cg_pos_y * num_blk_side);
int32_t pattern_sig_ctx = kvz_context_calc_pattern_sig_ctx(sig_coeffgroup_flag,
cg_pos_x, cg_pos_y, width);
FILL(rd_stats, 0); FILL(rd_stats, 0);
for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) { for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
int32_t scanpos = cg_scanpos*cg_size + scanpos_in_cg; int32_t scanpos = cg_scanpos*cg_size + scanpos_in_cg;
@ -765,22 +760,27 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
block_uncoded_cost += cost_coeff0[ scanpos ]; block_uncoded_cost += cost_coeff0[ scanpos ];
//===== coefficient level estimation ===== //===== coefficient level estimation =====
int32_t level; int32_t level;
uint16_t gt1_ctx = 4 * ctx_set + c1;
uint16_t gt2_ctx = 4 * ctx_set + c1; uint16_t gt1_ctx = ctx_set;
uint16_t par_ctx = ctx_set + c2; uint16_t gt2_ctx = ctx_set;
uint16_t par_ctx = ctx_set;
if( scanpos == last_scanpos ) { if( scanpos == last_scanpos ) {
level = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ], level = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
level_double, max_abs_level, 0, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, level_double, max_abs_level, 0, gt1_ctx, gt2_ctx, par_ctx, go_rice_param,
c1_idx, c2_idx, q_bits, temp, 1, type ); reg_bins, q_bits, temp, 1, type );
} else { } else {
uint32_t pos_y = blkpos >> log2_block_size; uint32_t pos_y = blkpos >> log2_block_size;
uint32_t pos_x = blkpos - ( pos_y << log2_block_size ); uint32_t pos_x = blkpos - ( pos_y << log2_block_size );
uint16_t ctx_sig = (uint16_t)kvz_context_get_sig_ctx_inc(pattern_sig_ctx, scan_mode, pos_x, pos_y, uint16_t ctx_sig = kvz_context_get_sig_ctx_idx_abs(coef, pos_x, pos_y, width, height, type, &temp_diag, &temp_sum);
log2_block_size, type); if (temp_diag != -1) {
ctx_set = (MIN(temp_sum, 4) + 1) + (!temp_diag ? ((type == 0) ? 15 : 5) : (type == 0) ? temp_diag < 3 ? 10 : (temp_diag < 10 ? 5 : 0) : 0);
}
else ctx_set = 0;
level = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ], level = kvz_get_coded_level(state, &cost_coeff[ scanpos ], &cost_coeff0[ scanpos ], &cost_sig[ scanpos ],
level_double, max_abs_level, ctx_sig, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, level_double, max_abs_level, ctx_sig, gt1_ctx, gt2_ctx, par_ctx, go_rice_param,
c1_idx, c2_idx, q_bits, temp, 0, type ); reg_bins, q_bits, temp, 0, type );
if (encoder->cfg.signhide_enable) { if (encoder->cfg.signhide_enable) {
int greater_than_zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 1); int greater_than_zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 1);
int zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 0); int zero = CTX_ENTROPY_BITS(&baseCtx[ctx_sig], 0);
@ -788,12 +788,14 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
} }
} }
if (encoder->cfg.signhide_enable) { if (encoder->cfg.signhide_enable) {
sh_rates.quant_delta[blkpos] = (level_double - level * (1 << q_bits)) >> (q_bits - 8); sh_rates.quant_delta[blkpos] = (level_double - level * (1 << q_bits)) >> (q_bits - 8);
if (level > 0) { if (level > 0) {
int32_t rate_now = kvz_get_ic_rate(state, level, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, c1_idx, c2_idx, type); int32_t rate_now = kvz_get_ic_rate(state, level, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type);
int32_t rate_up = kvz_get_ic_rate(state, level + 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, c1_idx, c2_idx, type); int32_t rate_up = kvz_get_ic_rate(state, level + 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type);
int32_t rate_down = kvz_get_ic_rate(state, level - 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, c1_idx, c2_idx, type); int32_t rate_down = kvz_get_ic_rate(state, level - 1, gt1_ctx, gt2_ctx, par_ctx, go_rice_param, reg_bins, type);
sh_rates.inc[blkpos] = rate_up - rate_now; sh_rates.inc[blkpos] = rate_up - rate_now;
sh_rates.dec[blkpos] = rate_down - rate_now; sh_rates.dec[blkpos] = rate_down - rate_now;
} else { // level == 0 } else { // level == 0
@ -803,35 +805,18 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
dest_coeff[blkpos] = (coeff_t)level; dest_coeff[blkpos] = (coeff_t)level;
base_cost += cost_coeff[scanpos]; base_cost += cost_coeff[scanpos];
base_level = (c1_idx < C1FLAG_NUMBER) ? (2 + (c2_idx < C2FLAG_NUMBER)) : 1; base_level = 4;
if (level >= base_level) { if (level >= base_level) {
if(level > 3*(1<<go_rice_param)) { if(level > 3*(1<<go_rice_param)) {
go_rice_param = MIN(go_rice_param + 1, 4); go_rice_param = MIN(go_rice_param + 1, 4);
} }
} }
if (level >= 1) c1_idx ++;
//===== update bin model =====
if (level > 1) {
c1 = 0;
c2 += (c2 < 2);
c2_idx ++;
} else if( (c1 < 3) && (c1 > 0) && level) {
c1++;
}
//===== context set update ===== //===== context set update =====
if ((scanpos % SCAN_SET_SIZE == 0) && scanpos > 0) { if ((scanpos % SCAN_SET_SIZE == 0) && scanpos > 0) {
c2 = 0;
go_rice_param = 0; go_rice_param = 0;
c1_idx = 0;
c2_idx = 0;
ctx_set = (scanpos == SCAN_SET_SIZE || type != 0) ? 0 : 2; ctx_set = (scanpos == SCAN_SET_SIZE || type != 0) ? 0 : 2;
if( c1 == 0 ) {
ctx_set++;
}
c1 = 1;
} }
rd_stats.sig_cost += cost_sig[scanpos]; rd_stats.sig_cost += cost_sig[scanpos];

View file

@ -46,12 +46,12 @@ uint32_t kvz_get_coeff_cost(const encoder_state_t * const state,
int8_t scan_mode); int8_t scan_mode);
int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par, int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par,
uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type); uint16_t abs_go_rice, uint32_t reg_bins, int8_t type);
uint32_t kvz_get_coded_level(encoder_state_t * state, double* coded_cost, double* coded_cost0, double* coded_cost_sig, uint32_t kvz_get_coded_level(encoder_state_t * state, double* coded_cost, double* coded_cost0, double* coded_cost_sig,
int32_t level_double, uint32_t max_abs_level, int32_t level_double, uint32_t max_abs_level,
uint16_t ctx_num_sig, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par, uint16_t ctx_num_sig, uint16_t ctx_num_gt1, uint16_t ctx_num_gt2, uint16_t ctx_num_par,
uint16_t abs_go_rice, uint16_t abs_go_rice,
uint32_t c1_idx, uint32_t c2_idx, uint32_t reg_bins,
int32_t q_bits,double temp, int8_t last, int8_t type); int32_t q_bits,double temp, int8_t last, int8_t type);
kvz_mvd_cost_func kvz_calc_mvd_cost_cabac; kvz_mvd_cost_func kvz_calc_mvd_cost_cabac;