Enable chroma scaling

WIP: user defined scaling array
This commit is contained in:
Joose Sainio 2021-03-16 10:22:40 +02:00
parent 412781db41
commit b2076d3b39
12 changed files with 129 additions and 35 deletions

View file

@ -175,6 +175,20 @@ int kvz_config_init(kvz_config *cfg)
cfg->file_format = KVZ_FORMAT_AUTO;
cfg->stats_file_prefix = NULL;
int8_t in[] = { 17, 27, 32, 44 };
int8_t out[] = { 17, 29, 34, 41 };
cfg->num_used_table = 1;
cfg->qp_table_length_minus1[0] = 2;
cfg->qp_table_start_minus26[0] = 17 - 26;
cfg->delta_qp_in_val_minus1[0] = malloc(cfg->qp_table_length_minus1[0] + 1);
cfg->delta_qp_out_val[0] = malloc(cfg->qp_table_length_minus1[0] + 1);
for (int i = 0; i < cfg->qp_table_length_minus1[0] + 1; i++) {
cfg->delta_qp_in_val_minus1[0][i] = in[i + 1] - in[i] - (int8_t)1;
cfg->delta_qp_out_val[0][i] = out[i + 1] - out[i];
}
return 1;
}
@ -191,6 +205,10 @@ int kvz_config_destroy(kvz_config *cfg)
{
FREE_POINTER(cfg->param_set_map);
}
for (int i = 0; i < cfg->num_used_table; i++) {
if (cfg->delta_qp_in_val_minus1[i]) FREE_POINTER(cfg->delta_qp_in_val_minus1[i]);
if (cfg->delta_qp_out_val[i]) FREE_POINTER(cfg->delta_qp_out_val[i]);
}
}
free(cfg);

View file

@ -199,6 +199,49 @@ static void init_erp_aqp_roi(encoder_control_t* encoder,
}
static int8_t* derive_chroma_QP_mapping_table(const kvz_config* const cfg, int i)
{
const int MAX_QP = 63;
int8_t qpInVal[16], qpOutVal[16];
int8_t* table = calloc(MAX_QP + 1, sizeof(int8_t));
const int qpBdOffsetC = (cfg->input_bitdepth - 8) * 6;
const int numPtsInCQPTableMinus1 = cfg->qp_table_length_minus1[i];
qpInVal[0] = cfg->qp_table_start_minus26[i] + 26;
qpOutVal[0] = qpInVal[0];
for (int j = 0; j <= cfg->qp_table_length_minus1[i]; j++)
{
qpInVal[j + 1] = qpInVal[j] + cfg->delta_qp_in_val_minus1[i][j] + 1;
qpOutVal[j + 1] = qpOutVal[j] + cfg->delta_qp_out_val[i][j];
}
table[qpInVal[0]] = qpOutVal[0];
for (int k = qpInVal[0] - 1; k >= -qpBdOffsetC; k--)
{
table[k] = CLIP(-qpBdOffsetC, MAX_QP, table[k + 1] - 1);
}
for (int j = 0; j <= numPtsInCQPTableMinus1; j++)
{
int sh = (cfg->delta_qp_in_val_minus1[i][j] + 1) >> 1;
for (int k = qpInVal[j] + 1, m = 1; k <= qpInVal[j + 1]; k++, m++)
{
table[k] = table[qpInVal[j]] + ((qpOutVal[j + 1] - qpOutVal[j]) * m + sh) / (cfg->delta_qp_in_val_minus1[i][j] + 1);
}
}
for (int k = qpInVal[numPtsInCQPTableMinus1 + 1] + 1; k <= MAX_QP; k++)
{
table[k] = CLIP(-qpBdOffsetC, MAX_QP, table[k - 1] + 1);
}
for(int i = 0; i < MAX_QP; i++) {
printf("%3d %3d\n", i, table[i]);
}
return table;
}
/**
* \brief Allocate and initialize an encoder control structure.
*
@ -655,6 +698,10 @@ encoder_control_t* kvz_encoder_control_init(const kvz_config *const cfg)
memcpy(encoder->cfg.optional_key, cfg->optional_key, 16);
}
for (int i = 0; i < cfg->num_used_table; i++) {
encoder->qp_map[i] = derive_chroma_QP_mapping_table(cfg, i);
}
return encoder;
init_failed:
@ -691,6 +738,9 @@ void kvz_encoder_control_free(encoder_control_t *const encoder)
kvz_threadqueue_free(encoder->threadqueue);
encoder->threadqueue = NULL;
for (int i = 0; i < encoder->cfg.num_used_table; i++) {
if (encoder->qp_map[i]) FREE_POINTER(encoder->qp_map[i]);
}
free(encoder);
}

View file

@ -135,6 +135,8 @@ typedef struct encoder_control_t
int32_t poc_lsb_bits;
int8_t* qp_map[3];
} encoder_control_t;
encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);

View file

@ -608,13 +608,17 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
WRITE_U(stream, 0, 1, "sps_joint_cbcr_enabled_flag");
if (encoder->chroma_format != KVZ_CSP_400) {
WRITE_U(stream, 1, 1, "same_qp_table_for_chroma"); //TODO: Enable chroma QP scaling and fix kvz_get_scaled_qp()
WRITE_U(stream, 1, 1, "same_qp_table_for_chroma");
WRITE_SE(stream, 0, "qp_table_starts_minus26");
WRITE_UE(stream, 0, "num_points_in_qp_table_minus1");
for (int i = 0; i < encoder->cfg.num_used_table; i++) {
WRITE_SE(stream, encoder->cfg.qp_table_start_minus26[i], "qp_table_starts_minus26");
WRITE_UE(stream, encoder->cfg.qp_table_length_minus1[i], "num_points_in_qp_table_minus1");
WRITE_UE(stream, 0, "delta_qp_in_val_minus1");
WRITE_UE(stream, 1, "delta_qp_diff_val");
for (int j = 0; j <= encoder->cfg.qp_table_length_minus1[i]; j++) {
WRITE_UE(stream, encoder->cfg.delta_qp_in_val_minus1[i][j], "delta_qp_in_val_minus1");
WRITE_UE(stream, encoder->cfg.delta_qp_out_val[i][j] ^ encoder->cfg.delta_qp_in_val_minus1[i][j], "delta_qp_diff_val");
}
}
}

View file

@ -314,6 +314,8 @@ typedef struct encoder_state_t {
//! \brief Quantization parameter for the current LCU
int8_t qp;
double c_lambda;
/**
* \brief Whether a QP delta value must be coded for the current LCU.
*/

View file

@ -475,6 +475,12 @@ typedef struct kvz_config
char *stats_file_prefix;
struct param_set_map *param_set_map;
int8_t num_used_table;
int8_t qp_table_start_minus26[3];
int8_t qp_table_length_minus1[3];
int8_t* delta_qp_in_val_minus1[3];
int8_t* delta_qp_out_val[3];
} kvz_config;
/**

View file

@ -787,6 +787,9 @@ static double qp_to_lambda(encoder_state_t* const state, int qp)
state->lambda = est_lambda;
state->lambda_sqrt = sqrt(est_lambda);
state->qp = est_qp;
int8_t chroma_qp = encoder->qp_map[0][est_qp];
double tmpWeight = pow(2.0, (est_qp - chroma_qp) / 3.0);
state->c_lambda = est_lambda / tmpWeight;
ctu->qp = est_qp;
ctu->lambda = est_lambda;
ctu->i_cost = 0;
@ -1086,7 +1089,6 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
state->qp = CLIP_TO_QP(state->frame->QP + dqp);
state->lambda = qp_to_lambda(state, state->qp);
state->lambda_sqrt = sqrt(state->lambda);
}
else if (ctrl->cfg.target_bitrate > 0) {
const uint32_t pixels = MIN(LCU_WIDTH, state->tile->frame->width - LCU_WIDTH * pos.x) *
@ -1134,6 +1136,10 @@ void kvz_set_lcu_lambda_and_qp(encoder_state_t * const state,
lcu->lambda = state->lambda;
lcu->qp = state->qp;
int8_t chroma_qp = ctrl->qp_map[0][state->qp];
double tmpWeight = pow(2.0, (state->qp - chroma_qp) / 3.0);
state->c_lambda = state->lambda / tmpWeight;
// Apply variance adaptive quantization
if (ctrl->cfg.vaq) {
vector2d_t lcu_pos = {

View file

@ -412,9 +412,10 @@ INLINE uint32_t kvz_get_coded_level( encoder_state_t * const state, double *code
int32_t abs_level;
int32_t min_abs_level;
cabac_ctx_t* base_sig_model = type?(cabac->ctx.cu_sig_model_chroma[0]):(cabac->ctx.cu_sig_model_luma[0]);
const double lambda = type ? state->c_lambda : state->lambda;
if( !last && max_abs_level < 3 ) {
*coded_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
*coded_cost_sig = lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 0);
*coded_cost = *coded_cost0 + *coded_cost_sig;
if (max_abs_level == 0) return best_abs_level;
} else {
@ -422,13 +423,13 @@ INLINE uint32_t kvz_get_coded_level( encoder_state_t * const state, double *code
}
if( !last ) {
cur_cost_sig = state->lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
cur_cost_sig = lambda * CTX_ENTROPY_BITS(&base_sig_model[ctx_num_sig], 1);
}
min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 );
for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) {
double err = (double)(level_double - ( abs_level * (1 << q_bits) ) );
double cur_cost = err * err * temp + state->lambda *
double cur_cost = err * err * temp + lambda *
kvz_get_ic_rate( state, abs_level, ctx_num_gt1, ctx_num_gt2, ctx_num_par,
abs_go_rice, reg_bins, type);
cur_cost += cur_cost_sig;
@ -452,7 +453,7 @@ INLINE uint32_t kvz_get_coded_level( encoder_state_t * const state, double *code
*
* From HM 12.0
*/
static double get_rate_last(const encoder_state_t * const state,
static double get_rate_last(double lambda,
const uint32_t pos_x, const uint32_t pos_y,
int32_t* last_x_bits, int32_t* last_y_bits)
{
@ -465,7 +466,7 @@ static double get_rate_last(const encoder_state_t * const state,
if( ctx_y > 3 ) {
uiCost += CTX_FRAC_ONE_BIT * ((ctx_y - 2) >> 1);
}
return state->lambda * uiCost;
return lambda * uiCost;
}
static void calc_last_bits(encoder_state_t * const state, int32_t width, int32_t height, int8_t type,
@ -514,16 +515,18 @@ void kvz_rdoq_sign_hiding(
const struct sh_rates_t *const sh_rates,
const int32_t last_pos,
const coeff_t *const coeffs,
coeff_t *const quant_coeffs)
coeff_t *const quant_coeffs,
const int8_t type)
{
const encoder_control_t * const ctrl = state->encoder_control;
const double lambda = type ? state->c_lambda : state->lambda;
int inv_quant = kvz_g_inv_quant_scales[qp_scaled % 6];
// This somehow scales quant_delta into fractional bits. Instead of the bits
// being multiplied by lambda, the residual is divided by it, or something
// like that.
const int64_t rd_factor = (inv_quant * inv_quant * (1 << (2 * (qp_scaled / 6)))
/ state->lambda / 16 / (1 << (2 * (ctrl->bitdepth - 8))) + 0.5);
/ lambda / 16 / (1 << (2 * (ctrl->bitdepth - 8))) + 0.5);
const int last_cg = (last_pos - 1) >> LOG2_SCAN_SET_SIZE;
for (int32_t cg_scan = last_cg; cg_scan >= 0; cg_scan--) {
@ -665,10 +668,12 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
const uint32_t log2_block_size = kvz_g_convert_to_bit[ width ] + 2;
int32_t scalinglist_type= (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
int32_t q_bits = QUANT_SHIFT + qp_scaled/6 + transform_shift;
const double lambda = type ? state->c_lambda : state->lambda;
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
const double *err_scale = encoder->scaling_list.error_scale[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled%6];
@ -866,7 +871,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
if (sig_coeffgroup_flag[cg_blkpos] == 0) {
uint32_t ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
cg_pos_y, cg_width);
cost_coeffgroup_sig[cg_scanpos] = state->lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
cost_coeffgroup_sig[cg_scanpos] = lambda *CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig],0);
base_cost += cost_coeffgroup_sig[cg_scanpos] - rd_stats.sig_cost;
} else {
if (cg_scanpos < cg_last_scanpos){
@ -883,9 +888,9 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
ctx_sig = kvz_context_get_sig_coeff_group(sig_coeffgroup_flag, cg_pos_x,
cg_pos_y, cg_width);
cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
cost_coeffgroup_sig[cg_scanpos] = lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 1);
base_cost += cost_coeffgroup_sig[cg_scanpos];
cost_zero_cg += state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
cost_zero_cg += lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
// try to convert the current coeff group from non-zero to all-zero
cost_zero_cg += rd_stats.uncoded_dist; // distortion for resetting non-zero levels to zero levels
@ -898,7 +903,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
sig_coeffgroup_flag[cg_blkpos] = 0;
base_cost = cost_zero_cg;
cost_coeffgroup_sig[cg_scanpos] = state->lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
cost_coeffgroup_sig[cg_scanpos] = lambda * CTX_ENTROPY_BITS(&base_coeff_group_ctx[ctx_sig], 0);
// reset coeffs to 0 in this block
for (int32_t scanpos_in_cg = cg_size - 1; scanpos_in_cg >= 0; scanpos_in_cg--) {
@ -925,14 +930,14 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
int32_t best_last_idx_p1 = 0;
if( block_type != CU_INTRA && !type ) {
best_cost = block_uncoded_cost + state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
base_cost += state->lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
best_cost = block_uncoded_cost + lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),0);
base_cost += lambda * CTX_ENTROPY_BITS(&(cabac->ctx.cu_qt_root_cbf_model),1);
} else {
// ToDo: update for VVC contexts
cabac_ctx_t* base_cbf_model = type?(cabac->ctx.qt_cbf_model_cb):(cabac->ctx.qt_cbf_model_luma);
ctx_cbf = ( type ? tr_depth : !tr_depth);
best_cost = block_uncoded_cost + state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
base_cost += state->lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
best_cost = block_uncoded_cost + lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],0);
base_cost += lambda * CTX_ENTROPY_BITS(&base_cbf_model[ctx_cbf],1);
}
for ( int32_t cg_scanpos = cg_last_scanpos; cg_scanpos >= 0; cg_scanpos--) {
@ -949,7 +954,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
uint32_t pos_y = blkpos >> log2_block_size;
uint32_t pos_x = blkpos - ( pos_y << log2_block_size );
double cost_last = get_rate_last(state, pos_x, pos_y, last_x_bits,last_y_bits );
double cost_last = get_rate_last(lambda, pos_x, pos_y, last_x_bits,last_y_bits );
double totalCost = base_cost + cost_last - cost_sig[ scanpos ];
if( totalCost < best_cost ) {
@ -983,7 +988,7 @@ void kvz_rdoq(encoder_state_t * const state, coeff_t *coef, coeff_t *dest_coeff,
}
if (encoder->cfg.signhide_enable && abs_sum >= 2) {
kvz_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff);
kvz_rdoq_sign_hiding(state, qp_scaled, scan, &sh_rates, best_last_idx_p1, coef, dest_coeff, type);
}
}

View file

@ -368,7 +368,7 @@ void kvz_quant_avx2(const encoder_state_t * const state, const coeff_t * __restr
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
uint32_t log2_tr_width = kvz_math_floor_log2(height);
uint32_t log2_tr_height = kvz_math_floor_log2(width);
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
@ -738,7 +738,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
shift = 20 - QUANT_SHIFT - transform_shift;

View file

@ -42,7 +42,7 @@ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
uint32_t log2_tr_width = kvz_math_floor_log2(height);
uint32_t log2_tr_height = kvz_math_floor_log2(width);
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
@ -295,7 +295,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6);
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
shift = 20 - QUANT_SHIFT - transform_shift;

View file

@ -126,17 +126,18 @@ static void rdpcm(const int width,
* \brief Get scaled QP used in quantization
*
*/
int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset)
int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const * const chroma_scale)
{
int32_t qp_scaled = 0;
if(type == 0) {
qp_scaled = qp + qp_offset;
} else {
qp_scaled = CLIP(-qp_offset, 57, qp);
if(true||qp_scaled < 0) { //TODO: Enable chroma QP scaling in the SPS headers
if (chroma_scale) {
qp_scaled = chroma_scale[qp] + qp_offset;
}
else {
qp_scaled = qp_scaled + qp_offset;
} else {
qp_scaled = kvz_g_chroma_scale[qp_scaled] + qp_offset;
}
}
return qp_scaled;

View file

@ -53,7 +53,7 @@ void kvz_itransform2d(const encoder_control_t * const encoder,
const cu_info_t *tu);
int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset);
int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);
void kvz_quantize_lcu_residual(encoder_state_t *state,
bool luma,