[mtt] Fix various small issues and DepQuant for non-square blocks

This commit is contained in:
Joose Sainio 2023-02-15 14:23:55 +02:00 committed by Marko Viitanen
parent d222718c22
commit 0f50caa2d0
5 changed files with 15 additions and 15 deletions

View file

@ -1153,7 +1153,7 @@ int uvg_dep_quant(
int32_t qp_scaled = uvg_get_scaled_qp(compID, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); int32_t qp_scaled = uvg_get_scaled_qp(compID, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = is_ts ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; qp_scaled = is_ts ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
bool needs_block_size_trafo_scale = is_ts && ((log2_tr_height + log2_tr_width) % 2 == 1); bool needs_block_size_trafo_scale = !is_ts && ((log2_tr_height + log2_tr_width) % 2 == 1);
needs_block_size_trafo_scale |= 0; // Non log2 block size needs_block_size_trafo_scale |= 0; // Non log2 block size
const int32_t scalinglist_type = (cur_tu->type == CU_INTRA ? 0 : 3) + (int8_t)compID; const int32_t scalinglist_type = (cur_tu->type == CU_INTRA ? 0 : 3) + (int8_t)compID;
@ -1252,8 +1252,8 @@ int uvg_dep_quant(
uint32_t pos_y_next = blkpos_next >> log2_tr_width; uint32_t pos_y_next = blkpos_next >> log2_tr_width;
uint32_t pos_x_next = blkpos_next - (pos_y_next << log2_tr_width); uint32_t pos_x_next = blkpos_next - (pos_y_next << log2_tr_width);
uint32_t cg_blockpos_next = scanIdx ? cg_scan[(scanIdx -1) >> 4] : 0; uint32_t cg_blockpos_next = scanIdx ? cg_scan[(scanIdx -1) >> 4] : 0;
uint32_t cg_pos_y_next = cg_blockpos_next / height_in_sbb; uint32_t cg_pos_y_next = cg_blockpos_next / width_in_sbb;
uint32_t cg_pos_x_next = cg_blockpos_next - (cg_pos_y_next * height_in_sbb); uint32_t cg_pos_x_next = cg_blockpos_next - (cg_pos_y_next * width_in_sbb);
uint32_t diag = pos_y_next + pos_x_next; uint32_t diag = pos_y_next + pos_x_next;
uint32_t sig_ctx_offset = compID == COLOR_Y ? (diag < 2 ? 8 : diag < 5 ? 4 : 0) : (diag < 2 ? 4 : 0); uint32_t sig_ctx_offset = compID == COLOR_Y ? (diag < 2 ? 8 : diag < 5 ? 4 : 0) : (diag < 2 ? 4 : 0);
@ -1308,10 +1308,7 @@ int uvg_dep_quant(
width, width,
height); //tu.cu->slice->getReverseLastSigCoeffFlag()); height); //tu.cu->slice->getReverseLastSigCoeffFlag());
} }
Decision temp[8];
Decision* decisions = ctxs->m_trellis[scanIdx];
memcpy(temp, decisions, sizeof(Decision) * 8);
decisions++;
} }
//===== find best path ===== //===== find best path =====

View file

@ -2070,7 +2070,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state,
int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y); int cbf = cbf_is_set(search_data->pred_cu.cbf, COLOR_Y);
if (i + 1 != split_limit && search_data->best_isp_cbfs != 0) { if (i + 1 != split_limit || search_data->best_isp_cbfs != 1 << (split_limit - 1)) {
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon"); CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_luma[cbf_context], cbf, coeff_bits, "cbf_luma_isp_recon");
} }
cost += ssd + coeff_bits * state->lambda; cost += ssd + coeff_bits * state->lambda;

View file

@ -1798,7 +1798,7 @@ static double search_cu(
// 3.9 // 3.9
const double factor = state->qp > 30 ? 1.1 : 1.075; const double factor = state->qp > 30 ? 1.1 : 1.075;
if (split_bits * state->frame->lambda + cost / factor > cost) { if (split_bits * state->lambda + cost / factor > cost) {
can_split[split_type] = false; can_split[split_type] = false;
continue; continue;
} }

View file

@ -457,7 +457,7 @@ static double search_intra_trdepth(
double transform_bits = 0; double transform_bits = 0;
if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) && if (state->encoder_control->cfg.lfnst && PU_IS_TU(pred_cu) &&
trafo != MTS_SKIP && end_lfnst_idx != 0) { trafo != MTS_SKIP && end_lfnst_idx != 0) {
if (!constraints[0] && constraints[1]) { if ((!constraints[0] && constraints[1]) || lfnst_idx != 0) {
transform_bits += CTX_ENTROPY_FBITS( transform_bits += CTX_ENTROPY_FBITS(
&state->search_cabac.ctx.lfnst_idx_model[tree_type == UVG_LUMA_T], &state->search_cabac.ctx.lfnst_idx_model[tree_type == UVG_LUMA_T],
lfnst_idx != 0); lfnst_idx != 0);
@ -468,7 +468,10 @@ static double search_intra_trdepth(
} }
} }
} }
if (num_transforms > 2 && trafo != MTS_SKIP && width <= 32 if (num_transforms > 2 && trafo != MTS_SKIP
&& pred_cu->intra.isp_mode == ISP_MODE_NO_ISP
&& lfnst_idx == 0
&& width <= 32
&& height <= 32 && height <= 32
&& !pred_cu->violates_mts_coeff_constraint && pred_cu-> && !pred_cu->violates_mts_coeff_constraint && pred_cu->
mts_last_scan_pos) { mts_last_scan_pos) {
@ -488,7 +491,7 @@ static double search_intra_trdepth(
} }
} }
rd_cost += transform_bits * state->frame->lambda; rd_cost += transform_bits * state->lambda;
search_data->lfnst_costs[lfnst_idx] = MIN( search_data->lfnst_costs[lfnst_idx] = MIN(
search_data->lfnst_costs[lfnst_idx], search_data->lfnst_costs[lfnst_idx],

View file

@ -782,8 +782,8 @@ void uvg_chroma_transform_search(
pred_cu->violates_lfnst_constrained_chroma = false; pred_cu->violates_lfnst_constrained_chroma = false;
} }
if (!is_jccr) { if (!is_jccr) {
double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->frame->lambda; double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->c_lambda;
double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda; double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->c_lambda;
if (u_cost < chorma_ts_out->best_u_cost) { if (u_cost < chorma_ts_out->best_u_cost) {
chorma_ts_out->best_u_cost = u_cost; chorma_ts_out->best_u_cost = u_cost;
chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL; chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL;
@ -794,7 +794,7 @@ void uvg_chroma_transform_search(
} }
} }
else { else {
double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->frame->lambda; double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->c_lambda;
if (cost < chorma_ts_out->best_combined_cost) { if (cost < chorma_ts_out->best_combined_cost) {
chorma_ts_out->best_combined_cost = cost; chorma_ts_out->best_combined_cost = cost;
chorma_ts_out->best_combined_index = transforms[i]; chorma_ts_out->best_combined_index = transforms[i];