[jccr] Chroma tr_search for inter

This commit is contained in:
Joose Sainio 2022-06-06 12:10:31 +03:00
parent 85bfeea602
commit 658d639f8b
6 changed files with 93 additions and 29 deletions

View file

@ -471,6 +471,7 @@ void uvg_ctx_init(cabac_ctx_t *ctx, int32_t qp, int32_t init_value, uint8_t rate
void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) void uvg_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
{ {
cabac_data_t * const cabac = &state->cabac; cabac_data_t * const cabac = &state->cabac;
memset(&state->cabac.ctx, 0, sizeof(state->cabac.ctx));
uint16_t i, ii; uint16_t i, ii;
// Initialize contexts // Initialize contexts

View file

@ -1077,20 +1077,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
} }
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, has_chroma, true, has_chroma && !cur_cu->joint_cb_cr,
state->encoder_control->cfg.jccr, x, y, cur_cu->joint_cb_cr, x, y,
depth, depth,
NULL, NULL,
lcu, lcu,
false); false);
if (cur_cu->depth == cur_cu->tr_depth && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr) {
uvg_select_jccr_mode(state,
x, y,
depth,
NULL,
lcu,
NULL);
}
int cbf = cbf_is_set_any(cur_cu->cbf, depth); int cbf = cbf_is_set_any(cur_cu->cbf, depth);

View file

@ -2101,7 +2101,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
const int width = LCU_WIDTH >> depth; const int width = LCU_WIDTH >> depth;
cabac_data_t cabac_copy; cabac_data_t cabac_copy;
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
cabac_copy.update = 1; cabac_data_t* cabac = &state->search_cabac;
state->search_cabac.update = 1;
cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); cu_info_t* cur_pu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
*cur_pu = *cur_cu; *cur_pu = *cur_cu;
@ -2128,21 +2129,90 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
const int skip_context = uvg_get_skip_context(x, y, lcu, NULL, NULL); const int skip_context = uvg_get_skip_context(x, y, lcu, NULL, NULL);
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) {
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost; no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost;
bits += uvg_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); bits += uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu);
} }
else { else {
no_cbf_bits = uvg_mock_encode_coding_unit(state, &cabac_copy, x, y, depth, lcu, cur_cu); no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu);
bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac_copy.ctx.cu_qt_root_cbf_model, 1); bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1);
} }
double no_cbf_cost = ssd + no_cbf_bits * state->lambda; double no_cbf_cost = ssd + no_cbf_bits * state->lambda;
uvg_quantize_lcu_residual(state, const int can_use_chroma_tr_skip = state->encoder_control->cfg.trskip_enable &&
true, reconstruct_chroma, (1 << state->encoder_control->cfg.trskip_max_size) >= width &&
reconstruct_chroma && state->encoder_control->cfg.jccr, x, y, state->encoder_control->cfg.chroma_trskip_enable;
depth,
cur_cu, double chroma_cost = 0;
lcu, if((state->encoder_control->cfg.jccr || can_use_chroma_tr_skip) && cur_cu->depth == cur_cu->tr_depth && reconstruct_chroma) {
false); uvg_quantize_lcu_residual(state,
true, false,false, x, y,
depth,
cur_cu,
lcu,
false);
ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
kvz_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
kvz_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
uvg_generate_residual(
&lcu->ref.u[index],
u_pred,
u_resi,
width,
LCU_WIDTH_C,
width);
uvg_generate_residual(
&lcu->ref.v[index],
v_pred,
v_resi,
width,
LCU_WIDTH_C,
width);
kvz_chorma_ts_out_t chorma_ts_out;
kvz_chroma_transform_search(
state,
depth,
lcu,
&cabac_copy,
width,
width,
index,
0,
cur_cu,
u_pred,
v_pred,
u_resi,
v_resi,
&chorma_ts_out);
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
if (chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost < chorma_ts_out.best_combined_cost) {
cur_cu->joint_cb_cr = 0;
cur_cu->tr_skip |= (chorma_ts_out.best_u_index == CHROMA_TS) << COLOR_U;
cur_cu->tr_skip |= (chorma_ts_out.best_v_index == CHROMA_TS) << COLOR_V;
if(chorma_ts_out.best_u_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, depth, COLOR_U);
if(chorma_ts_out.best_v_index != NO_RESIDUAL) cbf_set(&cur_cu->cbf, depth, COLOR_V);
chroma_cost += chorma_ts_out.best_u_cost + chorma_ts_out.best_v_cost;
}
else {
cur_cu->joint_cb_cr = chorma_ts_out.best_combined_index;
if (chorma_ts_out.best_combined_index & 2) cbf_set(&cur_cu->cbf, depth, COLOR_U);
if (chorma_ts_out.best_combined_index & 1) cbf_set(&cur_cu->cbf, depth, COLOR_V);
chroma_cost += chorma_ts_out.best_combined_cost;
}
}
else {
uvg_quantize_lcu_residual(state,
true, reconstruct_chroma,
reconstruct_chroma && state->encoder_control->cfg.jccr, x, y,
depth,
cur_cu,
lcu,
false);
}
int cbf = cbf_is_set_any(cur_cu->cbf, depth); int cbf = cbf_is_set_any(cur_cu->cbf, depth);
@ -2153,7 +2223,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
*inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu); *inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu);
} }
else { else {
uvg_select_jccr_mode(state, x_px, y_px, depth, cur_cu, lcu, inter_cost); *inter_cost += chroma_cost;
} }
} }
} }

View file

@ -1436,7 +1436,7 @@ int8_t uvg_search_intra_chroma_rdo(
state, state,
depth, depth,
lcu, lcu,
temp_cabac, &temp_cabac,
width, width,
height, height,
offset, offset,

View file

@ -256,7 +256,7 @@ static void generate_jccr_transforms(
int64_t d1 = 0; int64_t d1 = 0;
int64_t d2 = 0; int64_t d2 = 0;
const int cbf_mask = jccr * (state->frame->jccr_sign ? -1 : 1); const int cbf_mask = jccr * (state->frame->jccr_sign ? -1 : 1);
int16_t* current_resi = &temp_resi[(jccr - 1) * trans_offset]; int16_t* current_resi = &temp_resi[MAX((jccr - 1) , 0) * trans_offset];
for (int y = 0; y < height; y++) for (int y = 0; y < height; y++)
{ {
for (int x = 0; x < width; x++) for (int x = 0; x < width; x++)
@ -428,7 +428,7 @@ void kvz_chroma_transform_search(
encoder_state_t* const state, encoder_state_t* const state,
int depth, int depth,
lcu_t* const lcu, lcu_t* const lcu,
cabac_data_t temp_cabac, cabac_data_t* temp_cabac,
int8_t width, int8_t width,
int8_t height, int8_t height,
const int offset, const int offset,
@ -634,11 +634,11 @@ void kvz_chroma_transform_search(
double v_cost = KVZ_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda; double v_cost = KVZ_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda;
if (u_cost < chorma_ts_out->best_u_cost) { if (u_cost < chorma_ts_out->best_u_cost) {
chorma_ts_out->best_u_cost = u_cost; chorma_ts_out->best_u_cost = u_cost;
chorma_ts_out->best_u_index = transforms[i]; chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL;
} }
if (v_cost < chorma_ts_out->best_v_cost) { if (v_cost < chorma_ts_out->best_v_cost) {
chorma_ts_out->best_v_cost = v_cost; chorma_ts_out->best_v_cost = v_cost;
chorma_ts_out->best_v_index = transforms[i]; chorma_ts_out->best_v_index = v_has_coeffs ? transforms[i] : NO_RESIDUAL;
} }
} }
else { else {
@ -648,7 +648,7 @@ void kvz_chroma_transform_search(
chorma_ts_out->best_combined_index = transforms[i]; chorma_ts_out->best_combined_index = transforms[i];
} }
} }
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); memcpy(&state->search_cabac, temp_cabac, sizeof(cabac_data_t));
} }
} }

View file

@ -92,7 +92,7 @@ void kvz_chroma_transform_search(
encoder_state_t* const state, encoder_state_t* const state,
int depth, int depth,
lcu_t* const lcu, lcu_t* const lcu,
cabac_data_t temp_cabac, cabac_data_t* temp_cabac,
int8_t width, int8_t width,
int8_t height, int8_t height,
const int offset, const int offset,
@ -107,6 +107,7 @@ void kvz_chroma_transform_search(
enum kvz_chroma_transforms { enum kvz_chroma_transforms {
DCT7_CHROMA = 0, DCT7_CHROMA = 0,
CHROMA_TS = 4, CHROMA_TS = 4,
NO_RESIDUAL = 8,
JCCR_1 = 1, JCCR_1 = 1,
JCCR_2 = 2, JCCR_2 = 2,
JCCR_3 = 3, JCCR_3 = 3,