Fix rebase mistakes

This commit is contained in:
Joose Sainio 2022-06-13 11:17:39 +03:00
parent 882b00068b
commit 3da4a313ce
12 changed files with 222 additions and 203 deletions

View file

@ -42,7 +42,7 @@
bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu);
void kvz_encode_coding_tree(encoder_state_t * const state,
void uvg_encode_coding_tree(encoder_state_t * const state,
uint16_t x_ctb,
uint16_t y_ctb,
uint8_t depth,

View file

@ -579,8 +579,8 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
if(chroma_can_use_tr_skip && cb_flag_v) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
}
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], NULL, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], NULL, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2);
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4);
}
else {
@ -600,7 +600,7 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, chroma_width, COLOR_U, scan_order, 0);
}
}
if (kvz_is_mts_allowed(state, tr_cu)) {
if (uvg_is_mts_allowed(state, tr_cu)) {
bool symbol = tr_cu->tr_idx != 0;
int ctx_idx = 0;
@ -872,7 +872,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
intra_search.pred_cu.joint_cb_cr = 0;
// TODO: This heavily relies to square CUs
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) {
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400) {
// There is almost no benefit to doing the chroma mode search for
// rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being
@ -908,7 +908,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (cur_cu->type == CU_INTRA) {
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
if ((depth == 4 && (x % 8 == 0 || y % 8 == 0)) || state->encoder_control->chroma_format == KVZ_CSP_400) {
if ((depth == 4 && (x % 8 == 0 || y % 8 == 0)) || state->encoder_control->chroma_format == UVG_CSP_400) {
intra_search.pred_cu.intra.mode_chroma = -1;
}
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@ -1045,7 +1045,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (depth < MAX_DEPTH) {
// Add cost of cu_split_flag.
kvz_write_split_flag(state, &state->search_cabac,
uvg_write_split_flag(state, &state->search_cabac,
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
1, depth, cu_width, x, y, &split_bits);

View file

@ -2149,10 +2149,10 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
cur_cu,
lcu,
false);
ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
kvz_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
kvz_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
@ -2171,8 +2171,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
LCU_WIDTH_C,
width);
kvz_chorma_ts_out_t chorma_ts_out;
kvz_chroma_transform_search(
uvg_chorma_ts_out_t chorma_ts_out;
uvg_chroma_transform_search(
state,
depth,
lcu,

View file

@ -397,147 +397,148 @@ static double search_intra_trdepth(
pred_cu->mts_last_scan_pos = 0;
pred_cu->violates_mts_coeff_constraint = 0;
const int max_tb_size = TR_MAX_WIDTH;
// LFNST search params
const int max_lfnst_idx = width > max_tb_size || height > max_tb_size ? 0 : 2;
const int max_tb_size = TR_MAX_WIDTH;
// LFNST search params
const int max_lfnst_idx = width > max_tb_size || height > max_tb_size ? 0 : 2;
int start_idx = 0;
int end_idx = state->encoder_control->cfg.lfnst && depth == pred_cu->tr_depth ? max_lfnst_idx : 0;
int start_idx = 0;
int end_idx = state->encoder_control->cfg.lfnst && depth == pred_cu->tr_depth ? max_lfnst_idx : 0;
for (int lfnst_idx = start_idx; lfnst_idx <= end_idx; lfnst_idx++) {
// Initialize lfnst variables
pred_cu->lfnst_idx = lfnst_idx;
pred_cu->violates_lfnst_constrained[0] = false;
pred_cu->violates_lfnst_constrained[1] = false;
pred_cu->lfnst_last_scan_pos = false;
for (int lfnst_idx = start_idx; lfnst_idx <= end_idx; lfnst_idx++) {
// Initialize lfnst variables
pred_cu->lfnst_idx = lfnst_idx;
pred_cu->violates_lfnst_constrained[0] = false;
pred_cu->violates_lfnst_constrained[1] = false;
pred_cu->lfnst_last_scan_pos = false;
for (trafo = mts_start; trafo < num_transforms; trafo++) {
pred_cu->tr_idx = trafo;
if (mts_enabled) {
pred_cu->mts_last_scan_pos = 0;
pred_cu->violates_mts_coeff_constraint = 0;
for (trafo = mts_start; trafo < num_transforms; trafo++) {
pred_cu->tr_idx = trafo;
if (mts_enabled) {
pred_cu->mts_last_scan_pos = 0;
pred_cu->violates_mts_coeff_constraint = 0;
if (trafo == MTS_SKIP && width > (1 << state->encoder_control->cfg.trskip_max_size)) {
//TODO: parametrize that this is not hardcoded
// TODO: this probably should currently trip for chroma?
continue;
}
}
// MTS and LFNST cannot be on at the same time
if (pred_cu->lfnst_idx > 0 && pred_cu->tr_idx > 0) {
continue;
}
uvg_intra_recon_cu(state,
x_px, y_px,
depth, search_data,
pred_cu,
lcu);
if(trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue;
if (trafo == MTS_SKIP && width > (1 << state->encoder_control->cfg.trskip_max_size)) {
//TODO: parametrize that this is not hardcoded
// TODO: this probably should currently trip for chroma?
continue;
}
}
// MTS and LFNST cannot be on at the same time
if (pred_cu->lfnst_idx > 0 && pred_cu->tr_idx > 0) {
continue;
}
uvg_intra_recon_cu(state,
x_px, y_px,
depth, search_data,
pred_cu,
lcu);
if (trafo != 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) continue;
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
derive_mts_constraints(pred_cu, lcu, depth, lcu_px);
if (pred_cu->tr_idx > 1)
{
if (pred_cu->violates_mts_coeff_constraint || !pred_cu->mts_last_scan_pos)
{
continue;
}
}
if (pred_cu->lfnst_idx > 0) {
// Temp constraints. Updating the actual pred_cu constraints here will break things later
bool constraints[2] = { pred_cu->violates_lfnst_constrained[0],
pred_cu->lfnst_last_scan_pos };
derive_lfnst_constraints(pred_cu, lcu, depth, COLOR_Y, lcu_px, constraints);
if (constraints[0] || !constraints[1]) {
continue;
}
double rd_cost = uvg_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
double mts_bits = 0;
if (num_transforms > 1 && trafo != MTS_SKIP && width <= 32 /*&& height <= 32*/
&& !pred_cu->violates_mts_coeff_constraint && pred_cu->mts_last_scan_pos) {
bool symbol = trafo != 0;
int ctx_idx = 0;
mts_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.mts_idx_model[ctx_idx], symbol);
ctx_idx++;
for (int i = 0; i < 3 && symbol; i++, ctx_idx++)
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
derive_mts_constraints(pred_cu, lcu, depth, lcu_px);
if (pred_cu->tr_idx > 1)
{
symbol = trafo > i + MTS_DST7_DST7 ? 1 : 0;
if (pred_cu->violates_mts_coeff_constraint || !pred_cu->mts_last_scan_pos)
{
continue;
}
}
if (pred_cu->lfnst_idx > 0) {
// Temp constraints. Updating the actual pred_cu constraints here will break things later
bool constraints[2] = { pred_cu->violates_lfnst_constrained[0],
pred_cu->lfnst_last_scan_pos };
derive_lfnst_constraints(pred_cu, lcu, depth, COLOR_Y, lcu_px, constraints);
if (constraints[0] || !constraints[1]) {
continue;
}
}
double rd_cost = uvg_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
double mts_bits = 0;
if (num_transforms > 1 && trafo != MTS_SKIP && width <= 32 /*&& height <= 32*/
&& !pred_cu->violates_mts_coeff_constraint && pred_cu->mts_last_scan_pos) {
bool symbol = trafo != 0;
int ctx_idx = 0;
mts_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.mts_idx_model[ctx_idx], symbol);
ctx_idx++;
for (int i = 0; i < 3 && symbol; i++, ctx_idx++)
{
symbol = trafo > i + MTS_DST7_DST7 ? 1 : 0;
mts_bits += CTX_ENTROPY_FBITS(&state->search_cabac.ctx.mts_idx_model[ctx_idx], symbol);
}
}
rd_cost += mts_bits * state->frame->lambda;
}
rd_cost += mts_bits * state->frame->lambda;
// TODO: there is an error in this cost calculation. This will be fixed when merged into current master
// This is compared to the previous best, which may have chroma cost included
if (rd_cost < best_rd_cost) {
best_rd_cost = rd_cost;
best_lfnst_idx = pred_cu->lfnst_idx;
best_tr_idx = pred_cu->tr_idx;
if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option
}
} // end mts index loop (tr_idx)
if (reconstruct_chroma) {
int8_t luma_mode = pred_cu->intra.mode;
pred_cu->intra.mode = -1;
pred_cu->intra.mode_chroma = chroma_mode;
pred_cu->joint_cb_cr = 4; // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
uvg_intra_recon_cu(state,
x_px, y_px,
depth, search_data,
pred_cu,
lcu);
best_rd_cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
pred_cu->intra.mode = luma_mode;
// TODO: there is an error in this cost calculation. This will be fixed when merged into current master
// This is compared to the previous best, which may have chroma cost included
if (rd_cost < best_rd_cost) {
best_rd_cost = rd_cost;
best_lfnst_idx = pred_cu->lfnst_idx;
best_tr_idx = pred_cu->tr_idx;
if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option
// Check lfnst constraints for chroma
if (pred_cu->lfnst_idx > 0) {
// Temp constraints. Updating the actual pred_cu constraints here will break things later
bool constraints[2] = { pred_cu->violates_lfnst_constrained[1],
pred_cu->lfnst_last_scan_pos };
derive_lfnst_constraints(pred_cu, lcu, depth, COLOR_U, lcu_px, constraints);
if (constraints[0] || !constraints[1]) {
best_lfnst_idx = 0;
continue;
}
derive_lfnst_constraints(pred_cu, lcu, depth, COLOR_V, lcu_px, constraints);
if (constraints[0] || !constraints[1]) {
best_lfnst_idx = 0;
continue;
}
}
}
} // end mts index loop (tr_idx)
if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option
} // end lfnst_index loop
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
pred_cu->tr_idx = best_tr_idx;
pred_cu->lfnst_idx = best_lfnst_idx;
nosplit_cost += best_rd_cost;
// Early stop condition for the recursive search.
// If the cost of any 1/4th of the transform is already larger than the
// whole transform, assume that splitting further is a bad idea.
if (nosplit_cost >= cost_treshold) {
return nosplit_cost;
if (trafo == 0 && !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) {
break;
}
}
nosplit_cbf = pred_cu->cbf;
uvg_pixels_blit(lcu->rec.y, nosplit_pixels.y, width, width, LCU_WIDTH, width);
if (reconstruct_chroma) {
int8_t luma_mode = pred_cu->intra.mode;
pred_cu->intra.mode = -1;
pred_cu->intra.mode_chroma = chroma_mode;
pred_cu->joint_cb_cr = 4; // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
uvg_intra_recon_cu(state,
x_px, y_px,
depth, search_data,
pred_cu,
lcu);
best_rd_cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
pred_cu->intra.mode = luma_mode;
// Check lfnst constraints for chroma
if (pred_cu->lfnst_idx > 0) {
// Temp constraints. Updating the actual pred_cu constraints here will break things later
bool constraints[2] = { pred_cu->violates_lfnst_constrained[1],
pred_cu->lfnst_last_scan_pos };
derive_lfnst_constraints(pred_cu, lcu, depth, COLOR_U, lcu_px, constraints);
if (constraints[0] || !constraints[1]) {
best_lfnst_idx = 0;
continue;
}
derive_lfnst_constraints(pred_cu, lcu, depth, COLOR_V, lcu_px, constraints);
if (constraints[0] || !constraints[1]) {
best_lfnst_idx = 0;
continue;
}
}
uvg_pixels_blit(lcu->rec.u, nosplit_pixels.u, width_c, width_c, LCU_WIDTH_C, width_c);
uvg_pixels_blit(lcu->rec.v, nosplit_pixels.v, width_c, width_c, LCU_WIDTH_C, width_c);
}
if (best_tr_idx == MTS_SKIP) break; // Very unlikely that further search is necessary if skip seems best option
} // end lfnst_index loop
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
pred_cu->tr_idx = best_tr_idx;
pred_cu->lfnst_idx = best_lfnst_idx;
nosplit_cost += best_rd_cost;
// Early stop condition for the recursive search.
// If the cost of any 1/4th of the transform is already larger than the
// whole transform, assume that splitting further is a bad idea.
if (nosplit_cost >= cost_treshold) {
return nosplit_cost;
if(trafo == 0&& !cbf_is_set(pred_cu->cbf, depth, COLOR_Y)) {
break;
}
}
nosplit_cbf = pred_cu->cbf;
uvg_pixels_blit(lcu->rec.y, nosplit_pixels.y, width, width, LCU_WIDTH, width);
if (reconstruct_chroma) {
uvg_pixels_blit(lcu->rec.u, nosplit_pixels.u, width_c, width_c, LCU_WIDTH_C, width_c);
uvg_pixels_blit(lcu->rec.v, nosplit_pixels.v, width_c, width_c, LCU_WIDTH_C, width_c);
}
}
}
// Recurse further if all of the following:
// - Current depth is less than maximum depth of the search (max_depth).
// - Maximum transform hierarchy depth is constrained by clipping
@ -638,8 +639,7 @@ static int search_intra_chroma_rough(
assert(depth != 4 || (x_px & 4 && y_px & 4));
const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
cost_pixel_nxn_func *const satd_func = uvg_pixels_get_satd_func(width);
//cost_pixel_nxn_func *const sad_func = uvg_pixels_get_sad_func(width);
cu_loc_t loc = { x_px & ~7, y_px & ~7, width, width, width, width };
@ -656,16 +656,37 @@ static int search_intra_chroma_rough(
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
uvg_intra_predict(state, refs_u, &loc, COLOR_U, pred, &chroma_data[i], lcu);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
chroma_data[i].cost += satd_func(pred, orig_block);
switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
break;
case 8: chroma_data[i].cost += uvg_satd_8x8(pred, orig_block);
break;
case 16: chroma_data[i].cost += uvg_satd_16x16(pred, orig_block);
break;
case 32: chroma_data[i].cost += uvg_satd_32x32(pred, orig_block);
break;
default: assert(0);
}
}
uvg_pixels_blit(orig_v, orig_block, width, width, origstride, width);
for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81)
continue;
uvg_intra_predict(state, refs_v, &loc, COLOR_V, pred, &chroma_data[i], lcu);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
chroma_data[i].cost += satd_func(pred, orig_block);
switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
break;
case 8: chroma_data[i].cost += uvg_satd_8x8(pred, orig_block);
break;
case 16: chroma_data[i].cost += uvg_satd_16x16(pred, orig_block);
break;
case 32: chroma_data[i].cost += uvg_satd_32x32(pred, orig_block);
break;
default: assert(0);
}
}
sort_modes(chroma_data, modes_count);
if (modes_count > 5 && chroma_data[7].pred_cu.intra.mode_chroma > 81) modes_count--;
@ -1393,12 +1414,12 @@ int8_t uvg_search_intra_chroma_rdo(
for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
double mode_bits = kvz_chroma_mode_bits(state, mode, luma_mode);
double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode);
chroma_data[mode_i].cost = mode_bits * state->lambda;
cu_info_t* pred_cu = &chroma_data[mode_i].pred_cu;
if (pred_cu->tr_depth == pred_cu->depth) {
ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
uvg_intra_predict(
@ -1431,8 +1452,8 @@ int8_t uvg_search_intra_chroma_rdo(
width,
LCU_WIDTH_C,
width);
kvz_chorma_ts_out_t chorma_ts_out;
kvz_chroma_transform_search(
uvg_chorma_ts_out_t chorma_ts_out;
uvg_chroma_transform_search(
state,
depth,
lcu,

View file

@ -1811,7 +1811,7 @@ int uvg_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "pixel_var", "avx2", 40, &pixel_var_avx2);
success &= kvz_strategyselector_register(opaque, "generate_residual", "avx2", 0, &generate_residual_avx2);
success &= uvg_strategyselector_register(opaque, "generate_residual", "avx2", 0, &generate_residual_avx2);
}
#endif // UVG_BIT_DEPTH == 8

View file

@ -623,7 +623,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
assert(width >= TR_MIN_WIDTH);
// Get residual. (ref_in - pred_in -> residual)
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
int y, x;

View file

@ -782,7 +782,7 @@ static double pixel_var_generic(const uvg_pixel *arr, const uint32_t len)
}
static void generate_residual_generic(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual,
static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual,
int width, int ref_stride, int pred_stride)
{
int y, x;
@ -834,7 +834,7 @@ int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "pixel_var", "generic", 0, &pixel_var_generic);
success &= kvz_strategyselector_register(opaque, "generate_residual", "generic", 0, &generate_residual_generic);
success &= uvg_strategyselector_register(opaque, "generate_residual", "generic", 0, &generate_residual_generic);
return success;
}

View file

@ -219,8 +219,8 @@ int uvg_quant_cbcr_residual_generic(
}
}
}
kvz_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride);
kvz_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride);
uvg_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride);
uvg_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride);
const int cbf_mask = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
@ -409,7 +409,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
const int height = width; // TODO: height for non-square blocks
// Get residual. (ref_in - pred_in -> residual)
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
int y, x;

View file

@ -80,7 +80,7 @@ hor_sad_func *uvg_hor_sad = 0;
pixel_var_func *uvg_pixel_var = 0;
generate_residual_func *kvz_generate_residual = 0;
generate_residual_func *uvg_generate_residual = 0;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {

View file

@ -149,7 +149,7 @@ typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
typedef void (generate_residual_func)(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
// Declare function pointers.
extern reg_sad_func * uvg_reg_sad;
@ -191,13 +191,11 @@ extern hor_sad_func *uvg_hor_sad;
extern pixel_var_func *uvg_pixel_var;
extern generate_residual_func* kvz_generate_residual;
extern generate_residual_func* uvg_generate_residual;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
cost_pixel_nxn_multi_func * kvz_pixels_get_satd_dual_func(unsigned n);
cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
#define STRATEGIES_PICTURE_EXPORTS \
{"reg_sad", (void**) &uvg_reg_sad}, \
@ -229,7 +227,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
{"ver_sad", (void**) &uvg_ver_sad}, \
{"hor_sad", (void**) &uvg_hor_sad}, \
{"pixel_var", (void**) &uvg_pixel_var}, \
{"generate_residual", (void**) &kvz_generate_residual}, \
{"generate_residual", (void**) &uvg_generate_residual}, \

View file

@ -245,7 +245,7 @@ static void generate_jccr_transforms(
int16_t u_resi[1024],
int16_t v_resi[1024],
coeff_t u_coeff[5120],
enum kvz_chroma_transforms transforms[5],
enum uvg_chroma_transforms transforms[5],
const int trans_offset,
int* num_transforms)
{
@ -325,7 +325,7 @@ static void generate_jccr_transforms(
}
if (cbf_mask1)
{
kvz_transform2d(
uvg_transform2d(
state->encoder_control,
&temp_resi[(cbf_mask1 - 1) * trans_offset],
&u_coeff[*num_transforms * trans_offset],
@ -338,7 +338,7 @@ static void generate_jccr_transforms(
}
if (cbf_mask2 && ((min_dist2 < (9 * min_dist1) / 8) || (!cbf_mask1 && min_dist2 < (3 * min_dist1) / 2)))
{
kvz_transform2d(
uvg_transform2d(
state->encoder_control,
&temp_resi[(cbf_mask2 - 1) * trans_offset],
&u_coeff[*num_transforms * trans_offset],
@ -363,7 +363,7 @@ static void quantize_chroma(
int8_t height,
coeff_t u_coeff[5120],
coeff_t v_coeff[2048],
enum kvz_chroma_transforms transforms[5],
enum uvg_chroma_transforms transforms[5],
const int trans_offset,
int i,
coeff_t u_quant_coeff[1024],
@ -375,7 +375,7 @@ static void quantize_chroma(
if (state->encoder_control->cfg.rdoq_enable &&
(transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip))
{
kvz_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
uvg_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
scan_order, CU_INTRA, depth, 0);
int j;
@ -389,21 +389,21 @@ static void quantize_chroma(
if (transforms[i] == DCT7_CHROMA) {
uint16_t temp_cbf = 0;
if (*u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U);
kvz_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
uvg_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
scan_order, CU_INTRA, depth, temp_cbf);
}
}
else if (state->encoder_control->cfg.rdoq_enable && transforms[i] == CHROMA_TS) {
kvz_ts_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, COLOR_U, scan_order);
kvz_ts_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, scan_order);
uvg_ts_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, COLOR_U, scan_order);
uvg_ts_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, scan_order);
}
else {
kvz_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
uvg_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
if (!IS_JCCR_MODE(transforms[i])) {
kvz_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
uvg_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
}
}
@ -424,7 +424,7 @@ static void quantize_chroma(
}
}
void kvz_chroma_transform_search(
void uvg_chroma_transform_search(
encoder_state_t* const state,
int depth,
lcu_t* const lcu,
@ -434,23 +434,23 @@ void kvz_chroma_transform_search(
const int offset,
const uint8_t mode,
cu_info_t* pred_cu,
kvz_pixel u_pred[1024],
kvz_pixel v_pred[1024],
uvg_pixel u_pred[1024],
uvg_pixel v_pred[1024],
int16_t u_resi[1024],
int16_t v_resi[1024],
kvz_chorma_ts_out_t* chorma_ts_out)
uvg_chorma_ts_out_t* chorma_ts_out)
{
ALIGNED(64) coeff_t u_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 5];
ALIGNED(64) uint8_t u_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
ALIGNED(64) coeff_t v_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 2];
ALIGNED(64) uint8_t v_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
kvz_transform2d(
uvg_transform2d(
state->encoder_control, u_resi, u_coeff, width, COLOR_U, pred_cu
);
kvz_transform2d(
uvg_transform2d(
state->encoder_control, v_resi, v_coeff, width, COLOR_V, pred_cu
);
enum kvz_chroma_transforms transforms[5];
enum uvg_chroma_transforms transforms[5];
transforms[0] = DCT7_CHROMA;
const int trans_offset = width * height;
int num_transforms = 1;
@ -458,8 +458,8 @@ void kvz_chroma_transform_search(
(1 << state->encoder_control->cfg.trskip_max_size) >= width &&
state->encoder_control->cfg.chroma_trskip_enable;
if (can_use_tr_skip) {
kvz_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width);
kvz_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width);
uvg_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width);
uvg_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width);
transforms[num_transforms] = CHROMA_TS;
num_transforms++;
}
@ -488,7 +488,7 @@ void kvz_chroma_transform_search(
int16_t u_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
int16_t v_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
const coeff_scan_order_t scan_order =
kvz_get_scan_order(pred_cu->type, mode, depth);
uvg_get_scan_order(pred_cu->type, mode, depth);
bool u_has_coeffs = false;
bool v_has_coeffs = false;
quantize_chroma(
@ -510,18 +510,18 @@ void kvz_chroma_transform_search(
if (IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue;
if (u_has_coeffs) {
kvz_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
pred_cu->type, transforms[i] == CHROMA_TS);
if (transforms[i] != CHROMA_TS) {
kvz_itransform2d(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width,
uvg_itransform2d(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width,
transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu);
}
else {
kvz_itransformskip(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width);
uvg_itransformskip(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width);
}
if (transforms[i] != JCCR_1) {
for (int j = 0; j < width * height; j++) {
u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((kvz_pixel)(u_pred[j] + u_recon_resi[j]));
u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((uvg_pixel)(u_pred[j] + u_recon_resi[j]));
}
}
else {
@ -531,17 +531,17 @@ void kvz_chroma_transform_search(
}
}
else {
kvz_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width);
uvg_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width);
}
if (v_has_coeffs && !(IS_JCCR_MODE(transforms[i]))) {
kvz_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V,
uvg_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V,
pred_cu->type, transforms[i] == CHROMA_TS);
if (transforms[i] != CHROMA_TS) {
kvz_itransform2d(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width,
uvg_itransform2d(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width,
transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu);
}
else {
kvz_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width);
uvg_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width);
}
for (int j = 0; j < width * height; j++) {
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + v_recon_resi[j]);
@ -565,16 +565,16 @@ void kvz_chroma_transform_search(
}
}
else {
kvz_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width);
uvg_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width);
}
unsigned ssd_u = 0;
unsigned ssd_v = 0;
if (!state->encoder_control->cfg.lossless) {
ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i],
ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i],
LCU_WIDTH_C, width,
width);
ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i],
ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i],
LCU_WIDTH_C, width,
width);
}
@ -604,7 +604,7 @@ void kvz_chroma_transform_search(
transforms[i] == CHROMA_TS, u_bits, "tr_skip_u"
);
}
double coeff_cost = kvz_get_coeff_cost(
double coeff_cost = uvg_get_coeff_cost(
state,
u_quant_coeff,
NULL,
@ -620,7 +620,7 @@ void kvz_chroma_transform_search(
transforms[i] == CHROMA_TS, v_bits, "tr_skip_v"
);
}
v_bits += kvz_get_coeff_cost(
v_bits += uvg_get_coeff_cost(
state,
v_quant_coeff,
NULL,
@ -630,8 +630,8 @@ void kvz_chroma_transform_search(
transforms[i] == CHROMA_TS);
}
if (!IS_JCCR_MODE(transforms[i])) {
double u_cost = KVZ_CHROMA_MULT * ssd_u + u_bits * state->frame->lambda;
double v_cost = KVZ_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda;
double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->frame->lambda;
double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda;
if (u_cost < chorma_ts_out->best_u_cost) {
chorma_ts_out->best_u_cost = u_cost;
chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL;
@ -642,7 +642,7 @@ void kvz_chroma_transform_search(
}
}
else {
double cost = KVZ_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->frame->lambda;
double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->frame->lambda;
if (cost < chorma_ts_out->best_combined_cost) {
chorma_ts_out->best_combined_cost = cost;
chorma_ts_out->best_combined_index = transforms[i];

View file

@ -74,7 +74,7 @@ typedef struct {
int best_u_index;
int best_v_index;
int best_combined_index;
} kvz_chorma_ts_out_t;
} uvg_chorma_ts_out_t;
void uvg_quantize_lcu_residual(
encoder_state_t *state,
@ -88,7 +88,7 @@ void uvg_quantize_lcu_residual(
lcu_t* lcu,
bool early_skip);
void kvz_chroma_transform_search(
void uvg_chroma_transform_search(
encoder_state_t* const state,
int depth,
lcu_t* const lcu,
@ -98,13 +98,13 @@ void kvz_chroma_transform_search(
const int offset,
const uint8_t mode,
cu_info_t* pred_cu,
kvz_pixel u_pred[1024],
kvz_pixel v_pred[1024],
uvg_pixel u_pred[1024],
uvg_pixel v_pred[1024],
int16_t u_resi[1024],
int16_t v_resi[1024],
kvz_chorma_ts_out_t* chorma_ts_out);
uvg_chorma_ts_out_t* chorma_ts_out);
enum kvz_chroma_transforms {
enum uvg_chroma_transforms {
DCT7_CHROMA = 0,
CHROMA_TS = 4,
NO_RESIDUAL = 8,