Fix rebase mistakes

This commit is contained in:
Joose Sainio 2022-06-13 11:17:39 +03:00
parent 882b00068b
commit 3da4a313ce
12 changed files with 222 additions and 203 deletions

View file

@ -42,7 +42,7 @@
bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu); bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu);
void kvz_encode_coding_tree(encoder_state_t * const state, void uvg_encode_coding_tree(encoder_state_t * const state,
uint16_t x_ctb, uint16_t x_ctb,
uint16_t y_ctb, uint16_t y_ctb,
uint8_t depth, uint8_t depth,

View file

@ -579,8 +579,8 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
if(chroma_can_use_tr_skip && cb_flag_v) { if(chroma_can_use_tr_skip && cb_flag_v) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag");
} }
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], NULL, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], NULL, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4);
} }
else { else {
@ -600,7 +600,7 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, chroma_width, COLOR_U, scan_order, 0); coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, chroma_width, COLOR_U, scan_order, 0);
} }
} }
if (kvz_is_mts_allowed(state, tr_cu)) { if (uvg_is_mts_allowed(state, tr_cu)) {
bool symbol = tr_cu->tr_idx != 0; bool symbol = tr_cu->tr_idx != 0;
int ctx_idx = 0; int ctx_idx = 0;
@ -872,7 +872,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
intra_search.pred_cu.joint_cb_cr = 0; intra_search.pred_cu.joint_cb_cr = 0;
// TODO: This heavily relies to square CUs // TODO: This heavily relies to square CUs
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) { if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400) {
// There is almost no benefit to doing the chroma mode search for // There is almost no benefit to doing the chroma mode search for
// rd2. Possibly because the luma mode search already takes chroma // rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being // into account, so there is less of a chanse of luma mode being
@ -908,7 +908,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (cur_cu->type == CU_INTRA) { if (cur_cu->type == CU_INTRA) {
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN); assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
if ((depth == 4 && (x % 8 == 0 || y % 8 == 0)) || state->encoder_control->chroma_format == KVZ_CSP_400) { if ((depth == 4 && (x % 8 == 0 || y % 8 == 0)) || state->encoder_control->chroma_format == UVG_CSP_400) {
intra_search.pred_cu.intra.mode_chroma = -1; intra_search.pred_cu.intra.mode_chroma = -1;
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@ -1045,7 +1045,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (depth < MAX_DEPTH) { if (depth < MAX_DEPTH) {
// Add cost of cu_split_flag. // Add cost of cu_split_flag.
kvz_write_split_flag(state, &state->search_cabac, uvg_write_split_flag(state, &state->search_cabac,
x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL, x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL,
y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL,
1, depth, cu_width, x, y, &split_bits); 1, depth, cu_width, x, y, &split_bits);

View file

@ -2149,10 +2149,10 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
cur_cu, cur_cu,
lcu, lcu,
false); false);
ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
kvz_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width); uvg_pixels_blit(&lcu->ref.u[index], u_pred, width, width, LCU_WIDTH_C, width);
kvz_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width); uvg_pixels_blit(&lcu->ref.v[index], v_pred, width, width, LCU_WIDTH_C, width);
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
@ -2171,8 +2171,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
LCU_WIDTH_C, LCU_WIDTH_C,
width); width);
kvz_chorma_ts_out_t chorma_ts_out; uvg_chorma_ts_out_t chorma_ts_out;
kvz_chroma_transform_search( uvg_chroma_transform_search(
state, state,
depth, depth,
lcu, lcu,

View file

@ -452,6 +452,7 @@ static double search_intra_trdepth(
if (constraints[0] || !constraints[1]) { if (constraints[0] || !constraints[1]) {
continue; continue;
} }
}
double rd_cost = uvg_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); double rd_cost = uvg_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
double mts_bits = 0; double mts_bits = 0;
if (num_transforms > 1 && trafo != MTS_SKIP && width <= 32 /*&& height <= 32*/ if (num_transforms > 1 && trafo != MTS_SKIP && width <= 32 /*&& height <= 32*/
@ -537,7 +538,7 @@ static double search_intra_trdepth(
} }
} }
} }
}
// Recurse further if all of the following: // Recurse further if all of the following:
// - Current depth is less than maximum depth of the search (max_depth). // - Current depth is less than maximum depth of the search (max_depth).
// - Maximum transform hierarchy depth is constrained by clipping // - Maximum transform hierarchy depth is constrained by clipping
@ -639,7 +640,6 @@ static int search_intra_chroma_rough(
const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
cost_pixel_nxn_func *const satd_func = uvg_pixels_get_satd_func(width);
//cost_pixel_nxn_func *const sad_func = uvg_pixels_get_sad_func(width); //cost_pixel_nxn_func *const sad_func = uvg_pixels_get_sad_func(width);
cu_loc_t loc = { x_px & ~7, y_px & ~7, width, width, width, width }; cu_loc_t loc = { x_px & ~7, y_px & ~7, width, width, width, width };
@ -656,16 +656,37 @@ static int search_intra_chroma_rough(
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue;
uvg_intra_predict(state, refs_u, &loc, COLOR_U, pred, &chroma_data[i], lcu); uvg_intra_predict(state, refs_u, &loc, COLOR_U, pred, &chroma_data[i], lcu);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
chroma_data[i].cost += satd_func(pred, orig_block); switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
break;
case 8: chroma_data[i].cost += uvg_satd_8x8(pred, orig_block);
break;
case 16: chroma_data[i].cost += uvg_satd_16x16(pred, orig_block);
break;
case 32: chroma_data[i].cost += uvg_satd_32x32(pred, orig_block);
break;
default: assert(0);
}
} }
uvg_pixels_blit(orig_v, orig_block, width, width, origstride, width); uvg_pixels_blit(orig_v, orig_block, width, width, origstride, width);
for (int i = 0; i < modes_count; ++i) { for (int i = 0; i < modes_count; ++i) {
const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma;
if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81)
continue;
uvg_intra_predict(state, refs_v, &loc, COLOR_V, pred, &chroma_data[i], lcu); uvg_intra_predict(state, refs_v, &loc, COLOR_V, pred, &chroma_data[i], lcu);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
chroma_data[i].cost += satd_func(pred, orig_block); switch (width) {
case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block);
break;
case 8: chroma_data[i].cost += uvg_satd_8x8(pred, orig_block);
break;
case 16: chroma_data[i].cost += uvg_satd_16x16(pred, orig_block);
break;
case 32: chroma_data[i].cost += uvg_satd_32x32(pred, orig_block);
break;
default: assert(0);
}
} }
sort_modes(chroma_data, modes_count); sort_modes(chroma_data, modes_count);
if (modes_count > 5 && chroma_data[7].pred_cu.intra.mode_chroma > 81) modes_count--; if (modes_count > 5 && chroma_data[7].pred_cu.intra.mode_chroma > 81) modes_count--;
@ -1393,12 +1414,12 @@ int8_t uvg_search_intra_chroma_rdo(
for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) { for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma; const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
double mode_bits = kvz_chroma_mode_bits(state, mode, luma_mode); double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode);
chroma_data[mode_i].cost = mode_bits * state->lambda; chroma_data[mode_i].cost = mode_bits * state->lambda;
cu_info_t* pred_cu = &chroma_data[mode_i].pred_cu; cu_info_t* pred_cu = &chroma_data[mode_i].pred_cu;
if (pred_cu->tr_depth == pred_cu->depth) { if (pred_cu->tr_depth == pred_cu->depth) {
ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) uvg_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C]; ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
uvg_intra_predict( uvg_intra_predict(
@ -1431,8 +1452,8 @@ int8_t uvg_search_intra_chroma_rdo(
width, width,
LCU_WIDTH_C, LCU_WIDTH_C,
width); width);
kvz_chorma_ts_out_t chorma_ts_out; uvg_chorma_ts_out_t chorma_ts_out;
kvz_chroma_transform_search( uvg_chroma_transform_search(
state, state,
depth, depth,
lcu, lcu,

View file

@ -1811,7 +1811,7 @@ int uvg_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "pixel_var", "avx2", 40, &pixel_var_avx2); success &= uvg_strategyselector_register(opaque, "pixel_var", "avx2", 40, &pixel_var_avx2);
success &= kvz_strategyselector_register(opaque, "generate_residual", "avx2", 0, &generate_residual_avx2); success &= uvg_strategyselector_register(opaque, "generate_residual", "avx2", 0, &generate_residual_avx2);
} }
#endif // UVG_BIT_DEPTH == 8 #endif // UVG_BIT_DEPTH == 8

View file

@ -623,7 +623,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
assert(width >= TR_MIN_WIDTH); assert(width >= TR_MIN_WIDTH);
// Get residual. (ref_in - pred_in -> residual) // Get residual. (ref_in - pred_in -> residual)
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride); uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) { if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
int y, x; int y, x;

View file

@ -782,7 +782,7 @@ static double pixel_var_generic(const uvg_pixel *arr, const uint32_t len)
} }
static void generate_residual_generic(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual, static void generate_residual_generic(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual,
int width, int ref_stride, int pred_stride) int width, int ref_stride, int pred_stride)
{ {
int y, x; int y, x;
@ -834,7 +834,7 @@ int uvg_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
success &= uvg_strategyselector_register(opaque, "pixel_var", "generic", 0, &pixel_var_generic); success &= uvg_strategyselector_register(opaque, "pixel_var", "generic", 0, &pixel_var_generic);
success &= kvz_strategyselector_register(opaque, "generate_residual", "generic", 0, &generate_residual_generic); success &= uvg_strategyselector_register(opaque, "generate_residual", "generic", 0, &generate_residual_generic);
return success; return success;
} }

View file

@ -219,8 +219,8 @@ int uvg_quant_cbcr_residual_generic(
} }
} }
} }
kvz_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride); uvg_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride);
kvz_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride); uvg_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride);
const int cbf_mask = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1); const int cbf_mask = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
@ -409,7 +409,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
const int height = width; // TODO: height for non-square blocks const int height = width; // TODO: height for non-square blocks
// Get residual. (ref_in - pred_in -> residual) // Get residual. (ref_in - pred_in -> residual)
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride); uvg_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) { if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
int y, x; int y, x;

View file

@ -80,7 +80,7 @@ hor_sad_func *uvg_hor_sad = 0;
pixel_var_func *uvg_pixel_var = 0; pixel_var_func *uvg_pixel_var = 0;
generate_residual_func *kvz_generate_residual = 0; generate_residual_func *uvg_generate_residual = 0;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) { int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth) {

View file

@ -149,7 +149,7 @@ typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len); typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
typedef void (generate_residual_func)(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride); typedef void (generate_residual_func)(const uvg_pixel* ref_in, const uvg_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
// Declare function pointers. // Declare function pointers.
extern reg_sad_func * uvg_reg_sad; extern reg_sad_func * uvg_reg_sad;
@ -191,13 +191,11 @@ extern hor_sad_func *uvg_hor_sad;
extern pixel_var_func *uvg_pixel_var; extern pixel_var_func *uvg_pixel_var;
extern generate_residual_func* kvz_generate_residual; extern generate_residual_func* uvg_generate_residual;
int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth); int uvg_strategy_register_picture(void* opaque, uint8_t bitdepth);
cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n); cost_pixel_nxn_multi_func * uvg_pixels_get_satd_dual_func(unsigned n);
cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n); cost_pixel_nxn_multi_func * uvg_pixels_get_sad_dual_func(unsigned n);
cost_pixel_nxn_multi_func * kvz_pixels_get_satd_dual_func(unsigned n);
cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
#define STRATEGIES_PICTURE_EXPORTS \ #define STRATEGIES_PICTURE_EXPORTS \
{"reg_sad", (void**) &uvg_reg_sad}, \ {"reg_sad", (void**) &uvg_reg_sad}, \
@ -229,7 +227,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
{"ver_sad", (void**) &uvg_ver_sad}, \ {"ver_sad", (void**) &uvg_ver_sad}, \
{"hor_sad", (void**) &uvg_hor_sad}, \ {"hor_sad", (void**) &uvg_hor_sad}, \
{"pixel_var", (void**) &uvg_pixel_var}, \ {"pixel_var", (void**) &uvg_pixel_var}, \
{"generate_residual", (void**) &kvz_generate_residual}, \ {"generate_residual", (void**) &uvg_generate_residual}, \

View file

@ -245,7 +245,7 @@ static void generate_jccr_transforms(
int16_t u_resi[1024], int16_t u_resi[1024],
int16_t v_resi[1024], int16_t v_resi[1024],
coeff_t u_coeff[5120], coeff_t u_coeff[5120],
enum kvz_chroma_transforms transforms[5], enum uvg_chroma_transforms transforms[5],
const int trans_offset, const int trans_offset,
int* num_transforms) int* num_transforms)
{ {
@ -325,7 +325,7 @@ static void generate_jccr_transforms(
} }
if (cbf_mask1) if (cbf_mask1)
{ {
kvz_transform2d( uvg_transform2d(
state->encoder_control, state->encoder_control,
&temp_resi[(cbf_mask1 - 1) * trans_offset], &temp_resi[(cbf_mask1 - 1) * trans_offset],
&u_coeff[*num_transforms * trans_offset], &u_coeff[*num_transforms * trans_offset],
@ -338,7 +338,7 @@ static void generate_jccr_transforms(
} }
if (cbf_mask2 && ((min_dist2 < (9 * min_dist1) / 8) || (!cbf_mask1 && min_dist2 < (3 * min_dist1) / 2))) if (cbf_mask2 && ((min_dist2 < (9 * min_dist1) / 8) || (!cbf_mask1 && min_dist2 < (3 * min_dist1) / 2)))
{ {
kvz_transform2d( uvg_transform2d(
state->encoder_control, state->encoder_control,
&temp_resi[(cbf_mask2 - 1) * trans_offset], &temp_resi[(cbf_mask2 - 1) * trans_offset],
&u_coeff[*num_transforms * trans_offset], &u_coeff[*num_transforms * trans_offset],
@ -363,7 +363,7 @@ static void quantize_chroma(
int8_t height, int8_t height,
coeff_t u_coeff[5120], coeff_t u_coeff[5120],
coeff_t v_coeff[2048], coeff_t v_coeff[2048],
enum kvz_chroma_transforms transforms[5], enum uvg_chroma_transforms transforms[5],
const int trans_offset, const int trans_offset,
int i, int i,
coeff_t u_quant_coeff[1024], coeff_t u_quant_coeff[1024],
@ -375,7 +375,7 @@ static void quantize_chroma(
if (state->encoder_control->cfg.rdoq_enable && if (state->encoder_control->cfg.rdoq_enable &&
(transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip)) (transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip))
{ {
kvz_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, uvg_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
scan_order, CU_INTRA, depth, 0); scan_order, CU_INTRA, depth, 0);
int j; int j;
@ -389,21 +389,21 @@ static void quantize_chroma(
if (transforms[i] == DCT7_CHROMA) { if (transforms[i] == DCT7_CHROMA) {
uint16_t temp_cbf = 0; uint16_t temp_cbf = 0;
if (*u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U); if (*u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U);
kvz_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, uvg_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
scan_order, CU_INTRA, depth, temp_cbf); scan_order, CU_INTRA, depth, temp_cbf);
} }
} }
else if (state->encoder_control->cfg.rdoq_enable && transforms[i] == CHROMA_TS) { else if (state->encoder_control->cfg.rdoq_enable && transforms[i] == CHROMA_TS) {
kvz_ts_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, COLOR_U, scan_order); uvg_ts_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, COLOR_U, scan_order);
kvz_ts_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, scan_order); uvg_ts_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, scan_order);
} }
else { else {
kvz_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, uvg_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
scan_order, CU_INTRA, transforms[i] == CHROMA_TS); scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
if (!IS_JCCR_MODE(transforms[i])) { if (!IS_JCCR_MODE(transforms[i])) {
kvz_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, uvg_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
scan_order, CU_INTRA, transforms[i] == CHROMA_TS); scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
} }
} }
@ -424,7 +424,7 @@ static void quantize_chroma(
} }
} }
void kvz_chroma_transform_search( void uvg_chroma_transform_search(
encoder_state_t* const state, encoder_state_t* const state,
int depth, int depth,
lcu_t* const lcu, lcu_t* const lcu,
@ -434,23 +434,23 @@ void kvz_chroma_transform_search(
const int offset, const int offset,
const uint8_t mode, const uint8_t mode,
cu_info_t* pred_cu, cu_info_t* pred_cu,
kvz_pixel u_pred[1024], uvg_pixel u_pred[1024],
kvz_pixel v_pred[1024], uvg_pixel v_pred[1024],
int16_t u_resi[1024], int16_t u_resi[1024],
int16_t v_resi[1024], int16_t v_resi[1024],
kvz_chorma_ts_out_t* chorma_ts_out) uvg_chorma_ts_out_t* chorma_ts_out)
{ {
ALIGNED(64) coeff_t u_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 5]; ALIGNED(64) coeff_t u_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 5];
ALIGNED(64) uint8_t u_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5]; ALIGNED(64) uint8_t u_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
ALIGNED(64) coeff_t v_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 2]; ALIGNED(64) coeff_t v_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 2];
ALIGNED(64) uint8_t v_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5]; ALIGNED(64) uint8_t v_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
kvz_transform2d( uvg_transform2d(
state->encoder_control, u_resi, u_coeff, width, COLOR_U, pred_cu state->encoder_control, u_resi, u_coeff, width, COLOR_U, pred_cu
); );
kvz_transform2d( uvg_transform2d(
state->encoder_control, v_resi, v_coeff, width, COLOR_V, pred_cu state->encoder_control, v_resi, v_coeff, width, COLOR_V, pred_cu
); );
enum kvz_chroma_transforms transforms[5]; enum uvg_chroma_transforms transforms[5];
transforms[0] = DCT7_CHROMA; transforms[0] = DCT7_CHROMA;
const int trans_offset = width * height; const int trans_offset = width * height;
int num_transforms = 1; int num_transforms = 1;
@ -458,8 +458,8 @@ void kvz_chroma_transform_search(
(1 << state->encoder_control->cfg.trskip_max_size) >= width && (1 << state->encoder_control->cfg.trskip_max_size) >= width &&
state->encoder_control->cfg.chroma_trskip_enable; state->encoder_control->cfg.chroma_trskip_enable;
if (can_use_tr_skip) { if (can_use_tr_skip) {
kvz_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width); uvg_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width);
kvz_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width); uvg_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width);
transforms[num_transforms] = CHROMA_TS; transforms[num_transforms] = CHROMA_TS;
num_transforms++; num_transforms++;
} }
@ -488,7 +488,7 @@ void kvz_chroma_transform_search(
int16_t u_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C]; int16_t u_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
int16_t v_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C]; int16_t v_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
const coeff_scan_order_t scan_order = const coeff_scan_order_t scan_order =
kvz_get_scan_order(pred_cu->type, mode, depth); uvg_get_scan_order(pred_cu->type, mode, depth);
bool u_has_coeffs = false; bool u_has_coeffs = false;
bool v_has_coeffs = false; bool v_has_coeffs = false;
quantize_chroma( quantize_chroma(
@ -510,18 +510,18 @@ void kvz_chroma_transform_search(
if (IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue; if (IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue;
if (u_has_coeffs) { if (u_has_coeffs) {
kvz_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
pred_cu->type, transforms[i] == CHROMA_TS); pred_cu->type, transforms[i] == CHROMA_TS);
if (transforms[i] != CHROMA_TS) { if (transforms[i] != CHROMA_TS) {
kvz_itransform2d(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width, uvg_itransform2d(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width,
transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu); transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu);
} }
else { else {
kvz_itransformskip(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width); uvg_itransformskip(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width);
} }
if (transforms[i] != JCCR_1) { if (transforms[i] != JCCR_1) {
for (int j = 0; j < width * height; j++) { for (int j = 0; j < width * height; j++) {
u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((kvz_pixel)(u_pred[j] + u_recon_resi[j])); u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((uvg_pixel)(u_pred[j] + u_recon_resi[j]));
} }
} }
else { else {
@ -531,17 +531,17 @@ void kvz_chroma_transform_search(
} }
} }
else { else {
kvz_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width); uvg_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width);
} }
if (v_has_coeffs && !(IS_JCCR_MODE(transforms[i]))) { if (v_has_coeffs && !(IS_JCCR_MODE(transforms[i]))) {
kvz_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V, uvg_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V,
pred_cu->type, transforms[i] == CHROMA_TS); pred_cu->type, transforms[i] == CHROMA_TS);
if (transforms[i] != CHROMA_TS) { if (transforms[i] != CHROMA_TS) {
kvz_itransform2d(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width, uvg_itransform2d(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width,
transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu); transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu);
} }
else { else {
kvz_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width); uvg_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width);
} }
for (int j = 0; j < width * height; j++) { for (int j = 0; j < width * height; j++) {
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + v_recon_resi[j]); v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + v_recon_resi[j]);
@ -565,16 +565,16 @@ void kvz_chroma_transform_search(
} }
} }
else { else {
kvz_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width); uvg_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width);
} }
unsigned ssd_u = 0; unsigned ssd_u = 0;
unsigned ssd_v = 0; unsigned ssd_v = 0;
if (!state->encoder_control->cfg.lossless) { if (!state->encoder_control->cfg.lossless) {
ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i], ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i],
LCU_WIDTH_C, width, LCU_WIDTH_C, width,
width); width);
ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i], ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i],
LCU_WIDTH_C, width, LCU_WIDTH_C, width,
width); width);
} }
@ -604,7 +604,7 @@ void kvz_chroma_transform_search(
transforms[i] == CHROMA_TS, u_bits, "tr_skip_u" transforms[i] == CHROMA_TS, u_bits, "tr_skip_u"
); );
} }
double coeff_cost = kvz_get_coeff_cost( double coeff_cost = uvg_get_coeff_cost(
state, state,
u_quant_coeff, u_quant_coeff,
NULL, NULL,
@ -620,7 +620,7 @@ void kvz_chroma_transform_search(
transforms[i] == CHROMA_TS, v_bits, "tr_skip_v" transforms[i] == CHROMA_TS, v_bits, "tr_skip_v"
); );
} }
v_bits += kvz_get_coeff_cost( v_bits += uvg_get_coeff_cost(
state, state,
v_quant_coeff, v_quant_coeff,
NULL, NULL,
@ -630,8 +630,8 @@ void kvz_chroma_transform_search(
transforms[i] == CHROMA_TS); transforms[i] == CHROMA_TS);
} }
if (!IS_JCCR_MODE(transforms[i])) { if (!IS_JCCR_MODE(transforms[i])) {
double u_cost = KVZ_CHROMA_MULT * ssd_u + u_bits * state->frame->lambda; double u_cost = UVG_CHROMA_MULT * ssd_u + u_bits * state->frame->lambda;
double v_cost = KVZ_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda; double v_cost = UVG_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda;
if (u_cost < chorma_ts_out->best_u_cost) { if (u_cost < chorma_ts_out->best_u_cost) {
chorma_ts_out->best_u_cost = u_cost; chorma_ts_out->best_u_cost = u_cost;
chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL; chorma_ts_out->best_u_index = u_has_coeffs ? transforms[i] : NO_RESIDUAL;
@ -642,7 +642,7 @@ void kvz_chroma_transform_search(
} }
} }
else { else {
double cost = KVZ_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->frame->lambda; double cost = UVG_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->frame->lambda;
if (cost < chorma_ts_out->best_combined_cost) { if (cost < chorma_ts_out->best_combined_cost) {
chorma_ts_out->best_combined_cost = cost; chorma_ts_out->best_combined_cost = cost;
chorma_ts_out->best_combined_index = transforms[i]; chorma_ts_out->best_combined_index = transforms[i];

View file

@ -74,7 +74,7 @@ typedef struct {
int best_u_index; int best_u_index;
int best_v_index; int best_v_index;
int best_combined_index; int best_combined_index;
} kvz_chorma_ts_out_t; } uvg_chorma_ts_out_t;
void uvg_quantize_lcu_residual( void uvg_quantize_lcu_residual(
encoder_state_t *state, encoder_state_t *state,
@ -88,7 +88,7 @@ void uvg_quantize_lcu_residual(
lcu_t* lcu, lcu_t* lcu,
bool early_skip); bool early_skip);
void kvz_chroma_transform_search( void uvg_chroma_transform_search(
encoder_state_t* const state, encoder_state_t* const state,
int depth, int depth,
lcu_t* const lcu, lcu_t* const lcu,
@ -98,13 +98,13 @@ void kvz_chroma_transform_search(
const int offset, const int offset,
const uint8_t mode, const uint8_t mode,
cu_info_t* pred_cu, cu_info_t* pred_cu,
kvz_pixel u_pred[1024], uvg_pixel u_pred[1024],
kvz_pixel v_pred[1024], uvg_pixel v_pred[1024],
int16_t u_resi[1024], int16_t u_resi[1024],
int16_t v_resi[1024], int16_t v_resi[1024],
kvz_chorma_ts_out_t* chorma_ts_out); uvg_chorma_ts_out_t* chorma_ts_out);
enum kvz_chroma_transforms { enum uvg_chroma_transforms {
DCT7_CHROMA = 0, DCT7_CHROMA = 0,
CHROMA_TS = 4, CHROMA_TS = 4,
NO_RESIDUAL = 8, NO_RESIDUAL = 8,