mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
[jccr] Chroma transform search kinda working
This commit is contained in:
parent
27b730c2e9
commit
f056178e80
2
src/cu.h
2
src/cu.h
|
@ -146,7 +146,7 @@ typedef struct
|
|||
uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped
|
||||
uint8_t merged : 1; //!< \brief flag to indicate this block is merged
|
||||
uint8_t merge_idx : 3; //!< \brief merge index
|
||||
uint8_t tr_skip : 1; //!< \brief transform skip flag
|
||||
uint8_t tr_skip : 3; //!< \brief transform skip flag
|
||||
uint8_t tr_idx : 3; //!< \brief transform index
|
||||
uint8_t joint_cb_cr : 3; //!< \brief joint chroma residual coding
|
||||
|
||||
|
|
|
@ -514,7 +514,7 @@ static void encode_chroma_tu(encoder_state_t* const state, int x, int y, int dep
|
|||
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
|
||||
// HEVC only supports transform_skip for Luma
|
||||
// TODO: transform skip for chroma blocks
|
||||
CABAC_BIN(cabac, 0, "transform_skip_flag");
|
||||
CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_U) & 1, "transform_skip_flag");
|
||||
}
|
||||
uvg_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, COLOR_U, *scan_idx, NULL, cur_pu);
|
||||
}
|
||||
|
@ -522,7 +522,7 @@ static void encode_chroma_tu(encoder_state_t* const state, int x, int y, int dep
|
|||
if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) {
|
||||
if (state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)) {
|
||||
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
|
||||
CABAC_BIN(cabac, 0, "transform_skip_flag");
|
||||
CABAC_BIN(cabac, (cur_pu->tr_skip >> COLOR_V) & 1, "transform_skip_flag");
|
||||
}
|
||||
uvg_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, COLOR_V, *scan_idx, NULL, cur_pu);
|
||||
}
|
||||
|
|
18
src/rdo.c
18
src/rdo.c
|
@ -233,10 +233,10 @@ int uvg_init_rdcost_outfiles(const char *dir_path)
|
|||
// As long as QP is a two-digit number, template and produced string should
|
||||
// be equal in length ("%i" -> "22")
|
||||
assert(RD_SAMPLING_MAX_LAST_QP <= 99);
|
||||
assert(strlen(fn_template) <= RD_SAMPLING_MAX_FN_LENGTH);
|
||||
|
||||
strncpy(fn_template, dir_path, RD_SAMPLING_MAX_FN_LENGTH);
|
||||
strncat(fn_template, basename_tmpl, RD_SAMPLING_MAX_FN_LENGTH - strlen(dir_path));
|
||||
assert(strlen(fn_template) <= RD_SAMPLING_MAX_FN_LENGTH);
|
||||
|
||||
for (qp = 0; qp <= RD_SAMPLING_MAX_LAST_QP; qp++) {
|
||||
pthread_mutex_t *curr = outfile_mutex + qp;
|
||||
|
@ -290,7 +290,7 @@ out:
|
|||
*
|
||||
* \param coeff coefficient array
|
||||
* \param width coeff block width
|
||||
* \param type data type (0 == luma)
|
||||
* \param color data type (0 == luma)
|
||||
*
|
||||
* \returns bits needed to code input coefficients
|
||||
*/
|
||||
|
@ -298,7 +298,7 @@ static INLINE double get_coeff_cabac_cost(
|
|||
const encoder_state_t * const state,
|
||||
const coeff_t *coeff,
|
||||
int32_t width,
|
||||
int32_t type,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip,
|
||||
cu_info_t* cur_tu)
|
||||
|
@ -331,7 +331,7 @@ static INLINE double get_coeff_cabac_cost(
|
|||
&cabac_copy,
|
||||
coeff,
|
||||
width,
|
||||
type,
|
||||
color,
|
||||
scan_mode,
|
||||
cur_tu,
|
||||
&bits);
|
||||
|
@ -341,7 +341,7 @@ static INLINE double get_coeff_cabac_cost(
|
|||
&cabac_copy,
|
||||
coeff,
|
||||
width,
|
||||
type,
|
||||
color,
|
||||
scan_mode,
|
||||
&bits);
|
||||
}
|
||||
|
@ -383,7 +383,7 @@ static INLINE void save_accuracy(int qp, double ccc, uint32_t fast_cost)
|
|||
*
|
||||
* \param coeff coefficient array
|
||||
* \param width coeff block width
|
||||
* \param type data type (0 == luma)
|
||||
* \param color data type (0 == luma)
|
||||
*
|
||||
* \returns number of bits needed to code coefficients
|
||||
*/
|
||||
|
@ -392,7 +392,7 @@ double uvg_get_coeff_cost(
|
|||
const coeff_t *coeff,
|
||||
cu_info_t* cur_tu,
|
||||
int32_t width,
|
||||
int32_t type,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip)
|
||||
{
|
||||
|
@ -411,13 +411,13 @@ double uvg_get_coeff_cost(
|
|||
uint64_t weights = uvg_fast_coeff_get_weights(state);
|
||||
uint32_t fast_cost = uvg_fast_coeff_cost(coeff, width, weights);
|
||||
if (check_accuracy) {
|
||||
double ccc = get_coeff_cabac_cost(state, coeff, width, type, scan_mode, tr_skip, cur_tu);
|
||||
double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu);
|
||||
save_accuracy(state->qp, ccc, fast_cost);
|
||||
}
|
||||
return fast_cost;
|
||||
}
|
||||
} else {
|
||||
double ccc = get_coeff_cabac_cost(state, coeff, width, type, scan_mode, tr_skip, cur_tu);
|
||||
double ccc = get_coeff_cabac_cost(state, coeff, width, color, scan_mode, tr_skip, cur_tu);
|
||||
if (save_cccs) {
|
||||
save_ccc(state->qp, coeff, width * width, ccc);
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ double uvg_get_coeff_cost(
|
|||
const coeff_t *coeff,
|
||||
cu_info_t* cur_tu,
|
||||
int32_t width,
|
||||
int32_t type,
|
||||
color_t color,
|
||||
int8_t scan_mode,
|
||||
int8_t tr_skip);
|
||||
|
||||
|
|
49
src/search.c
49
src/search.c
|
@ -473,8 +473,8 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
|||
|
||||
const uint8_t tr_depth = tr_cu->tr_depth - depth;
|
||||
|
||||
const int cb_flag_u = cbf_is_set(tr_cu->cbf, depth, COLOR_U);
|
||||
const int cb_flag_v = cbf_is_set(tr_cu->cbf, depth, COLOR_V);
|
||||
const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, depth, COLOR_U);
|
||||
const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, depth, COLOR_V);
|
||||
|
||||
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
|
||||
|
||||
|
@ -488,7 +488,8 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
|||
|
||||
}
|
||||
|
||||
if(state->encoder_control->chroma_format != UVG_CSP_400 && !skip_residual_coding && (depth != 4 || (x_px % 8 && y_px % 8))) {
|
||||
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (x_px % 8 && y_px % 8));
|
||||
if( !skip_residual_coding && has_chroma) {
|
||||
if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb");
|
||||
}
|
||||
|
@ -522,10 +523,10 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
|||
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
|
||||
}
|
||||
|
||||
if (cb_flag_y | cb_flag_u | cb_flag_v) {
|
||||
if (cb_flag_y || cb_flag_u || cb_flag_v) {
|
||||
// TODO qp_delta_sign_flag
|
||||
|
||||
if ((cb_flag_u | cb_flag_v) && x_px % 8 == 0 && y_px % 8 == 0 && state->encoder_control->cfg.jccr) {
|
||||
if ((cb_flag_u || cb_flag_v) && has_chroma && state->encoder_control->cfg.jccr) {
|
||||
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag");
|
||||
}
|
||||
}
|
||||
|
@ -547,11 +548,11 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
|||
int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);
|
||||
const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)];
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, width, 0, luma_scan_mode, tr_cu->tr_skip);
|
||||
coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, width, 0, luma_scan_mode, tr_cu->tr_skip & 1);
|
||||
}
|
||||
|
||||
unsigned chroma_ssd = 0;
|
||||
if(state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (x_px % 8 != 0 && y_px % 8 != 0))) {
|
||||
if(has_chroma) {
|
||||
const vector2d_t lcu_px = { (x_px & ~7 ) / 2, (y_px & ~7) / 2 };
|
||||
const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1));
|
||||
int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
|
||||
|
@ -566,22 +567,23 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
|
|||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
chroma_ssd = ssd_u + ssd_v;
|
||||
}
|
||||
|
||||
}
|
||||
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], NULL, chroma_width, COLOR_U, scan_order, tr_cu->tr_skip & 2);
|
||||
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], NULL, chroma_width, COLOR_V, scan_order, tr_cu->tr_skip & 4);
|
||||
|
||||
}
|
||||
else {
|
||||
{
|
||||
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.u[index], NULL, chroma_width, 2, scan_order, 0);
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.v[index], NULL, chroma_width, 2, scan_order, 0);
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
chroma_ssd = ssd_u_joint + ssd_v_joint;
|
||||
}
|
||||
} else {
|
||||
int ssd_u_joint = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
width);
|
||||
int ssd_v_joint = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
chroma_width);
|
||||
chroma_ssd = ssd_u_joint + ssd_v_joint;
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, width, 2, scan_order, 0);
|
||||
coeff_bits += uvg_get_coeff_cost(state, &lcu->coeff.joint_uv[index], NULL, chroma_width, COLOR_U, scan_order, 0);
|
||||
}
|
||||
}
|
||||
if (kvz_is_mts_allowed(state, tr_cu)) {
|
||||
|
@ -986,7 +988,10 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
if (ctrl->cfg.rdo >= 3) {
|
||||
cur_cu->intra.mode_chroma = uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search);
|
||||
|
||||
if (intra_search.pred_cu.joint_cb_cr == 0) intra_search.pred_cu.joint_cb_cr = 4;
|
||||
if (intra_search.pred_cu.joint_cb_cr == 0) {
|
||||
intra_search.pred_cu.joint_cb_cr = 4;
|
||||
cur_cu->tr_skip |= intra_search.pred_cu.tr_skip;
|
||||
}
|
||||
else cur_cu->joint_cb_cr = intra_search.pred_cu.joint_cb_cr;
|
||||
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
|
|
|
@ -388,6 +388,14 @@ static double search_intra_trdepth(
|
|||
}
|
||||
pred_cu->intra.mode_chroma = -1;
|
||||
pred_cu->joint_cb_cr = 4;
|
||||
for (; trafo < num_transforms; trafo++) {
|
||||
pred_cu->tr_idx = trafo;
|
||||
if (trafo == MTS_SKIP) pred_cu->tr_skip |= 1;
|
||||
else pred_cu->tr_skip &= 6; // Keep chroma tr_skip untouched allthough it probably won't matter here
|
||||
if (mts_enabled)
|
||||
{
|
||||
pred_cu->mts_last_scan_pos = 0;
|
||||
pred_cu->violates_mts_coeff_constraint = 0;
|
||||
|
||||
const int max_tb_size = TR_MAX_WIDTH;
|
||||
// LFNST search params
|
||||
|
@ -1350,6 +1358,7 @@ double uvg_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
|
|||
}
|
||||
|
||||
|
||||
#define IS_JCCR_MODE(t) ((t) != DCT7_CHROMA && (t) != CHROMA_TS)
|
||||
static INLINE int64_t square(int x) {
|
||||
return x * (int64_t)x;
|
||||
}
|
||||
|
@ -1362,6 +1371,181 @@ enum chroma_transforms {
|
|||
JCCR_3 = 3,
|
||||
};
|
||||
|
||||
static void generate_jccr_transforms(encoder_state_t* const state,
|
||||
intra_search_data_t* chroma_data, int8_t width, int8_t mode_i,
|
||||
int16_t u_resi[1024], int16_t v_resi[1024], coeff_t u_coeff[5120],
|
||||
enum chroma_transforms transforms[5], const int trans_offset, int* num_transforms)
|
||||
{
|
||||
ALIGNED(64) int16_t temp_resi[LCU_WIDTH_C * LCU_WIDTH_C * 3];
|
||||
int64_t costs[4];
|
||||
costs[0] = INT64_MAX;
|
||||
for (int jccr = chroma_data[mode_i].pred_cu.type == CU_INTRA ? 0 : 3; jccr < 4; jccr++) {
|
||||
int64_t d1 = 0;
|
||||
int64_t d2 = 0;
|
||||
const int cbf_mask = jccr * (state->frame->jccr_sign ? -1 : 1);
|
||||
int16_t *current_resi = &temp_resi[(jccr - 1) * trans_offset];
|
||||
for (int y = 0; y < width; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
const int16_t cbx = u_resi[x + y * width], crx = v_resi[x + y * width];
|
||||
if (cbf_mask == 2)
|
||||
{
|
||||
const int16_t resi = ((4 * cbx + 2 * crx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx - (resi >> 1));
|
||||
}
|
||||
else if (cbf_mask == -2)
|
||||
{
|
||||
const int16_t resi = ((4 * cbx - 2 * crx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx - (-resi >> 1));
|
||||
}
|
||||
else if (cbf_mask == 3)
|
||||
{
|
||||
const int16_t resi = ((cbx + crx) / 2);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx - resi);
|
||||
}
|
||||
else if (cbf_mask == -3)
|
||||
{
|
||||
const int16_t resi = ((cbx - crx) / 2);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx + resi);
|
||||
}
|
||||
else if (cbf_mask == 1)
|
||||
{
|
||||
const int16_t resi = ((4 * crx + 2 * cbx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - (resi >> 1)) + square(crx - resi);
|
||||
}
|
||||
else if (cbf_mask == -1)
|
||||
{
|
||||
const int16_t resi = ((4 * crx - 2 * cbx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - (-resi >> 1)) + square(crx - resi);
|
||||
}
|
||||
else
|
||||
{
|
||||
d1 += square(cbx);
|
||||
d2 += square(crx);
|
||||
}
|
||||
}
|
||||
}
|
||||
costs[jccr] = d2 != 0 ? MIN(d1, d2) : d1;
|
||||
}
|
||||
int64_t min_dist1 = costs[0];
|
||||
int64_t min_dist2 = INT64_MAX;
|
||||
int cbf_mask1 = 0;
|
||||
int cbf_mask2 = 0;
|
||||
for (int cbfMask = 1; cbfMask < 4; cbfMask++)
|
||||
{
|
||||
if (costs[cbfMask] < min_dist1)
|
||||
{
|
||||
cbf_mask2 = cbf_mask1; min_dist2 = min_dist1;
|
||||
cbf_mask1 = cbfMask; min_dist1 = costs[cbf_mask1];
|
||||
}
|
||||
else if (costs[cbfMask] < min_dist2)
|
||||
{
|
||||
cbf_mask2 = cbfMask; min_dist2 = costs[cbf_mask2];
|
||||
}
|
||||
}
|
||||
if (cbf_mask1)
|
||||
{
|
||||
kvz_transform2d(
|
||||
state->encoder_control,
|
||||
&temp_resi[(cbf_mask1 - 1) * trans_offset],
|
||||
&u_coeff[*num_transforms * trans_offset],
|
||||
width,
|
||||
COLOR_U,
|
||||
&chroma_data[cbf_mask1].pred_cu
|
||||
);
|
||||
transforms[(*num_transforms)] = cbf_mask1;
|
||||
(*num_transforms)++;
|
||||
}
|
||||
if (cbf_mask2 && ((min_dist2 < (9 * min_dist1) / 8) || (!cbf_mask1 && min_dist2 < (3 * min_dist1) / 2)))
|
||||
{
|
||||
kvz_transform2d(
|
||||
state->encoder_control,
|
||||
&temp_resi[(cbf_mask2 - 1) * trans_offset],
|
||||
&u_coeff[*num_transforms * trans_offset],
|
||||
width,
|
||||
COLOR_U,
|
||||
&chroma_data[cbf_mask2].pred_cu
|
||||
);
|
||||
transforms[(*num_transforms)] = cbf_mask2;
|
||||
(*num_transforms)++;
|
||||
}
|
||||
}
|
||||
|
||||
static void quantize_chroma(
|
||||
encoder_state_t* const state,
|
||||
int depth,
|
||||
int8_t width,
|
||||
int8_t height,
|
||||
coeff_t u_coeff[5120],
|
||||
coeff_t v_coeff[2048],
|
||||
enum chroma_transforms transforms[5],
|
||||
const int trans_offset,
|
||||
int i,
|
||||
coeff_t u_quant_coeff[1024],
|
||||
coeff_t v_quant_coeff[1024],
|
||||
const coeff_scan_order_t scan_order,
|
||||
bool* u_has_coeffs,
|
||||
bool* v_has_coeffs)
|
||||
{
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
uvg_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
scan_order, CU_INTRA, depth, 0);
|
||||
|
||||
int j;
|
||||
for (j = 0; j < width * height; ++j) {
|
||||
if (u_quant_coeff[j]) {
|
||||
*u_has_coeffs = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(transforms[i] == DCT7_CHROMA) {
|
||||
uint16_t temp_cbf = 0;
|
||||
if (*u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U);
|
||||
uvg_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
|
||||
scan_order, CU_INTRA, depth, temp_cbf);
|
||||
|
||||
}
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && transforms[i] == CHROMA_TS) {
|
||||
uvg_ts_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, COLOR_U,scan_order);
|
||||
uvg_ts_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
|
||||
|
||||
if(!IS_JCCR_MODE(transforms[i])) {
|
||||
uvg_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
|
||||
scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; j < width * height; ++j) {
|
||||
if (u_quant_coeff[j]) {
|
||||
*u_has_coeffs = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!IS_JCCR_MODE(transforms[i])) {
|
||||
for (int j = 0; j < width * height; ++j) {
|
||||
if (v_quant_coeff[j]) {
|
||||
*v_has_coeffs = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int8_t uvg_search_intra_chroma_rdo(
|
||||
encoder_state_t * const state,
|
||||
int x_px,
|
||||
|
@ -1384,159 +1568,103 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
|
||||
|
||||
if (reconstruct_chroma) {
|
||||
uvg_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0);
|
||||
uvg_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0);
|
||||
int log2_width = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
|
||||
uvg_intra_build_reference(log2_width, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0);
|
||||
uvg_intra_build_reference(log2_width, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0);
|
||||
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
cabac_data_t temp_cabac;
|
||||
memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t));
|
||||
int8_t width = MAX(4, LCU_CU_WIDTH >> (depth - 1));
|
||||
int8_t height = MAX(4, LCU_CU_WIDTH >> (depth - 1));
|
||||
const cu_loc_t loc = { x_px, y_px, width, height, width, height};
|
||||
const int offset = (lcu_px.x >> 1) + (lcu_px.y >> 1)* LCU_WIDTH_C;
|
||||
int8_t width = 1 << log2_width;
|
||||
int8_t height = 1 << log2_width;
|
||||
const cu_loc_t loc = { x_px &~7, y_px & ~7, width, height, width, height};
|
||||
const int offset = ((lcu_px.x & ~7) >> 1) + ((lcu_px.y & ~7) >> 1)* LCU_WIDTH_C;
|
||||
|
||||
for (int8_t i = 0; i < num_modes; ++i) {
|
||||
const uint8_t mode = chroma_data[i].pred_cu.intra.mode_chroma;
|
||||
for (int8_t mode_i = 0; mode_i < num_modes; ++mode_i) {
|
||||
const uint8_t mode = chroma_data[mode_i].pred_cu.intra.mode_chroma;
|
||||
double mode_bits = kvz_chroma_mode_bits(state, mode, luma_mode);
|
||||
chroma_data[mode_i].cost = mode_bits * state->lambda;
|
||||
if ((state->encoder_control->cfg.jccr ||
|
||||
(state->encoder_control->cfg.trskip_enable &&
|
||||
(1 << state->encoder_control->cfg.trskip_max_size) >= width)) &&
|
||||
chroma_data[i].pred_cu.tr_depth == chroma_data[i].pred_cu.depth) {
|
||||
chroma_data[mode_i].pred_cu.tr_depth == chroma_data[mode_i].pred_cu.depth) {
|
||||
ALIGNED(64) kvz_pixel u_pred[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) kvz_pixel v_pred[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) int16_t u_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
ALIGNED(64) int16_t v_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
uvg_intra_predict(
|
||||
state,
|
||||
&refs[COLOR_U],
|
||||
&refs[COLOR_U - 1],
|
||||
&loc,
|
||||
COLOR_U,
|
||||
u_pred,
|
||||
&chroma_data[i],
|
||||
&chroma_data[mode_i],
|
||||
lcu);
|
||||
uvg_intra_predict(
|
||||
state,
|
||||
&refs[COLOR_V],
|
||||
&refs[COLOR_V - 1],
|
||||
&loc,
|
||||
COLOR_V,
|
||||
v_pred,
|
||||
&chroma_data[i],
|
||||
&chroma_data[mode_i],
|
||||
lcu);
|
||||
uvg_generate_residual(
|
||||
&lcu->ref.u[offset],
|
||||
u_pred,
|
||||
u_resi,
|
||||
width,
|
||||
LCU_WIDTH_C,
|
||||
width);
|
||||
uvg_generate_residual(
|
||||
&lcu->ref.v[offset],
|
||||
v_pred,
|
||||
v_resi,
|
||||
width,
|
||||
LCU_WIDTH_C,
|
||||
width);
|
||||
ALIGNED(64) coeff_t u_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 5];
|
||||
ALIGNED(64) uint8_t u_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
|
||||
ALIGNED(64) coeff_t v_coeff[LCU_WIDTH_C * LCU_WIDTH_C * 2];
|
||||
ALIGNED(64) uint8_t v_recon[LCU_WIDTH_C * LCU_WIDTH_C * 5];
|
||||
uvg_transform2d(
|
||||
state->encoder_control, u_resi, u_coeff, width, COLOR_U, &chroma_data[i].pred_cu
|
||||
state->encoder_control, u_resi, u_coeff, width, COLOR_U, &chroma_data[mode_i].pred_cu
|
||||
);
|
||||
uvg_transform2d(
|
||||
state->encoder_control, v_resi, v_coeff, width, COLOR_V, &chroma_data[i].pred_cu
|
||||
state->encoder_control, v_resi, v_coeff, width, COLOR_V, &chroma_data[mode_i].pred_cu
|
||||
);
|
||||
enum chroma_transforms transforms[5];
|
||||
transforms[0] = DCT7_CHROMA;
|
||||
const int trans_offset = width * height;
|
||||
int num_transforms = 1;
|
||||
if(state->encoder_control->cfg.trskip_enable &&
|
||||
(1 << state->encoder_control->cfg.trskip_max_size) >= width) {
|
||||
const int can_use_tr_skip = state->encoder_control->cfg.trskip_enable &&
|
||||
(1 << state->encoder_control->cfg.trskip_max_size) >= width;
|
||||
if(can_use_tr_skip) {
|
||||
uvg_transformskip(state->encoder_control, u_resi, u_coeff + num_transforms * trans_offset, width);
|
||||
uvg_transformskip(state->encoder_control, v_resi, v_coeff + num_transforms * trans_offset, width);
|
||||
transforms[num_transforms] = CHROMA_TS;
|
||||
num_transforms++;
|
||||
}
|
||||
if(state->encoder_control->cfg.jccr) {
|
||||
ALIGNED(64) int16_t temp_resi[LCU_WIDTH_C * LCU_WIDTH_C * 3];
|
||||
int64_t costs[4];
|
||||
costs[0] = INT64_MAX;
|
||||
for (int jccr = chroma_data[i].pred_cu.type == CU_INTRA ? 0 : 3; jccr < 4; jccr++) {
|
||||
int64_t d1 = 0;
|
||||
int64_t d2 = 0;
|
||||
const int cbf_mask = jccr * (state->frame->jccr_sign ? -1 : 1);
|
||||
int16_t *current_resi = &temp_resi[(jccr - 1) + trans_offset];
|
||||
for (int y = 0; y < width; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
int cbx = u_resi[x + y * width], crx = v_resi[x + y * width];
|
||||
if (cbf_mask == 2)
|
||||
{
|
||||
const int resi = ((4 * cbx + 2 * crx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx - (resi >> 1));
|
||||
}
|
||||
else if (cbf_mask == -2)
|
||||
{
|
||||
const int resi = ((4 * cbx - 2 * crx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx - (resi >> 1));
|
||||
}
|
||||
else if (cbf_mask == 3)
|
||||
{
|
||||
const int resi = ((cbx + crx) / 2);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx - resi);
|
||||
}
|
||||
else if (cbf_mask == -3)
|
||||
{
|
||||
const int resi = ((cbx - crx) / 2);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - resi) + square(crx + resi);
|
||||
}
|
||||
else if (cbf_mask == 1)
|
||||
{
|
||||
const int resi = ((4 * crx + 2 * cbx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - (resi >> 1)) + square(crx - resi);
|
||||
}
|
||||
else if (cbf_mask == -1)
|
||||
{
|
||||
const int resi = ((4 * crx - 2 * cbx) / 5);
|
||||
current_resi[x + y * width] = resi;
|
||||
d1 += square(cbx - (resi >> 1)) + square(crx - resi);
|
||||
}
|
||||
else
|
||||
{
|
||||
d1 += square(cbx);
|
||||
d2 += square(crx);
|
||||
}
|
||||
}
|
||||
}
|
||||
costs[jccr] = d2 != 0 ? MIN(d1, d2) : d1;
|
||||
}
|
||||
for(int jccr = chroma_data[i].pred_cu.type == CU_INTRA ? 1 : 3; jccr < 4; jccr++) {
|
||||
if(costs[jccr] < costs[0]) {
|
||||
uvg_transform2d(
|
||||
state->encoder_control,
|
||||
&temp_resi[(jccr - 1) + trans_offset],
|
||||
&u_coeff[num_transforms * trans_offset],
|
||||
width,
|
||||
COLOR_U,
|
||||
&chroma_data[jccr].pred_cu
|
||||
);
|
||||
transforms[num_transforms] = jccr;
|
||||
num_transforms++;
|
||||
}
|
||||
}
|
||||
generate_jccr_transforms(
|
||||
state,
|
||||
chroma_data,
|
||||
width,
|
||||
mode_i,
|
||||
u_resi,
|
||||
v_resi,
|
||||
u_coeff,
|
||||
transforms,
|
||||
trans_offset,
|
||||
&num_transforms);
|
||||
}
|
||||
|
||||
double best_u_cost = MAX_INT64;
|
||||
double best_v_cost = MAX_INT64;
|
||||
double best_combined_cost = MAX_INT64;
|
||||
int best_u_index = -1;
|
||||
int best_v_index = -1;
|
||||
int best_combined_index = -1;
|
||||
for(int trans = 0; trans < num_transforms; trans++) {
|
||||
for(int i = 0; i < num_transforms; i++) {
|
||||
coeff_t u_quant_coeff[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
coeff_t v_quant_coeff[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
int16_t u_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C];
|
||||
|
@ -1545,56 +1673,24 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
uvg_get_scan_order(CU_INTRA, mode, depth);
|
||||
bool u_has_coeffs = false;
|
||||
bool v_has_coeffs = false;
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
uvg_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
scan_order, CU_INTRA, depth, 0);
|
||||
|
||||
int j;
|
||||
for (j = 0; i < width * height; ++j) {
|
||||
if (u_quant_coeff[num_transforms * trans_offset + j]) {
|
||||
u_has_coeffs = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(transforms[i] == DCT7_CHROMA) {
|
||||
int16_t temp_cbf = 0;
|
||||
if (u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U);
|
||||
uvg_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
|
||||
scan_order, CU_INTRA, depth, temp_cbf);
|
||||
|
||||
}
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && transforms[i] == CHROMA_TS) {
|
||||
uvg_ts_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, COLOR_U,scan_order);
|
||||
uvg_ts_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
|
||||
quantize_chroma(
|
||||
state,
|
||||
depth,
|
||||
width,
|
||||
height,
|
||||
u_coeff,
|
||||
v_coeff,
|
||||
transforms,
|
||||
trans_offset,
|
||||
i,
|
||||
u_quant_coeff,
|
||||
v_quant_coeff,
|
||||
scan_order,
|
||||
&u_has_coeffs,
|
||||
&v_has_coeffs);
|
||||
|
||||
if(IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue;
|
||||
|
||||
if(transforms[i] != CHROMA_TS && transforms[i] != DCT7_CHROMA) {
|
||||
uvg_quant(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V,
|
||||
scan_order, CU_INTRA, transforms[i] == CHROMA_TS);
|
||||
}
|
||||
}
|
||||
|
||||
for (int j = 0; i < width * height; ++j) {
|
||||
if (u_quant_coeff[num_transforms * trans_offset + j]) {
|
||||
u_has_coeffs = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (transforms[i] != CHROMA_TS && transforms[i] != DCT7_CHROMA) {
|
||||
for (int j = 0; i < width * height; ++j) {
|
||||
if (v_quant_coeff[num_transforms * trans_offset + j]) {
|
||||
v_has_coeffs = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(u_has_coeffs) {
|
||||
uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V,
|
||||
CU_INTRA, transforms[i] == CHROMA_TS);
|
||||
|
@ -1607,19 +1703,19 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
}
|
||||
if(transforms[i] != JCCR_1) {
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
u_recon[offset * i + j] = CLIP_TO_PIXEL((int16_t)u_pred[j] + u_recon_resi[j]);
|
||||
u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((kvz_pixel)(u_pred[j] + u_recon_resi[j]));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
u_recon[offset * i + j] = CLIP_TO_PIXEL((int16_t)u_pred[j] + (u_recon_resi[j] >> 1));
|
||||
u_recon[trans_offset * i + j] = CLIP_TO_PIXEL(u_pred[j] + ((state->frame->jccr_sign ? -u_recon_resi[j] : u_recon_resi[j]) >> 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvg_pixels_blit(u_pred, &u_recon[offset * i], width, height, width, width);
|
||||
uvg_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width);
|
||||
}
|
||||
if(v_has_coeffs && (transforms[i] == DCT7_CHROMA || transforms[i] == CHROMA_TS)) {
|
||||
if(v_has_coeffs && !(IS_JCCR_MODE(transforms[i]))) {
|
||||
uvg_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V,
|
||||
CU_INTRA, transforms[i] == CHROMA_TS);
|
||||
if (transforms[i] != CHROMA_TS) {
|
||||
|
@ -1630,70 +1726,135 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
uvg_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width);
|
||||
}
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
v_recon[offset * i + j] = CLIP_TO_PIXEL((int16_t)u_pred[j] + v_recon_resi[j]);
|
||||
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + v_recon_resi[j]);
|
||||
}
|
||||
}
|
||||
else if(u_has_coeffs && (transforms[i] != DCT7_CHROMA && transforms[i] != CHROMA_TS)) {
|
||||
if(transforms[i] != JCCR_2) {
|
||||
else if(u_has_coeffs && IS_JCCR_MODE(transforms[i])) {
|
||||
if (transforms[i] == JCCR_1) {
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
v_recon[offset * i + j] = CLIP_TO_PIXEL((int16_t)v_pred[j] + (state->frame->jccr_sign ? -u_recon_resi[j] : u_recon_resi[j]));
|
||||
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + u_recon_resi[j]);
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else if(transforms[i] == JCCR_3) {
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
v_recon[offset * i + j] = CLIP_TO_PIXEL((int16_t)v_pred[j] + (state->frame->jccr_sign ? -u_recon_resi[j] : u_recon_resi[j]));
|
||||
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + (state->frame->jccr_sign ? -u_recon_resi[j] : u_recon_resi[j]));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int j = 0; j < width * height; j++) {
|
||||
v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + ((state->frame->jccr_sign ? -u_recon_resi[j] : u_recon_resi[j]) >> 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvg_pixels_blit(v_pred, &v_recon[offset * i], width, height, width, width);
|
||||
uvg_pixels_blit(v_pred, &v_recon[trans_offset * i], width, height, width, width);
|
||||
}
|
||||
|
||||
int ssd_u;
|
||||
int ssd_v;
|
||||
unsigned ssd_u = 0;
|
||||
unsigned ssd_v = 0;
|
||||
if (!state->encoder_control->cfg.lossless) {
|
||||
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
|
||||
ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[offset], &u_recon[trans_offset * i],
|
||||
LCU_WIDTH_C, width,
|
||||
width);
|
||||
ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
|
||||
LCU_WIDTH_C, LCU_WIDTH_C,
|
||||
ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[offset], &v_recon[trans_offset * i],
|
||||
LCU_WIDTH_C, width,
|
||||
width);
|
||||
}
|
||||
|
||||
double u_bits = 0;
|
||||
double v_bits = 0;
|
||||
state->search_cabac.update = 1;
|
||||
if(state->encoder_control->cfg.jccr) {
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.joint_cb_cr[transforms[i]],
|
||||
transforms[i] != DCT7_CHROMA && transforms[i] != CHROMA_TS, u_bits, "jccr_flag"
|
||||
);
|
||||
}
|
||||
|
||||
int cbf_u = transforms[i] & 2 || (u_has_coeffs && !(transforms[i] & 1));
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_cb[0],
|
||||
cbf_u, u_bits, "cbf_u"
|
||||
);
|
||||
int cbf_v = transforms[i] & 1 || (v_has_coeffs && !(transforms[i] & 2));
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.qt_cbf_model_cr[cbf_u],
|
||||
transforms[i] & 1 || (v_has_coeffs && !(transforms[i] & 2)), v_bits, "cbf_v"
|
||||
cbf_v, v_bits, "cbf_v"
|
||||
);
|
||||
|
||||
if (state->encoder_control->cfg.jccr && (cbf_u || cbf_v)) {
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.joint_cb_cr[cbf_u * 2 + cbf_v - 1],
|
||||
transforms[i] != DCT7_CHROMA && transforms[i] != CHROMA_TS, v_bits, "jccr_flag"
|
||||
);
|
||||
}
|
||||
|
||||
if (cbf_u || (transforms[i] == JCCR_1 && u_has_coeffs)) {
|
||||
if(can_use_tr_skip && !IS_JCCR_MODE(transforms[i])) {
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.transform_skip_model_chroma,
|
||||
transforms[i] == CHROMA_TS, u_bits, "tr_skip_u"
|
||||
);
|
||||
}
|
||||
double coeff_cost = kvz_get_coeff_cost(
|
||||
state,
|
||||
u_quant_coeff,
|
||||
NULL,
|
||||
width,
|
||||
COLOR_U,
|
||||
scan_order,
|
||||
transforms[i] == CHROMA_TS);
|
||||
u_bits += coeff_cost;
|
||||
}
|
||||
if (cbf_v && !IS_JCCR_MODE(transforms[i])) {
|
||||
if (can_use_tr_skip) {
|
||||
CABAC_FBITS_UPDATE(&state->search_cabac, &state->search_cabac.ctx.transform_skip_model_chroma,
|
||||
transforms[i] == CHROMA_TS, v_bits, "tr_skip_v"
|
||||
);
|
||||
}
|
||||
v_bits += kvz_get_coeff_cost(
|
||||
state,
|
||||
v_quant_coeff,
|
||||
NULL,
|
||||
width,
|
||||
COLOR_V,
|
||||
scan_order,
|
||||
transforms[i] == CHROMA_TS);
|
||||
}
|
||||
if(!IS_JCCR_MODE(transforms[i])) {
|
||||
double u_cost = KVZ_CHROMA_MULT * ssd_u + u_bits * state->frame->lambda;
|
||||
double v_cost = KVZ_CHROMA_MULT * ssd_v + v_bits * state->frame->lambda;
|
||||
if(u_cost < best_u_cost) {
|
||||
best_u_cost = u_cost;
|
||||
best_u_index = transforms[i];
|
||||
}
|
||||
if(v_cost < best_v_cost) {
|
||||
best_v_cost = v_cost;
|
||||
best_v_index = transforms[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
double cost = KVZ_CHROMA_MULT * (ssd_u + ssd_v) + (u_bits + v_bits) * state->frame->lambda;
|
||||
if (cost < best_combined_cost) {
|
||||
best_combined_cost = cost;
|
||||
best_combined_index = transforms[i];
|
||||
}
|
||||
}
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
|
||||
}
|
||||
|
||||
if(best_u_cost + best_v_cost < best_combined_cost) {
|
||||
chroma_data[mode_i].pred_cu.joint_cb_cr = 0;
|
||||
chroma_data[mode_i].pred_cu.tr_skip |= (best_u_index == CHROMA_TS) << COLOR_U;
|
||||
chroma_data[mode_i].pred_cu.tr_skip |= (best_v_index == CHROMA_TS) << COLOR_V;
|
||||
chroma_data[mode_i].cost += best_u_cost + best_v_cost;
|
||||
}
|
||||
else {
|
||||
chroma_data[mode_i].pred_cu.joint_cb_cr = best_combined_index;
|
||||
chroma_data[mode_i].cost += best_combined_cost;
|
||||
}
|
||||
}
|
||||
else {
|
||||
state->search_cabac.update = 1;
|
||||
chroma_data[mode_i].cost = mode_bits * state->lambda;
|
||||
uvg_intra_recon_cu(state,
|
||||
x_px, y_px,
|
||||
depth, &chroma_data[i],
|
||||
&chroma_data[i].pred_cu,
|
||||
depth, &chroma_data[mode_i],
|
||||
&chroma_data[mode_i].pred_cu,
|
||||
lcu);
|
||||
double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode);
|
||||
chroma_data[i].cost = mode_bits * state->lambda;
|
||||
|
||||
if(tr_cu->depth != tr_cu->tr_depth || !state->encoder_control->cfg.jccr) {
|
||||
chroma_data[i].cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, &chroma_data[i].pred_cu, lcu);
|
||||
chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, &chroma_data[mode_i].pred_cu, lcu);
|
||||
} else {
|
||||
uvg_select_jccr_mode(state, lcu_px.x, lcu_px.y, depth, &chroma_data[i].pred_cu, lcu, &chroma_data[i].cost);
|
||||
uvg_select_jccr_mode(state, lcu_px.x, lcu_px.y, depth, &chroma_data[mode_i].pred_cu, lcu, &chroma_data[mode_i].cost);
|
||||
}
|
||||
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
|
||||
}
|
||||
|
@ -1701,13 +1862,14 @@ int8_t uvg_search_intra_chroma_rdo(
|
|||
|
||||
}
|
||||
sort_modes(chroma_data, num_modes);
|
||||
|
||||
|
||||
return chroma_data[0].pred_cu.intra.mode_chroma;
|
||||
}
|
||||
|
||||
return 100;
|
||||
}
|
||||
|
||||
#undef IS_JCCR_MODE
|
||||
|
||||
int8_t uvg_search_cu_intra_chroma(encoder_state_t * const state,
|
||||
const int x_px, const int y_px,
|
||||
|
@ -2090,5 +2252,6 @@ void uvg_search_cu_intra(
|
|||
search_data[0].pred_cu.mts_last_scan_pos = false;
|
||||
search_data[0].pred_cu.violates_mts_coeff_constraint = false;
|
||||
}
|
||||
printf("%f\n", search_data[0].cost);
|
||||
*mode_out = search_data[0];
|
||||
}
|
||||
|
|
|
@ -1723,44 +1723,44 @@ static INLINE __m128i get_residual_8x1_avx2(const uint8_t* a_in, const uint8_t*
|
|||
return diff;
|
||||
}
|
||||
|
||||
void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in, int16_t* residual, int width, int in_stride) {
|
||||
static void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride) {
|
||||
|
||||
__m128i diff = _mm_setzero_si128();
|
||||
switch (width) {
|
||||
case 4:
|
||||
diff = get_residual_4x1_avx2(ref_in + 0 * in_stride, pred_in + 0 * in_stride);
|
||||
diff = get_residual_4x1_avx2(ref_in + 0 * ref_stride, pred_in + 0 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[0]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 1 * in_stride, pred_in + 1 * in_stride);
|
||||
diff = get_residual_4x1_avx2(ref_in + 1 * ref_stride, pred_in + 1 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 2 * in_stride, pred_in + 2 * in_stride);
|
||||
diff = get_residual_4x1_avx2(ref_in + 2 * ref_stride, pred_in + 2 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[8]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 3 * in_stride, pred_in + 3 * in_stride);
|
||||
diff = get_residual_4x1_avx2(ref_in + 3 * ref_stride, pred_in + 3 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[12]), diff);
|
||||
break;
|
||||
case 8:
|
||||
diff = get_residual_8x1_avx2(&ref_in[0 * in_stride], &pred_in[0 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[0 * ref_stride], &pred_in[0 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[0]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[1 * in_stride], &pred_in[1 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[1 * ref_stride], &pred_in[1 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[8]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[2 * in_stride], &pred_in[2 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[2 * ref_stride], &pred_in[2 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[16]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[3 * in_stride], &pred_in[3 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[3 * ref_stride], &pred_in[3 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[24]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[4 * in_stride], &pred_in[4 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[4 * ref_stride], &pred_in[4 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[32]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[5 * in_stride], &pred_in[5 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[5 * ref_stride], &pred_in[5 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[40]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[6 * in_stride], &pred_in[6 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[6 * ref_stride], &pred_in[6 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[48]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[7 * in_stride], &pred_in[7 * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[7 * ref_stride], &pred_in[7 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[56]), diff);
|
||||
break;
|
||||
default:
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int x = 0; x < width; x += 16) {
|
||||
diff = get_residual_8x1_avx2(&ref_in[x + y * in_stride], &pred_in[x + y * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[x + y * ref_stride], &pred_in[x + y * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & residual[x + y * width], diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[(x + 8) + y * in_stride], &pred_in[(x + 8) + y * in_stride]);
|
||||
diff = get_residual_8x1_avx2(&ref_in[(x + 8) + y * ref_stride], &pred_in[(x + 8) + y * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & residual[(x + 8) + y * width], diff);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -623,7 +623,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
|
|||
assert(width >= TR_MIN_WIDTH);
|
||||
|
||||
// Get residual. (ref_in - pred_in -> residual)
|
||||
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride);
|
||||
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
int y, x;
|
||||
|
|
|
@ -782,12 +782,13 @@ static double pixel_var_generic(const uvg_pixel *arr, const uint32_t len)
|
|||
}
|
||||
|
||||
|
||||
void generate_residual_generic(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual, int width, int in_stride)
|
||||
static void generate_residual_generic(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual,
|
||||
int width, int ref_stride, int pred_stride)
|
||||
{
|
||||
int y, x;
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
residual[x + y * width] = (int16_t)(ref_in[x + y * in_stride] - pred_in[x + y * in_stride]);
|
||||
residual[x + y * width] = (int16_t)(ref_in[x + y * ref_stride] - pred_in[x + y * pred_stride]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -207,8 +207,7 @@ int uvg_quant_cbcr_residual_generic(
|
|||
) {
|
||||
ALIGNED(64) int16_t u_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) int16_t u1_residual[2][TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) int16_t v1_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) int16_t combined_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
|
||||
|
||||
{
|
||||
|
@ -220,80 +219,64 @@ int uvg_quant_cbcr_residual_generic(
|
|||
}
|
||||
}
|
||||
}
|
||||
kvz_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride);
|
||||
kvz_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride);
|
||||
|
||||
int best_cbf_mask = -1;
|
||||
int64_t best_cost = INT64_MAX;
|
||||
|
||||
// This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM
|
||||
for(int i = cur_cu->type == CU_INTRA ? 1 : 3; i < 4; i++) {
|
||||
int64_t d1 = 0;
|
||||
const int cbf_mask = i * (state->frame->jccr_sign ? -1 : 1);
|
||||
for (int y = 0; y < width; y++)
|
||||
kvz_generate_residual(u_ref_in, u_pred_in, u_residual, width, in_stride, in_stride);
|
||||
kvz_generate_residual(v_ref_in, v_pred_in, v_residual, width, in_stride, in_stride);
|
||||
|
||||
|
||||
const int cbf_mask = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
|
||||
for (int y = 0; y < width; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
const int16_t cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
|
||||
if (cbf_mask == 2)
|
||||
{
|
||||
int cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
|
||||
if (cbf_mask == 2)
|
||||
{
|
||||
u1_residual[i - 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (u1_residual[i - 2][x + y * width] >> 1));
|
||||
}
|
||||
else if (cbf_mask == -2)
|
||||
{
|
||||
u1_residual[i - 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (-u1_residual[i - 2][x + y * width] >> 1));
|
||||
}
|
||||
else if (cbf_mask == 3)
|
||||
{
|
||||
u1_residual[i - 2][x + y * width] = ((cbx + crx) / 2);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - u1_residual[i - 2][x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == -3)
|
||||
{
|
||||
u1_residual[i - 2][x + y * width] = ((cbx - crx) / 2);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx + u1_residual[i - 2][x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == 1)
|
||||
{
|
||||
v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5);
|
||||
d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == -1)
|
||||
{
|
||||
v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5);
|
||||
d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
|
||||
}
|
||||
else
|
||||
{
|
||||
d1 += square(cbx);
|
||||
//d2 += square(crx);
|
||||
}
|
||||
combined_residual[x + y * width] = (4 * cbx + 2 * crx) / 5;
|
||||
}
|
||||
else if (cbf_mask == -2)
|
||||
{
|
||||
combined_residual[x + y * width] = (4 * cbx - 2 * crx) / 5;
|
||||
}
|
||||
else if (cbf_mask == 3)
|
||||
{
|
||||
combined_residual[x + y * width] = (cbx + crx) / 2;
|
||||
}
|
||||
else if (cbf_mask == -3)
|
||||
{
|
||||
combined_residual[x + y * width] = (cbx - crx) / 2;
|
||||
}
|
||||
else if (cbf_mask == 1)
|
||||
{
|
||||
combined_residual[x + y * width] = (4 * crx + 2 * cbx) / 5;
|
||||
}
|
||||
else if (cbf_mask == -1)
|
||||
{
|
||||
combined_residual[x + y * width] = (4 * crx - 2 * cbx) / 5;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
if (d1 < best_cost) {
|
||||
best_cbf_mask = i;
|
||||
best_cost = d1;
|
||||
}
|
||||
}
|
||||
|
||||
uvg_transform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
uvg_transform2d(state->encoder_control, combined_residual, coeff, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && false) {
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U,
|
||||
scan_order);
|
||||
}
|
||||
else {
|
||||
uvg_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_quant(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
}
|
||||
|
||||
|
@ -309,13 +292,12 @@ int uvg_quant_cbcr_residual_generic(
|
|||
}
|
||||
|
||||
if (has_coeffs && !early_skip) {
|
||||
int y, x;
|
||||
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
uvg_dequant(state, coeff_out, coeff, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
|
||||
uvg_itransform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
uvg_itransform2d(state->encoder_control, combined_residual, coeff, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
|
||||
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
|
@ -336,39 +318,39 @@ int uvg_quant_cbcr_residual_generic(
|
|||
// }
|
||||
// }
|
||||
//}
|
||||
const int temp = best_cbf_mask * (state->frame->jccr_sign ? -1 : 1);
|
||||
const int temp = cur_cu->joint_cb_cr * (state->frame->jccr_sign ? -1 : 1);
|
||||
// Get quantized reconstruction. (residual + pred_in -> rec_out)
|
||||
for (int y = 0; y < width; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
if (temp == 2) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
|
||||
u_residual[x + y * width] = combined_residual[x + y * width];
|
||||
v_residual[x + y * width] = combined_residual[x + y * width] >> 1;
|
||||
}
|
||||
else if (temp == -2) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
|
||||
u_residual[x + y * width] = combined_residual[x + y * width];
|
||||
v_residual[x + y * width] = -combined_residual[x + y * width] >> 1;
|
||||
}
|
||||
else if (temp == 3) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
u_residual[x + y * width] = combined_residual[x + y * width];
|
||||
v_residual[x + y * width] = combined_residual[x + y * width];
|
||||
}
|
||||
else if (temp == -3) {
|
||||
// non-normative clipping to prevent 16-bit overflow
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
u_residual[x + y * width] = combined_residual[x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
|
||||
v_residual[x + y * width] = -combined_residual[x + y * width];
|
||||
}
|
||||
else if (temp == 1) {
|
||||
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = v1_residual[x + y * width];
|
||||
u_residual[x + y * width] = combined_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = combined_residual[x + y * width];
|
||||
}
|
||||
else if (temp == -1) {
|
||||
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = -v1_residual[x + y * width];
|
||||
u_residual[x + y * width] = -combined_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = combined_residual[x + y * width];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
int16_t u_val = u_residual[x + y * width] + u_pred_in[x + y * in_stride];
|
||||
u_rec_out[x + y * out_stride] = (uvg_pixel)CLIP(0, PIXEL_MAX, u_val);
|
||||
int16_t v_val = v_residual[x + y * width] + v_pred_in[x + y * in_stride];
|
||||
|
@ -379,20 +361,16 @@ int uvg_quant_cbcr_residual_generic(
|
|||
else/* if (rec_out != pred_in)*/ {
|
||||
// With no coeffs and rec_out == pred_int we skip copying the coefficients
|
||||
// because the reconstruction is just the prediction.
|
||||
int y, x;
|
||||
|
||||
for (y = 0; y < width; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
u_rec_out[x + y * out_stride] = u_pred_in[x + y * in_stride];
|
||||
v_rec_out[x + y * out_stride] = v_pred_in[x + y * in_stride];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
return has_coeffs ? best_cbf_mask : 0;
|
||||
|
||||
return has_coeffs ? cur_cu->joint_cb_cr : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -431,7 +409,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
|
|||
const int height = width; // TODO: height for non-square blocks
|
||||
|
||||
// Get residual. (ref_in - pred_in -> residual)
|
||||
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride);
|
||||
kvz_generate_residual(ref_in, pred_in, residual, width, in_stride, in_stride);
|
||||
|
||||
if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
int y, x;
|
||||
|
|
|
@ -149,7 +149,7 @@ typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
|
|||
|
||||
typedef double (pixel_var_func)(const uvg_pixel *buf, const uint32_t len);
|
||||
|
||||
typedef void (generate_residual_func)(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual, int width, int in_stride);
|
||||
typedef void (generate_residual_func)(const kvz_pixel* ref_in, const kvz_pixel* pred_in, int16_t* residual, int width, int ref_stride, int pred_stride);
|
||||
|
||||
// Declare function pointers.
|
||||
extern reg_sad_func * uvg_reg_sad;
|
||||
|
@ -229,6 +229,7 @@ cost_pixel_nxn_multi_func * kvz_pixels_get_sad_dual_func(unsigned n);
|
|||
{"ver_sad", (void**) &uvg_ver_sad}, \
|
||||
{"hor_sad", (void**) &uvg_hor_sad}, \
|
||||
{"pixel_var", (void**) &uvg_pixel_var}, \
|
||||
{"generate_residual", (void**) &kvz_generate_residual}, \
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -650,9 +650,8 @@ static void quantize_tr_residual(encoder_state_t * const state,
|
|||
}
|
||||
|
||||
const bool can_use_trskip = tr_width <= (1 << state->encoder_control->cfg.trskip_max_size) &&
|
||||
color == COLOR_Y &&
|
||||
cfg->trskip_enable &&
|
||||
cur_pu->tr_idx == 1;
|
||||
cur_pu->tr_skip & (1 << color);
|
||||
|
||||
uint8_t has_coeffs;
|
||||
|
||||
|
@ -696,7 +695,6 @@ static void quantize_tr_residual(encoder_state_t * const state,
|
|||
pred,
|
||||
coeff,
|
||||
lmcs_chroma_adj);
|
||||
cur_pu->tr_skip = tr_skip;
|
||||
} else {
|
||||
if(color == COLOR_UV) {
|
||||
has_coeffs = uvg_quant_cbcr_residual(
|
||||
|
|
Loading…
Reference in a new issue