fix jccr and improve intra parameter passing

This commit is contained in:
Joose Sainio 2022-03-23 13:39:38 +02:00
parent 0be443d309
commit a88553b206
9 changed files with 344 additions and 196 deletions

View file

@ -452,7 +452,7 @@ static void get_cclm_parameters(
}
}
static void linear_transform_cclm(cclm_parameters_t* cclm_params, kvz_pixel * src, kvz_pixel * dst, int stride, int height) {
static void linear_transform_cclm(const cclm_parameters_t* cclm_params, kvz_pixel * src, kvz_pixel * dst, int stride, int height) {
int scale = cclm_params->a;
int shift = cclm_params->shift;
int offset = cclm_params->b;
@ -1355,13 +1355,9 @@ static void intra_recon_tb_leaf(
int x,
int y,
int depth,
int8_t intra_mode,
cclm_parameters_t *cclm_params,
lcu_t *lcu,
color_t color,
uint8_t multi_ref_idx,
bool mip_flag,
bool mip_transp)
const intra_parameters_t* intra_paramas)
{
const kvz_config *cfg = &state->encoder_control->cfg;
const int shift = color == COLOR_Y ? 0 : 1;
@ -1383,7 +1379,7 @@ static void intra_recon_tb_leaf(
int x_scu = SUB_SCU(x);
int y_scu = SUB_SCU(y);
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
uint8_t multi_ref_index = color == COLOR_Y ? intra_paramas->multi_ref_idx : 0;
kvz_intra_references refs;
// Extra reference lines for use with MRL. Extra lines needed only for left edge.
@ -1409,7 +1405,8 @@ static void intra_recon_tb_leaf(
int stride = state->tile->frame->source->stride;
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
bool use_mip = false;
if (mip_flag) {
int8_t intra_mode = color == COLOR_Y ? intra_paramas->luma_mode : intra_paramas->chroma_mode;
if (intra_paramas->mip_flag) {
if (color == COLOR_Y) {
use_mip = true;
} else {
@ -1426,21 +1423,15 @@ static void intra_recon_tb_leaf(
if(intra_mode < 68) {
if (use_mip) {
assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
kvz_mip_predict(state, &refs, width, height, pred, intra_mode, mip_transp);
kvz_mip_predict(state, &refs, width, height, pred, intra_mode, intra_paramas->mip_transp);
}
else {
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index);
}
} else {
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
if(cclm_params == NULL) {
cclm_parameters_t temp_params;
kvz_predict_cclm(
state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params);
}
else {
linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width);
}
linear_transform_cclm(&intra_paramas->cclm_parameters[color == COLOR_U ? 0 : 1], pred, pred, width, width);
}
const int index = lcu_px.x + lcu_px.y * lcu_width;
@ -1487,13 +1478,8 @@ void kvz_intra_recon_cu(
int x,
int y,
int depth,
int8_t mode_luma,
int8_t mode_chroma,
const intra_parameters_t* intra_parameters,
cu_info_t *cur_cu,
cclm_parameters_t *cclm_params,
uint8_t multi_ref_idx,
bool mip_flag,
bool mip_transp,
lcu_t *lcu)
{
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
@ -1501,9 +1487,9 @@ void kvz_intra_recon_cu(
if (cur_cu == NULL) {
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
uint8_t multi_ref_index = multi_ref_idx;
bool use_mip = mip_flag;
bool mip_transposed = mip_transp;
bool use_mip = intra_parameters->mip_flag;
const int8_t mode_luma = intra_parameters->luma_mode;
const int8_t mode_chroma= intra_parameters->chroma_mode;
if (mode_luma != -1 && mode_chroma != -1) {
if (use_mip) {
@ -1527,10 +1513,10 @@ void kvz_intra_recon_cu(
const int32_t x2 = x + offset;
const int32_t y2 = y + offset;
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, use_mip, mip_transposed, lcu);
kvz_intra_recon_cu(state, x, y, depth + 1, intra_parameters, NULL, lcu);
kvz_intra_recon_cu(state, x2, y, depth + 1, intra_parameters, NULL, lcu);
kvz_intra_recon_cu(state, x, y2, depth + 1, intra_parameters, NULL, lcu);
kvz_intra_recon_cu(state, x2, y2, depth + 1, intra_parameters, NULL, lcu);
// Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = {
@ -1552,13 +1538,13 @@ void kvz_intra_recon_cu(
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index, use_mip, mip_transposed);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_Y, intra_parameters);
}
if (has_chroma) {
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0, use_mip, mip_transposed);
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0, use_mip, mip_transposed);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_U, intra_parameters);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_V, intra_parameters);
}
kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);
kvz_quantize_lcu_residual(state, has_luma, has_chroma, intra_parameters->jccr != -1 && state->encoder_control->cfg.jccr && (x % 8 == 0 && y % 8 == 0), x, y, depth, cur_cu, lcu, false);
}
}

View file

@ -63,6 +63,17 @@ typedef struct
int16_t b;
} cclm_parameters_t;
typedef struct {
int8_t luma_mode;
int8_t chroma_mode;
cclm_parameters_t cclm_parameters[2];
uint8_t multi_ref_idx;
bool mip_flag;
bool mip_transp;
int8_t mts_idx;
int8_t jccr;
} intra_parameters_t;
/**
* \brief Function for deriving intra luma predictions
* \param x x-coordinate of the PU in pixels
@ -128,13 +139,8 @@ void kvz_intra_recon_cu(
int x,
int y,
int depth,
int8_t mode_luma,
int8_t mode_chroma,
const intra_parameters_t * intra_parameters,
cu_info_t *cur_cu,
cclm_parameters_t* cclm_params,
uint8_t multi_ref_idx,
bool mip_flag,
bool mip_transp,
lcu_t *lcu);

View file

@ -384,9 +384,7 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0);
double tr_tree_bits = 0;
double joint_cbcr_tr_tree_bits = 0;
double coeff_bits = 0;
double joint_coeff_bits = 0;
assert(x_px >= 0 && x_px < LCU_WIDTH);
assert(y_px >= 0 && y_px < LCU_WIDTH);
@ -407,19 +405,12 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
}
if(state->encoder_control->cfg.jccr) {
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, pred_cu->joint_cb_cr & 1);
}
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
ctx = &(cabac->ctx.qt_cbf_model_cr[is_set]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cb_search");
}
if(state->encoder_control->cfg.jccr) {
ctx = &(cabac->ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, (pred_cu->joint_cb_cr & 2) >> 1);
}
}
@ -442,15 +433,10 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
ctx = &(state->cabac.ctx.joint_cb_cr[cbf_mask]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 0);
}
if(pred_cu->joint_cb_cr) {
ctx = &(state->cabac.ctx.joint_cb_cr[(pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 1);
}
}
// Chroma SSD
int ssd = 0;
int joint_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
int ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
@ -460,16 +446,6 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
LCU_WIDTH_C, LCU_WIDTH_C,
width);
ssd = ssd_u + ssd_v;
if(state->encoder_control->cfg.jccr) {
int ssd_u_joint = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
int ssd_v_joint = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
joint_ssd = ssd_u_joint + ssd_v_joint;
}
}
if (!skip_residual_coding)
@ -479,35 +455,12 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
if(state->encoder_control->cfg.jccr) {
joint_coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
}
double bits = tr_tree_bits + coeff_bits;
double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits;
double cost = (double)ssd * KVZ_CHROMA_MULT + bits * state->c_lambda;
double joint_cost = (double)joint_ssd * KVZ_CHROMA_MULT + joint_bits * state->c_lambda;
if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) {
pred_cu->joint_cb_cr = 0;
return cost;
}
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
if (pred_cu->joint_cb_cr & 1) {
cbf_set(&pred_cu->cbf, depth, COLOR_U);
}
if (pred_cu->joint_cb_cr & 2) {
cbf_set(&pred_cu->cbf, depth, COLOR_V);
}
int lcu_width = LCU_WIDTH_C;
const int index = lcu_px.x + lcu_px.y * lcu_width;
kvz_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
kvz_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
return joint_cost;
return (double)ssd * KVZ_CHROMA_MULT + bits * state->c_lambda;
}
static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
@ -577,6 +530,16 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
CABAC_FBITS_UPDATE(cabac, ctx, cb_flag_y, tr_tree_bits, "cbf_y_search");
}
if (cb_flag_y | cb_flag_u | cb_flag_v) {
// TODO qp_delta_sign_flag
if ((cb_flag_u | cb_flag_v) && x_px % 8 == 0 && y_px % 8 == 0 && state->encoder_control->cfg.jccr) {
CABAC_FBITS_UPDATE(cabac, &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1], tr_cu->joint_cb_cr != 0, tr_tree_bits, "tu_joint_cbcr_residual_flag");
}
}
// SSD between reconstruction and original
unsigned luma_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
@ -597,23 +560,34 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
if(state->encoder_control->chroma_format != KVZ_CSP_400 && x_px % 8 == 0 && y_px % 8 == 0) {
const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
const int chroma_width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
unsigned ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
if(pred_cu->joint_cb_cr != 0) {
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
unsigned ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
unsigned ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
chroma_ssd = ssd_u + ssd_v;
}
{
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order, 0);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order, 0);
}
} else {
int ssd_u_joint = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
int ssd_v_joint = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
unsigned ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
chroma_width);
chroma_ssd = ssd_u + ssd_v;
}
{
int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], chroma_width, 2, scan_order, 0);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], chroma_width, 2, scan_order, 0);
chroma_ssd = ssd_u_joint + ssd_v_joint;
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
}
@ -622,6 +596,118 @@ static double cu_rd_cost_tr_split_accurate(const encoder_state_t* const state,
}
void kvz_select_jccr_mode(
const encoder_state_t* const state,
const int x_px,
const int y_px,
const int depth,
cu_info_t* pred_cu,
lcu_t* const lcu,
double* cost_out)
{
const vector2d_t lcu_px = { (SUB_SCU(x_px) & ~7) / 2, (SUB_SCU(y_px) & ~7) / 2 };
const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
if (pred_cu == NULL) pred_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x * 2, lcu_px.y * 2);
assert(pred_cu->depth == pred_cu->tr_depth && "jccr does not support transform splitting");
if (cost_out == NULL && pred_cu->joint_cb_cr == 0) {
return;
}
double tr_tree_bits = 0;
double joint_cbcr_tr_tree_bits = 0;
double coeff_bits = 0;
double joint_coeff_bits = 0;
assert(lcu_px.x >= 0 && lcu_px.x < LCU_WIDTH_C);
assert(lcu_px.y >= 0 && lcu_px.y < LCU_WIDTH_C);
if (depth == 4 && (x_px % 8 == 0 || y_px % 8 == 0)) {
// For MAX_PU_DEPTH calculate chroma for previous depth for the first
// block and return 0 cost for all others.
return;
}
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
cabac_ctx_t* ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
cabac->cur_ctx = ctx;
int u_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
CABAC_FBITS_UPDATE(cabac, ctx, u_is_set, tr_tree_bits, "cbf_cb_search");
ctx = &(cabac->ctx.qt_cbf_model_cr[u_is_set]);
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cr_search");
int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1;
if(cbf_mask != -1)
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 0, tr_tree_bits, "jccr_flag");
if(pred_cu->joint_cb_cr) {
ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cb_search");
ctx = &(cabac->ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
CABAC_FBITS_UPDATE(cabac, ctx, (pred_cu->joint_cb_cr & 2) >> 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
cbf_mask = (pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 1, joint_cbcr_tr_tree_bits, "jccr_flag");
}
int ssd = 0;
int joint_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
int ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
int ssd_v = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
ssd = ssd_u + ssd_v;
if (pred_cu->joint_cb_cr) {
int ssd_u_joint = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
int ssd_v_joint = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
joint_ssd = ssd_u_joint + ssd_v_joint;
}
}
{
int8_t scan_order = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth);
const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y);
if (u_is_set) coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
if (v_is_set) coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
joint_coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
double bits = tr_tree_bits + coeff_bits;
double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits;
double cost = (double)ssd * KVZ_CHROMA_MULT + bits * state->c_lambda;
double joint_cost = (double)joint_ssd * KVZ_CHROMA_MULT + joint_bits * state->c_lambda;
if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) {
pred_cu->joint_cb_cr = 0;
if (cost_out) *cost_out += cost;
return;
}
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
if (pred_cu->joint_cb_cr & 1) {
cbf_set(&pred_cu->cbf, depth, COLOR_U);
}
if (pred_cu->joint_cb_cr & 2) {
cbf_set(&pred_cu->cbf, depth, COLOR_V);
}
int lcu_width = LCU_WIDTH_C;
const int index = lcu_px.x + lcu_px.y * lcu_width;
kvz_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
kvz_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
if (cost_out) *cost_out += joint_cost;
}
// Return estimate of bits used to code prediction mode of cur_cu.
static double calc_mode_bits(const encoder_state_t *state,
const lcu_t *lcu,
@ -885,15 +971,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
(y & ~(cu_width_intra_min - 1)) + cu_width_intra_min > frame->height) &&
!(state->encoder_control->cfg.force_inter && state->frame->slicetype != KVZ_SLICE_I);
intra_parameters_t intra_parameters;
if (can_use_intra && !skip_intra) {
int8_t intra_mode;
int8_t intra_trafo;
double intra_cost;
uint8_t multi_ref_index = 0;
bool mip_flag = false;
bool mip_transposed = false;
intra_parameters.jccr = -1;
kvz_search_cu_intra(state, x, y, depth, lcu,
&intra_mode, &intra_trafo, &intra_cost, &multi_ref_index, &mip_flag, &mip_transposed);
&intra_cost, &intra_parameters);
#ifdef COMPLETE_PRED_MODE_BITS
// Technically counting these bits would be correct, however counting
// them universally degrades quality so this block is disabled by default
@ -908,13 +991,13 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cost = intra_cost;
cur_cu->type = CU_INTRA;
cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
cur_cu->intra.mode = intra_mode;
cur_cu->intra.multi_ref_idx = multi_ref_index;
cur_cu->intra.mip_flag = mip_flag;
cur_cu->intra.mip_is_transposed = mip_transposed;
cur_cu->intra.mode = intra_parameters.luma_mode;
cur_cu->intra.multi_ref_idx = intra_parameters.multi_ref_idx;
cur_cu->intra.mip_flag = intra_parameters.mip_flag;
cur_cu->intra.mip_is_transposed = intra_parameters.mip_transp;
//If the CU is not split from 64x64 block, the MTS is disabled for that CU.
cur_cu->tr_idx = (depth > 0) ? intra_trafo : 0;
cur_cu->tr_idx = (depth > 0) ? intra_parameters.mts_idx : 0;
}
}
@ -925,12 +1008,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->intra.mode_chroma = cur_cu->intra.mode;
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
intra_parameters.chroma_mode = -1;
kvz_intra_recon_cu(state,
x, y,
depth,
cur_cu->intra.mode, -1, // skip chroma
NULL, NULL, cur_cu->intra.multi_ref_idx,
cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
&intra_parameters,
NULL,
lcu);
downsample_cclm_rec(
@ -943,19 +1026,27 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being
// really bad for chroma.
cclm_parameters_t cclm_params[2];
if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) {
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params);
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, intra_parameters.cclm_parameters);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
}
intra_parameters.chroma_mode = cur_cu->intra.mode_chroma;
intra_parameters.luma_mode = -1; // skip luma
intra_parameters.jccr = 0;
kvz_intra_recon_cu(state,
x & ~7, y & ~7, // TODO: as does this
depth,
-1, cur_cu->intra.mode_chroma, // skip luma
NULL, cclm_params, 0,
cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
&intra_parameters,
NULL,
lcu);
if(depth != 0 && state->encoder_control->cfg.jccr) {
kvz_select_jccr_mode(state,
x & ~7, y & ~7,
depth,
NULL,
lcu,
NULL);
}
}
} else if (cur_cu->type == CU_INTER) {
@ -983,11 +1074,12 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
kvz_quantize_lcu_residual(state,
true, has_chroma,
x, y, depth,
NULL,
lcu,
false);
true, has_chroma,
state->encoder_control->cfg.jccr, x, y,
depth,
NULL,
lcu,
false);
int cbf = cbf_is_set_any(cur_cu->cbf, depth);
@ -1142,11 +1234,21 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
const bool has_chroma = state->encoder_control->chroma_format != KVZ_CSP_400;
const int8_t mode_chroma = has_chroma ? cur_cu->intra.mode_chroma : -1;
intra_parameters_t intra_parameters = {
.luma_mode = cur_cu->intra.mode,
.chroma_mode = mode_chroma,
.cclm_parameters ={{0, 0, 0}, {0, 0 ,0}},
0,
0,
0,
0,
-1,
};
kvz_intra_recon_cu(state,
x, y,
depth,
cur_cu->intra.mode, mode_chroma,
NULL,NULL, 0, cur_cu->intra.mip_flag, cur_cu->intra.mip_is_transposed,
&intra_parameters,
NULL,
lcu);
double mode_bits = calc_mode_bits(state, lcu, cur_cu, x, y, depth) + bits;

View file

@ -91,6 +91,15 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
cu_info_t *const pred_cu,
lcu_t *const lcu);
void kvz_select_jccr_mode(
const encoder_state_t* const state,
const int x_px,
const int y_px,
const int depth,
cu_info_t* const pred_cu,
lcu_t* const lcu,
double* cost_out);
void kvz_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth);
void kvz_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu);

View file

@ -1800,14 +1800,16 @@ static void search_pu_inter(encoder_state_t * const state,
cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1];
kvz_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth));
kvz_inter_recon_cu(state, lcu, x, y, width, true, false);
kvz_quantize_lcu_residual(state, true, false, x, y, depth, cur_pu, lcu, true);
kvz_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true);
if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
continue;
}
else if (has_chroma) {
kvz_inter_recon_cu(state, lcu, x, y, width, false, has_chroma);
kvz_quantize_lcu_residual(state, false, has_chroma, x, y, depth, cur_pu, lcu, true);
kvz_quantize_lcu_residual(state, false, has_chroma,
false, /*we are only checking for lack of coeffs so no need to check jccr*/
x, y, depth, cur_pu, lcu, true);
if (!cbf_is_set_any(cur_pu->cbf, depth)) {
cur_pu->type = CU_INTER;
cur_pu->merge_idx = merge_idx;
@ -2159,8 +2161,10 @@ void kvz_cu_cost_inter_rd2(encoder_state_t * const state,
}
double no_cbf_cost = ssd + no_cbf_bits * state->lambda;
kvz_quantize_lcu_residual(state, true, reconstruct_chroma,
x, y, depth,
kvz_quantize_lcu_residual(state,
true, reconstruct_chroma,
reconstruct_chroma && state->encoder_control->cfg.jccr, x, y,
depth,
cur_cu,
lcu,
false);
@ -2170,7 +2174,12 @@ void kvz_cu_cost_inter_rd2(encoder_state_t * const state,
if(cbf) {
*inter_cost = kvz_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu);
if (reconstruct_chroma) {
*inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu);
if (cur_cu->depth != cur_cu->tr_depth) {
*inter_cost += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu);
}
else {
kvz_select_jccr_mode(state, x_px, y_px, depth, cur_cu, lcu, inter_cost);
}
}
}
else {

View file

@ -315,8 +315,21 @@ static double search_intra_trdepth(encoder_state_t * const state,
if(state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size) /*&& height == 4*/) {
num_transforms = MAX(num_transforms, 2);
}
intra_parameters_t intra_parameters = {
.luma_mode = intra_mode,
-1,
{{0, 0, 0}, {0, 0 ,0}},
pred_cu->intra.multi_ref_idx,
pred_cu->intra.mip_flag,
pred_cu->intra.mip_is_transposed,
0,
-1,
};
for (; trafo < num_transforms; trafo++) {
pred_cu->tr_idx = trafo;
intra_parameters.mts_idx = trafo;
if (mts_enabled)
{
pred_cu->mts_last_scan_pos = 0;
@ -332,9 +345,8 @@ static double search_intra_trdepth(encoder_state_t * const state,
kvz_intra_recon_cu(state,
x_px, y_px,
depth,
intra_mode, -1,
pred_cu, cclm_params, pred_cu->intra.multi_ref_idx,
pred_cu->intra.mip_flag, pred_cu->intra.mip_is_transposed,
&intra_parameters,
pred_cu,
lcu);
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
@ -359,12 +371,14 @@ static double search_intra_trdepth(encoder_state_t * const state,
}
}
if(reconstruct_chroma) {
intra_parameters.luma_mode = -1;
intra_parameters.chroma_mode = chroma_mode;
intra_parameters.jccr = -1; // TODO: Maybe check the jccr mode here also but holy shit is the interface of search_intra_rdo bad currently
kvz_intra_recon_cu(state,
x_px, y_px,
depth,
-1, chroma_mode,
pred_cu, cclm_params, 0,
pred_cu->intra.mip_flag, pred_cu->intra.mip_is_transposed,
&intra_parameters,
pred_cu,
lcu);
best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
}
@ -1020,22 +1034,32 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
double cost;
int8_t mode;
cclm_parameters_t cclm[2];
int8_t jccr;
} chroma, best_chroma;
// chroma.cclm = cclm_params;
best_chroma.mode = 0;
best_chroma.cost = MAX_INT;
best_chroma.jccr = 0;
intra_parameters_t intra_parameters;
memset(&intra_parameters, 0, sizeof(intra_parameters_t));
intra_parameters.luma_mode = -1; // skip luma
chroma.jccr = 0;
for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) {
chroma.mode = modes[chroma_mode_i];
if (chroma.mode == -1) continue;
intra_parameters.chroma_mode = modes[chroma_mode_i];
if(chroma.mode < 67 || depth == 0) {
kvz_intra_recon_cu(state,
x_px, y_px,
depth,
-1, chroma.mode, // skip luma
NULL, NULL, 0, false, false, lcu);
&intra_parameters,
NULL,
lcu);
}
else {
@ -1050,6 +1074,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
&cclm_params[0]);
chroma.cclm[0] = cclm_params[0];
intra_parameters.cclm_parameters[0] = cclm_params[0];
kvz_predict_cclm(
state, COLOR_V,
@ -1062,16 +1087,23 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
&cclm_params[1]);
chroma.cclm[1] = cclm_params[1];
intra_parameters.cclm_parameters[1] = cclm_params[1];
kvz_intra_recon_cu(
state,
x_px, y_px,
depth,
-1, chroma.mode, // skip luma
NULL, cclm_params, 0, false, false, lcu);
&intra_parameters,
NULL,
lcu);
}
double bits = 0;
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
if(tr_cu->depth != tr_cu->tr_depth) {
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
} else {
kvz_select_jccr_mode(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu, &chroma.cost);
chroma.jccr = tr_cu->joint_cb_cr;
}
double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode);
bits += mode_bits;
@ -1083,6 +1115,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
}
best_cclm[0] = best_chroma.cclm[0];
best_cclm[1] = best_chroma.cclm[1];
tr_cu->joint_cb_cr = best_chroma.jccr;
return best_chroma.mode;
}
@ -1154,15 +1187,14 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
*/
void kvz_search_cu_intra(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu,
int8_t *mode_out,
int8_t *trafo_out,
double *cost_out,
uint8_t *multi_ref_idx_out,
bool *mip_flag_out,
bool * mip_transposed_out)
void kvz_search_cu_intra(
encoder_state_t * const state,
const int x_px,
const int y_px,
const int depth,
lcu_t *lcu,
double *cost_out,
intra_parameters_t* intra_parameters)
{
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const int8_t cu_width = LCU_WIDTH >> depth;
@ -1333,10 +1365,10 @@ void kvz_search_cu_intra(encoder_state_t * const state,
tmp_best_mode = (tmp_mip_transp ? tmp_best_mode - (num_mip_modes >> 1) : tmp_best_mode);
}
*mode_out = tmp_best_mode;
*trafo_out = tmp_best_trafo;
intra_parameters->luma_mode = tmp_best_mode;
intra_parameters->mts_idx = tmp_best_trafo;
*cost_out = tmp_best_cost;
*mip_flag_out = tmp_mip_flag;
*mip_transposed_out = tmp_mip_transp;
*multi_ref_idx_out = tmp_mip_flag ? 0 : best_line;
intra_parameters->mip_flag = tmp_mip_flag;
intra_parameters->mip_transp = tmp_mip_transp;
intra_parameters->multi_ref_idx = tmp_mip_flag ? 0 : best_line;
}

View file

@ -53,14 +53,13 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu, cclm_parameters_t* best_cclm);
void kvz_search_cu_intra(encoder_state_t * const state,
const int x_px, const int y_px,
const int depth, lcu_t *lcu,
int8_t *mode_out,
int8_t *trafo_out,
double *cost_out,
uint8_t *multi_ref_idx_out,
bool *mip_flag,
bool *mip_transp);
void kvz_search_cu_intra(
encoder_state_t * const state,
const int x_px,
const int y_px,
const int depth,
lcu_t *lcu,
double *cost_out,
intra_parameters_t* intra_parameters);
#endif // SEARCH_INTRA_H_

View file

@ -479,15 +479,17 @@ static void quantize_tr_residual(encoder_state_t * const state,
* - lcu->cbf coded block flags for the area
* - lcu->cu.intra.tr_skip tr skip flags for the area (in case of luma)
*/
void kvz_quantize_lcu_residual(encoder_state_t * const state,
const bool luma,
const bool chroma,
const int32_t x,
const int32_t y,
const uint8_t depth,
cu_info_t *cur_pu,
lcu_t* lcu,
bool early_skip)
void kvz_quantize_lcu_residual(
encoder_state_t * const state,
const bool luma,
const bool chroma,
const bool jccr,
const int32_t x,
const int32_t y,
const uint8_t depth,
cu_info_t *cur_pu,
lcu_t* lcu,
bool early_skip)
{
const int32_t width = LCU_WIDTH >> depth;
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
@ -509,7 +511,7 @@ void kvz_quantize_lcu_residual(encoder_state_t * const state,
if (luma) {
cbf_clear(&cur_pu->cbf, depth, COLOR_Y);
}
if (chroma) {
if (chroma || jccr) {
cbf_clear(&cur_pu->cbf, depth, COLOR_U);
cbf_clear(&cur_pu->cbf, depth, COLOR_V);
}
@ -521,10 +523,11 @@ void kvz_quantize_lcu_residual(encoder_state_t * const state,
const int32_t x2 = x + offset;
const int32_t y2 = y + offset;
kvz_quantize_lcu_residual(state, luma, chroma, x, y, depth + 1, NULL, lcu, early_skip);
kvz_quantize_lcu_residual(state, luma, chroma, x2, y, depth + 1, NULL, lcu, early_skip);
kvz_quantize_lcu_residual(state, luma, chroma, x, y2, depth + 1, NULL, lcu, early_skip);
kvz_quantize_lcu_residual(state, luma, chroma, x2, y2, depth + 1, NULL, lcu, early_skip);
// jccr is currently not supported if transform is split
kvz_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip);
kvz_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip);
kvz_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip);
kvz_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip);
// Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = {
@ -546,10 +549,10 @@ void kvz_quantize_lcu_residual(encoder_state_t * const state,
}
if (chroma) {
quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip);
quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip);
if(state->encoder_control->cfg.jccr && cur_pu->tr_depth == cur_pu->depth){
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
}
quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip);
}
if (jccr && cur_pu->tr_depth == cur_pu->depth) {
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
}
}
}

View file

@ -67,14 +67,16 @@ void kvz_itransform2d(const encoder_control_t * const encoder,
int32_t kvz_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);
void kvz_quantize_lcu_residual(encoder_state_t *state,
bool luma,
bool chroma,
int32_t x,
int32_t y,
uint8_t depth,
cu_info_t *cur_cu,
lcu_t* lcu,
bool early_skip);
void kvz_quantize_lcu_residual(
encoder_state_t *state,
bool luma,
bool chroma,
const bool jccr,
int32_t x,
int32_t y,
uint8_t depth,
cu_info_t *cur_cu,
lcu_t* lcu,
bool early_skip);
#endif