Improve jccr search

This commit is contained in:
Joose Sainio 2022-04-20 08:12:42 +03:00
parent d41103385a
commit b413aa5c43
7 changed files with 90 additions and 45 deletions

View file

@ -507,8 +507,8 @@ static void encode_transform_coeff(encoder_state_t * const state,
const int cb_flag_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y);
const int cb_flag_u = cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U);
const int cb_flag_v = cur_pu->joint_cb_cr ? ((cur_pu->joint_cb_cr & 2) >> 1) : cbf_is_set(cur_cu->cbf, depth, COLOR_V);
const int cb_flag_u = cur_pu->joint_cb_cr ? (cur_pu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U);
const int cb_flag_v = cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_V);
// The split_transform_flag is not signaled when:
// - transform size is greater than 32 (depth == 0)

View file

@ -1125,7 +1125,7 @@ static void kvz_encoder_state_write_bitstream_picture_header(
}
if (encoder->cfg.jccr) {
WRITE_U(stream, 0, 1, "ph_joint_cbcr_sign_flag");
WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag");
}
// END PICTURE HEADER

View file

@ -634,6 +634,38 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
}
}
static void set_joint_cb_cr_modes(encoder_state_t* state, kvz_picture* pic)
{
bool sgnFlag = true;
if (state->encoder_control->chroma_format != KVZ_CSP_400)
{
const int x1 = pic->width / 2 - 1;
const int y1 = pic->height / 2 - 1;
const int cbs = pic->stride / 2;
const int crs = pic->stride / 2;
const kvz_pixel* p_cb = pic->u + 1 * cbs;
const kvz_pixel* p_cr = pic->v + 1 * crs;
int64_t sum_cb_cr = 0;
// determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
for (int y = 1; y < y1; y++, p_cb += cbs, p_cr += crs)
{
for (int x = 1; x < x1; x++)
{
int cb = (12 * (int)p_cb[x] - 2 * ((int)p_cb[x - 1] + (int)p_cb[x + 1] + (int)p_cb[x - cbs] + (int)p_cb[x + cbs]) - ((int)p_cb[x - 1 - cbs] + (int)p_cb[x + 1 - cbs] + (int)p_cb[x - 1 + cbs] + (int)p_cb[x + 1 + cbs]));
int cr = (12 * (int)p_cr[x] - 2 * ((int)p_cr[x - 1] + (int)p_cr[x + 1] + (int)p_cr[x - crs] + (int)p_cr[x + crs]) - ((int)p_cr[x - 1 - crs] + (int)p_cr[x + 1 - crs] + (int)p_cr[x - 1 + crs] + (int)p_cr[x + 1 + crs]));
sum_cb_cr += cb * cr;
}
}
sgnFlag = (sum_cb_cr < 0);
}
state->frame->jccr_sign = sgnFlag;
}
static void encoder_state_worker_encode_lcu_bitstream(void* opaque);
static void encoder_state_worker_encode_lcu_search(void * opaque)
@ -1870,6 +1902,7 @@ void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
encoder_state_init_new_frame(state, frame);
if(state->encoder_control->cfg.jccr) set_joint_cb_cr_modes(state, frame);
// Create a separate job for ALF done after everything else, and only then do final bitstream writing (for ALF parameters)
if (state->encoder_control->cfg.alf_type && state->encoder_control->cfg.wpp) {

View file

@ -195,6 +195,7 @@ typedef struct encoder_state_config_frame_t {
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uint8_t* hmvp_size; //!< \brief HMVP LUT size
bool jccr_sign;
} encoder_state_config_frame_t;

View file

@ -637,16 +637,17 @@ void kvz_select_jccr_mode(
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cr_search");
int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1;
int cbf_mask = u_is_set * 2 + v_is_set - 1;
if((cbf_mask != -1 && pred_cu->type == CU_INTRA) || cbf_mask == 2)
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 0, tr_tree_bits, "jccr_flag");
if(pred_cu->joint_cb_cr) {
const int u_jccr = (pred_cu->joint_cb_cr >> 1) & 1;
ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cb_search");
ctx = &(cabac->ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
CABAC_FBITS_UPDATE(cabac, ctx, (pred_cu->joint_cb_cr & 2) >> 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
cbf_mask = (pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1;
CABAC_FBITS_UPDATE(cabac, ctx, u_jccr, joint_cbcr_tr_tree_bits, "cbf_cb_search");
ctx = &(cabac->ctx.qt_cbf_model_cr[u_jccr]);
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
cbf_mask = pred_cu->joint_cb_cr - 1;
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 1, joint_cbcr_tr_tree_bits, "jccr_flag");
}
int ssd = 0;
@ -695,10 +696,10 @@ void kvz_select_jccr_mode(
}
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
if (pred_cu->joint_cb_cr & 1) {
if (pred_cu->joint_cb_cr & 2) {
cbf_set(&pred_cu->cbf, depth, COLOR_U);
}
if (pred_cu->joint_cb_cr & 2) {
if (pred_cu->joint_cb_cr & 1) {
cbf_set(&pred_cu->cbf, depth, COLOR_V);
}
int lcu_width = LCU_WIDTH_C;
@ -989,6 +990,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
);
cur_cu->joint_cb_cr = 0;
// TODO: This heavily relies to square CUs
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) {
@ -996,7 +998,6 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being
// really bad for chroma.
cur_cu->joint_cb_cr = 0;
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; // skip luma
if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) {
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search);
@ -1022,6 +1023,14 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
}
else if(depth != 0 && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr & 3) {
assert(cur_cu->joint_cb_cr < 4);
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
if (cur_cu->joint_cb_cr & 2) {
cbf_set(&cur_cu->cbf, depth, COLOR_U);
}
if (cur_cu->joint_cb_cr & 1) {
cbf_set(&cur_cu->cbf, depth, COLOR_V);
}
const vector2d_t lcu_px = { (x_local & ~7) / 2, (y_local & ~7) / 2 };
int lcu_width = LCU_WIDTH_C;
const int index = lcu_px.x + lcu_px.y * lcu_width;

View file

@ -320,6 +320,7 @@ static double search_intra_trdepth(
num_transforms = MAX(num_transforms, 2);
}
pred_cu->intra.mode_chroma = -1;
pred_cu->joint_cb_cr = 4;
for (; trafo < num_transforms; trafo++) {
pred_cu->tr_idx = trafo;
if (mts_enabled)

View file

@ -225,39 +225,40 @@ int kvz_quant_cbcr_residual_generic(
int64_t best_cost = INT64_MAX;
// This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM
for(int cbf_mask = cur_cu->type == CU_INTRA ? 1 : 3; cbf_mask < 4; cbf_mask++) {
for(int i = cur_cu->type == CU_INTRA ? 1 : 3; i < 4; i++) {
int64_t d1 = 0;
const int cbf_mask = i * (state->frame->jccr_sign ? -1 : 1);
for (int y = 0; y < width; y++)
{
for (int x = 0; x < width; x++)
{
int cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
if (cbf_mask == 1)
if (cbf_mask == 2)
{
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (u1_residual[cbf_mask / 2][x + y * width] >> 1));
u1_residual[i - 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (u1_residual[i - 2][x + y * width] >> 1));
}
else if (cbf_mask == -1)
else if (cbf_mask == -2)
{
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (-u1_residual[cbf_mask / 2][x + y * width] >> 1));
u1_residual[i - 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (-u1_residual[i - 2][x + y * width] >> 1));
}
else if (cbf_mask == 3)
{
u1_residual[cbf_mask / 2][x + y * width] = ((cbx + crx) / 2);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - u1_residual[cbf_mask / 2][x + y * width]);
u1_residual[i - 2][x + y * width] = ((cbx + crx) / 2);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - u1_residual[i - 2][x + y * width]);
}
else if (cbf_mask == -3)
{
u1_residual[cbf_mask / 2][x + y * width] = ((cbx - crx) / 2);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx + u1_residual[cbf_mask / 2][x + y * width]);
u1_residual[i - 2][x + y * width] = ((cbx - crx) / 2);
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx + u1_residual[i - 2][x + y * width]);
}
else if (cbf_mask == 2)
else if (cbf_mask == 1)
{
v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5);
d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
}
else if (cbf_mask == -2)
else if (cbf_mask == -1)
{
v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5);
d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
@ -270,19 +271,19 @@ int kvz_quant_cbcr_residual_generic(
}
}
if (d1 < best_cost) {
best_cbf_mask = cbf_mask;
best_cbf_mask = i;
best_cost = d1;
}
}
kvz_transform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
kvz_transform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
{
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
kvz_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
kvz_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, tr_depth, cur_cu->cbf);
}
else if (state->encoder_control->cfg.rdoq_enable && false) {
@ -290,7 +291,7 @@ int kvz_quant_cbcr_residual_generic(
scan_order);
}
else {
kvz_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
kvz_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
}
@ -309,10 +310,10 @@ int kvz_quant_cbcr_residual_generic(
int y, x;
// Get quantized residual. (coeff_out -> coeff -> residual)
kvz_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
kvz_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
kvz_itransform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
kvz_itransform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
@ -333,32 +334,32 @@ int kvz_quant_cbcr_residual_generic(
// }
// }
//}
const int temp = best_cbf_mask * (state->frame->jccr_sign ? -1 : 1);
// Get quantized reconstruction. (residual + pred_in -> rec_out)
for (int y = 0; y < width; y++) {
for (int x = 0; x < width; x++) {
if (best_cbf_mask == 1) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
if (temp == 2) {
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
}
else if (best_cbf_mask == -1) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
else if (temp == -2) {
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
}
else if (best_cbf_mask == 3) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
else if (temp == 3) {
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
}
else if (best_cbf_mask == -3) {
else if (temp == -3) {
// non-normative clipping to prevent 16-bit overflow
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width];
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width];
}
else if (best_cbf_mask == 2) {
else if (temp == 1) {
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
v_residual[x + y * width] = v1_residual[x + y * width];
}
else if (best_cbf_mask == -2) {
else if (temp == -1) {
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
v_residual[x + y * width] = -v1_residual[x + y * width];
}