mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Improve jccr search
This commit is contained in:
parent
d41103385a
commit
b413aa5c43
|
@ -507,8 +507,8 @@ static void encode_transform_coeff(encoder_state_t * const state,
|
|||
|
||||
|
||||
const int cb_flag_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y);
|
||||
const int cb_flag_u = cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U);
|
||||
const int cb_flag_v = cur_pu->joint_cb_cr ? ((cur_pu->joint_cb_cr & 2) >> 1) : cbf_is_set(cur_cu->cbf, depth, COLOR_V);
|
||||
const int cb_flag_u = cur_pu->joint_cb_cr ? (cur_pu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U);
|
||||
const int cb_flag_v = cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_V);
|
||||
|
||||
// The split_transform_flag is not signaled when:
|
||||
// - transform size is greater than 32 (depth == 0)
|
||||
|
|
|
@ -1125,7 +1125,7 @@ static void kvz_encoder_state_write_bitstream_picture_header(
|
|||
}
|
||||
|
||||
if (encoder->cfg.jccr) {
|
||||
WRITE_U(stream, 0, 1, "ph_joint_cbcr_sign_flag");
|
||||
WRITE_U(stream, state->frame->jccr_sign, 1, "ph_joint_cbcr_sign_flag");
|
||||
}
|
||||
// END PICTURE HEADER
|
||||
|
||||
|
|
|
@ -634,6 +634,38 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
static void set_joint_cb_cr_modes(encoder_state_t* state, kvz_picture* pic)
|
||||
{
|
||||
bool sgnFlag = true;
|
||||
|
||||
if (state->encoder_control->chroma_format != KVZ_CSP_400)
|
||||
{
|
||||
const int x1 = pic->width / 2 - 1;
|
||||
const int y1 = pic->height / 2 - 1;
|
||||
const int cbs = pic->stride / 2;
|
||||
const int crs = pic->stride / 2;
|
||||
const kvz_pixel* p_cb = pic->u + 1 * cbs;
|
||||
const kvz_pixel* p_cr = pic->v + 1 * crs;
|
||||
int64_t sum_cb_cr = 0;
|
||||
|
||||
// determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
|
||||
for (int y = 1; y < y1; y++, p_cb += cbs, p_cr += crs)
|
||||
{
|
||||
for (int x = 1; x < x1; x++)
|
||||
{
|
||||
int cb = (12 * (int)p_cb[x] - 2 * ((int)p_cb[x - 1] + (int)p_cb[x + 1] + (int)p_cb[x - cbs] + (int)p_cb[x + cbs]) - ((int)p_cb[x - 1 - cbs] + (int)p_cb[x + 1 - cbs] + (int)p_cb[x - 1 + cbs] + (int)p_cb[x + 1 + cbs]));
|
||||
int cr = (12 * (int)p_cr[x] - 2 * ((int)p_cr[x - 1] + (int)p_cr[x + 1] + (int)p_cr[x - crs] + (int)p_cr[x + crs]) - ((int)p_cr[x - 1 - crs] + (int)p_cr[x + 1 - crs] + (int)p_cr[x - 1 + crs] + (int)p_cr[x + 1 + crs]));
|
||||
sum_cb_cr += cb * cr;
|
||||
}
|
||||
}
|
||||
|
||||
sgnFlag = (sum_cb_cr < 0);
|
||||
}
|
||||
|
||||
state->frame->jccr_sign = sgnFlag;
|
||||
}
|
||||
|
||||
static void encoder_state_worker_encode_lcu_bitstream(void* opaque);
|
||||
|
||||
static void encoder_state_worker_encode_lcu_search(void * opaque)
|
||||
|
@ -1870,6 +1902,7 @@ void kvz_encode_one_frame(encoder_state_t * const state, kvz_picture* frame)
|
|||
|
||||
|
||||
encoder_state_init_new_frame(state, frame);
|
||||
if(state->encoder_control->cfg.jccr) set_joint_cb_cr_modes(state, frame);
|
||||
|
||||
// Create a separate job for ALF done after everything else, and only then do final bitstream writing (for ALF parameters)
|
||||
if (state->encoder_control->cfg.alf_type && state->encoder_control->cfg.wpp) {
|
||||
|
|
|
@ -195,6 +195,7 @@ typedef struct encoder_state_config_frame_t {
|
|||
|
||||
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
|
||||
uint8_t* hmvp_size; //!< \brief HMVP LUT size
|
||||
bool jccr_sign;
|
||||
|
||||
} encoder_state_config_frame_t;
|
||||
|
||||
|
|
25
src/search.c
25
src/search.c
|
@ -637,16 +637,17 @@ void kvz_select_jccr_mode(
|
|||
int v_is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_V);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, v_is_set, tr_tree_bits, "cbf_cr_search");
|
||||
|
||||
int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1;
|
||||
int cbf_mask = u_is_set * 2 + v_is_set - 1;
|
||||
if((cbf_mask != -1 && pred_cu->type == CU_INTRA) || cbf_mask == 2)
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 0, tr_tree_bits, "jccr_flag");
|
||||
|
||||
if(pred_cu->joint_cb_cr) {
|
||||
const int u_jccr = (pred_cu->joint_cb_cr >> 1) & 1;
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cb[0]);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cb_search");
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, (pred_cu->joint_cb_cr & 2) >> 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
|
||||
cbf_mask = (pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1;
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, u_jccr, joint_cbcr_tr_tree_bits, "cbf_cb_search");
|
||||
ctx = &(cabac->ctx.qt_cbf_model_cr[u_jccr]);
|
||||
CABAC_FBITS_UPDATE(cabac, ctx, pred_cu->joint_cb_cr & 1, joint_cbcr_tr_tree_bits, "cbf_cr_search");
|
||||
cbf_mask = pred_cu->joint_cb_cr - 1;
|
||||
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.joint_cb_cr[cbf_mask]), 1, joint_cbcr_tr_tree_bits, "jccr_flag");
|
||||
}
|
||||
int ssd = 0;
|
||||
|
@ -695,10 +696,10 @@ void kvz_select_jccr_mode(
|
|||
}
|
||||
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
|
||||
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
|
||||
if (pred_cu->joint_cb_cr & 1) {
|
||||
if (pred_cu->joint_cb_cr & 2) {
|
||||
cbf_set(&pred_cu->cbf, depth, COLOR_U);
|
||||
}
|
||||
if (pred_cu->joint_cb_cr & 2) {
|
||||
if (pred_cu->joint_cb_cr & 1) {
|
||||
cbf_set(&pred_cu->cbf, depth, COLOR_V);
|
||||
}
|
||||
int lcu_width = LCU_WIDTH_C;
|
||||
|
@ -989,6 +990,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
cur_cu->joint_cb_cr = 0;
|
||||
|
||||
// TODO: This heavily relies to square CUs
|
||||
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||
|
@ -996,7 +998,6 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// rd2. Possibly because the luma mode search already takes chroma
|
||||
// into account, so there is less of a chanse of luma mode being
|
||||
// really bad for chroma.
|
||||
cur_cu->joint_cb_cr = 0;
|
||||
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; // skip luma
|
||||
if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) {
|
||||
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search);
|
||||
|
@ -1022,6 +1023,14 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
}
|
||||
else if(depth != 0 && state->encoder_control->cfg.jccr && cur_cu->joint_cb_cr & 3) {
|
||||
assert(cur_cu->joint_cb_cr < 4);
|
||||
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
|
||||
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
|
||||
if (cur_cu->joint_cb_cr & 2) {
|
||||
cbf_set(&cur_cu->cbf, depth, COLOR_U);
|
||||
}
|
||||
if (cur_cu->joint_cb_cr & 1) {
|
||||
cbf_set(&cur_cu->cbf, depth, COLOR_V);
|
||||
}
|
||||
const vector2d_t lcu_px = { (x_local & ~7) / 2, (y_local & ~7) / 2 };
|
||||
int lcu_width = LCU_WIDTH_C;
|
||||
const int index = lcu_px.x + lcu_px.y * lcu_width;
|
||||
|
|
|
@ -320,6 +320,7 @@ static double search_intra_trdepth(
|
|||
num_transforms = MAX(num_transforms, 2);
|
||||
}
|
||||
pred_cu->intra.mode_chroma = -1;
|
||||
pred_cu->joint_cb_cr = 4;
|
||||
for (; trafo < num_transforms; trafo++) {
|
||||
pred_cu->tr_idx = trafo;
|
||||
if (mts_enabled)
|
||||
|
|
|
@ -225,39 +225,40 @@ int kvz_quant_cbcr_residual_generic(
|
|||
int64_t best_cost = INT64_MAX;
|
||||
|
||||
// This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM
|
||||
for(int cbf_mask = cur_cu->type == CU_INTRA ? 1 : 3; cbf_mask < 4; cbf_mask++) {
|
||||
for(int i = cur_cu->type == CU_INTRA ? 1 : 3; i < 4; i++) {
|
||||
int64_t d1 = 0;
|
||||
const int cbf_mask = i * (state->frame->jccr_sign ? -1 : 1);
|
||||
for (int y = 0; y < width; y++)
|
||||
{
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
int cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
|
||||
if (cbf_mask == 1)
|
||||
if (cbf_mask == 2)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (u1_residual[cbf_mask / 2][x + y * width] >> 1));
|
||||
u1_residual[i - 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (u1_residual[i - 2][x + y * width] >> 1));
|
||||
}
|
||||
else if (cbf_mask == -1)
|
||||
else if (cbf_mask == -2)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (-u1_residual[cbf_mask / 2][x + y * width] >> 1));
|
||||
u1_residual[i - 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - (-u1_residual[i - 2][x + y * width] >> 1));
|
||||
}
|
||||
else if (cbf_mask == 3)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((cbx + crx) / 2);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - u1_residual[cbf_mask / 2][x + y * width]);
|
||||
u1_residual[i - 2][x + y * width] = ((cbx + crx) / 2);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx - u1_residual[i - 2][x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == -3)
|
||||
{
|
||||
u1_residual[cbf_mask / 2][x + y * width] = ((cbx - crx) / 2);
|
||||
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx + u1_residual[cbf_mask / 2][x + y * width]);
|
||||
u1_residual[i - 2][x + y * width] = ((cbx - crx) / 2);
|
||||
d1 += square(cbx - u1_residual[i - 2][x + y * width]) + square(crx + u1_residual[i - 2][x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == 2)
|
||||
else if (cbf_mask == 1)
|
||||
{
|
||||
v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5);
|
||||
d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
|
||||
}
|
||||
else if (cbf_mask == -2)
|
||||
else if (cbf_mask == -1)
|
||||
{
|
||||
v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5);
|
||||
d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
|
||||
|
@ -270,19 +271,19 @@ int kvz_quant_cbcr_residual_generic(
|
|||
}
|
||||
}
|
||||
if (d1 < best_cost) {
|
||||
best_cbf_mask = cbf_mask;
|
||||
best_cbf_mask = i;
|
||||
best_cost = d1;
|
||||
}
|
||||
}
|
||||
|
||||
kvz_transform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
|
||||
kvz_transform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
if (state->encoder_control->cfg.rdoq_enable &&
|
||||
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
|
||||
{
|
||||
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
|
||||
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
|
||||
kvz_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
kvz_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, tr_depth, cur_cu->cbf);
|
||||
}
|
||||
else if (state->encoder_control->cfg.rdoq_enable && false) {
|
||||
|
@ -290,7 +291,7 @@ int kvz_quant_cbcr_residual_generic(
|
|||
scan_order);
|
||||
}
|
||||
else {
|
||||
kvz_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
kvz_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
}
|
||||
|
||||
|
@ -309,10 +310,10 @@ int kvz_quant_cbcr_residual_generic(
|
|||
int y, x;
|
||||
|
||||
// Get quantized residual. (coeff_out -> coeff -> residual)
|
||||
kvz_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
|
||||
kvz_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U,
|
||||
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
|
||||
|
||||
kvz_itransform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
|
||||
kvz_itransform2d(state->encoder_control, best_cbf_mask == 1 ? v1_residual : u1_residual[best_cbf_mask - 2], coeff, width, best_cbf_mask == 1 ? COLOR_V : COLOR_U, cur_cu);
|
||||
|
||||
|
||||
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
|
||||
|
@ -333,32 +334,32 @@ int kvz_quant_cbcr_residual_generic(
|
|||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
const int temp = best_cbf_mask * (state->frame->jccr_sign ? -1 : 1);
|
||||
// Get quantized reconstruction. (residual + pred_in -> rec_out)
|
||||
for (int y = 0; y < width; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
if (best_cbf_mask == 1) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
|
||||
if (temp == 2) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
|
||||
}
|
||||
else if (best_cbf_mask == -1) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
|
||||
else if (temp == -2) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width] >> 1;
|
||||
}
|
||||
else if (best_cbf_mask == 3) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
else if (temp == 3) {
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
v_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
}
|
||||
else if (best_cbf_mask == -3) {
|
||||
else if (temp == -3) {
|
||||
// non-normative clipping to prevent 16-bit overflow
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width];
|
||||
u_residual[x + y * width] = u1_residual[best_cbf_mask - 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
|
||||
v_residual[x + y * width] = -u1_residual[best_cbf_mask - 2][x + y * width];
|
||||
}
|
||||
else if (best_cbf_mask == 2) {
|
||||
else if (temp == 1) {
|
||||
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = v1_residual[x + y * width];
|
||||
}
|
||||
else if (best_cbf_mask == -2) {
|
||||
else if (temp == -1) {
|
||||
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
|
||||
v_residual[x + y * width] = -v1_residual[x + y * width];
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue