diff --git a/configure.ac b/configure.ac index 4447e5bf..dbbb1b72 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c]) # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html ver_major=6 -ver_minor=5 +ver_minor=6 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS diff --git a/src/cabac.h b/src/cabac.h index f86a633a..9b946ab1 100644 --- a/src/cabac.h +++ b/src/cabac.h @@ -95,7 +95,7 @@ typedef struct cabac_ctx_t luma_planar_model[2]; cabac_ctx_t multi_ref_line[2]; cabac_ctx_t bdpcm_mode[4]; - cabac_ctx_t joint_bc_br[3]; + cabac_ctx_t joint_cb_cr[3]; cabac_ctx_t transform_skip_model_luma; cabac_ctx_t transform_skip_model_chroma; cabac_ctx_t transform_skip_sig_coeff[3]; diff --git a/src/cfg.c b/src/cfg.c index 2d06a718..37cf6b04 100644 --- a/src/cfg.c +++ b/src/cfg.c @@ -180,10 +180,7 @@ int kvz_config_init(kvz_config *cfg) cfg->fastrd_sampling_on = 0; cfg->fastrd_accuracy_check_on = 0; cfg->fastrd_learning_outdir_fn = NULL; - - int8_t in[] = { 17, 27, 32, 44 }; - int8_t out[] = { 17, 29, 34, 41 }; - + cfg->chroma_scale_out[0][0] = cfg->chroma_scale_in[0][0] = 17; cfg->chroma_scale_out[0][1] = cfg->chroma_scale_in[0][1] = 27; cfg->chroma_scale_out[0][2] = cfg->chroma_scale_in[0][2] = 32; @@ -195,6 +192,8 @@ int kvz_config_init(kvz_config *cfg) parse_qp_map(cfg, 0); + cfg->jccr = 0; + return 1; } @@ -1466,6 +1465,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value) parse_qp_map(cfg, 0); return success; } + else if OPT("jccr") { + cfg->jccr = (bool)atobool(value); + } else { return 0; } diff --git a/src/cli.c b/src/cli.c index 2ec02f8b..24cf22c4 100644 --- a/src/cli.c +++ b/src/cli.c @@ -162,6 +162,8 @@ static const struct option long_options[] = { { "fastrd-outdir", required_argument, NULL, 0 }, { "chroma-qp-in", required_argument, NULL, 0 }, { "chroma-qp-out", required_argument, NULL, 0 }, + { "jccr", no_argument, NULL, 0 }, + { "no-jccr", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -613,6 +615,8 @@ void print_help(void) " - both: MTS applied for both intra and inter blocks.\n" " - implicit: uses implicit MTS. Applies DST7 instead \n" " of DCT2 to certain intra blocks.\n" + " --(no-)jccr : Joint coding of chroma residual. " + " Requires rdo> = 2. [disabled]\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") *************/ "Parallel processing:\n" diff --git a/src/context.c b/src/context.c index dd7853a2..ace5f46c 100644 --- a/src/context.c +++ b/src/context.c @@ -455,7 +455,7 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice) for (i = 0; i < 3; i++) { kvz_ctx_init(&cabac->ctx.cu_skip_flag_model[i], QP, INIT_SKIP_FLAG[slice][i], INIT_SKIP_FLAG[3][i]); - kvz_ctx_init(&cabac->ctx.joint_bc_br[i], QP, INIT_JOINT_CB_CR_FLAG[slice][i], INIT_JOINT_CB_CR_FLAG[3][i]); + kvz_ctx_init(&cabac->ctx.joint_cb_cr[i], QP, INIT_JOINT_CB_CR_FLAG[slice][i], INIT_JOINT_CB_CR_FLAG[3][i]); kvz_ctx_init(&cabac->ctx.transform_skip_sig_coeff[i], QP, INIT_TRANSFORM_SKIP_SIG_COEFF[slice][i], INIT_TRANSFORM_SKIP_SIG_COEFF[3][i]); kvz_ctx_init(&cabac->ctx.transform_skip_sig[i], QP, INIT_TRANSFORM_SKIP_SIG[slice][i], INIT_TRANSFORM_SKIP_SIG[3][i]); } diff --git a/src/cu.h b/src/cu.h index 093840dc..c0f395db 100644 --- a/src/cu.h +++ b/src/cu.h @@ -127,15 +127,16 @@ typedef struct { */ typedef struct { - uint8_t type : 2; //!< \brief block type, one of cu_type_t values - uint8_t depth : 3; //!< \brief depth / size of this block - uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values - uint8_t tr_depth : 3; //!< \brief transform depth - uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped - uint8_t merged : 1; //!< \brief flag to indicate this block is merged - uint8_t merge_idx : 3; //!< \brief merge index - uint8_t tr_skip : 1; //!< \brief transform skip flag - uint8_t tr_idx : 3; //!< \brief transform index + uint8_t type : 2; //!< \brief block type, one of cu_type_t values + uint8_t depth : 3; //!< \brief depth / size of this block + uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values + uint8_t tr_depth : 3; //!< \brief transform depth + uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped + uint8_t merged : 1; //!< \brief flag to indicate this block is merged + uint8_t merge_idx : 3; //!< \brief merge index + uint8_t tr_skip : 1; //!< \brief transform skip flag + uint8_t tr_idx : 3; //!< \brief transform index + uint8_t joint_cb_cr : 2; //!< \brief joint chroma residual coding uint16_t cbf; @@ -299,6 +300,7 @@ typedef ALIGNED(8) struct { coeff_t y[LCU_LUMA_SIZE]; coeff_t u[LCU_CHROMA_SIZE]; coeff_t v[LCU_CHROMA_SIZE]; + coeff_t joint_uv[LCU_CHROMA_SIZE]; } lcu_coeff_t; diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 799941e6..a25304db 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -328,31 +328,41 @@ void kvz_encode_last_significant_xy(cabac_data_t * const cabac, } } -static void encode_chroma_tu(encoder_state_t* const state, int x, int y, int depth, const uint8_t width_c, const cu_info_t* cur_pu, int8_t* scan_idx, lcu_coeff_t* coeff) { +static void encode_chroma_tu(encoder_state_t* const state, int x, int y, int depth, const uint8_t width_c, const cu_info_t* cur_pu, int8_t* scan_idx, lcu_coeff_t* coeff, uint8_t joint_chroma) { int x_local = (x >> 1) % LCU_WIDTH_C; int y_local = (y >> 1) % LCU_WIDTH_C; cabac_data_t* const cabac = &state->cabac; *scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth); + if(!joint_chroma){ + const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - - if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) { - if(state->encoder_control->cfg.trskip_enable && width_c == 4){ - cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; - // HEVC only supports transform_skip for Luma - // TODO: transform skip for chroma blocks - CABAC_BIN(cabac, 0, "transform_skip_flag"); + if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) { + if(state->encoder_control->cfg.trskip_enable && width_c == 4){ + cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; + // HEVC only supports transform_skip for Luma + // TODO: transform skip for chroma blocks + CABAC_BIN(cabac, 0, "transform_skip_flag"); + } + kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 1, *scan_idx, NULL, false); } - kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 1, *scan_idx, NULL, false); - } - if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) { + if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) { + if (state->encoder_control->cfg.trskip_enable && width_c == 4) { + cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; + CABAC_BIN(cabac, 0, "transform_skip_flag"); + } + kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, *scan_idx, NULL, false); + } + } + else { + const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; if (state->encoder_control->cfg.trskip_enable && width_c == 4) { cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; CABAC_BIN(cabac, 0, "transform_skip_flag"); } - kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, *scan_idx, NULL, false); + kvz_encode_coeff_nxn(state, &state->cabac, coeff_uv, width_c, 2, *scan_idx, NULL, false); + } } @@ -370,16 +380,6 @@ static void encode_transform_unit(encoder_state_t * const state, int8_t scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); - if (state->encoder_control->chroma_format != KVZ_CSP_400) { - // joint_cb_cr - /* - if (type == 2 && cbf_mask) { - cabac->cur_ctx = &(cabac->ctx.joint_bc_br[0]); - CABAC_BIN(cabac, 0, "joint_cb_cr"); - } - */ - } - int cbf_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y); if (cbf_y && !only_chroma) { @@ -410,6 +410,7 @@ static void encode_transform_unit(encoder_state_t * const state, } } + bool joint_chroma = cur_pu->joint_cb_cr != 0; if (depth == MAX_DEPTH) { // For size 4x4 luma transform the corresponding chroma transforms are // also of size 4x4 covering 8x8 luma pixels. The residual is coded in @@ -428,8 +429,8 @@ static void encode_transform_unit(encoder_state_t * const state, bool chroma_cbf_set = cbf_is_set(cur_pu->cbf, depth, COLOR_U) || cbf_is_set(cur_pu->cbf, depth, COLOR_V); - if (chroma_cbf_set) { - encode_chroma_tu(state, x, y, depth, width_c, cur_pu, &scan_idx, coeff); + if (chroma_cbf_set || joint_chroma) { + encode_chroma_tu(state, x, y, depth, width_c, cur_pu, &scan_idx, coeff, joint_chroma); } } @@ -483,8 +484,8 @@ static void encode_transform_coeff(encoder_state_t * const state, const int cb_flag_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y); - const int cb_flag_u = cbf_is_set(cur_cu->cbf, depth, COLOR_U); - const int cb_flag_v = cbf_is_set(cur_cu->cbf, depth, COLOR_V); + const int cb_flag_u = cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U); + const int cb_flag_v = cur_pu->joint_cb_cr ? ((cur_pu->joint_cb_cr & 2) >> 1) : cbf_is_set(cur_cu->cbf, depth, COLOR_V); // The split_transform_flag is not signaled when: // - transform size is greater than 32 (depth == 0) @@ -519,7 +520,7 @@ static void encode_transform_coeff(encoder_state_t * const state, } if (true) { cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_cr[cb_flag_u ? 1 : 0]); - CABAC_BIN(cabac, cb_flag_v, "cbf_cr"); + CABAC_BIN(cabac, cb_flag_v, "cbf_cr"); } } } @@ -570,7 +571,10 @@ static void encode_transform_coeff(encoder_state_t * const state, state->must_code_qp_delta = false; } - + if((cb_flag_u || cb_flag_v ) && (depth != 4 || only_chroma) && state->encoder_control->cfg.jccr) { + cabac->cur_ctx = &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1]; + CABAC_BIN(cabac, cur_pu->joint_cb_cr != 0, "tu_joint_cbcr_residual_flag"); + } encode_transform_unit(state, x, y, depth, only_chroma, coeff); } } diff --git a/src/encoder_state-bitstream.c b/src/encoder_state-bitstream.c index 2552cc58..641f48b9 100644 --- a/src/encoder_state-bitstream.c +++ b/src/encoder_state-bitstream.c @@ -614,7 +614,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream, if (encoder->chroma_format != KVZ_CSP_400) { - WRITE_U(stream, 0, 1, "sps_joint_cbcr_enabled_flag"); + WRITE_U(stream, encoder->cfg.jccr, 1, "sps_joint_cbcr_enabled_flag"); WRITE_U(stream, 1, 1, "same_qp_table_for_chroma"); for (int i = 0; i < encoder->cfg.num_used_table; i++) { @@ -1265,6 +1265,11 @@ void kvz_encoder_state_write_bitstream_slice_header( WRITE_UE(stream, state->frame->slicetype, "sh_slice_type"); } + + if (encoder->cfg.jccr) { + WRITE_U(stream, 0, 1, "ph_joint_cbcr_sign_flag"); + } + if (state->frame->pictype == KVZ_NAL_CRA_NUT || state->frame->pictype == KVZ_NAL_IDR_N_LP || state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_GDR_NUT) { WRITE_U(stream, 0, 1, "sh_no_output_of_prior_pics_flag"); @@ -1322,7 +1327,6 @@ void kvz_encoder_state_write_bitstream_slice_header( int slice_qp_delta = state->frame->QP - encoder->cfg.qp; WRITE_SE(stream, slice_qp_delta, "sh_qp_delta"); - if (encoder->cfg.sao_type) { WRITE_U(stream, 1, 1, "sh_sao_luma_flag"); if (encoder->chroma_format != KVZ_CSP_400) { diff --git a/src/global.h b/src/global.h index 97954fad..b3d24048 100644 --- a/src/global.h +++ b/src/global.h @@ -65,6 +65,7 @@ #define RESHAPE_SIGNAL_HLG 2 #define RESHAPE_SIGNAL_NULL 100 + /** * \defgroup Bitstream * HEVC bitstream coding @@ -327,7 +328,7 @@ typedef int16_t coeff_t; #define MAX_TR_DYNAMIC_RANGE 15 //Constants -typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V } color_t; +typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, COLOR_UV } color_t; // Hardware data (abstraction of defines). Extend for other compilers diff --git a/src/image.c b/src/image.c index c0a9eb0f..44d1ee45 100644 --- a/src/image.c +++ b/src/image.c @@ -220,6 +220,8 @@ hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size) yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y)); yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u)); yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v)); + yuv->joint_u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u)); + yuv->joint_v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v)); yuv->size = luma_size; return yuv; @@ -230,6 +232,8 @@ void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv) free(yuv->y); free(yuv->u); free(yuv->v); + free(yuv->joint_v); + free(yuv->joint_u); free(yuv); } diff --git a/src/image.h b/src/image.h index 950066bf..11e6452f 100644 --- a/src/image.h +++ b/src/image.h @@ -36,6 +36,8 @@ typedef struct { kvz_pixel y[LCU_LUMA_SIZE]; kvz_pixel u[LCU_CHROMA_SIZE]; kvz_pixel v[LCU_CHROMA_SIZE]; + kvz_pixel joint_u[LCU_CHROMA_SIZE]; + kvz_pixel joint_v[LCU_CHROMA_SIZE]; enum kvz_chroma_format chroma_format; } lcu_yuv_t; @@ -44,6 +46,8 @@ typedef struct { int16_t *y; int16_t *u; int16_t *v; + int16_t *joint_u; + int16_t *joint_v; } hi_prec_buf_t; typedef struct { diff --git a/src/intra.c b/src/intra.c index 65113441..d5924c01 100644 --- a/src/intra.c +++ b/src/intra.c @@ -603,19 +603,25 @@ static void intra_recon_tb_leaf( const int index = lcu_px.x + lcu_px.y * lcu_width; kvz_pixel *block = NULL; + kvz_pixel *block2 = NULL; switch (color) { case COLOR_Y: block = &lcu->rec.y[index]; break; case COLOR_U: block = &lcu->rec.u[index]; + block2 = &lcu->rec.joint_u[index]; break; case COLOR_V: block = &lcu->rec.v[index]; + block2 = &lcu->rec.joint_v[index]; break; } kvz_pixels_blit(pred, block , width, width, width, lcu_width); + if(color != COLOR_Y && cfg->jccr) { + kvz_pixels_blit(pred, block2, width, width, width, lcu_width); + } } /** @@ -683,7 +689,7 @@ void kvz_intra_recon_cu( } } else { const bool has_luma = mode_luma != -1; - const bool has_chroma = mode_chroma != -1 && x % 8 == 0 && y % 8 == 0; + const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0); // Process a leaf TU. if (has_luma) { intra_recon_tb_leaf(state, x, y, depth, mode_luma, lcu, COLOR_Y); diff --git a/src/kvazaar.h b/src/kvazaar.h index 7b9abc9b..f5d62baa 100644 --- a/src/kvazaar.h +++ b/src/kvazaar.h @@ -29,7 +29,6 @@ #include #include - #ifdef __cplusplus extern "C" { #endif @@ -490,6 +489,8 @@ typedef struct kvz_config int8_t chroma_scale_in[3][17]; int8_t chroma_scale_out[3][17]; + + int8_t jccr; } kvz_config; /** diff --git a/src/search.c b/src/search.c index c874f6c2..b207b3e5 100644 --- a/src/search.c +++ b/src/search.c @@ -80,7 +80,7 @@ static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *fr } } -static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) +static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to, bool joint) { const int luma_z = xy_to_zorder(LCU_WIDTH, x_local, y_local); copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], width); @@ -89,18 +89,22 @@ static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *fr const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x_local >> 1, y_local >> 1); copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], width >> 1); copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], width >> 1); + if (joint) { + copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], width >> 1); + } } } /** * Copy all non-reference CU data from next level to current level. */ -static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree) +static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree, bool joint) { const int width = LCU_WIDTH >> depth; copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); - copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); + copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth], joint); + } @@ -298,7 +302,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, - const cu_info_t *const pred_cu, + cu_info_t * pred_cu, lcu_t *const lcu) { const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 }; @@ -306,7 +310,9 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); double tr_tree_bits = 0; + double joint_cbcr_tr_tree_bits = 0; double coeff_bits = 0; + double joint_coeff_bits = 0; assert(x_px >= 0 && x_px < LCU_WIDTH); assert(y_px >= 0 && y_px < LCU_WIDTH); @@ -323,13 +329,21 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U)); } + if(state->encoder_control->cfg.jccr) { + joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, pred_cu->joint_cb_cr & 1); + } int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U); ctx = &(state->cabac.ctx.qt_cbf_model_cr[is_set]); if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V)); } + if(state->encoder_control->cfg.jccr) { + ctx = &(state->cabac.ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]); + joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, (pred_cu->joint_cb_cr & 2) >> 1); + } } + if (tr_cu->tr_depth > depth) { int offset = LCU_WIDTH >> (depth + 1); int sum = 0; @@ -342,8 +356,22 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, return sum + tr_tree_bits * state->lambda; } + if (state->encoder_control->cfg.jccr) { + int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1; + const cabac_ctx_t* ctx = NULL; + if (cbf_mask != -1) { + ctx = &(state->cabac.ctx.joint_cb_cr[cbf_mask]); + tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 0); + } + if(pred_cu->joint_cb_cr) { + ctx = &(state->cabac.ctx.joint_cb_cr[(pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1]); + joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 1); + } + } + // Chroma SSD int ssd = 0; + int joint_ssd = 0; if (!state->encoder_control->cfg.lossless) { int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x; int ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index], @@ -353,6 +381,16 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, LCU_WIDTH_C, LCU_WIDTH_C, width); ssd = ssd_u + ssd_v; + + if(state->encoder_control->cfg.jccr) { + int ssd_u_joint = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index], + LCU_WIDTH_C, LCU_WIDTH_C, + width); + int ssd_v_joint = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index], + LCU_WIDTH_C, LCU_WIDTH_C, + width); + joint_ssd = ssd_u_joint + ssd_v_joint; + } } { @@ -361,10 +399,35 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0); coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0); + + if(state->encoder_control->cfg.jccr) { + joint_coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0); + } } + double bits = tr_tree_bits + coeff_bits; - return (double)ssd + bits * state->c_lambda; + double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits; + + double cost = (double)ssd + bits * state->c_lambda; + double joint_cost = (double)joint_ssd + joint_bits * state->c_lambda; + if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) { + pred_cu->joint_cb_cr = 0; + return cost; + } + cbf_clear(&pred_cu->cbf, depth, COLOR_U); + cbf_clear(&pred_cu->cbf, depth, COLOR_V); + if (pred_cu->joint_cb_cr & 1) { + cbf_set(&pred_cu->cbf, depth, COLOR_U); + } + if (pred_cu->joint_cb_cr & 2) { + cbf_set(&pred_cu->cbf, depth, COLOR_V); + } + int lcu_width = LCU_WIDTH_C; + const int index = lcu_px.x + lcu_px.y * lcu_width; + kvz_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width); + kvz_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width); + return joint_cost; } @@ -518,6 +581,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, cur_cu->tr_idx = 0; cur_cu->violates_mts_coeff_constraint = 0; cur_cu->mts_last_scan_pos = 0; + cur_cu->joint_cb_cr = 0; // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. @@ -814,7 +878,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth, if (split_cost < cost) { // Copy split modes to this depth. cost = split_cost; - work_tree_copy_up(x_local, y_local, depth, work_tree); + work_tree_copy_up(x_local, y_local, depth, work_tree, state->encoder_control->cfg.jccr); #if KVZ_DEBUG //debug_split = 1; #endif @@ -1027,4 +1091,7 @@ void kvz_search_lcu(encoder_state_t * const state, const int x, const int y, con copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH); copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C); copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C); + if (state->encoder_control->cfg.jccr) { + copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C); + } } diff --git a/src/search.h b/src/search.h index e1225099..a53cbf9c 100644 --- a/src/search.h +++ b/src/search.h @@ -43,7 +43,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, lcu_t *const lcu); double kvz_cu_rd_cost_chroma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, - const cu_info_t *const pred_cu, + cu_info_t * pred_cu, lcu_t *const lcu); void kvz_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth); diff --git a/src/search_intra.c b/src/search_intra.c index 30623aa4..09b2b49b 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -319,7 +319,7 @@ static double search_intra_trdepth(encoder_state_t * const state, kvz_intra_recon_cu(state, x_px, y_px, depth, - intra_mode, chroma_mode, + intra_mode, -1, pred_cu, lcu); // TODO: Not sure if this should be 0 or 1 but at least seems to work with 1 @@ -334,15 +334,23 @@ static double search_intra_trdepth(encoder_state_t * const state, } double rd_cost = kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); - if (reconstruct_chroma) { - rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); - } + //if (reconstruct_chroma) { + // rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + //} if (rd_cost < best_rd_cost) { best_rd_cost = rd_cost; best_tr_idx = pred_cu->tr_idx; } } + if(reconstruct_chroma) { + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + -1, chroma_mode, + pred_cu, lcu); + best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + } pred_cu->tr_skip = best_tr_idx == MTS_SKIP; pred_cu->tr_idx = best_tr_idx; nosplit_cost += best_rd_cost; @@ -718,6 +726,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state, pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N); pred_cu.intra.mode = modes[rdo_mode]; pred_cu.intra.mode_chroma = modes[rdo_mode]; + pred_cu.joint_cb_cr = 0; FILL(pred_cu.cbf, 0); // Reset transform split data in lcu.cu for this area. diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index d731eef4..7a6e8e62 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -364,17 +364,17 @@ static INLINE unsigned kvz_math_floor_log2(unsigned value) * */ void kvz_quant_avx2(const encoder_state_t * const state, const coeff_t * __restrict coef, coeff_t * __restrict q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip) + int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip) { const encoder_control_t * const encoder = state->encoder_control; const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; - int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); + int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; uint32_t log2_tr_width = kvz_math_floor_log2(height); uint32_t log2_tr_height = kvz_math_floor_log2(width); - const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[color]); const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6]; const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); //!< Represents scaling through forward transform const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift); @@ -721,7 +721,7 @@ int kvz_quantize_residual_avx2(encoder_state_t *const state, if (has_coeffs && !early_skip) { // Get quantized residual. (coeff_out -> coeff -> residual) - kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), + kvz_dequant(state, coeff_out, coeff, width, width, color, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y); if (use_trskip) { kvz_itransformskip(state->encoder_control, residual, coeff, width); @@ -771,7 +771,7 @@ int kvz_quantize_residual_avx2(encoder_state_t *const state, * \brief inverse quantize transformed and quantized coefficents * */ -void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type, int8_t transform_skip) +void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,color_t color, int8_t block_type, int8_t transform_skip) { const encoder_control_t * const encoder = state->encoder_control; int32_t shift,add,coeff_q; @@ -779,7 +779,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform - int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]); + int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift); @@ -788,7 +788,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef { uint32_t log2_tr_width = kvz_math_floor_log2(height) + 2; uint32_t log2_tr_height = kvz_math_floor_log2(width) + 2; - int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)(color); const int32_t* dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width - 2][log2_tr_height - 2][scalinglist_type][qp_scaled % 6]; shift += 4; diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index db2ea83c..7d8a6829 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -38,17 +38,17 @@ * */ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip) + int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip) { const encoder_control_t * const encoder = state->encoder_control; const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2; const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1]; - int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); + int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; uint32_t log2_tr_width = kvz_math_floor_log2(height); uint32_t log2_tr_height = kvz_math_floor_log2(width); - const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[color]); const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6]; const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); //!< Represents scaling through forward transform const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift); @@ -172,6 +172,214 @@ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff } } +static INLINE int64_t square(int x) { + return x * (int64_t)x; +} + + +int kvz_quant_cbcr_residual_generic( + encoder_state_t* const state, + const cu_info_t* const cur_cu, + const int width, + const coeff_scan_order_t scan_order, + const int in_stride, const int out_stride, + const kvz_pixel* const u_ref_in, + const kvz_pixel* const v_ref_in, + const kvz_pixel* const u_pred_in, + const kvz_pixel* const v_pred_in, + kvz_pixel* u_rec_out, + kvz_pixel* v_rec_out, + coeff_t* coeff_out, + bool early_skip, + int lmcs_chroma_adj + ) { + ALIGNED(64) int16_t u_residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; + ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; + ALIGNED(64) int16_t u1_residual[2][TR_MAX_WIDTH * TR_MAX_WIDTH]; + ALIGNED(64) int16_t v1_residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; + ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; + + { + int y, x; + for (y = 0; y < width; ++y) { + for (x = 0; x < width; ++x) { + u_residual[x + y * width] = (int16_t)(u_ref_in[x + y * in_stride] - u_pred_in[x + y * in_stride]); + v_residual[x + y * width] = (int16_t)(v_ref_in[x + y * in_stride] - v_pred_in[x + y * in_stride]); + } + } + } + + int best_cbf_mask = -1; + int64_t best_cost = INT64_MAX; + + // This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM + for(int cbf_mask = cur_cu->type == CU_INTRA ? 1 : 3; cbf_mask < 4; cbf_mask++) { + int64_t d1 = 0; + for (int y = 0; y < width; y++) + { + for (int x = 0; x < width; x++) + { + int cbx = u_residual[x + y * width], crx = v_residual[x + y * width]; + if (cbf_mask == 1) + { + u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx + 2 * crx) / 5); + d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (u1_residual[cbf_mask / 2][x + y * width] >> 1)); + } + else if (cbf_mask == -1) + { + u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx - 2 * crx) / 5); + d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (-u1_residual[cbf_mask / 2][x + y * width] >> 1)); + } + else if (cbf_mask == 3) + { + u1_residual[cbf_mask / 2][x + y * width] = ((cbx + crx) / 2); + d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - u1_residual[cbf_mask / 2][x + y * width]); + } + else if (cbf_mask == -3) + { + u1_residual[cbf_mask / 2][x + y * width] = ((cbx - crx) / 2); + d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx + u1_residual[cbf_mask / 2][x + y * width]); + } + else if (cbf_mask == 2) + { + v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5); + d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]); + } + else if (cbf_mask == -2) + { + v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5); + d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]); + } + else + { + d1 += square(cbx); + //d2 += square(crx); + } + } + } + if (d1 < best_cost) { + best_cbf_mask = cbf_mask; + best_cost = d1; + } + } + + kvz_transform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu); + + if (state->encoder_control->cfg.rdoq_enable && + (width > 4 || !state->encoder_control->cfg.rdoq_skip)) + { + int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; + tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0); + kvz_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, + scan_order, cur_cu->type, tr_depth, cur_cu->cbf); + } + else if (state->encoder_control->cfg.rdoq_enable && false) { + kvz_ts_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, + scan_order); + } + else { + kvz_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, + scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false); + } + + int8_t has_coeffs = 0; + { + int i; + for (i = 0; i < width * width; ++i) { + if (coeff_out[i] != 0) { + has_coeffs = 1; + break; + } + } + } + + if (has_coeffs && !early_skip) { + int y, x; + + // Get quantized residual. (coeff_out -> coeff -> residual) + kvz_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, + cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false); + + kvz_itransform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu); + + + //if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) { + // int y, x; + // int sign, absval; + // int maxAbsclipBD = (1 << KVZ_BIT_DEPTH) - 1; + // for (y = 0; y < width; ++y) { + // for (x = 0; x < width; ++x) { + // residual[x + y * width] = (int16_t)CLIP((int16_t)(-maxAbsclipBD - 1), (int16_t)maxAbsclipBD, residual[x + y * width]); + // sign = residual[x + y * width] >= 0 ? 1 : -1; + // absval = sign * residual[x + y * width]; + // int val = sign * ((absval * lmcs_chroma_adj + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC); + // if (sizeof(kvz_pixel) == 2) // avoid overflow when storing data + // { + // val = CLIP(-32768, 32767, val); + // } + // residual[x + y * width] = (int16_t)val; + // } + // } + //} + + // Get quantized reconstruction. (residual + pred_in -> rec_out) + for (int y = 0; y < width; y++) { + for (int x = 0; x < width; x++) { + if (best_cbf_mask == 1) { + u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; + v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width] >> 1; + } + else if (best_cbf_mask == -1) { + u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; + v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width] >> 1; + } + else if (best_cbf_mask == 3) { + u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; + v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; + } + else if (best_cbf_mask == -3) { + // non-normative clipping to prevent 16-bit overflow + u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x]; + v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width]; + } + else if (best_cbf_mask == 2) { + u_residual[x + y * width] = v1_residual[x + y * width] >> 1; + v_residual[x + y * width] = v1_residual[x + y * width]; + } + else if (best_cbf_mask == -2) { + u_residual[x + y * width] = v1_residual[x + y * width] >> 1; + v_residual[x + y * width] = -v1_residual[x + y * width]; + } + } + } + for (y = 0; y < width; ++y) { + for (x = 0; x < width; ++x) { + int16_t u_val = u_residual[x + y * width] + u_pred_in[x + y * in_stride]; + u_rec_out[x + y * out_stride] = (kvz_pixel)CLIP(0, PIXEL_MAX, u_val); + int16_t v_val = v_residual[x + y * width] + v_pred_in[x + y * in_stride]; + v_rec_out[x + y * out_stride] = (kvz_pixel)CLIP(0, PIXEL_MAX, v_val); + } + } + } + else/* if (rec_out != pred_in)*/ { + // With no coeffs and rec_out == pred_int we skip copying the coefficients + // because the reconstruction is just the prediction. + int y, x; + + for (y = 0; y < width; ++y) { + for (x = 0; x < width; ++x) { + u_rec_out[x + y * out_stride] = u_pred_in[x + y * in_stride]; + v_rec_out[x + y * out_stride] = v_pred_in[x + y * in_stride]; + } + } + } + + + + + return has_coeffs ? best_cbf_mask : 0; +} + /** * \brief Quantize residual and get both the reconstruction and coeffs. * @@ -271,7 +479,7 @@ int kvz_quantize_residual_generic(encoder_state_t *const state, int y, x; // Get quantized residual. (coeff_out -> coeff -> residual) - kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), + kvz_dequant(state, coeff_out, coeff, width, width, color, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y); if (use_trskip) { kvz_itransformskip(state->encoder_control, residual, coeff, width); @@ -326,7 +534,7 @@ int kvz_quantize_residual_generic(encoder_state_t *const state, * \brief inverse quantize transformed and quantized coefficents * */ -void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type, int8_t transform_skip) +void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,color_t color, int8_t block_type, int8_t transform_skip) { const encoder_control_t * const encoder = state->encoder_control; int32_t shift,add,coeff_q; @@ -334,7 +542,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform - int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]); + int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]); qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled; shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift); @@ -343,7 +551,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c { uint32_t log2_tr_width = kvz_math_floor_log2(height) + 2; uint32_t log2_tr_height = kvz_math_floor_log2(width) + 2; - int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]); + int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)(color); const int32_t *dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width -2][log2_tr_height -2][scalinglist_type][qp_scaled%6]; shift += 4; @@ -413,6 +621,7 @@ int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth) bool success = true; success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic); + success &= kvz_strategyselector_register(opaque, "quant_cbcr_residual", "generic", 0, &kvz_quant_cbcr_residual_generic); success &= kvz_strategyselector_register(opaque, "quantize_residual", "generic", 0, &kvz_quantize_residual_generic); success &= kvz_strategyselector_register(opaque, "dequant", "generic", 0, &kvz_dequant_generic); success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "generic", 0, &coeff_abs_sum_generic); diff --git a/src/strategies/generic/quant-generic.h b/src/strategies/generic/quant-generic.h index 442e8238..34269522 100644 --- a/src/strategies/generic/quant-generic.h +++ b/src/strategies/generic/quant-generic.h @@ -36,7 +36,7 @@ int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth); void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip); + int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip); int kvz_quantize_residual_generic(encoder_state_t *const state, const cu_info_t *const cur_cu, const int width, const color_t color, @@ -46,4 +46,21 @@ int kvz_quantize_residual_generic(encoder_state_t *const state, kvz_pixel *rec_out, coeff_t *coeff_out, bool early_skip, int lmcs_chroma_adj); +int kvz_quant_cbcr_residual_generic( + encoder_state_t* const state, + const cu_info_t* const cur_cu, + const int width, + const coeff_scan_order_t scan_order, + const int in_stride, const int out_stride, + const kvz_pixel* const u_ref_in, + const kvz_pixel* const v_ref_in, + const kvz_pixel* const u_pred_in, + const kvz_pixel* const v_pred_in, + kvz_pixel* u_rec_out, + kvz_pixel* v_rec_out, + coeff_t* coeff_out, + bool early_skip, + int lmcs_chroma_adj +); + #endif //STRATEGIES_QUANT_GENERIC_H_ diff --git a/src/strategies/strategies-quant.c b/src/strategies/strategies-quant.c index 0a7e8f91..5a45fdb0 100644 --- a/src/strategies/strategies-quant.c +++ b/src/strategies/strategies-quant.c @@ -27,6 +27,7 @@ // Define function pointers. quant_func *kvz_quant; +quant_cbcr_func *kvz_quant_cbcr_residual; quant_residual_func *kvz_quantize_residual; dequant_func *kvz_dequant; coeff_abs_sum_func *kvz_coeff_abs_sum; diff --git a/src/strategies/strategies-quant.h b/src/strategies/strategies-quant.h index 83dc48eb..55c0bed2 100644 --- a/src/strategies/strategies-quant.h +++ b/src/strategies/strategies-quant.h @@ -34,7 +34,22 @@ // Declare function pointers. typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width, - int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip); + int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip); +typedef unsigned (quant_cbcr_func)( + encoder_state_t* const state, + const cu_info_t* const cur_cu, + const int width, + const coeff_scan_order_t scan_order, + const int in_stride, const int out_stride, + const kvz_pixel* const u_ref_in, + const kvz_pixel* const v_ref_in, + const kvz_pixel* const u_pred_in, + const kvz_pixel* const v_pred_in, + kvz_pixel* u_rec_out, + kvz_pixel* v_rec_out, + coeff_t* coeff_out, + bool early_skip, + int lmcs_chroma_adj); typedef unsigned (quant_residual_func)(encoder_state_t *const state, const cu_info_t *const cur_cu, const int width, const color_t color, const coeff_scan_order_t scan_order, const int use_trskip, @@ -43,13 +58,14 @@ typedef unsigned (quant_residual_func)(encoder_state_t *const state, kvz_pixel *rec_out, coeff_t *coeff_out, bool early_skip, int lmcs_chroma_adj); typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, - int32_t height, int8_t type, int8_t block_type, int8_t transform_skip); + int32_t height, color_t color, int8_t block_type, int8_t transform_skip); typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights); typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length); // Declare function pointers. extern quant_func * kvz_quant; +extern quant_cbcr_func* kvz_quant_cbcr_residual; extern quant_residual_func * kvz_quantize_residual; extern dequant_func *kvz_dequant; extern coeff_abs_sum_func *kvz_coeff_abs_sum; @@ -60,6 +76,7 @@ int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth); #define STRATEGIES_QUANT_EXPORTS \ {"quant", (void**) &kvz_quant}, \ + {"quant_cbcr_residual", (void**) &kvz_quant_cbcr_residual}, \ {"quantize_residual", (void**) &kvz_quantize_residual}, \ {"dequant", (void**) &kvz_dequant}, \ {"coeff_abs_sum", (void**) &kvz_coeff_abs_sum}, \ diff --git a/src/transform.c b/src/transform.c index 79ce6ef0..bb768218 100644 --- a/src/transform.c +++ b/src/transform.c @@ -127,10 +127,10 @@ static void rdpcm(const int width, * \brief Get scaled QP used in quantization * */ -int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const * const chroma_scale) +int32_t kvz_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const * const chroma_scale) { int32_t qp_scaled = 0; - if(type == 0) { + if(color == 0) { qp_scaled = qp + qp_offset; } else { qp_scaled = CLIP(-qp_offset, 57, qp); @@ -306,13 +306,13 @@ static void quantize_tr_residual(encoder_state_t * const state, { const kvz_config *cfg = &state->encoder_control->cfg; const int32_t shift = color == COLOR_Y ? 0 : 1; - const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift }; + const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; // If luma is 4x4, do chroma for the 8x8 luma area when handling the top // left PU because the coordinates are correct. bool handled_elsewhere = color != COLOR_Y && - depth > MAX_DEPTH && - (lcu_px.x % 4 != 0 || lcu_px.y % 4 != 0); + depth == MAX_DEPTH && + (x % 4 != 0 || y % 4 != 0); if (handled_elsewhere) { return; } @@ -367,7 +367,7 @@ static void quantize_tr_residual(encoder_state_t * const state, cfg->trskip_enable && cur_pu->tr_idx == 1; - bool has_coeffs; + uint8_t has_coeffs; int lmcs_chroma_adj = 0; @@ -411,6 +411,25 @@ static void quantize_tr_residual(encoder_state_t * const state, lmcs_chroma_adj); cur_pu->tr_skip = tr_skip; } else { + if(color == COLOR_UV) { + has_coeffs = kvz_quant_cbcr_residual( + state, + cur_pu, + tr_width, + scan_idx, + lcu_width, + lcu_width, + &lcu->ref.u[offset], &lcu->ref.v[offset], + &lcu->rec.joint_u[offset], &lcu->rec.joint_v[offset], + &lcu->rec.joint_u[offset], &lcu->rec.joint_v[offset], + &lcu->coeff.joint_uv[z_index], + early_skip, + lmcs_chroma_adj + ); + cur_pu->joint_cb_cr = has_coeffs; + return; + } + has_coeffs = kvz_quantize_residual(state, cur_pu, tr_width, @@ -425,6 +444,7 @@ static void quantize_tr_residual(encoder_state_t * const state, coeff, early_skip, lmcs_chroma_adj); + } cbf_clear(&cur_pu->cbf, depth, color); @@ -519,6 +539,9 @@ void kvz_quantize_lcu_residual(encoder_state_t * const state, if (chroma) { quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip); quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip); + if(state->encoder_control->cfg.jccr && cur_pu->tr_depth == cur_pu->depth){ + quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip); + } } } } diff --git a/src/transform.h b/src/transform.h index 27cbdf91..77bc2607 100644 --- a/src/transform.h +++ b/src/transform.h @@ -53,7 +53,7 @@ void kvz_itransform2d(const encoder_control_t * const encoder, const cu_info_t *tu); -int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale); +int32_t kvz_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale); void kvz_quantize_lcu_residual(encoder_state_t *state, bool luma, diff --git a/tests/test_intra.sh b/tests/test_intra.sh index e7b7c4c9..722fe9fc 100755 --- a/tests/test_intra.sh +++ b/tests/test_intra.sh @@ -12,4 +12,6 @@ valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0 valgrind_test $common_args --alf=full --wpp --threads=1 +valgrind_test $common_args --jccr +valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra