Merge branch 'joint_cbcr' into 'master'

[jccr] Add joint coding of chroma residual

See merge request cs/ultravideo/vvc/uvg266!6
This commit is contained in:
Joose Sainio 2021-09-06 11:43:06 +03:00
commit 450cbd356c
24 changed files with 463 additions and 86 deletions

View file

@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
#
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
ver_major=6
ver_minor=5
ver_minor=6
ver_release=0
# Prevents configure from adding a lot of defines to the CFLAGS

View file

@ -95,7 +95,7 @@ typedef struct
cabac_ctx_t luma_planar_model[2];
cabac_ctx_t multi_ref_line[2];
cabac_ctx_t bdpcm_mode[4];
cabac_ctx_t joint_bc_br[3];
cabac_ctx_t joint_cb_cr[3];
cabac_ctx_t transform_skip_model_luma;
cabac_ctx_t transform_skip_model_chroma;
cabac_ctx_t transform_skip_sig_coeff[3];

View file

@ -180,10 +180,7 @@ int kvz_config_init(kvz_config *cfg)
cfg->fastrd_sampling_on = 0;
cfg->fastrd_accuracy_check_on = 0;
cfg->fastrd_learning_outdir_fn = NULL;
int8_t in[] = { 17, 27, 32, 44 };
int8_t out[] = { 17, 29, 34, 41 };
cfg->chroma_scale_out[0][0] = cfg->chroma_scale_in[0][0] = 17;
cfg->chroma_scale_out[0][1] = cfg->chroma_scale_in[0][1] = 27;
cfg->chroma_scale_out[0][2] = cfg->chroma_scale_in[0][2] = 32;
@ -195,6 +192,8 @@ int kvz_config_init(kvz_config *cfg)
parse_qp_map(cfg, 0);
cfg->jccr = 0;
return 1;
}
@ -1466,6 +1465,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
parse_qp_map(cfg, 0);
return success;
}
else if OPT("jccr") {
cfg->jccr = (bool)atobool(value);
}
else {
return 0;
}

View file

@ -162,6 +162,8 @@ static const struct option long_options[] = {
{ "fastrd-outdir", required_argument, NULL, 0 },
{ "chroma-qp-in", required_argument, NULL, 0 },
{ "chroma-qp-out", required_argument, NULL, 0 },
{ "jccr", no_argument, NULL, 0 },
{ "no-jccr", no_argument, NULL, 0 },
{0, 0, 0, 0}
};
@ -613,6 +615,8 @@ void print_help(void)
" - both: MTS applied for both intra and inter blocks.\n"
" - implicit: uses implicit MTS. Applies DST7 instead \n"
" of DCT2 to certain intra blocks.\n"
" --(no-)jccr : Joint coding of chroma residual. "
" Requires rdo> = 2. [disabled]\n"
"\n"
/* Word wrap to this width to stay under 80 characters (including ") *************/
"Parallel processing:\n"

View file

@ -455,7 +455,7 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
for (i = 0; i < 3; i++) {
kvz_ctx_init(&cabac->ctx.cu_skip_flag_model[i], QP, INIT_SKIP_FLAG[slice][i], INIT_SKIP_FLAG[3][i]);
kvz_ctx_init(&cabac->ctx.joint_bc_br[i], QP, INIT_JOINT_CB_CR_FLAG[slice][i], INIT_JOINT_CB_CR_FLAG[3][i]);
kvz_ctx_init(&cabac->ctx.joint_cb_cr[i], QP, INIT_JOINT_CB_CR_FLAG[slice][i], INIT_JOINT_CB_CR_FLAG[3][i]);
kvz_ctx_init(&cabac->ctx.transform_skip_sig_coeff[i], QP, INIT_TRANSFORM_SKIP_SIG_COEFF[slice][i], INIT_TRANSFORM_SKIP_SIG_COEFF[3][i]);
kvz_ctx_init(&cabac->ctx.transform_skip_sig[i], QP, INIT_TRANSFORM_SKIP_SIG[slice][i], INIT_TRANSFORM_SKIP_SIG[3][i]);
}

View file

@ -127,15 +127,16 @@ typedef struct {
*/
typedef struct
{
uint8_t type : 2; //!< \brief block type, one of cu_type_t values
uint8_t depth : 3; //!< \brief depth / size of this block
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
uint8_t tr_depth : 3; //!< \brief transform depth
uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped
uint8_t merged : 1; //!< \brief flag to indicate this block is merged
uint8_t merge_idx : 3; //!< \brief merge index
uint8_t tr_skip : 1; //!< \brief transform skip flag
uint8_t tr_idx : 3; //!< \brief transform index
uint8_t type : 2; //!< \brief block type, one of cu_type_t values
uint8_t depth : 3; //!< \brief depth / size of this block
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
uint8_t tr_depth : 3; //!< \brief transform depth
uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped
uint8_t merged : 1; //!< \brief flag to indicate this block is merged
uint8_t merge_idx : 3; //!< \brief merge index
uint8_t tr_skip : 1; //!< \brief transform skip flag
uint8_t tr_idx : 3; //!< \brief transform index
uint8_t joint_cb_cr : 2; //!< \brief joint chroma residual coding
uint16_t cbf;
@ -299,6 +300,7 @@ typedef ALIGNED(8) struct {
coeff_t y[LCU_LUMA_SIZE];
coeff_t u[LCU_CHROMA_SIZE];
coeff_t v[LCU_CHROMA_SIZE];
coeff_t joint_uv[LCU_CHROMA_SIZE];
} lcu_coeff_t;

View file

@ -328,31 +328,41 @@ void kvz_encode_last_significant_xy(cabac_data_t * const cabac,
}
}
static void encode_chroma_tu(encoder_state_t* const state, int x, int y, int depth, const uint8_t width_c, const cu_info_t* cur_pu, int8_t* scan_idx, lcu_coeff_t* coeff) {
static void encode_chroma_tu(encoder_state_t* const state, int x, int y, int depth, const uint8_t width_c, const cu_info_t* cur_pu, int8_t* scan_idx, lcu_coeff_t* coeff, uint8_t joint_chroma) {
int x_local = (x >> 1) % LCU_WIDTH_C;
int y_local = (y >> 1) % LCU_WIDTH_C;
cabac_data_t* const cabac = &state->cabac;
*scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth);
if(!joint_chroma){
const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) {
if(state->encoder_control->cfg.trskip_enable && width_c == 4){
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
// HEVC only supports transform_skip for Luma
// TODO: transform skip for chroma blocks
CABAC_BIN(cabac, 0, "transform_skip_flag");
if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) {
if(state->encoder_control->cfg.trskip_enable && width_c == 4){
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
// HEVC only supports transform_skip for Luma
// TODO: transform skip for chroma blocks
CABAC_BIN(cabac, 0, "transform_skip_flag");
}
kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 1, *scan_idx, NULL, false);
}
kvz_encode_coeff_nxn(state, &state->cabac, coeff_u, width_c, 1, *scan_idx, NULL, false);
}
if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) {
if (cbf_is_set(cur_pu->cbf, depth, COLOR_V)) {
if (state->encoder_control->cfg.trskip_enable && width_c == 4) {
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
CABAC_BIN(cabac, 0, "transform_skip_flag");
}
kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, *scan_idx, NULL, false);
}
}
else {
const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
if (state->encoder_control->cfg.trskip_enable && width_c == 4) {
cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
CABAC_BIN(cabac, 0, "transform_skip_flag");
}
kvz_encode_coeff_nxn(state, &state->cabac, coeff_v, width_c, 2, *scan_idx, NULL, false);
kvz_encode_coeff_nxn(state, &state->cabac, coeff_uv, width_c, 2, *scan_idx, NULL, false);
}
}
@ -370,16 +380,6 @@ static void encode_transform_unit(encoder_state_t * const state,
int8_t scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth);
if (state->encoder_control->chroma_format != KVZ_CSP_400) {
// joint_cb_cr
/*
if (type == 2 && cbf_mask) {
cabac->cur_ctx = &(cabac->ctx.joint_bc_br[0]);
CABAC_BIN(cabac, 0, "joint_cb_cr");
}
*/
}
int cbf_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y);
if (cbf_y && !only_chroma) {
@ -410,6 +410,7 @@ static void encode_transform_unit(encoder_state_t * const state,
}
}
bool joint_chroma = cur_pu->joint_cb_cr != 0;
if (depth == MAX_DEPTH) {
// For size 4x4 luma transform the corresponding chroma transforms are
// also of size 4x4 covering 8x8 luma pixels. The residual is coded in
@ -428,8 +429,8 @@ static void encode_transform_unit(encoder_state_t * const state,
bool chroma_cbf_set = cbf_is_set(cur_pu->cbf, depth, COLOR_U) ||
cbf_is_set(cur_pu->cbf, depth, COLOR_V);
if (chroma_cbf_set) {
encode_chroma_tu(state, x, y, depth, width_c, cur_pu, &scan_idx, coeff);
if (chroma_cbf_set || joint_chroma) {
encode_chroma_tu(state, x, y, depth, width_c, cur_pu, &scan_idx, coeff, joint_chroma);
}
}
@ -483,8 +484,8 @@ static void encode_transform_coeff(encoder_state_t * const state,
const int cb_flag_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y);
const int cb_flag_u = cbf_is_set(cur_cu->cbf, depth, COLOR_U);
const int cb_flag_v = cbf_is_set(cur_cu->cbf, depth, COLOR_V);
const int cb_flag_u = cur_pu->joint_cb_cr ? cur_pu->joint_cb_cr & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U);
const int cb_flag_v = cur_pu->joint_cb_cr ? ((cur_pu->joint_cb_cr & 2) >> 1) : cbf_is_set(cur_cu->cbf, depth, COLOR_V);
// The split_transform_flag is not signaled when:
// - transform size is greater than 32 (depth == 0)
@ -519,7 +520,7 @@ static void encode_transform_coeff(encoder_state_t * const state,
}
if (true) {
cabac->cur_ctx = &(cabac->ctx.qt_cbf_model_cr[cb_flag_u ? 1 : 0]);
CABAC_BIN(cabac, cb_flag_v, "cbf_cr");
CABAC_BIN(cabac, cb_flag_v, "cbf_cr");
}
}
}
@ -570,7 +571,10 @@ static void encode_transform_coeff(encoder_state_t * const state,
state->must_code_qp_delta = false;
}
if((cb_flag_u || cb_flag_v ) && (depth != 4 || only_chroma) && state->encoder_control->cfg.jccr) {
cabac->cur_ctx = &cabac->ctx.joint_cb_cr[cb_flag_u * 2 + cb_flag_v - 1];
CABAC_BIN(cabac, cur_pu->joint_cb_cr != 0, "tu_joint_cbcr_residual_flag");
}
encode_transform_unit(state, x, y, depth, only_chroma, coeff);
}
}

View file

@ -614,7 +614,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
if (encoder->chroma_format != KVZ_CSP_400) {
WRITE_U(stream, 0, 1, "sps_joint_cbcr_enabled_flag");
WRITE_U(stream, encoder->cfg.jccr, 1, "sps_joint_cbcr_enabled_flag");
WRITE_U(stream, 1, 1, "same_qp_table_for_chroma");
for (int i = 0; i < encoder->cfg.num_used_table; i++) {
@ -1265,6 +1265,11 @@ void kvz_encoder_state_write_bitstream_slice_header(
WRITE_UE(stream, state->frame->slicetype, "sh_slice_type");
}
if (encoder->cfg.jccr) {
WRITE_U(stream, 0, 1, "ph_joint_cbcr_sign_flag");
}
if (state->frame->pictype == KVZ_NAL_CRA_NUT || state->frame->pictype == KVZ_NAL_IDR_N_LP || state->frame->pictype == KVZ_NAL_IDR_W_RADL || state->frame->pictype == KVZ_NAL_GDR_NUT)
{
WRITE_U(stream, 0, 1, "sh_no_output_of_prior_pics_flag");
@ -1322,7 +1327,6 @@ void kvz_encoder_state_write_bitstream_slice_header(
int slice_qp_delta = state->frame->QP - encoder->cfg.qp;
WRITE_SE(stream, slice_qp_delta, "sh_qp_delta");
if (encoder->cfg.sao_type) {
WRITE_U(stream, 1, 1, "sh_sao_luma_flag");
if (encoder->chroma_format != KVZ_CSP_400) {

View file

@ -65,6 +65,7 @@
#define RESHAPE_SIGNAL_HLG 2
#define RESHAPE_SIGNAL_NULL 100
/**
* \defgroup Bitstream
* HEVC bitstream coding
@ -327,7 +328,7 @@ typedef int16_t coeff_t;
#define MAX_TR_DYNAMIC_RANGE 15
//Constants
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V } color_t;
typedef enum { COLOR_Y = 0, COLOR_U, COLOR_V, COLOR_UV } color_t;
// Hardware data (abstraction of defines). Extend for other compilers

View file

@ -220,6 +220,8 @@ hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size)
yuv->y = (int16_t *)malloc(luma_size * sizeof(*yuv->y));
yuv->u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
yuv->v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
yuv->joint_u = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->u));
yuv->joint_v = (int16_t *)malloc(luma_size / 2 * sizeof(*yuv->v));
yuv->size = luma_size;
return yuv;
@ -230,6 +232,8 @@ void kvz_hi_prec_buf_t_free(hi_prec_buf_t * yuv)
free(yuv->y);
free(yuv->u);
free(yuv->v);
free(yuv->joint_v);
free(yuv->joint_u);
free(yuv);
}

View file

@ -36,6 +36,8 @@ typedef struct {
kvz_pixel y[LCU_LUMA_SIZE];
kvz_pixel u[LCU_CHROMA_SIZE];
kvz_pixel v[LCU_CHROMA_SIZE];
kvz_pixel joint_u[LCU_CHROMA_SIZE];
kvz_pixel joint_v[LCU_CHROMA_SIZE];
enum kvz_chroma_format chroma_format;
} lcu_yuv_t;
@ -44,6 +46,8 @@ typedef struct {
int16_t *y;
int16_t *u;
int16_t *v;
int16_t *joint_u;
int16_t *joint_v;
} hi_prec_buf_t;
typedef struct {

View file

@ -603,19 +603,25 @@ static void intra_recon_tb_leaf(
const int index = lcu_px.x + lcu_px.y * lcu_width;
kvz_pixel *block = NULL;
kvz_pixel *block2 = NULL;
switch (color) {
case COLOR_Y:
block = &lcu->rec.y[index];
break;
case COLOR_U:
block = &lcu->rec.u[index];
block2 = &lcu->rec.joint_u[index];
break;
case COLOR_V:
block = &lcu->rec.v[index];
block2 = &lcu->rec.joint_v[index];
break;
}
kvz_pixels_blit(pred, block , width, width, width, lcu_width);
if(color != COLOR_Y && cfg->jccr) {
kvz_pixels_blit(pred, block2, width, width, width, lcu_width);
}
}
/**
@ -683,7 +689,7 @@ void kvz_intra_recon_cu(
}
} else {
const bool has_luma = mode_luma != -1;
const bool has_chroma = mode_chroma != -1 && x % 8 == 0 && y % 8 == 0;
const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0);
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, x, y, depth, mode_luma, lcu, COLOR_Y);

View file

@ -29,7 +29,6 @@
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
@ -490,6 +489,8 @@ typedef struct kvz_config
int8_t chroma_scale_in[3][17];
int8_t chroma_scale_out[3][17];
int8_t jccr;
} kvz_config;
/**

View file

@ -80,7 +80,7 @@ static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *fr
}
}
static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to)
static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to, bool joint)
{
const int luma_z = xy_to_zorder(LCU_WIDTH, x_local, y_local);
copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], width);
@ -89,18 +89,22 @@ static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *fr
const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x_local >> 1, y_local >> 1);
copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], width >> 1);
copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], width >> 1);
if (joint) {
copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], width >> 1);
}
}
}
/**
* Copy all non-reference CU data from next level to current level.
*/
static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree)
static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree, bool joint)
{
const int width = LCU_WIDTH >> depth;
copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]);
copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth], joint);
}
@ -298,7 +302,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
cu_info_t * pred_cu,
lcu_t *const lcu)
{
const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 };
@ -306,7 +310,9 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);
double tr_tree_bits = 0;
double joint_cbcr_tr_tree_bits = 0;
double coeff_bits = 0;
double joint_coeff_bits = 0;
assert(x_px >= 0 && x_px < LCU_WIDTH);
assert(y_px >= 0 && y_px < LCU_WIDTH);
@ -323,13 +329,21 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
}
if(state->encoder_control->cfg.jccr) {
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, pred_cu->joint_cb_cr & 1);
}
int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_U);
ctx = &(state->cabac.ctx.qt_cbf_model_cr[is_set]);
if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V));
}
if(state->encoder_control->cfg.jccr) {
ctx = &(state->cabac.ctx.qt_cbf_model_cr[pred_cu->joint_cb_cr & 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, (pred_cu->joint_cb_cr & 2) >> 1);
}
}
if (tr_cu->tr_depth > depth) {
int offset = LCU_WIDTH >> (depth + 1);
int sum = 0;
@ -342,8 +356,22 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
return sum + tr_tree_bits * state->lambda;
}
if (state->encoder_control->cfg.jccr) {
int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1;
const cabac_ctx_t* ctx = NULL;
if (cbf_mask != -1) {
ctx = &(state->cabac.ctx.joint_cb_cr[cbf_mask]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 0);
}
if(pred_cu->joint_cb_cr) {
ctx = &(state->cabac.ctx.joint_cb_cr[(pred_cu->joint_cb_cr & 1) * 2 + ((pred_cu->joint_cb_cr & 2) >> 1) - 1]);
joint_cbcr_tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 1);
}
}
// Chroma SSD
int ssd = 0;
int joint_ssd = 0;
if (!state->encoder_control->cfg.lossless) {
int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x;
int ssd_u = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index],
@ -353,6 +381,16 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
LCU_WIDTH_C, LCU_WIDTH_C,
width);
ssd = ssd_u + ssd_v;
if(state->encoder_control->cfg.jccr) {
int ssd_u_joint = kvz_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.joint_u[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
int ssd_v_joint = kvz_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.joint_v[index],
LCU_WIDTH_C, LCU_WIDTH_C,
width);
joint_ssd = ssd_u_joint + ssd_v_joint;
}
}
{
@ -361,10 +399,35 @@ double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.u[index], width, 2, scan_order, 0);
coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.v[index], width, 2, scan_order, 0);
if(state->encoder_control->cfg.jccr) {
joint_coeff_bits += kvz_get_coeff_cost(state, &lcu->coeff.joint_uv[index], width, 2, scan_order, 0);
}
}
double bits = tr_tree_bits + coeff_bits;
return (double)ssd + bits * state->c_lambda;
double joint_bits = joint_cbcr_tr_tree_bits + joint_coeff_bits;
double cost = (double)ssd + bits * state->c_lambda;
double joint_cost = (double)joint_ssd + joint_bits * state->c_lambda;
if ((cost < joint_cost || !pred_cu->joint_cb_cr) || !state->encoder_control->cfg.jccr) {
pred_cu->joint_cb_cr = 0;
return cost;
}
cbf_clear(&pred_cu->cbf, depth, COLOR_U);
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
if (pred_cu->joint_cb_cr & 1) {
cbf_set(&pred_cu->cbf, depth, COLOR_U);
}
if (pred_cu->joint_cb_cr & 2) {
cbf_set(&pred_cu->cbf, depth, COLOR_V);
}
int lcu_width = LCU_WIDTH_C;
const int index = lcu_px.x + lcu_px.y * lcu_width;
kvz_pixels_blit(&lcu->rec.joint_u[index], &lcu->rec.u[index], width, width, lcu_width, lcu_width);
kvz_pixels_blit(&lcu->rec.joint_v[index], &lcu->rec.v[index], width, width, lcu_width, lcu_width);
return joint_cost;
}
@ -518,6 +581,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->tr_idx = 0;
cur_cu->violates_mts_coeff_constraint = 0;
cur_cu->mts_last_scan_pos = 0;
cur_cu->joint_cb_cr = 0;
// If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth.
@ -814,7 +878,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
if (split_cost < cost) {
// Copy split modes to this depth.
cost = split_cost;
work_tree_copy_up(x_local, y_local, depth, work_tree);
work_tree_copy_up(x_local, y_local, depth, work_tree, state->encoder_control->cfg.jccr);
#if KVZ_DEBUG
//debug_split = 1;
#endif
@ -1027,4 +1091,7 @@ void kvz_search_lcu(encoder_state_t * const state, const int x, const int y, con
copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH);
copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C);
copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C);
if (state->encoder_control->cfg.jccr) {
copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C);
}
}

View file

@ -43,7 +43,7 @@ double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
lcu_t *const lcu);
double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
const int x_px, const int y_px, const int depth,
const cu_info_t *const pred_cu,
cu_info_t * pred_cu,
lcu_t *const lcu);
void kvz_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth);

View file

@ -319,7 +319,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
kvz_intra_recon_cu(state,
x_px, y_px,
depth,
intra_mode, chroma_mode,
intra_mode, -1,
pred_cu, lcu);
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
@ -334,15 +334,23 @@ static double search_intra_trdepth(encoder_state_t * const state,
}
double rd_cost = kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
if (reconstruct_chroma) {
rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
}
//if (reconstruct_chroma) {
// rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
//}
if (rd_cost < best_rd_cost) {
best_rd_cost = rd_cost;
best_tr_idx = pred_cu->tr_idx;
}
}
if(reconstruct_chroma) {
kvz_intra_recon_cu(state,
x_px, y_px,
depth,
-1, chroma_mode,
pred_cu, lcu);
best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
}
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
pred_cu->tr_idx = best_tr_idx;
nosplit_cost += best_rd_cost;
@ -718,6 +726,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
pred_cu.intra.mode = modes[rdo_mode];
pred_cu.intra.mode_chroma = modes[rdo_mode];
pred_cu.joint_cb_cr = 0;
FILL(pred_cu.cbf, 0);
// Reset transform split data in lcu.cu for this area.

View file

@ -364,17 +364,17 @@ static INLINE unsigned kvz_math_floor_log2(unsigned value)
*
*/
void kvz_quant_avx2(const encoder_state_t * const state, const coeff_t * __restrict coef, coeff_t * __restrict q_coef, int32_t width,
int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip)
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip)
{
const encoder_control_t * const encoder = state->encoder_control;
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
uint32_t log2_tr_width = kvz_math_floor_log2(height);
uint32_t log2_tr_height = kvz_math_floor_log2(width);
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[color]);
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_width + log2_tr_height) >> 1); //!< Represents scaling through forward transform
const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift);
@ -721,7 +721,7 @@ int kvz_quantize_residual_avx2(encoder_state_t *const state,
if (has_coeffs && !early_skip) {
// Get quantized residual. (coeff_out -> coeff -> residual)
kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)),
kvz_dequant(state, coeff_out, coeff, width, width, color,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
if (use_trskip) {
kvz_itransformskip(state->encoder_control, residual, coeff, width);
@ -771,7 +771,7 @@ int kvz_quantize_residual_avx2(encoder_state_t *const state,
* \brief inverse quantize transformed and quantized coefficents
*
*/
void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type, int8_t transform_skip)
void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,color_t color, int8_t block_type, int8_t transform_skip)
{
const encoder_control_t * const encoder = state->encoder_control;
int32_t shift,add,coeff_q;
@ -779,7 +779,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift);
@ -788,7 +788,7 @@ void kvz_dequant_avx2(const encoder_state_t * const state, coeff_t *q_coef, coef
{
uint32_t log2_tr_width = kvz_math_floor_log2(height) + 2;
uint32_t log2_tr_height = kvz_math_floor_log2(width) + 2;
int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)(color);
const int32_t* dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width - 2][log2_tr_height - 2][scalinglist_type][qp_scaled % 6];
shift += 4;

View file

@ -38,17 +38,17 @@
*
*/
void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip)
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip)
{
const encoder_control_t * const encoder = state->encoder_control;
const uint32_t log2_block_size = kvz_g_convert_to_bit[width] + 2;
const uint32_t * const scan = kvz_g_sig_last_scan[scan_idx][log2_block_size - 1];
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth - 8) * 6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
uint32_t log2_tr_width = kvz_math_floor_log2(height);
uint32_t log2_tr_height = kvz_math_floor_log2(width);
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
const int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[color]);
const int32_t *quant_coeff = encoder->scaling_list.quant_coeff[log2_tr_width][log2_tr_height][scalinglist_type][qp_scaled % 6];
const int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((log2_tr_height + log2_tr_width) >> 1); //!< Represents scaling through forward transform
const int32_t q_bits = QUANT_SHIFT + qp_scaled / 6 + (transform_skip ? 0 : transform_shift);
@ -172,6 +172,214 @@ void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff
}
}
static INLINE int64_t square(int x) {
return x * (int64_t)x;
}
int kvz_quant_cbcr_residual_generic(
encoder_state_t* const state,
const cu_info_t* const cur_cu,
const int width,
const coeff_scan_order_t scan_order,
const int in_stride, const int out_stride,
const kvz_pixel* const u_ref_in,
const kvz_pixel* const v_ref_in,
const kvz_pixel* const u_pred_in,
const kvz_pixel* const v_pred_in,
kvz_pixel* u_rec_out,
kvz_pixel* v_rec_out,
coeff_t* coeff_out,
bool early_skip,
int lmcs_chroma_adj
) {
ALIGNED(64) int16_t u_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) int16_t v_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) int16_t u1_residual[2][TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) int16_t v1_residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
ALIGNED(64) coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
{
int y, x;
for (y = 0; y < width; ++y) {
for (x = 0; x < width; ++x) {
u_residual[x + y * width] = (int16_t)(u_ref_in[x + y * in_stride] - u_pred_in[x + y * in_stride]);
v_residual[x + y * width] = (int16_t)(v_ref_in[x + y * in_stride] - v_pred_in[x + y * in_stride]);
}
}
}
int best_cbf_mask = -1;
int64_t best_cost = INT64_MAX;
// This changes the order of the cbf_masks so 2 and 3 are swapped compared with VTM
for(int cbf_mask = cur_cu->type == CU_INTRA ? 1 : 3; cbf_mask < 4; cbf_mask++) {
int64_t d1 = 0;
for (int y = 0; y < width; y++)
{
for (int x = 0; x < width; x++)
{
int cbx = u_residual[x + y * width], crx = v_residual[x + y * width];
if (cbf_mask == 1)
{
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx + 2 * crx) / 5);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (u1_residual[cbf_mask / 2][x + y * width] >> 1));
}
else if (cbf_mask == -1)
{
u1_residual[cbf_mask / 2][x + y * width] = ((4 * cbx - 2 * crx) / 5);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - (-u1_residual[cbf_mask / 2][x + y * width] >> 1));
}
else if (cbf_mask == 3)
{
u1_residual[cbf_mask / 2][x + y * width] = ((cbx + crx) / 2);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx - u1_residual[cbf_mask / 2][x + y * width]);
}
else if (cbf_mask == -3)
{
u1_residual[cbf_mask / 2][x + y * width] = ((cbx - crx) / 2);
d1 += square(cbx - u1_residual[cbf_mask / 2][x + y * width]) + square(crx + u1_residual[cbf_mask / 2][x + y * width]);
}
else if (cbf_mask == 2)
{
v1_residual[x + y * width] = ((4 * crx + 2 * cbx) / 5);
d1 += square(cbx - (v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
}
else if (cbf_mask == -2)
{
v1_residual[x + y * width] = ((4 * crx - 2 * cbx) / 5);
d1 += square(cbx - (-v1_residual[x + y * width] >> 1)) + square(crx - v1_residual[x + y * width]);
}
else
{
d1 += square(cbx);
//d2 += square(crx);
}
}
}
if (d1 < best_cost) {
best_cbf_mask = cbf_mask;
best_cost = d1;
}
}
kvz_transform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
if (state->encoder_control->cfg.rdoq_enable &&
(width > 4 || !state->encoder_control->cfg.rdoq_skip))
{
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
kvz_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, tr_depth, cur_cu->cbf);
}
else if (state->encoder_control->cfg.rdoq_enable && false) {
kvz_ts_rdoq(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
scan_order);
}
else {
kvz_quant(state, coeff, coeff_out, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
}
int8_t has_coeffs = 0;
{
int i;
for (i = 0; i < width * width; ++i) {
if (coeff_out[i] != 0) {
has_coeffs = 1;
break;
}
}
}
if (has_coeffs && !early_skip) {
int y, x;
// Get quantized residual. (coeff_out -> coeff -> residual)
kvz_dequant(state, coeff_out, coeff, width, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && false);
kvz_itransform2d(state->encoder_control, best_cbf_mask == 2 ? v1_residual : u1_residual[best_cbf_mask / 2], coeff, width, best_cbf_mask == 2 ? COLOR_V : COLOR_U, cur_cu);
//if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.enableChromaAdj && color != COLOR_Y) {
// int y, x;
// int sign, absval;
// int maxAbsclipBD = (1 << KVZ_BIT_DEPTH) - 1;
// for (y = 0; y < width; ++y) {
// for (x = 0; x < width; ++x) {
// residual[x + y * width] = (int16_t)CLIP((int16_t)(-maxAbsclipBD - 1), (int16_t)maxAbsclipBD, residual[x + y * width]);
// sign = residual[x + y * width] >= 0 ? 1 : -1;
// absval = sign * residual[x + y * width];
// int val = sign * ((absval * lmcs_chroma_adj + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
// if (sizeof(kvz_pixel) == 2) // avoid overflow when storing data
// {
// val = CLIP(-32768, 32767, val);
// }
// residual[x + y * width] = (int16_t)val;
// }
// }
//}
// Get quantized reconstruction. (residual + pred_in -> rec_out)
for (int y = 0; y < width; y++) {
for (int x = 0; x < width; x++) {
if (best_cbf_mask == 1) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
}
else if (best_cbf_mask == -1) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width] >> 1;
}
else if (best_cbf_mask == 3) {
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
v_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width];
}
else if (best_cbf_mask == -3) {
// non-normative clipping to prevent 16-bit overflow
u_residual[x + y * width] = u1_residual[best_cbf_mask / 2][x + y * width]; // == -32768 && sizeof(Pel) == 2) ? 32767 : -v1_residual[best_cbf_mask][x];
v_residual[x + y * width] = -u1_residual[best_cbf_mask / 2][x + y * width];
}
else if (best_cbf_mask == 2) {
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
v_residual[x + y * width] = v1_residual[x + y * width];
}
else if (best_cbf_mask == -2) {
u_residual[x + y * width] = v1_residual[x + y * width] >> 1;
v_residual[x + y * width] = -v1_residual[x + y * width];
}
}
}
for (y = 0; y < width; ++y) {
for (x = 0; x < width; ++x) {
int16_t u_val = u_residual[x + y * width] + u_pred_in[x + y * in_stride];
u_rec_out[x + y * out_stride] = (kvz_pixel)CLIP(0, PIXEL_MAX, u_val);
int16_t v_val = v_residual[x + y * width] + v_pred_in[x + y * in_stride];
v_rec_out[x + y * out_stride] = (kvz_pixel)CLIP(0, PIXEL_MAX, v_val);
}
}
}
else/* if (rec_out != pred_in)*/ {
// With no coeffs and rec_out == pred_int we skip copying the coefficients
// because the reconstruction is just the prediction.
int y, x;
for (y = 0; y < width; ++y) {
for (x = 0; x < width; ++x) {
u_rec_out[x + y * out_stride] = u_pred_in[x + y * in_stride];
v_rec_out[x + y * out_stride] = v_pred_in[x + y * in_stride];
}
}
}
return has_coeffs ? best_cbf_mask : 0;
}
/**
* \brief Quantize residual and get both the reconstruction and coeffs.
*
@ -271,7 +479,7 @@ int kvz_quantize_residual_generic(encoder_state_t *const state,
int y, x;
// Get quantized residual. (coeff_out -> coeff -> residual)
kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)),
kvz_dequant(state, coeff_out, coeff, width, width, color,
cur_cu->type, cur_cu->tr_idx == MTS_SKIP && color == COLOR_Y);
if (use_trskip) {
kvz_itransformskip(state->encoder_control, residual, coeff, width);
@ -326,7 +534,7 @@ int kvz_quantize_residual_generic(encoder_state_t *const state,
* \brief inverse quantize transformed and quantized coefficents
*
*/
void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,int8_t type, int8_t block_type, int8_t transform_skip)
void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height,color_t color, int8_t block_type, int8_t transform_skip)
{
const encoder_control_t * const encoder = state->encoder_control;
int32_t shift,add,coeff_q;
@ -334,7 +542,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
int32_t transform_shift = MAX_TR_DYNAMIC_RANGE - encoder->bitdepth - ((kvz_math_floor_log2(width) + kvz_math_floor_log2(height)) >> 1); // Represents scaling through forward transform
int32_t qp_scaled = kvz_get_scaled_qp(type, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
int32_t qp_scaled = kvz_get_scaled_qp(color, state->qp, (encoder->bitdepth-8)*6, encoder->qp_map[0]);
qp_scaled = transform_skip ? MAX(qp_scaled, 4 + 6 * MIN_QP_PRIME_TS) : qp_scaled;
shift = 20 - QUANT_SHIFT - (transform_skip ? 0 : transform_shift);
@ -343,7 +551,7 @@ void kvz_dequant_generic(const encoder_state_t * const state, coeff_t *q_coef, c
{
uint32_t log2_tr_width = kvz_math_floor_log2(height) + 2;
uint32_t log2_tr_height = kvz_math_floor_log2(width) + 2;
int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)("\0\3\1\2"[type]);
int32_t scalinglist_type = (block_type == CU_INTRA ? 0 : 3) + (int8_t)(color);
const int32_t *dequant_coef = encoder->scaling_list.de_quant_coeff[log2_tr_width -2][log2_tr_height -2][scalinglist_type][qp_scaled%6];
shift += 4;
@ -413,6 +621,7 @@ int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth)
bool success = true;
success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic);
success &= kvz_strategyselector_register(opaque, "quant_cbcr_residual", "generic", 0, &kvz_quant_cbcr_residual_generic);
success &= kvz_strategyselector_register(opaque, "quantize_residual", "generic", 0, &kvz_quantize_residual_generic);
success &= kvz_strategyselector_register(opaque, "dequant", "generic", 0, &kvz_dequant_generic);
success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "generic", 0, &coeff_abs_sum_generic);

View file

@ -36,7 +36,7 @@
int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth);
void kvz_quant_generic(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip);
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip);
int kvz_quantize_residual_generic(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
@ -46,4 +46,21 @@ int kvz_quantize_residual_generic(encoder_state_t *const state,
kvz_pixel *rec_out, coeff_t *coeff_out,
bool early_skip, int lmcs_chroma_adj);
int kvz_quant_cbcr_residual_generic(
encoder_state_t* const state,
const cu_info_t* const cur_cu,
const int width,
const coeff_scan_order_t scan_order,
const int in_stride, const int out_stride,
const kvz_pixel* const u_ref_in,
const kvz_pixel* const v_ref_in,
const kvz_pixel* const u_pred_in,
const kvz_pixel* const v_pred_in,
kvz_pixel* u_rec_out,
kvz_pixel* v_rec_out,
coeff_t* coeff_out,
bool early_skip,
int lmcs_chroma_adj
);
#endif //STRATEGIES_QUANT_GENERIC_H_

View file

@ -27,6 +27,7 @@
// Define function pointers.
quant_func *kvz_quant;
quant_cbcr_func *kvz_quant_cbcr_residual;
quant_residual_func *kvz_quantize_residual;
dequant_func *kvz_dequant;
coeff_abs_sum_func *kvz_coeff_abs_sum;

View file

@ -34,7 +34,22 @@
// Declare function pointers.
typedef unsigned (quant_func)(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
int32_t height, int8_t type, int8_t scan_idx, int8_t block_type, int8_t transform_skip);
int32_t height, color_t color, int8_t scan_idx, int8_t block_type, int8_t transform_skip);
typedef unsigned (quant_cbcr_func)(
encoder_state_t* const state,
const cu_info_t* const cur_cu,
const int width,
const coeff_scan_order_t scan_order,
const int in_stride, const int out_stride,
const kvz_pixel* const u_ref_in,
const kvz_pixel* const v_ref_in,
const kvz_pixel* const u_pred_in,
const kvz_pixel* const v_pred_in,
kvz_pixel* u_rec_out,
kvz_pixel* v_rec_out,
coeff_t* coeff_out,
bool early_skip,
int lmcs_chroma_adj);
typedef unsigned (quant_residual_func)(encoder_state_t *const state,
const cu_info_t *const cur_cu, const int width, const color_t color,
const coeff_scan_order_t scan_order, const int use_trskip,
@ -43,13 +58,14 @@ typedef unsigned (quant_residual_func)(encoder_state_t *const state,
kvz_pixel *rec_out, coeff_t *coeff_out,
bool early_skip, int lmcs_chroma_adj);
typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width,
int32_t height, int8_t type, int8_t block_type, int8_t transform_skip);
int32_t height, color_t color, int8_t block_type, int8_t transform_skip);
typedef uint32_t (fast_coeff_cost_func)(const coeff_t *coeff, int32_t width, uint64_t weights);
typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length);
// Declare function pointers.
extern quant_func * kvz_quant;
extern quant_cbcr_func* kvz_quant_cbcr_residual;
extern quant_residual_func * kvz_quantize_residual;
extern dequant_func *kvz_dequant;
extern coeff_abs_sum_func *kvz_coeff_abs_sum;
@ -60,6 +76,7 @@ int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth);
#define STRATEGIES_QUANT_EXPORTS \
{"quant", (void**) &kvz_quant}, \
{"quant_cbcr_residual", (void**) &kvz_quant_cbcr_residual}, \
{"quantize_residual", (void**) &kvz_quantize_residual}, \
{"dequant", (void**) &kvz_dequant}, \
{"coeff_abs_sum", (void**) &kvz_coeff_abs_sum}, \

View file

@ -127,10 +127,10 @@ static void rdpcm(const int width,
* \brief Get scaled QP used in quantization
*
*/
int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const * const chroma_scale)
int32_t kvz_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const * const chroma_scale)
{
int32_t qp_scaled = 0;
if(type == 0) {
if(color == 0) {
qp_scaled = qp + qp_offset;
} else {
qp_scaled = CLIP(-qp_offset, 57, qp);
@ -306,13 +306,13 @@ static void quantize_tr_residual(encoder_state_t * const state,
{
const kvz_config *cfg = &state->encoder_control->cfg;
const int32_t shift = color == COLOR_Y ? 0 : 1;
const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift };
const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift};
// If luma is 4x4, do chroma for the 8x8 luma area when handling the top
// left PU because the coordinates are correct.
bool handled_elsewhere = color != COLOR_Y &&
depth > MAX_DEPTH &&
(lcu_px.x % 4 != 0 || lcu_px.y % 4 != 0);
depth == MAX_DEPTH &&
(x % 4 != 0 || y % 4 != 0);
if (handled_elsewhere) {
return;
}
@ -367,7 +367,7 @@ static void quantize_tr_residual(encoder_state_t * const state,
cfg->trskip_enable &&
cur_pu->tr_idx == 1;
bool has_coeffs;
uint8_t has_coeffs;
int lmcs_chroma_adj = 0;
@ -411,6 +411,25 @@ static void quantize_tr_residual(encoder_state_t * const state,
lmcs_chroma_adj);
cur_pu->tr_skip = tr_skip;
} else {
if(color == COLOR_UV) {
has_coeffs = kvz_quant_cbcr_residual(
state,
cur_pu,
tr_width,
scan_idx,
lcu_width,
lcu_width,
&lcu->ref.u[offset], &lcu->ref.v[offset],
&lcu->rec.joint_u[offset], &lcu->rec.joint_v[offset],
&lcu->rec.joint_u[offset], &lcu->rec.joint_v[offset],
&lcu->coeff.joint_uv[z_index],
early_skip,
lmcs_chroma_adj
);
cur_pu->joint_cb_cr = has_coeffs;
return;
}
has_coeffs = kvz_quantize_residual(state,
cur_pu,
tr_width,
@ -425,6 +444,7 @@ static void quantize_tr_residual(encoder_state_t * const state,
coeff,
early_skip,
lmcs_chroma_adj);
}
cbf_clear(&cur_pu->cbf, depth, color);
@ -519,6 +539,9 @@ void kvz_quantize_lcu_residual(encoder_state_t * const state,
if (chroma) {
quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip);
quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip);
if(state->encoder_control->cfg.jccr && cur_pu->tr_depth == cur_pu->depth){
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip);
}
}
}
}

View file

@ -53,7 +53,7 @@ void kvz_itransform2d(const encoder_control_t * const encoder,
const cu_info_t *tu);
int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);
int32_t kvz_get_scaled_qp(color_t color, int8_t qp, int8_t qp_offset, int8_t const* const chroma_scale);
void kvz_quantize_lcu_residual(encoder_state_t *state,
bool luma,

View file

@ -12,4 +12,6 @@ valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0
valgrind_test $common_args --alf=full --wpp --threads=1
valgrind_test $common_args --jccr
valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra