Merge branch 'CCLM' into 'master'

CCLM implementation

See merge request cs/ultravideo/vvc/uvg266!7
This commit is contained in:
Joose Sainio 2021-11-26 08:54:24 +02:00
commit 7aeef8e9b8
18 changed files with 596 additions and 64 deletions

View file

@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
# #
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
ver_major=6 ver_major=6
ver_minor=6 ver_minor=7
ver_release=0 ver_release=0
# Prevents configure from adding a lot of defines to the CFLAGS # Prevents configure from adding a lot of defines to the CFLAGS

View file

@ -117,6 +117,8 @@ typedef struct
cabac_ctx_t transform_skip_gt1[4]; cabac_ctx_t transform_skip_gt1[4];
cabac_ctx_t transform_skip_par; cabac_ctx_t transform_skip_par;
cabac_ctx_t transform_skip_gt2[5]; cabac_ctx_t transform_skip_gt2[5];
cabac_ctx_t cclm_flag;
cabac_ctx_t cclm_model;
} ctx; } ctx;
} cabac_data_t; } cabac_data_t;

View file

@ -209,6 +209,8 @@ int kvz_config_init(kvz_config *cfg)
cfg->amvr = 0; cfg->amvr = 0;
cfg->cclm = 0;
return 1; return 1;
} }
@ -1486,6 +1488,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
else if OPT("amvr") { else if OPT("amvr") {
cfg->amvr = (bool)atobool(value); cfg->amvr = (bool)atobool(value);
} }
else if OPT("cclm") {
cfg->cclm = (bool)atobool(value);
}
else { else {
return 0; return 0;
} }

View file

@ -178,6 +178,8 @@ static const struct option long_options[] = {
{ "no-jccr", no_argument, NULL, 0 }, { "no-jccr", no_argument, NULL, 0 },
{ "amvr", no_argument, NULL, 0 }, { "amvr", no_argument, NULL, 0 },
{ "no-amvr", no_argument, NULL, 0 }, { "no-amvr", no_argument, NULL, 0 },
{ "cclm", no_argument, NULL, 0 },
{ "no-cclm", no_argument, NULL, 0 },
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
@ -550,8 +552,8 @@ void print_help(void)
" - 0: Skip intra if inter is good enough.\n" " - 0: Skip intra if inter is good enough.\n"
" - 1: Rough intra mode search with SATD.\n" " - 1: Rough intra mode search with SATD.\n"
" - 2: Refine intra mode search with SSE.\n" " - 2: Refine intra mode search with SSE.\n"
" - 3: Try all intra modes and enable intra\n" " - 3: Enable intra chroma mode search.\n"
" chroma mode search.\n" " - 4: Try all intra modes.\n"
" --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n" " --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n"
" [disabled]\n" " [disabled]\n"
" --(no-)zero-coeff-rdo : If a CU is set inter, check if forcing zero\n" " --(no-)zero-coeff-rdo : If a CU is set inter, check if forcing zero\n"
@ -629,8 +631,12 @@ void print_help(void)
" - both: MTS applied for both intra and inter blocks.\n" " - both: MTS applied for both intra and inter blocks.\n"
" - implicit: uses implicit MTS. Applies DST7 instead \n" " - implicit: uses implicit MTS. Applies DST7 instead \n"
" of DCT2 to certain intra blocks.\n" " of DCT2 to certain intra blocks.\n"
" --(no-)jccr : Joint coding of chroma residual.\n" " --(no-)jccr : Joint coding of chroma residual. "
" Requires rdo> = 2. [disabled]\n" " Requires rdo> = 2. [disabled]\n"
" --(no-)cclm : Cross component linear model. \n"
" Extra chroma prediction modes that are formed\n"
" via linear transformation from the luma\n"
" prediction. Requires rdo >=3. [disabled\n"
" --(no-)amvr : Adaptive Motion Vector Resolution.\n" " --(no-)amvr : Adaptive Motion Vector Resolution.\n"
" Code some mv's with reduced resolution [disabled]\n" " Code some mv's with reduced resolution [disabled]\n"
"\n" "\n"

View file

@ -395,6 +395,20 @@ static const uint8_t INIT_IMV_FLAG[4][5] = {
{ 0, 5, 0, 0, 4, }, { 0, 5, 0, 0, 4, },
}; };
static const uint8_t INIT_CCLM_FLAG[4] = {
{ 26, },
{ 34, },
{ 59, },
{ 4, },
};
static const uint8_t INIT_CCLM_MODEL[4] = {
{ 27, },
{ 27, },
{ 27, },
{ 9, },
};
/* /*
static const uint16_t g_inistateToCount[128] = { static const uint16_t g_inistateToCount[128] = {
614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342, 614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342,
@ -471,6 +485,9 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
kvz_ctx_init(&cabac->ctx.chroma_pred_model, QP, INIT_CHROMA_PRED_MODE[slice], INIT_CHROMA_PRED_MODE[3]); kvz_ctx_init(&cabac->ctx.chroma_pred_model, QP, INIT_CHROMA_PRED_MODE[slice], INIT_CHROMA_PRED_MODE[3]);
kvz_ctx_init(&cabac->ctx.cclm_flag, QP, INIT_CCLM_FLAG[slice], INIT_CCLM_FLAG[3]);
kvz_ctx_init(&cabac->ctx.cclm_model, QP, INIT_CCLM_MODEL[slice], INIT_CCLM_MODEL[3]);
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
kvz_ctx_init(&cabac->ctx.cu_skip_flag_model[i], QP, INIT_SKIP_FLAG[slice][i], INIT_SKIP_FLAG[3][i]); kvz_ctx_init(&cabac->ctx.cu_skip_flag_model[i], QP, INIT_SKIP_FLAG[slice][i], INIT_SKIP_FLAG[3][i]);

View file

@ -701,7 +701,7 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state,
return non_zero_mvd; return non_zero_mvd;
} }
static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* const cur_cu, int x, int y, const videoframe_t* const frame, const int cu_width) { static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* const cur_cu, int x, int y, const videoframe_t* const frame, const int cu_width, const int cclm_enabled) {
unsigned pred_mode = 0; unsigned pred_mode = 0;
unsigned chroma_pred_modes[8] = {0, 50, 18, 1, 67, 81, 82, 83}; unsigned chroma_pred_modes[8] = {0, 50, 18, 1, 67, 81, 82, 83};
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, 0); const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, 0);
@ -710,7 +710,23 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c
int8_t chroma_intra_dir = first_pu->intra.mode_chroma; int8_t chroma_intra_dir = first_pu->intra.mode_chroma;
int8_t luma_intra_dir = first_pu->intra.mode; int8_t luma_intra_dir = first_pu->intra.mode;
bool derived_mode = chroma_intra_dir == luma_intra_dir; bool derived_mode = chroma_intra_dir == luma_intra_dir;
bool cclm_mode = chroma_intra_dir > 67;
if (cclm_enabled) {
cabac->cur_ctx = &cabac->ctx.cclm_flag;
CABAC_BIN(cabac, cclm_mode, "cclm_flag");
if(cclm_mode) {
cabac->cur_ctx = &cabac->ctx.cclm_model;
CABAC_BIN(cabac, chroma_intra_dir != 81, "cclm_model_1");
if(chroma_intra_dir != 81) {
CABAC_BIN_EP(cabac, chroma_intra_dir == 83, "cclm_model_2");
}
return;
}
}
cabac->cur_ctx = &(cabac->ctx.chroma_pred_model); cabac->cur_ctx = &(cabac->ctx.chroma_pred_model);
CABAC_BIN(cabac, derived_mode ? 0 : 1, "intra_chroma_pred_mode"); CABAC_BIN(cabac, derived_mode ? 0 : 1, "intra_chroma_pred_mode");
@ -722,7 +738,7 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c
break; break;
} }
}*/ }*/
for (; pred_mode < 8; pred_mode++) { for (; pred_mode < 5; pred_mode++) {
if (chroma_intra_dir == chroma_pred_modes[pred_mode]) { if (chroma_intra_dir == chroma_pred_modes[pred_mode]) {
break; break;
} }
@ -983,7 +999,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
// Code chroma prediction mode. // Code chroma prediction mode.
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth != 4) { if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth != 4) {
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width); encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
} }
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 0, coeff); encode_transform_coeff(state, x, y, depth, 0, 0, 0, 0, coeff);
@ -991,7 +1007,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
encode_mts_idx(state, cabac, cur_cu); encode_mts_idx(state, cabac, cur_cu);
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth == 4 && x % 8 && y % 8) { if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth == 4 && x % 8 && y % 8) {
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width); encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff); encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff);
} }

View file

@ -722,7 +722,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
WRITE_U(stream, 0, 1, "sps_mip_enabled_flag"); WRITE_U(stream, 0, 1, "sps_mip_enabled_flag");
// if(!no_cclm_constraint_flag) // if(!no_cclm_constraint_flag)
if(encoder->chroma_format != KVZ_CSP_400) { if(encoder->chroma_format != KVZ_CSP_400) {
WRITE_U(stream, 0, 1, "sps_cclm_enabled_flag"); WRITE_U(stream, encoder->cfg.cclm, 1, "sps_cclm_enabled_flag");
} }
if (encoder->chroma_format == KVZ_CSP_420) { if (encoder->chroma_format == KVZ_CSP_420) {
WRITE_U(stream, 0, 1, "sps_chroma_horizontal_collocated_flag"); WRITE_U(stream, 0, 1, "sps_chroma_horizontal_collocated_flag");

View file

@ -122,7 +122,7 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
const int width, const int height, const int width_in_lcu, const int height_in_lcu) { const int width, const int height, const int width_in_lcu, const int height_in_lcu) {
const encoder_control_t * const encoder = state->encoder_control; const encoder_control_t * const encoder = state->encoder_control;
state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type); state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type, encoder->cfg.cclm);
state->tile->frame->rec = NULL; state->tile->frame->rec = NULL;

View file

@ -248,6 +248,300 @@ static void intra_pred_dc(
} }
enum lm_mode
{
LM_CHROMA_IDX = 81,
LM_CHROMA_L_IDX = 82,
LM_CHROMA_T_IDX = 83,
};
static void get_cclm_parameters(
encoder_state_t const* const state,
int8_t width, int8_t height, int8_t mode,
int x0, int y0, int avai_above_right_units, int avai_left_below_units,
kvz_intra_ref* luma_src, kvz_intra_references*chroma_ref,
int16_t *a, int16_t*b, int16_t*shift) {
const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX);
// TODO: take into account YUV422
const int unit_w = base_unit_size >> 1;
const int unit_h = base_unit_size >> 1;
const int c_height = height;
const int c_width = width;
height *= 2;
width *= 2;
const int tu_width_in_units = c_width / unit_w;
const int tu_height_in_units = c_height / unit_h;
int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H;
int left_template_samp_num = height;
// These are used for calculating some stuff for non-square CUs
//int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w;
//int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h;
//int total_units = total_left_units + total_above_units + 1;
//int above_right_units = total_above_units - tu_width_in_units;
//int left_below_units = total_left_units - tu_height_in_units;
//int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs
//int avai_left_below_units = 0;
int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size);
int avai_left_units = CLIP(0, tu_width_in_units, x0 / base_unit_size);
bool above_available = avai_above_units != 0;
bool left_available = avai_left_units != 0;
char internal_bit_depth = state->encoder_control->bitdepth;
int min_luma[2] = { MAX_INT, 0 };
int max_luma[2] = { -MAX_INT, 0 };
kvz_pixel* src;
int actualTopTemplateSampNum = 0;
int actualLeftTemplateSampNum = 0;
if (mode == LM_CHROMA_T_IDX)
{
left_available = 0;
avai_above_right_units = avai_above_right_units > (c_height / unit_w) ? c_height / unit_w : avai_above_right_units;
actualTopTemplateSampNum = unit_w * (avai_above_units + avai_above_right_units);
}
else if (mode == LM_CHROMA_L_IDX)
{
above_available = 0;
avai_left_below_units = avai_left_below_units > (c_width / unit_h) ? c_width / unit_h : avai_left_below_units;
actualLeftTemplateSampNum = unit_h * (avai_left_units + avai_left_below_units);
}
else if (mode == LM_CHROMA_IDX)
{
actualTopTemplateSampNum = c_width;
actualLeftTemplateSampNum = c_height;
}
int startPos[2]; //0:Above, 1: Left
int pickStep[2];
int aboveIs4 = left_available ? 0 : 1;
int leftIs4 = above_available ? 0 : 1;
startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
pickStep[0] = MAX(1, actualTopTemplateSampNum >> (1 + aboveIs4));
startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
pickStep[1] = MAX(1, actualLeftTemplateSampNum >> (1 + leftIs4));
kvz_pixel selectLumaPix[4] = { 0, 0, 0, 0 };
kvz_pixel selectChromaPix[4] = { 0, 0, 0, 0 };
int cntT, cntL;
cntT = cntL = 0;
int cnt = 0;
if (above_available)
{
cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
src = luma_src->top;
const kvz_pixel* cur = chroma_ref->ref.top + 1;
for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
{
selectLumaPix[cnt] = src[pos];
selectChromaPix[cnt] = cur[pos];
}
}
if (left_available)
{
cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1);
src = luma_src->left;
const kvz_pixel* cur = chroma_ref->ref.left + 1;
for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
{
selectLumaPix[cnt + cntT] = src[pos];
selectChromaPix[cnt + cntT] = cur[pos];
}
}
cnt = cntL + cntT;
if (cnt == 2)
{
selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
}
int minGrpIdx[2] = { 0, 2 };
int maxGrpIdx[2] = { 1, 3 };
int* tmpMinGrp = minGrpIdx;
int* tmpMaxGrp = maxGrpIdx;
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]])
{
SWAP(tmpMinGrp[0], tmpMinGrp[1], int);
}
if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]])
{
SWAP(tmpMaxGrp[0], tmpMaxGrp[1], int);
}
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]])
{
SWAP(tmpMinGrp, tmpMaxGrp, int*);
}
if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]])
{
SWAP(tmpMinGrp[1], tmpMaxGrp[0], int);
}
min_luma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1;
min_luma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
max_luma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1;
max_luma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
if (left_available || above_available)
{
int diff = max_luma[0] - min_luma[0];
if (diff > 0)
{
int diffC = max_luma[1] - min_luma[1];
int x = kvz_math_floor_log2(diff);
static const uint8_t DivSigTable[1 << 4] = {
// 4bit significands - 8 ( MSB is omitted )
0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0
};
int normDiff = (diff << 4 >> x) & 15;
int v = DivSigTable[normDiff] | 8;
x += normDiff != 0;
int y = diffC ? kvz_math_floor_log2(abs(diffC)) + 1 : 0;
int add = 1 << y >> 1;
*a = (diffC * v + add) >> y;
*shift = 3 + x - y;
if (*shift < 1)
{
*shift = 1;
*a = ((*a == 0) ? 0 : (*a < 0) ? -15 : 15); // a=Sign(a)*15
}
*b = min_luma[1] - ((*a * min_luma[0]) >> *shift);
}
else
{
*a = 0;
*b = min_luma[1];
*shift = 0;
}
}
else
{
*a = 0;
*b = 1 << (internal_bit_depth - 1);
*shift = 0;
}
}
static void linear_transform_cclm(cclm_parameters_t* cclm_params, kvz_pixel * src, kvz_pixel * dst, int stride, int height) {
int scale = cclm_params->a;
int shift = cclm_params->shift;
int offset = cclm_params->b;
for (int y = 0; y < height; ++y) {
for (int x=0; x < stride; ++x) {
int val = src[x + y * stride] * scale;
val >>= shift;
val += offset;
val = CLIP_TO_PIXEL(val);
dst[x + y * stride] = val;
}
}
}
void kvz_predict_cclm(
encoder_state_t const* const state,
const color_t color,
const int8_t width,
const int8_t height,
const int16_t x0,
const int16_t y0,
const int16_t stride,
const int8_t mode,
lcu_t* const lcu,
kvz_intra_references* chroma_ref,
kvz_pixel* dst,
cclm_parameters_t* cclm_params
)
{
assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX);
assert(state->encoder_control->cfg.cclm);
kvz_intra_ref sampled_luma_ref;
kvz_pixel sampled_luma[LCU_CHROMA_SIZE];
int x_scu = SUB_SCU(x0);
int y_scu = SUB_SCU(y0);
int available_above_right = 0;
int available_left_below = 0;
kvz_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH;
// Essentially what this does is that it uses 6-tap filtering to downsample
// the luma intra references down to match the resolution of the chroma channel.
// The luma reference is only needed when we are not on the edge of the picture.
// Because the reference pixels that are needed on the edge of the ctu this code
// is kinda messy but what can you do
if (y0) {
for (; available_above_right < width / 2; available_above_right++) {
int x_extension = x_scu + width * 2 + 4 * available_above_right;
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
if (x_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
}
if(y_scu == 0) {
if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4);
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride / 2)], sizeof(kvz_pixel) * (width + available_above_right * 2));
}
else {
for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
bool left_padding = x0 || x;
int s = 4;
s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 2) * stride] * 2;
s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride];
s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride];
s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2;
s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride];
s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride];
sampled_luma_ref.top[x / 2] = s >> 3;
}
}
}
if(x0) {
for (; available_left_below < height / 2; available_left_below++) {
int y_extension = y_scu + height * 2 + 4 * available_left_below;
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension);
if (y_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break;
}
for(int i = 0; i < height + available_left_below * 2; i++) {
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride/2) + x0 / 2 - 1];
}
}
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride) / 4], sampled_luma, width, height, stride / 2, width);
int16_t a, b, shift;
get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift);
cclm_params->shift = shift;
cclm_params->a = a;
cclm_params->b = b;
if(dst)
linear_transform_cclm(cclm_params, sampled_luma, dst, width, height);
}
void kvz_intra_predict( void kvz_intra_predict(
encoder_state_t *const state, encoder_state_t *const state,
kvz_intra_references *refs, kvz_intra_references *refs,
@ -573,6 +867,7 @@ static void intra_recon_tb_leaf(
int y, int y,
int depth, int depth,
int8_t intra_mode, int8_t intra_mode,
cclm_parameters_t *cclm_params,
lcu_t *lcu, lcu_t *lcu,
color_t color) color_t color)
{ {
@ -592,14 +887,29 @@ static void intra_recon_tb_leaf(
state->tile->frame->width, state->tile->frame->width,
state->tile->frame->height, state->tile->frame->height,
}; };
const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; int x_scu = SUB_SCU(x);
int y_scu = SUB_SCU(y);
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
kvz_intra_references refs; kvz_intra_references refs;
kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp); kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp);
kvz_pixel pred[32 * 32]; kvz_pixel pred[32 * 32];
int stride = state->tile->frame->source->stride;
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
if(intra_mode < 68) {
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary); kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary);
} else {
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
if(cclm_params == NULL) {
cclm_parameters_t temp_params;
kvz_predict_cclm(
state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params);
}
else {
linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width);
}
}
const int index = lcu_px.x + lcu_px.y * lcu_width; const int index = lcu_px.x + lcu_px.y * lcu_width;
kvz_pixel *block = NULL; kvz_pixel *block = NULL;
@ -634,6 +944,7 @@ static void intra_recon_tb_leaf(
* \param mode_luma intra mode for luma, or -1 to skip luma recon * \param mode_luma intra mode for luma, or -1 to skip luma recon
* \param mode_chroma intra mode for chroma, or -1 to skip chroma recon * \param mode_chroma intra mode for chroma, or -1 to skip chroma recon
* \param cur_cu pointer to the CU, or NULL to fetch CU from LCU * \param cur_cu pointer to the CU, or NULL to fetch CU from LCU
* \param cclm_params pointer for the cclm_parameters, can be NULL if the mode is not cclm mode
* \param lcu containing LCU * \param lcu containing LCU
*/ */
void kvz_intra_recon_cu( void kvz_intra_recon_cu(
@ -644,6 +955,7 @@ void kvz_intra_recon_cu(
int8_t mode_luma, int8_t mode_luma,
int8_t mode_chroma, int8_t mode_chroma,
cu_info_t *cur_cu, cu_info_t *cur_cu,
cclm_parameters_t *cclm_params,
lcu_t *lcu) lcu_t *lcu)
{ {
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
@ -668,10 +980,10 @@ void kvz_intra_recon_cu(
const int32_t x2 = x + offset; const int32_t x2 = x + offset;
const int32_t y2 = y + offset; const int32_t y2 = y + offset;
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu); kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu); kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
// Propagate coded block flags from child CUs to parent CU. // Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = { uint16_t child_cbfs[3] = {
@ -692,11 +1004,11 @@ void kvz_intra_recon_cu(
const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0); const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0);
// Process a leaf TU. // Process a leaf TU.
if (has_luma) { if (has_luma) {
intra_recon_tb_leaf(state, x, y, depth, mode_luma, lcu, COLOR_Y); intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y);
} }
if (has_chroma) { if (has_chroma) {
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_U); intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U);
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_V); intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V);
} }
kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false); kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);

View file

@ -54,6 +54,12 @@ typedef struct
bool filtered_initialized; bool filtered_initialized;
} kvz_intra_references; } kvz_intra_references;
typedef struct
{
int16_t a;
int16_t shift;
int16_t b;
} cclm_parameters_t;
/** /**
* \brief Function for deriving intra luma predictions * \brief Function for deriving intra luma predictions
@ -118,5 +124,21 @@ void kvz_intra_recon_cu(
int8_t mode_luma, int8_t mode_luma,
int8_t mode_chroma, int8_t mode_chroma,
cu_info_t *cur_cu, cu_info_t *cur_cu,
cclm_parameters_t* cclm_params,
lcu_t *lcu); lcu_t *lcu);
void kvz_predict_cclm(
encoder_state_t const* const state,
const color_t color,
const int8_t width,
const int8_t height,
const int16_t x0,
const int16_t y0,
const int16_t stride,
const int8_t mode,
lcu_t* const lcu,
kvz_intra_references* chroma_ref,
kvz_pixel* dst,
cclm_parameters_t* cclm_params
);

View file

@ -516,6 +516,8 @@ typedef struct kvz_config
int8_t jccr; int8_t jccr;
int8_t cclm;
int8_t amvr; /* \brief Adaptive motion vector resolution parameter */ int8_t amvr; /* \brief Adaptive motion vector resolution parameter */
} kvz_config; } kvz_config;

View file

@ -241,6 +241,44 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
} }
static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec, kvz_pixel extra_pixel) {
if (!state->encoder_control->cfg.cclm) return;
int x_scu = SUB_SCU(x);
int y_scu = SUB_SCU(y);
y_rec += x_scu + y_scu * LCU_WIDTH;
int stride = state->tile->frame->source->stride;
for (int y_ = 0; y_ < height && y_ * 2 + y < state->encoder_control->cfg.height; y_++) {
for (int x_ = 0; x_ < width; x_++) {
int s = 4;
s += y_rec[2 * x_] * 2;
s += y_rec[2 * x_ + 1];
// If we are at the edge of the CTU read the pixel from the frame reconstruct buffer,
// *except* when we are also at the edge of the frame, in which case we want to duplicate
// the edge pixel
s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)];
s += y_rec[2 * x_ + LCU_WIDTH] * 2;
s += y_rec[2 * x_ + 1 + LCU_WIDTH];
s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH];
int index = x / 2 + x_ + (y / 2 + y_ )* stride / 2;
state->tile->frame->cclm_luma_rec[index] = s >> 3;
}
y_rec += LCU_WIDTH * 2;
}
if((y + height * 2) % 64 == 0) {
int line = y / 64 * stride / 2;
y_rec -= LCU_WIDTH;
for (int i = 0; i < width; ++i) {
int s = 2;
s += y_rec[i * 2] * 2;
s += y_rec[i * 2 + 1];
s += !x_scu && !i && x ? extra_pixel : y_rec[i * 2 - ((i + x) > 0)] ;
state->tile->frame->cclm_luma_rec_top_line[i + x / 2 + line] = s >> 2;
}
}
}
/** /**
* Calculate RD cost for a Coding Unit. * Calculate RD cost for a Coding Unit.
* \return Cost of block * \return Cost of block
@ -709,7 +747,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
x, y, x, y,
depth, depth,
cur_cu->intra.mode, -1, // skip chroma cur_cu->intra.mode, -1, // skip chroma
NULL, lcu); NULL, NULL, lcu);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
);
// TODO: This heavily relies to square CUs // TODO: This heavily relies to square CUs
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) { if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) {
@ -717,8 +759,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// rd2. Possibly because the luma mode search already takes chroma // rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being // into account, so there is less of a chanse of luma mode being
// really bad for chroma. // really bad for chroma.
if (ctrl->cfg.rdo == 3) { cclm_parameters_t cclm_params[2];
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu); if (ctrl->cfg.rdo >= 3) {
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
} }
@ -726,7 +769,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
x & ~7, y & ~7, // TODO: as does this x & ~7, y & ~7, // TODO: as does this
depth, depth,
-1, cur_cu->intra.mode_chroma, // skip luma -1, cur_cu->intra.mode_chroma, // skip luma
NULL, lcu); NULL, cclm_params, lcu);
} }
} else if (cur_cu->type == CU_INTER) { } else if (cur_cu->type == CU_INTER) {
@ -862,7 +905,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// gets used, at least in the most obvious cases, while avoiding any // gets used, at least in the most obvious cases, while avoiding any
// searching. // searching.
if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH
&& x + cu_width <= frame->width && y + cu_width <= frame->height) && x + cu_width <= frame->width && y + cu_width <= frame->height && 0)
{ {
cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local); cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local);
@ -883,7 +926,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
x, y, x, y,
depth, depth,
cur_cu->intra.mode, mode_chroma, cur_cu->intra.mode, mode_chroma,
NULL, lcu); NULL,NULL, lcu);
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu); cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
if (has_chroma) { if (has_chroma) {
@ -912,6 +955,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// Copy this CU's mode all the way down for use in adjacent CUs mode // Copy this CU's mode all the way down for use in adjacent CUs mode
// search. // search.
work_tree_copy_down(x_local, y_local, depth, work_tree); work_tree_copy_down(x_local, y_local, depth, work_tree);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
);
if (state->frame->slicetype != KVZ_SLICE_I) { if (state->frame->slicetype != KVZ_SLICE_I) {
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp // Reset HMVP to the beginning of this CU level search and add this CU as the mvp
@ -924,6 +970,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// Need to copy modes down since the lower level of the work tree is used // Need to copy modes down since the lower level of the work tree is used
// when searching SMP and AMP blocks. // when searching SMP and AMP blocks.
work_tree_copy_down(x_local, y_local, depth, work_tree); work_tree_copy_down(x_local, y_local, depth, work_tree);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
);
if (state->frame->slicetype != KVZ_SLICE_I) { if (state->frame->slicetype != KVZ_SLICE_I) {
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp // Reset HMVP to the beginning of this CU level search and add this CU as the mvp

View file

@ -1937,7 +1937,7 @@ static void search_pu_inter(encoder_state_t * const state,
} }
// TODO: this probably should have a separate command line option // TODO: this probably should have a separate command line option
if (cfg->rdo == 3) { if (cfg->rdo >= 3) {
search_pu_inter_bipred(&info, depth, lcu, cur_cu, inter_cost, inter_bitcost); search_pu_inter_bipred(&info, depth, lcu, cur_cu, inter_cost, inter_bitcost);
} }
} }

View file

@ -258,6 +258,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
int intra_mode, int cost_treshold, int intra_mode, int cost_treshold,
cu_info_t *const pred_cu, cu_info_t *const pred_cu,
lcu_t *const lcu, lcu_t *const lcu,
cclm_parameters_t *cclm_params,
const int mts_mode) const int mts_mode)
{ {
assert(depth >= 0 && depth <= MAX_PU_DEPTH); assert(depth >= 0 && depth <= MAX_PU_DEPTH);
@ -332,7 +333,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
x_px, y_px, x_px, y_px,
depth, depth,
intra_mode, -1, intra_mode, -1,
pred_cu, lcu); pred_cu, cclm_params, lcu);
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1 // TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
if (pred_cu->tr_idx > 1) if (pred_cu->tr_idx > 1)
@ -360,7 +361,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
x_px, y_px, x_px, y_px,
depth, depth,
-1, chroma_mode, -1, chroma_mode,
pred_cu, lcu); pred_cu, cclm_params, lcu);
best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
} }
pred_cu->tr_skip = best_tr_idx == MTS_SKIP; pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
@ -391,15 +392,15 @@ static double search_intra_trdepth(encoder_state_t * const state,
if (depth < max_depth && depth < MAX_PU_DEPTH) { if (depth < max_depth && depth < MAX_PU_DEPTH) {
split_cost = 3 * state->lambda; split_cost = 3 * state->lambda;
split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
if (split_cost < nosplit_cost) { if (split_cost < nosplit_cost) {
split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
} }
if (split_cost < nosplit_cost) { if (split_cost < nosplit_cost) {
split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
} }
if (split_cost < nosplit_cost) { if (split_cost < nosplit_cost) {
split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1); split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
} }
double cbf_bits = 0.0; double cbf_bits = 0.0;
@ -454,20 +455,22 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride, const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride,
kvz_intra_references *refs_u, kvz_intra_references *refs_v, kvz_intra_references *refs_u, kvz_intra_references *refs_v,
int8_t luma_mode, int8_t luma_mode,
int8_t modes[5], double costs[5]) int8_t modes[8], double costs[8], lcu_t* lcu)
{ {
assert(!(x_px & 4 || y_px & 4)); assert(!(x_px & 4 || y_px & 4));
const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH); const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2); const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 8; ++i) {
costs[i] = 0; costs[i] = 0;
} }
cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width); cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width);
//cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width); //cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width);
cclm_parameters_t cclm_params;
kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT]; kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT];
kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT); kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
@ -476,19 +479,31 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width); kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
if (modes[i] == luma_mode) continue; if (modes[i] == -1) continue;
kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false); kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
costs[i] += satd_func(pred, orig_block); costs[i] += satd_func(pred, orig_block);
} }
for (int i = 5; i < 8; i++) {
assert(state->encoder_control->cfg.cclm);
kvz_predict_cclm(
state,
COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params);
}
kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width); kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
if (modes[i] == luma_mode) continue; if (modes[i] == -1) continue;
kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false); kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
costs[i] += satd_func(pred, orig_block); costs[i] += satd_func(pred, orig_block);
} }
for (int i = 5; i < 8; i++) {
assert(state->encoder_control->cfg.cclm);
kvz_predict_cclm(
state,
COLOR_V, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params);
}
kvz_sort_modes(modes, costs, 5); kvz_sort_modes(modes, costs, 5);
} }
@ -744,7 +759,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
// Reset transform split data in lcu.cu for this area. // Reset transform split data in lcu.cu for this area.
kvz_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth); kvz_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth);
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, -1); double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, NULL, -1);
costs[rdo_mode] += mode_cost; costs[rdo_mode] += mode_cost;
trafo[rdo_mode] = pred_cu.tr_idx; trafo[rdo_mode] = pred_cu.tr_idx;
@ -769,7 +784,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
pred_cu.intra.mode = modes[0]; pred_cu.intra.mode = modes[0];
pred_cu.intra.mode_chroma = modes[0]; pred_cu.intra.mode_chroma = modes[0];
FILL(pred_cu.cbf, 0); FILL(pred_cu.cbf, 0);
search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, trafo[0]); search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, NULL, trafo[0]);
} }
return modes_to_check; return modes_to_check;
@ -810,8 +825,20 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
if (chroma_mode == luma_mode) { if (chroma_mode == luma_mode) {
mode_bits = CTX_ENTROPY_FBITS(ctx, 0); mode_bits = CTX_ENTROPY_FBITS(ctx, 0);
} else { } else {
if(chroma_mode > 67) {
mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1); mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1);
} }
else {
ctx = &(state->cabac.ctx.cclm_model);
mode_bits = CTX_ENTROPY_FBITS(ctx, chroma_mode != 81);
if (chroma_mode != 81) mode_bits += 1;
}
}
// Technically this is encoded first but for this method of counting bits it does not matter
if(state->encoder_control->cfg.cclm) {
ctx = &(state->cabac.ctx.cclm_flag);
mode_bits += CTX_ENTROPY_FBITS(ctx, chroma_mode > 67);
}
return mode_bits; return mode_bits;
} }
@ -820,31 +847,87 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state, int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
int x_px, int y_px, int depth, int x_px, int y_px, int depth,
int8_t intra_mode, int8_t intra_mode,
int8_t modes[5], int8_t num_modes, int8_t modes[8], int8_t num_modes,
lcu_t *const lcu) lcu_t *const lcu, cclm_parameters_t *best_cclm)
{ {
const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4); const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4);
kvz_intra_references refs[2];
const vector2d_t luma_px = { x_px & ~7, y_px & ~7 };
const vector2d_t pic_px = {
state->tile->frame->width,
state->tile->frame->height,
};
if (reconstruct_chroma) { if (reconstruct_chroma) {
int c_width = MAX(32 >> (depth), 4);
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp);
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp);
cclm_parameters_t cclm_params[2] = { 0 };
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
struct { struct {
double cost; double cost;
int8_t mode; int8_t mode;
cclm_parameters_t cclm[2];
} chroma, best_chroma; } chroma, best_chroma;
// chroma.cclm = cclm_params;
best_chroma.mode = 0; best_chroma.mode = 0;
best_chroma.cost = MAX_INT; best_chroma.cost = MAX_INT;
for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) { for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) {
chroma.mode = modes[chroma_mode_i]; chroma.mode = modes[chroma_mode_i];
if (chroma.mode == -1) continue;
if(chroma.mode < 67 || depth == 0) {
kvz_intra_recon_cu(state, kvz_intra_recon_cu(state,
x_px, y_px, x_px, y_px,
depth, depth,
-1, chroma.mode, // skip luma -1, chroma.mode, // skip luma
NULL, lcu); NULL, NULL, lcu);
}
else {
kvz_predict_cclm(
state, COLOR_U,
c_width, c_width,
x_px & ~7, y_px & ~7,
state->tile->frame->source->stride,
chroma.mode,
lcu,
&refs[0], NULL,
&cclm_params[0]);
chroma.cclm[0] = cclm_params[0];
kvz_predict_cclm(
state, COLOR_V,
c_width, c_width,
x_px & ~7, y_px & ~7,
state->tile->frame->source->stride,
chroma.mode,
lcu,
&refs[1], NULL,
&cclm_params[1]);
chroma.cclm[1] = cclm_params[1];
kvz_intra_recon_cu(
state,
x_px, y_px,
depth,
-1, chroma.mode, // skip luma
NULL, cclm_params, lcu
);
}
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode);
@ -854,6 +937,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
best_chroma = chroma; best_chroma = chroma;
} }
} }
best_cclm[0] = best_chroma.cclm[0];
best_cclm[1] = best_chroma.cclm[1];
return best_chroma.mode; return best_chroma.mode;
} }
@ -864,15 +949,15 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const int x_px, const int y_px, const int x_px, const int y_px,
const int depth, lcu_t *lcu) const int depth, lcu_t *lcu, cclm_parameters_t *best_cclm)
{ {
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
int8_t intra_mode = cur_pu->intra.mode; int8_t intra_mode = cur_pu->intra.mode;
double costs[5]; double costs[8];
int8_t modes[5] = { 0, 50, 18, 1, 67 }; int8_t modes[8] = { 0, 50, 18, 1, -1, 81, 82, 83 };
if (intra_mode != 0 && intra_mode != 50 && intra_mode != 18 && intra_mode != 1) { if (intra_mode != 0 && intra_mode != 50 && intra_mode != 18 && intra_mode != 1) {
modes[4] = intra_mode; modes[4] = intra_mode;
} }
@ -884,14 +969,14 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const int8_t modes_in_depth[5] = { 1, 1, 1, 1, 2 }; const int8_t modes_in_depth[5] = { 1, 1, 1, 1, 2 };
int num_modes = modes_in_depth[depth]; int num_modes = modes_in_depth[depth];
if (state->encoder_control->cfg.rdo == 3) { if (state->encoder_control->cfg.rdo >= 3) {
num_modes = modes[4] == intra_mode ? 5 : 4; num_modes = state->encoder_control->cfg.cclm ? 8 : 5;
} }
// Don't do rough mode search if all modes are selected. // Don't do rough mode search if all modes are selected.
// FIXME: It might make more sense to only disable rough search if // FIXME: It might make more sense to only disable rough search if
// num_modes is 0.is 0. // num_modes is 0.is 0.
if (num_modes != 1 && num_modes != 5 && num_modes != 4) { if (num_modes != 1 && num_modes != 5 && num_modes != 4 && num_modes != 8) {
const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2); const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
const vector2d_t luma_px = { x_px, y_px }; const vector2d_t luma_px = { x_px, y_px };
@ -909,12 +994,12 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
search_intra_chroma_rough(state, x_px, y_px, depth, search_intra_chroma_rough(state, x_px, y_px, depth,
ref_u, ref_v, LCU_WIDTH_C, ref_u, ref_v, LCU_WIDTH_C,
&refs_u, &refs_v, &refs_u, &refs_v,
intra_mode, modes, costs); intra_mode, modes, costs, lcu);
} }
int8_t intra_mode_chroma = intra_mode; int8_t intra_mode_chroma = intra_mode;
if (num_modes > 1) { if (num_modes > 1) {
intra_mode_chroma = kvz_search_intra_chroma_rdo(state, x_px, y_px, depth, intra_mode, modes, num_modes, lcu); intra_mode_chroma = kvz_search_intra_chroma_rdo(state, x_px, y_px, depth, intra_mode, modes, num_modes, lcu, best_cclm);
} }
return intra_mode_chroma; return intra_mode_chroma;
@ -969,7 +1054,7 @@ void kvz_search_cu_intra(encoder_state_t * const state,
kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
int8_t number_of_modes = 0; int8_t number_of_modes = 0;
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 3); bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4);
if (!skip_rough_search) { if (!skip_rough_search) {
number_of_modes = search_intra_rough(state, number_of_modes = search_intra_rough(state,
ref_pixels, LCU_WIDTH, ref_pixels, LCU_WIDTH,
@ -990,9 +1075,9 @@ void kvz_search_cu_intra(encoder_state_t * const state,
const int32_t rdo_level = state->encoder_control->cfg.rdo; const int32_t rdo_level = state->encoder_control->cfg.rdo;
if (rdo_level >= 2 || skip_rough_search) { if (rdo_level >= 2 || skip_rough_search) {
int number_of_modes_to_search; int number_of_modes_to_search;
if (rdo_level == 3) { if (rdo_level == 4) {
number_of_modes_to_search = 67; number_of_modes_to_search = 67;
} else if (rdo_level == 2) { } else if (rdo_level == 2 || rdo_level == 3) {
number_of_modes_to_search = (cu_width == 4) ? 3 : 2; number_of_modes_to_search = (cu_width == 4) ? 3 : 2;
} else { } else {
// Check only the predicted modes. // Check only the predicted modes.

View file

@ -41,6 +41,7 @@
#include "cu.h" #include "cu.h"
#include "encoderstate.h" #include "encoderstate.h"
#include "global.h" // IWYU pragma: keep #include "global.h" // IWYU pragma: keep
#include "intra.h"
double kvz_luma_mode_bits(const encoder_state_t *state, double kvz_luma_mode_bits(const encoder_state_t *state,
@ -51,7 +52,7 @@ double kvz_chroma_mode_bits(const encoder_state_t *state,
int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state, int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const int x_px, const int y_px, const int x_px, const int y_px,
const int depth, lcu_t *lcu); const int depth, lcu_t *lcu, cclm_parameters_t* best_cclm);
void kvz_search_cu_intra(encoder_state_t * const state, void kvz_search_cu_intra(encoder_state_t * const state,
const int x_px, const int y_px, const int x_px, const int y_px,

View file

@ -46,7 +46,7 @@
videoframe_t * kvz_videoframe_alloc(int32_t width, videoframe_t * kvz_videoframe_alloc(int32_t width,
int32_t height, int32_t height,
enum kvz_chroma_format chroma_format, enum kvz_chroma_format chroma_format,
enum kvz_alf alf_type) enum kvz_alf alf_type, bool cclm)
{ {
videoframe_t *frame = calloc(1, sizeof(videoframe_t)); videoframe_t *frame = calloc(1, sizeof(videoframe_t));
if (!frame) return 0; if (!frame) return 0;
@ -59,6 +59,11 @@ videoframe_t * kvz_videoframe_alloc(int32_t width,
frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu);
if (chroma_format != KVZ_CSP_400) { if (chroma_format != KVZ_CSP_400) {
frame->sao_chroma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); frame->sao_chroma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu);
if (cclm) {
assert(chroma_format == KVZ_CSP_420);
frame->cclm_luma_rec = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4);
frame->cclm_luma_rec_top_line = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) / 2 * CEILDIV(height, 64));
}
} }
return frame; return frame;
@ -76,6 +81,12 @@ int kvz_videoframe_free(videoframe_t * const frame)
kvz_image_free(frame->rec_lmcs); kvz_image_free(frame->rec_lmcs);
frame->source_lmcs_mapped = false; frame->source_lmcs_mapped = false;
} }
if(frame->cclm_luma_rec) {
FREE_POINTER(frame->cclm_luma_rec);
}
if(frame->cclm_luma_rec_top_line) {
FREE_POINTER(frame->cclm_luma_rec_top_line);
}
kvz_image_free(frame->source); kvz_image_free(frame->source);
frame->source = NULL; frame->source = NULL;

View file

@ -53,6 +53,9 @@ typedef struct videoframe
kvz_picture *rec; //!< \brief Reconstructed image. kvz_picture *rec; //!< \brief Reconstructed image.
kvz_picture *rec_lmcs; //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source. kvz_picture *rec_lmcs; //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source.
kvz_pixel *cclm_luma_rec; //!< \brief buffer for the downsampled luma reconstruction for cclm
kvz_pixel *cclm_luma_rec_top_line; //!< \brief buffer for the downsampled luma reconstruction for cclm
uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available
int32_t* lmcs_avg; //!< \brief Average of LCU border pixels int32_t* lmcs_avg; //!< \brief Average of LCU border pixels
@ -78,7 +81,7 @@ typedef struct videoframe
} videoframe_t; } videoframe_t;
videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type); videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type, bool cclm);
int kvz_videoframe_free(videoframe_t * const frame); int kvz_videoframe_free(videoframe_t * const frame);
void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc); void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc);

View file

@ -10,8 +10,9 @@ common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no
valgrind_test $common_args --rd=1 valgrind_test $common_args --rd=1
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
valgrind_test $common_args --rd=3
valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0 valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0
valgrind_test $common_args --alf=full --wpp --threads=1 valgrind_test $common_args --alf=full --wpp --threads=1
valgrind_test $common_args --jccr
valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra
valgrind_test $common_args --rd=3 --cclm --jccr