mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
Merge branch 'CCLM' into 'master'
CCLM implementation See merge request cs/ultravideo/vvc/uvg266!7
This commit is contained in:
commit
7aeef8e9b8
|
@ -23,7 +23,7 @@ AC_CONFIG_SRCDIR([src/encmain.c])
|
|||
#
|
||||
# Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html
|
||||
ver_major=6
|
||||
ver_minor=6
|
||||
ver_minor=7
|
||||
ver_release=0
|
||||
|
||||
# Prevents configure from adding a lot of defines to the CFLAGS
|
||||
|
|
|
@ -117,6 +117,8 @@ typedef struct
|
|||
cabac_ctx_t transform_skip_gt1[4];
|
||||
cabac_ctx_t transform_skip_par;
|
||||
cabac_ctx_t transform_skip_gt2[5];
|
||||
cabac_ctx_t cclm_flag;
|
||||
cabac_ctx_t cclm_model;
|
||||
|
||||
} ctx;
|
||||
} cabac_data_t;
|
||||
|
|
|
@ -209,6 +209,8 @@ int kvz_config_init(kvz_config *cfg)
|
|||
|
||||
cfg->amvr = 0;
|
||||
|
||||
cfg->cclm = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -1486,6 +1488,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
|||
else if OPT("amvr") {
|
||||
cfg->amvr = (bool)atobool(value);
|
||||
}
|
||||
else if OPT("cclm") {
|
||||
cfg->cclm = (bool)atobool(value);
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
|
|
14
src/cli.c
14
src/cli.c
|
@ -178,6 +178,8 @@ static const struct option long_options[] = {
|
|||
{ "no-jccr", no_argument, NULL, 0 },
|
||||
{ "amvr", no_argument, NULL, 0 },
|
||||
{ "no-amvr", no_argument, NULL, 0 },
|
||||
{ "cclm", no_argument, NULL, 0 },
|
||||
{ "no-cclm", no_argument, NULL, 0 },
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
|
||||
|
@ -550,8 +552,8 @@ void print_help(void)
|
|||
" - 0: Skip intra if inter is good enough.\n"
|
||||
" - 1: Rough intra mode search with SATD.\n"
|
||||
" - 2: Refine intra mode search with SSE.\n"
|
||||
" - 3: Try all intra modes and enable intra\n"
|
||||
" chroma mode search.\n"
|
||||
" - 3: Enable intra chroma mode search.\n"
|
||||
" - 4: Try all intra modes.\n"
|
||||
" --(no-)mv-rdo : Rate-distortion optimized motion vector costs\n"
|
||||
" [disabled]\n"
|
||||
" --(no-)zero-coeff-rdo : If a CU is set inter, check if forcing zero\n"
|
||||
|
@ -629,8 +631,12 @@ void print_help(void)
|
|||
" - both: MTS applied for both intra and inter blocks.\n"
|
||||
" - implicit: uses implicit MTS. Applies DST7 instead \n"
|
||||
" of DCT2 to certain intra blocks.\n"
|
||||
" --(no-)jccr : Joint coding of chroma residual.\n"
|
||||
" Requires rdo> = 2. [disabled]\n"
|
||||
" --(no-)jccr : Joint coding of chroma residual. "
|
||||
" Requires rdo> = 2. [disabled]\n"
|
||||
" --(no-)cclm : Cross component linear model. \n"
|
||||
" Extra chroma prediction modes that are formed\n"
|
||||
" via linear transformation from the luma\n"
|
||||
" prediction. Requires rdo >=3. [disabled\n"
|
||||
" --(no-)amvr : Adaptive Motion Vector Resolution.\n"
|
||||
" Code some mv's with reduced resolution [disabled]\n"
|
||||
"\n"
|
||||
|
|
|
@ -395,6 +395,20 @@ static const uint8_t INIT_IMV_FLAG[4][5] = {
|
|||
{ 0, 5, 0, 0, 4, },
|
||||
};
|
||||
|
||||
static const uint8_t INIT_CCLM_FLAG[4] = {
|
||||
{ 26, },
|
||||
{ 34, },
|
||||
{ 59, },
|
||||
{ 4, },
|
||||
};
|
||||
|
||||
static const uint8_t INIT_CCLM_MODEL[4] = {
|
||||
{ 27, },
|
||||
{ 27, },
|
||||
{ 27, },
|
||||
{ 9, },
|
||||
};
|
||||
|
||||
/*
|
||||
static const uint16_t g_inistateToCount[128] = {
|
||||
614, 647, 681, 718, 756, 797, 839, 884, 932, 982, 1034, 1089, 1148, 1209, 1274, 1342,
|
||||
|
@ -471,6 +485,9 @@ void kvz_init_contexts(encoder_state_t *state, int8_t QP, int8_t slice)
|
|||
|
||||
kvz_ctx_init(&cabac->ctx.chroma_pred_model, QP, INIT_CHROMA_PRED_MODE[slice], INIT_CHROMA_PRED_MODE[3]);
|
||||
|
||||
kvz_ctx_init(&cabac->ctx.cclm_flag, QP, INIT_CCLM_FLAG[slice], INIT_CCLM_FLAG[3]);
|
||||
kvz_ctx_init(&cabac->ctx.cclm_model, QP, INIT_CCLM_MODEL[slice], INIT_CCLM_MODEL[3]);
|
||||
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
kvz_ctx_init(&cabac->ctx.cu_skip_flag_model[i], QP, INIT_SKIP_FLAG[slice][i], INIT_SKIP_FLAG[3][i]);
|
||||
|
|
|
@ -701,7 +701,7 @@ static bool encode_inter_prediction_unit(encoder_state_t * const state,
|
|||
return non_zero_mvd;
|
||||
}
|
||||
|
||||
static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* const cur_cu, int x, int y, const videoframe_t* const frame, const int cu_width) {
|
||||
static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* const cur_cu, int x, int y, const videoframe_t* const frame, const int cu_width, const int cclm_enabled) {
|
||||
unsigned pred_mode = 0;
|
||||
unsigned chroma_pred_modes[8] = {0, 50, 18, 1, 67, 81, 82, 83};
|
||||
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, 0);
|
||||
|
@ -710,7 +710,23 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c
|
|||
int8_t chroma_intra_dir = first_pu->intra.mode_chroma;
|
||||
int8_t luma_intra_dir = first_pu->intra.mode;
|
||||
|
||||
|
||||
bool derived_mode = chroma_intra_dir == luma_intra_dir;
|
||||
bool cclm_mode = chroma_intra_dir > 67;
|
||||
|
||||
if (cclm_enabled) {
|
||||
cabac->cur_ctx = &cabac->ctx.cclm_flag;
|
||||
CABAC_BIN(cabac, cclm_mode, "cclm_flag");
|
||||
if(cclm_mode) {
|
||||
cabac->cur_ctx = &cabac->ctx.cclm_model;
|
||||
CABAC_BIN(cabac, chroma_intra_dir != 81, "cclm_model_1");
|
||||
if(chroma_intra_dir != 81) {
|
||||
CABAC_BIN_EP(cabac, chroma_intra_dir == 83, "cclm_model_2");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
cabac->cur_ctx = &(cabac->ctx.chroma_pred_model);
|
||||
CABAC_BIN(cabac, derived_mode ? 0 : 1, "intra_chroma_pred_mode");
|
||||
|
||||
|
@ -722,7 +738,7 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c
|
|||
break;
|
||||
}
|
||||
}*/
|
||||
for (; pred_mode < 8; pred_mode++) {
|
||||
for (; pred_mode < 5; pred_mode++) {
|
||||
if (chroma_intra_dir == chroma_pred_modes[pred_mode]) {
|
||||
break;
|
||||
}
|
||||
|
@ -983,7 +999,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
|
|||
|
||||
// Code chroma prediction mode.
|
||||
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth != 4) {
|
||||
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width);
|
||||
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
|
||||
}
|
||||
|
||||
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 0, coeff);
|
||||
|
@ -991,7 +1007,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
|
|||
encode_mts_idx(state, cabac, cur_cu);
|
||||
|
||||
if (state->encoder_control->chroma_format != KVZ_CSP_400 && depth == 4 && x % 8 && y % 8) {
|
||||
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width);
|
||||
encode_chroma_intra_cu(cabac, cur_cu, x, y, frame, cu_width, state->encoder_control->cfg.cclm);
|
||||
encode_transform_coeff(state, x, y, depth, 0, 0, 0, 1, coeff);
|
||||
}
|
||||
|
||||
|
|
|
@ -722,7 +722,7 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
|||
WRITE_U(stream, 0, 1, "sps_mip_enabled_flag");
|
||||
// if(!no_cclm_constraint_flag)
|
||||
if(encoder->chroma_format != KVZ_CSP_400) {
|
||||
WRITE_U(stream, 0, 1, "sps_cclm_enabled_flag");
|
||||
WRITE_U(stream, encoder->cfg.cclm, 1, "sps_cclm_enabled_flag");
|
||||
}
|
||||
if (encoder->chroma_format == KVZ_CSP_420) {
|
||||
WRITE_U(stream, 0, 1, "sps_chroma_horizontal_collocated_flag");
|
||||
|
|
|
@ -122,7 +122,7 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
|
|||
const int width, const int height, const int width_in_lcu, const int height_in_lcu) {
|
||||
|
||||
const encoder_control_t * const encoder = state->encoder_control;
|
||||
state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type);
|
||||
state->tile->frame = kvz_videoframe_alloc(width, height, state->encoder_control->chroma_format, encoder->cfg.alf_type, encoder->cfg.cclm);
|
||||
|
||||
state->tile->frame->rec = NULL;
|
||||
|
||||
|
|
330
src/intra.c
330
src/intra.c
|
@ -248,6 +248,300 @@ static void intra_pred_dc(
|
|||
}
|
||||
|
||||
|
||||
enum lm_mode
|
||||
{
|
||||
LM_CHROMA_IDX = 81,
|
||||
LM_CHROMA_L_IDX = 82,
|
||||
LM_CHROMA_T_IDX = 83,
|
||||
};
|
||||
|
||||
|
||||
static void get_cclm_parameters(
|
||||
encoder_state_t const* const state,
|
||||
int8_t width, int8_t height, int8_t mode,
|
||||
int x0, int y0, int avai_above_right_units, int avai_left_below_units,
|
||||
kvz_intra_ref* luma_src, kvz_intra_references*chroma_ref,
|
||||
int16_t *a, int16_t*b, int16_t*shift) {
|
||||
|
||||
const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX);
|
||||
|
||||
// TODO: take into account YUV422
|
||||
const int unit_w = base_unit_size >> 1;
|
||||
const int unit_h = base_unit_size >> 1;
|
||||
|
||||
const int c_height = height;
|
||||
const int c_width = width;
|
||||
height *= 2;
|
||||
width *= 2;
|
||||
|
||||
const int tu_width_in_units = c_width / unit_w;
|
||||
const int tu_height_in_units = c_height / unit_h;
|
||||
|
||||
|
||||
int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H;
|
||||
int left_template_samp_num = height;
|
||||
|
||||
// These are used for calculating some stuff for non-square CUs
|
||||
//int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w;
|
||||
//int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h;
|
||||
//int total_units = total_left_units + total_above_units + 1;
|
||||
//int above_right_units = total_above_units - tu_width_in_units;
|
||||
//int left_below_units = total_left_units - tu_height_in_units;
|
||||
//int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs
|
||||
//int avai_left_below_units = 0;
|
||||
int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size);
|
||||
int avai_left_units = CLIP(0, tu_width_in_units, x0 / base_unit_size);
|
||||
|
||||
bool above_available = avai_above_units != 0;
|
||||
bool left_available = avai_left_units != 0;
|
||||
|
||||
char internal_bit_depth = state->encoder_control->bitdepth;
|
||||
|
||||
int min_luma[2] = { MAX_INT, 0 };
|
||||
int max_luma[2] = { -MAX_INT, 0 };
|
||||
|
||||
kvz_pixel* src;
|
||||
int actualTopTemplateSampNum = 0;
|
||||
int actualLeftTemplateSampNum = 0;
|
||||
if (mode == LM_CHROMA_T_IDX)
|
||||
{
|
||||
left_available = 0;
|
||||
avai_above_right_units = avai_above_right_units > (c_height / unit_w) ? c_height / unit_w : avai_above_right_units;
|
||||
actualTopTemplateSampNum = unit_w * (avai_above_units + avai_above_right_units);
|
||||
}
|
||||
else if (mode == LM_CHROMA_L_IDX)
|
||||
{
|
||||
above_available = 0;
|
||||
avai_left_below_units = avai_left_below_units > (c_width / unit_h) ? c_width / unit_h : avai_left_below_units;
|
||||
actualLeftTemplateSampNum = unit_h * (avai_left_units + avai_left_below_units);
|
||||
}
|
||||
else if (mode == LM_CHROMA_IDX)
|
||||
{
|
||||
actualTopTemplateSampNum = c_width;
|
||||
actualLeftTemplateSampNum = c_height;
|
||||
}
|
||||
int startPos[2]; //0:Above, 1: Left
|
||||
int pickStep[2];
|
||||
|
||||
int aboveIs4 = left_available ? 0 : 1;
|
||||
int leftIs4 = above_available ? 0 : 1;
|
||||
|
||||
startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
|
||||
pickStep[0] = MAX(1, actualTopTemplateSampNum >> (1 + aboveIs4));
|
||||
|
||||
startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
|
||||
pickStep[1] = MAX(1, actualLeftTemplateSampNum >> (1 + leftIs4));
|
||||
|
||||
kvz_pixel selectLumaPix[4] = { 0, 0, 0, 0 };
|
||||
kvz_pixel selectChromaPix[4] = { 0, 0, 0, 0 };
|
||||
|
||||
int cntT, cntL;
|
||||
cntT = cntL = 0;
|
||||
int cnt = 0;
|
||||
if (above_available)
|
||||
{
|
||||
cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
|
||||
src = luma_src->top;
|
||||
const kvz_pixel* cur = chroma_ref->ref.top + 1;
|
||||
for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
|
||||
{
|
||||
selectLumaPix[cnt] = src[pos];
|
||||
selectChromaPix[cnt] = cur[pos];
|
||||
}
|
||||
}
|
||||
|
||||
if (left_available)
|
||||
{
|
||||
cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1);
|
||||
src = luma_src->left;
|
||||
const kvz_pixel* cur = chroma_ref->ref.left + 1;
|
||||
for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
|
||||
{
|
||||
selectLumaPix[cnt + cntT] = src[pos];
|
||||
selectChromaPix[cnt + cntT] = cur[pos];
|
||||
}
|
||||
}
|
||||
cnt = cntL + cntT;
|
||||
|
||||
if (cnt == 2)
|
||||
{
|
||||
selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
|
||||
selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
|
||||
selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
|
||||
selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
|
||||
}
|
||||
|
||||
int minGrpIdx[2] = { 0, 2 };
|
||||
int maxGrpIdx[2] = { 1, 3 };
|
||||
int* tmpMinGrp = minGrpIdx;
|
||||
int* tmpMaxGrp = maxGrpIdx;
|
||||
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]])
|
||||
{
|
||||
SWAP(tmpMinGrp[0], tmpMinGrp[1], int);
|
||||
}
|
||||
if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]])
|
||||
{
|
||||
SWAP(tmpMaxGrp[0], tmpMaxGrp[1], int);
|
||||
}
|
||||
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]])
|
||||
{
|
||||
SWAP(tmpMinGrp, tmpMaxGrp, int*);
|
||||
}
|
||||
if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]])
|
||||
{
|
||||
SWAP(tmpMinGrp[1], tmpMaxGrp[0], int);
|
||||
}
|
||||
|
||||
min_luma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1;
|
||||
min_luma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
|
||||
max_luma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1;
|
||||
max_luma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
|
||||
|
||||
if (left_available || above_available)
|
||||
{
|
||||
int diff = max_luma[0] - min_luma[0];
|
||||
if (diff > 0)
|
||||
{
|
||||
int diffC = max_luma[1] - min_luma[1];
|
||||
int x = kvz_math_floor_log2(diff);
|
||||
static const uint8_t DivSigTable[1 << 4] = {
|
||||
// 4bit significands - 8 ( MSB is omitted )
|
||||
0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0
|
||||
};
|
||||
int normDiff = (diff << 4 >> x) & 15;
|
||||
int v = DivSigTable[normDiff] | 8;
|
||||
x += normDiff != 0;
|
||||
|
||||
int y = diffC ? kvz_math_floor_log2(abs(diffC)) + 1 : 0;
|
||||
int add = 1 << y >> 1;
|
||||
*a = (diffC * v + add) >> y;
|
||||
*shift = 3 + x - y;
|
||||
if (*shift < 1)
|
||||
{
|
||||
*shift = 1;
|
||||
*a = ((*a == 0) ? 0 : (*a < 0) ? -15 : 15); // a=Sign(a)*15
|
||||
}
|
||||
*b = min_luma[1] - ((*a * min_luma[0]) >> *shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
*a = 0;
|
||||
*b = min_luma[1];
|
||||
*shift = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
*a = 0;
|
||||
|
||||
*b = 1 << (internal_bit_depth - 1);
|
||||
|
||||
*shift = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void linear_transform_cclm(cclm_parameters_t* cclm_params, kvz_pixel * src, kvz_pixel * dst, int stride, int height) {
|
||||
int scale = cclm_params->a;
|
||||
int shift = cclm_params->shift;
|
||||
int offset = cclm_params->b;
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x=0; x < stride; ++x) {
|
||||
int val = src[x + y * stride] * scale;
|
||||
val >>= shift;
|
||||
val += offset;
|
||||
val = CLIP_TO_PIXEL(val);
|
||||
dst[x + y * stride] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void kvz_predict_cclm(
|
||||
encoder_state_t const* const state,
|
||||
const color_t color,
|
||||
const int8_t width,
|
||||
const int8_t height,
|
||||
const int16_t x0,
|
||||
const int16_t y0,
|
||||
const int16_t stride,
|
||||
const int8_t mode,
|
||||
lcu_t* const lcu,
|
||||
kvz_intra_references* chroma_ref,
|
||||
kvz_pixel* dst,
|
||||
cclm_parameters_t* cclm_params
|
||||
)
|
||||
{
|
||||
assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX);
|
||||
assert(state->encoder_control->cfg.cclm);
|
||||
|
||||
|
||||
kvz_intra_ref sampled_luma_ref;
|
||||
kvz_pixel sampled_luma[LCU_CHROMA_SIZE];
|
||||
|
||||
int x_scu = SUB_SCU(x0);
|
||||
int y_scu = SUB_SCU(y0);
|
||||
|
||||
int available_above_right = 0;
|
||||
int available_left_below = 0;
|
||||
|
||||
|
||||
kvz_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH;
|
||||
|
||||
// Essentially what this does is that it uses 6-tap filtering to downsample
|
||||
// the luma intra references down to match the resolution of the chroma channel.
|
||||
// The luma reference is only needed when we are not on the edge of the picture.
|
||||
// Because the reference pixels that are needed on the edge of the ctu this code
|
||||
// is kinda messy but what can you do
|
||||
|
||||
if (y0) {
|
||||
for (; available_above_right < width / 2; available_above_right++) {
|
||||
int x_extension = x_scu + width * 2 + 4 * available_above_right;
|
||||
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
|
||||
if (x_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
|
||||
}
|
||||
if(y_scu == 0) {
|
||||
if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4);
|
||||
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride / 2)], sizeof(kvz_pixel) * (width + available_above_right * 2));
|
||||
}
|
||||
else {
|
||||
for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
|
||||
bool left_padding = x0 || x;
|
||||
int s = 4;
|
||||
s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 2) * stride] * 2;
|
||||
s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride];
|
||||
s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride];
|
||||
s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2;
|
||||
s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride];
|
||||
s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride];
|
||||
sampled_luma_ref.top[x / 2] = s >> 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(x0) {
|
||||
for (; available_left_below < height / 2; available_left_below++) {
|
||||
int y_extension = y_scu + height * 2 + 4 * available_left_below;
|
||||
cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_scu - 4, y_extension);
|
||||
if (y_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
|
||||
if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break;
|
||||
}
|
||||
for(int i = 0; i < height + available_left_below * 2; i++) {
|
||||
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride/2) + x0 / 2 - 1];
|
||||
}
|
||||
}
|
||||
|
||||
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride) / 4], sampled_luma, width, height, stride / 2, width);
|
||||
|
||||
int16_t a, b, shift;
|
||||
get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift);
|
||||
cclm_params->shift = shift;
|
||||
cclm_params->a = a;
|
||||
cclm_params->b = b;
|
||||
|
||||
if(dst)
|
||||
linear_transform_cclm(cclm_params, sampled_luma, dst, width, height);
|
||||
}
|
||||
|
||||
void kvz_intra_predict(
|
||||
encoder_state_t *const state,
|
||||
kvz_intra_references *refs,
|
||||
|
@ -573,6 +867,7 @@ static void intra_recon_tb_leaf(
|
|||
int y,
|
||||
int depth,
|
||||
int8_t intra_mode,
|
||||
cclm_parameters_t *cclm_params,
|
||||
lcu_t *lcu,
|
||||
color_t color)
|
||||
{
|
||||
|
@ -592,14 +887,29 @@ static void intra_recon_tb_leaf(
|
|||
state->tile->frame->width,
|
||||
state->tile->frame->height,
|
||||
};
|
||||
const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift};
|
||||
int x_scu = SUB_SCU(x);
|
||||
int y_scu = SUB_SCU(y);
|
||||
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
|
||||
|
||||
kvz_intra_references refs;
|
||||
kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp);
|
||||
|
||||
kvz_pixel pred[32 * 32];
|
||||
int stride = state->tile->frame->source->stride;
|
||||
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
|
||||
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary);
|
||||
if(intra_mode < 68) {
|
||||
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary);
|
||||
} else {
|
||||
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
|
||||
if(cclm_params == NULL) {
|
||||
cclm_parameters_t temp_params;
|
||||
kvz_predict_cclm(
|
||||
state, color, width, width, x, y, stride, intra_mode, lcu, &refs, pred, &temp_params);
|
||||
}
|
||||
else {
|
||||
linear_transform_cclm(&cclm_params[color == COLOR_U ? 0 : 1], pred, pred, width, width);
|
||||
}
|
||||
}
|
||||
|
||||
const int index = lcu_px.x + lcu_px.y * lcu_width;
|
||||
kvz_pixel *block = NULL;
|
||||
|
@ -634,6 +944,7 @@ static void intra_recon_tb_leaf(
|
|||
* \param mode_luma intra mode for luma, or -1 to skip luma recon
|
||||
* \param mode_chroma intra mode for chroma, or -1 to skip chroma recon
|
||||
* \param cur_cu pointer to the CU, or NULL to fetch CU from LCU
|
||||
* \param cclm_params pointer for the cclm_parameters, can be NULL if the mode is not cclm mode
|
||||
* \param lcu containing LCU
|
||||
*/
|
||||
void kvz_intra_recon_cu(
|
||||
|
@ -644,6 +955,7 @@ void kvz_intra_recon_cu(
|
|||
int8_t mode_luma,
|
||||
int8_t mode_chroma,
|
||||
cu_info_t *cur_cu,
|
||||
cclm_parameters_t *cclm_params,
|
||||
lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
|
@ -668,10 +980,10 @@ void kvz_intra_recon_cu(
|
|||
const int32_t x2 = x + offset;
|
||||
const int32_t y2 = y + offset;
|
||||
|
||||
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
|
||||
// Propagate coded block flags from child CUs to parent CU.
|
||||
uint16_t child_cbfs[3] = {
|
||||
|
@ -692,11 +1004,11 @@ void kvz_intra_recon_cu(
|
|||
const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0);
|
||||
// Process a leaf TU.
|
||||
if (has_luma) {
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_luma, lcu, COLOR_Y);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y);
|
||||
}
|
||||
if (has_chroma) {
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_U);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, lcu, COLOR_V);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V);
|
||||
}
|
||||
|
||||
kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);
|
||||
|
|
22
src/intra.h
22
src/intra.h
|
@ -54,6 +54,12 @@ typedef struct
|
|||
bool filtered_initialized;
|
||||
} kvz_intra_references;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int16_t a;
|
||||
int16_t shift;
|
||||
int16_t b;
|
||||
} cclm_parameters_t;
|
||||
|
||||
/**
|
||||
* \brief Function for deriving intra luma predictions
|
||||
|
@ -118,5 +124,21 @@ void kvz_intra_recon_cu(
|
|||
int8_t mode_luma,
|
||||
int8_t mode_chroma,
|
||||
cu_info_t *cur_cu,
|
||||
cclm_parameters_t* cclm_params,
|
||||
lcu_t *lcu);
|
||||
|
||||
|
||||
void kvz_predict_cclm(
|
||||
encoder_state_t const* const state,
|
||||
const color_t color,
|
||||
const int8_t width,
|
||||
const int8_t height,
|
||||
const int16_t x0,
|
||||
const int16_t y0,
|
||||
const int16_t stride,
|
||||
const int8_t mode,
|
||||
lcu_t* const lcu,
|
||||
kvz_intra_references* chroma_ref,
|
||||
kvz_pixel* dst,
|
||||
cclm_parameters_t* cclm_params
|
||||
);
|
|
@ -516,6 +516,8 @@ typedef struct kvz_config
|
|||
|
||||
int8_t jccr;
|
||||
|
||||
int8_t cclm;
|
||||
|
||||
int8_t amvr; /* \brief Adaptive motion vector resolution parameter */
|
||||
} kvz_config;
|
||||
|
||||
|
|
61
src/search.c
61
src/search.c
|
@ -241,6 +241,44 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
|
|||
}
|
||||
|
||||
|
||||
static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec, kvz_pixel extra_pixel) {
|
||||
if (!state->encoder_control->cfg.cclm) return;
|
||||
int x_scu = SUB_SCU(x);
|
||||
int y_scu = SUB_SCU(y);
|
||||
y_rec += x_scu + y_scu * LCU_WIDTH;
|
||||
int stride = state->tile->frame->source->stride;
|
||||
|
||||
for (int y_ = 0; y_ < height && y_ * 2 + y < state->encoder_control->cfg.height; y_++) {
|
||||
for (int x_ = 0; x_ < width; x_++) {
|
||||
int s = 4;
|
||||
s += y_rec[2 * x_] * 2;
|
||||
s += y_rec[2 * x_ + 1];
|
||||
// If we are at the edge of the CTU read the pixel from the frame reconstruct buffer,
|
||||
// *except* when we are also at the edge of the frame, in which case we want to duplicate
|
||||
// the edge pixel
|
||||
s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2) * stride] : y_rec[2 * x_ - ((x_ + x) > 0)];
|
||||
s += y_rec[2 * x_ + LCU_WIDTH] * 2;
|
||||
s += y_rec[2 * x_ + 1 + LCU_WIDTH];
|
||||
s += !x_scu && !x_ && x ? state->tile->frame->rec->y[x - 1 + (y + y_ * 2 + 1) * stride] : y_rec[2 * x_ - ((x_ + x) > 0) + LCU_WIDTH];
|
||||
int index = x / 2 + x_ + (y / 2 + y_ )* stride / 2;
|
||||
state->tile->frame->cclm_luma_rec[index] = s >> 3;
|
||||
}
|
||||
y_rec += LCU_WIDTH * 2;
|
||||
}
|
||||
if((y + height * 2) % 64 == 0) {
|
||||
int line = y / 64 * stride / 2;
|
||||
y_rec -= LCU_WIDTH;
|
||||
for (int i = 0; i < width; ++i) {
|
||||
int s = 2;
|
||||
s += y_rec[i * 2] * 2;
|
||||
s += y_rec[i * 2 + 1];
|
||||
s += !x_scu && !i && x ? extra_pixel : y_rec[i * 2 - ((i + x) > 0)] ;
|
||||
state->tile->frame->cclm_luma_rec_top_line[i + x / 2 + line] = s >> 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate RD cost for a Coding Unit.
|
||||
* \return Cost of block
|
||||
|
@ -709,7 +747,11 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
x, y,
|
||||
depth,
|
||||
cur_cu->intra.mode, -1, // skip chroma
|
||||
NULL, lcu);
|
||||
NULL, NULL, lcu);
|
||||
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
|
||||
// TODO: This heavily relies to square CUs
|
||||
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != KVZ_CSP_400) {
|
||||
|
@ -717,8 +759,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// rd2. Possibly because the luma mode search already takes chroma
|
||||
// into account, so there is less of a chanse of luma mode being
|
||||
// really bad for chroma.
|
||||
if (ctrl->cfg.rdo == 3) {
|
||||
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu);
|
||||
cclm_parameters_t cclm_params[2];
|
||||
if (ctrl->cfg.rdo >= 3) {
|
||||
cur_cu->intra.mode_chroma = kvz_search_cu_intra_chroma(state, x, y, depth, lcu, cclm_params);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
}
|
||||
|
||||
|
@ -726,7 +769,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
x & ~7, y & ~7, // TODO: as does this
|
||||
depth,
|
||||
-1, cur_cu->intra.mode_chroma, // skip luma
|
||||
NULL, lcu);
|
||||
NULL, cclm_params, lcu);
|
||||
}
|
||||
} else if (cur_cu->type == CU_INTER) {
|
||||
|
||||
|
@ -862,7 +905,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// gets used, at least in the most obvious cases, while avoiding any
|
||||
// searching.
|
||||
if (cur_cu->type == CU_NOTSET && depth < MAX_PU_DEPTH
|
||||
&& x + cu_width <= frame->width && y + cu_width <= frame->height)
|
||||
&& x + cu_width <= frame->width && y + cu_width <= frame->height && 0)
|
||||
{
|
||||
cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x_local, y_local);
|
||||
|
||||
|
@ -883,7 +926,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
x, y,
|
||||
depth,
|
||||
cur_cu->intra.mode, mode_chroma,
|
||||
NULL, lcu);
|
||||
NULL,NULL, lcu);
|
||||
|
||||
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
if (has_chroma) {
|
||||
|
@ -912,6 +955,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// Copy this CU's mode all the way down for use in adjacent CUs mode
|
||||
// search.
|
||||
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
|
||||
if (state->frame->slicetype != KVZ_SLICE_I) {
|
||||
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
|
||||
|
@ -924,6 +970,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
// Need to copy modes down since the lower level of the work tree is used
|
||||
// when searching SMP and AMP blocks.
|
||||
work_tree_copy_down(x_local, y_local, depth, work_tree);
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
);
|
||||
|
||||
if (state->frame->slicetype != KVZ_SLICE_I) {
|
||||
// Reset HMVP to the beginning of this CU level search and add this CU as the mvp
|
||||
|
|
|
@ -1937,7 +1937,7 @@ static void search_pu_inter(encoder_state_t * const state,
|
|||
}
|
||||
|
||||
// TODO: this probably should have a separate command line option
|
||||
if (cfg->rdo == 3) {
|
||||
if (cfg->rdo >= 3) {
|
||||
search_pu_inter_bipred(&info, depth, lcu, cur_cu, inter_cost, inter_bitcost);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -258,6 +258,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
int intra_mode, int cost_treshold,
|
||||
cu_info_t *const pred_cu,
|
||||
lcu_t *const lcu,
|
||||
cclm_parameters_t *cclm_params,
|
||||
const int mts_mode)
|
||||
{
|
||||
assert(depth >= 0 && depth <= MAX_PU_DEPTH);
|
||||
|
@ -332,7 +333,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
x_px, y_px,
|
||||
depth,
|
||||
intra_mode, -1,
|
||||
pred_cu, lcu);
|
||||
pred_cu, cclm_params, lcu);
|
||||
|
||||
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
|
||||
if (pred_cu->tr_idx > 1)
|
||||
|
@ -360,7 +361,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma_mode,
|
||||
pred_cu, lcu);
|
||||
pred_cu, cclm_params, lcu);
|
||||
best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
|
||||
}
|
||||
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
|
||||
|
@ -391,15 +392,15 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
if (depth < max_depth && depth < MAX_PU_DEPTH) {
|
||||
split_cost = 3 * state->lambda;
|
||||
|
||||
split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1);
|
||||
split_cost += search_intra_trdepth(state, x_px, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
|
||||
if (split_cost < nosplit_cost) {
|
||||
split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1);
|
||||
split_cost += search_intra_trdepth(state, x_px + offset, y_px, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
|
||||
}
|
||||
if (split_cost < nosplit_cost) {
|
||||
split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1);
|
||||
split_cost += search_intra_trdepth(state, x_px, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
|
||||
}
|
||||
if (split_cost < nosplit_cost) {
|
||||
split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, -1);
|
||||
split_cost += search_intra_trdepth(state, x_px + offset, y_px + offset, depth + 1, max_depth, intra_mode, nosplit_cost, pred_cu, lcu, cclm_params, -1);
|
||||
}
|
||||
|
||||
double cbf_bits = 0.0;
|
||||
|
@ -454,20 +455,22 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
|||
const kvz_pixel *orig_u, const kvz_pixel *orig_v, int16_t origstride,
|
||||
kvz_intra_references *refs_u, kvz_intra_references *refs_v,
|
||||
int8_t luma_mode,
|
||||
int8_t modes[5], double costs[5])
|
||||
int8_t modes[8], double costs[8], lcu_t* lcu)
|
||||
{
|
||||
assert(!(x_px & 4 || y_px & 4));
|
||||
|
||||
const unsigned width = MAX(LCU_WIDTH_C >> depth, TR_MIN_WIDTH);
|
||||
const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - (depth + 1), 2);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
costs[i] = 0;
|
||||
}
|
||||
|
||||
cost_pixel_nxn_func *const satd_func = kvz_pixels_get_satd_func(width);
|
||||
//cost_pixel_nxn_func *const sad_func = kvz_pixels_get_sad_func(width);
|
||||
|
||||
cclm_parameters_t cclm_params;
|
||||
|
||||
kvz_pixel _pred[32 * 32 + SIMD_ALIGNMENT];
|
||||
kvz_pixel *pred = ALIGNED_POINTER(_pred, SIMD_ALIGNMENT);
|
||||
|
||||
|
@ -476,19 +479,31 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
|||
|
||||
kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
if (modes[i] == luma_mode) continue;
|
||||
if (modes[i] == -1) continue;
|
||||
kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false);
|
||||
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
|
||||
costs[i] += satd_func(pred, orig_block);
|
||||
}
|
||||
for (int i = 5; i < 8; i++) {
|
||||
assert(state->encoder_control->cfg.cclm);
|
||||
kvz_predict_cclm(
|
||||
state,
|
||||
COLOR_U, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params);
|
||||
}
|
||||
|
||||
kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
if (modes[i] == luma_mode) continue;
|
||||
if (modes[i] == -1) continue;
|
||||
kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false);
|
||||
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
|
||||
costs[i] += satd_func(pred, orig_block);
|
||||
}
|
||||
for (int i = 5; i < 8; i++) {
|
||||
assert(state->encoder_control->cfg.cclm);
|
||||
kvz_predict_cclm(
|
||||
state,
|
||||
COLOR_V, width, width, x_px, y_px, state->tile->frame->source->stride, modes[i], lcu, refs_u, pred, &cclm_params);
|
||||
}
|
||||
|
||||
kvz_sort_modes(modes, costs, 5);
|
||||
}
|
||||
|
@ -744,7 +759,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
|||
// Reset transform split data in lcu.cu for this area.
|
||||
kvz_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth);
|
||||
|
||||
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, -1);
|
||||
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[rdo_mode], MAX_INT, &pred_cu, lcu, NULL, -1);
|
||||
costs[rdo_mode] += mode_cost;
|
||||
trafo[rdo_mode] = pred_cu.tr_idx;
|
||||
|
||||
|
@ -769,7 +784,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
|||
pred_cu.intra.mode = modes[0];
|
||||
pred_cu.intra.mode_chroma = modes[0];
|
||||
FILL(pred_cu.cbf, 0);
|
||||
search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, trafo[0]);
|
||||
search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, NULL, trafo[0]);
|
||||
}
|
||||
|
||||
return modes_to_check;
|
||||
|
@ -810,7 +825,19 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
|
|||
if (chroma_mode == luma_mode) {
|
||||
mode_bits = CTX_ENTROPY_FBITS(ctx, 0);
|
||||
} else {
|
||||
mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1);
|
||||
if(chroma_mode > 67) {
|
||||
mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1);
|
||||
}
|
||||
else {
|
||||
ctx = &(state->cabac.ctx.cclm_model);
|
||||
mode_bits = CTX_ENTROPY_FBITS(ctx, chroma_mode != 81);
|
||||
if (chroma_mode != 81) mode_bits += 1;
|
||||
}
|
||||
}
|
||||
// Technically this is encoded first but for this method of counting bits it does not matter
|
||||
if(state->encoder_control->cfg.cclm) {
|
||||
ctx = &(state->cabac.ctx.cclm_flag);
|
||||
mode_bits += CTX_ENTROPY_FBITS(ctx, chroma_mode > 67);
|
||||
}
|
||||
|
||||
return mode_bits;
|
||||
|
@ -820,31 +847,87 @@ double kvz_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
|
|||
int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
||||
int x_px, int y_px, int depth,
|
||||
int8_t intra_mode,
|
||||
int8_t modes[5], int8_t num_modes,
|
||||
lcu_t *const lcu)
|
||||
int8_t modes[8], int8_t num_modes,
|
||||
lcu_t *const lcu, cclm_parameters_t *best_cclm)
|
||||
{
|
||||
const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4);
|
||||
|
||||
|
||||
kvz_intra_references refs[2];
|
||||
const vector2d_t luma_px = { x_px & ~7, y_px & ~7 };
|
||||
const vector2d_t pic_px = {
|
||||
state->tile->frame->width,
|
||||
state->tile->frame->height,
|
||||
};
|
||||
|
||||
|
||||
if (reconstruct_chroma) {
|
||||
|
||||
int c_width = MAX(32 >> (depth), 4);
|
||||
|
||||
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp);
|
||||
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp);
|
||||
|
||||
cclm_parameters_t cclm_params[2] = { 0 };
|
||||
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
|
||||
struct {
|
||||
double cost;
|
||||
int8_t mode;
|
||||
cclm_parameters_t cclm[2];
|
||||
} chroma, best_chroma;
|
||||
|
||||
// chroma.cclm = cclm_params;
|
||||
|
||||
best_chroma.mode = 0;
|
||||
best_chroma.cost = MAX_INT;
|
||||
|
||||
for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) {
|
||||
chroma.mode = modes[chroma_mode_i];
|
||||
if (chroma.mode == -1) continue;
|
||||
if(chroma.mode < 67 || depth == 0) {
|
||||
kvz_intra_recon_cu(state,
|
||||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma.mode, // skip luma
|
||||
NULL, NULL, lcu);
|
||||
}
|
||||
else {
|
||||
|
||||
kvz_intra_recon_cu(state,
|
||||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma.mode, // skip luma
|
||||
NULL, lcu);
|
||||
kvz_predict_cclm(
|
||||
state, COLOR_U,
|
||||
c_width, c_width,
|
||||
x_px & ~7, y_px & ~7,
|
||||
state->tile->frame->source->stride,
|
||||
chroma.mode,
|
||||
lcu,
|
||||
&refs[0], NULL,
|
||||
&cclm_params[0]);
|
||||
|
||||
chroma.cclm[0] = cclm_params[0];
|
||||
|
||||
kvz_predict_cclm(
|
||||
state, COLOR_V,
|
||||
c_width, c_width,
|
||||
x_px & ~7, y_px & ~7,
|
||||
state->tile->frame->source->stride,
|
||||
chroma.mode,
|
||||
lcu,
|
||||
&refs[1], NULL,
|
||||
&cclm_params[1]);
|
||||
|
||||
chroma.cclm[1] = cclm_params[1];
|
||||
|
||||
kvz_intra_recon_cu(
|
||||
state,
|
||||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma.mode, // skip luma
|
||||
NULL, cclm_params, lcu
|
||||
);
|
||||
}
|
||||
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
|
||||
|
||||
double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode);
|
||||
|
@ -854,6 +937,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
|||
best_chroma = chroma;
|
||||
}
|
||||
}
|
||||
best_cclm[0] = best_chroma.cclm[0];
|
||||
best_cclm[1] = best_chroma.cclm[1];
|
||||
|
||||
return best_chroma.mode;
|
||||
}
|
||||
|
@ -864,15 +949,15 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
|||
|
||||
int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
|
||||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu)
|
||||
const int depth, lcu_t *lcu, cclm_parameters_t *best_cclm)
|
||||
{
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
|
||||
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
int8_t intra_mode = cur_pu->intra.mode;
|
||||
|
||||
double costs[5];
|
||||
int8_t modes[5] = { 0, 50, 18, 1, 67 };
|
||||
double costs[8];
|
||||
int8_t modes[8] = { 0, 50, 18, 1, -1, 81, 82, 83 };
|
||||
if (intra_mode != 0 && intra_mode != 50 && intra_mode != 18 && intra_mode != 1) {
|
||||
modes[4] = intra_mode;
|
||||
}
|
||||
|
@ -884,14 +969,14 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
|
|||
const int8_t modes_in_depth[5] = { 1, 1, 1, 1, 2 };
|
||||
int num_modes = modes_in_depth[depth];
|
||||
|
||||
if (state->encoder_control->cfg.rdo == 3) {
|
||||
num_modes = modes[4] == intra_mode ? 5 : 4;
|
||||
if (state->encoder_control->cfg.rdo >= 3) {
|
||||
num_modes = state->encoder_control->cfg.cclm ? 8 : 5;
|
||||
}
|
||||
|
||||
// Don't do rough mode search if all modes are selected.
|
||||
// FIXME: It might make more sense to only disable rough search if
|
||||
// num_modes is 0.is 0.
|
||||
if (num_modes != 1 && num_modes != 5 && num_modes != 4) {
|
||||
if (num_modes != 1 && num_modes != 5 && num_modes != 4 && num_modes != 8) {
|
||||
const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2);
|
||||
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
|
||||
const vector2d_t luma_px = { x_px, y_px };
|
||||
|
@ -909,12 +994,12 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
|
|||
search_intra_chroma_rough(state, x_px, y_px, depth,
|
||||
ref_u, ref_v, LCU_WIDTH_C,
|
||||
&refs_u, &refs_v,
|
||||
intra_mode, modes, costs);
|
||||
intra_mode, modes, costs, lcu);
|
||||
}
|
||||
|
||||
int8_t intra_mode_chroma = intra_mode;
|
||||
if (num_modes > 1) {
|
||||
intra_mode_chroma = kvz_search_intra_chroma_rdo(state, x_px, y_px, depth, intra_mode, modes, num_modes, lcu);
|
||||
intra_mode_chroma = kvz_search_intra_chroma_rdo(state, x_px, y_px, depth, intra_mode, modes, num_modes, lcu, best_cclm);
|
||||
}
|
||||
|
||||
return intra_mode_chroma;
|
||||
|
@ -969,7 +1054,7 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
|||
kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
|
||||
|
||||
int8_t number_of_modes = 0;
|
||||
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 3);
|
||||
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4);
|
||||
if (!skip_rough_search) {
|
||||
number_of_modes = search_intra_rough(state,
|
||||
ref_pixels, LCU_WIDTH,
|
||||
|
@ -990,9 +1075,9 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
|||
const int32_t rdo_level = state->encoder_control->cfg.rdo;
|
||||
if (rdo_level >= 2 || skip_rough_search) {
|
||||
int number_of_modes_to_search;
|
||||
if (rdo_level == 3) {
|
||||
if (rdo_level == 4) {
|
||||
number_of_modes_to_search = 67;
|
||||
} else if (rdo_level == 2) {
|
||||
} else if (rdo_level == 2 || rdo_level == 3) {
|
||||
number_of_modes_to_search = (cu_width == 4) ? 3 : 2;
|
||||
} else {
|
||||
// Check only the predicted modes.
|
||||
|
|
|
@ -41,17 +41,18 @@
|
|||
#include "cu.h"
|
||||
#include "encoderstate.h"
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "intra.h"
|
||||
|
||||
|
||||
double kvz_luma_mode_bits(const encoder_state_t *state,
|
||||
int8_t luma_mode, const int8_t *intra_preds);
|
||||
int8_t luma_mode, const int8_t *intra_preds);
|
||||
|
||||
double kvz_chroma_mode_bits(const encoder_state_t *state,
|
||||
int8_t chroma_mode, int8_t luma_mode);
|
||||
|
||||
int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
|
||||
const int x_px, const int y_px,
|
||||
const int depth, lcu_t *lcu);
|
||||
const int depth, lcu_t *lcu, cclm_parameters_t* best_cclm);
|
||||
|
||||
void kvz_search_cu_intra(encoder_state_t * const state,
|
||||
const int x_px, const int y_px,
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
videoframe_t * kvz_videoframe_alloc(int32_t width,
|
||||
int32_t height,
|
||||
enum kvz_chroma_format chroma_format,
|
||||
enum kvz_alf alf_type)
|
||||
enum kvz_alf alf_type, bool cclm)
|
||||
{
|
||||
videoframe_t *frame = calloc(1, sizeof(videoframe_t));
|
||||
if (!frame) return 0;
|
||||
|
@ -59,8 +59,13 @@ videoframe_t * kvz_videoframe_alloc(int32_t width,
|
|||
frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu);
|
||||
if (chroma_format != KVZ_CSP_400) {
|
||||
frame->sao_chroma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu);
|
||||
if (cclm) {
|
||||
assert(chroma_format == KVZ_CSP_420);
|
||||
frame->cclm_luma_rec = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4);
|
||||
frame->cclm_luma_rec_top_line = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) / 2 * CEILDIV(height, 64));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
|
@ -76,6 +81,12 @@ int kvz_videoframe_free(videoframe_t * const frame)
|
|||
kvz_image_free(frame->rec_lmcs);
|
||||
frame->source_lmcs_mapped = false;
|
||||
}
|
||||
if(frame->cclm_luma_rec) {
|
||||
FREE_POINTER(frame->cclm_luma_rec);
|
||||
}
|
||||
if(frame->cclm_luma_rec_top_line) {
|
||||
FREE_POINTER(frame->cclm_luma_rec_top_line);
|
||||
}
|
||||
|
||||
kvz_image_free(frame->source);
|
||||
frame->source = NULL;
|
||||
|
|
|
@ -53,6 +53,9 @@ typedef struct videoframe
|
|||
kvz_picture *rec; //!< \brief Reconstructed image.
|
||||
kvz_picture *rec_lmcs; //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source.
|
||||
|
||||
kvz_pixel *cclm_luma_rec; //!< \brief buffer for the downsampled luma reconstruction for cclm
|
||||
kvz_pixel *cclm_luma_rec_top_line; //!< \brief buffer for the downsampled luma reconstruction for cclm
|
||||
|
||||
uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available
|
||||
int32_t* lmcs_avg; //!< \brief Average of LCU border pixels
|
||||
|
||||
|
@ -78,7 +81,7 @@ typedef struct videoframe
|
|||
} videoframe_t;
|
||||
|
||||
|
||||
videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type);
|
||||
videoframe_t *kvz_videoframe_alloc(int32_t width, int32_t height, enum kvz_chroma_format chroma_format, enum kvz_alf alf_type, bool cclm);
|
||||
int kvz_videoframe_free(videoframe_t * const frame);
|
||||
|
||||
void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc);
|
||||
|
|
|
@ -10,8 +10,9 @@ common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no
|
|||
valgrind_test $common_args --rd=1
|
||||
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
|
||||
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
|
||||
valgrind_test $common_args --rd=3
|
||||
valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0
|
||||
valgrind_test $common_args --alf=full --wpp --threads=1
|
||||
valgrind_test $common_args --jccr
|
||||
valgrind_test $common_args --jccr --rdoq --rd=2 --mts=intra
|
||||
valgrind_test $common_args --rd=3 --cclm --jccr
|
||||
|
||||
|
|
Loading…
Reference in a new issue