Merge branch 'intra_mrl'

This commit is contained in:
siivonek 2021-12-08 15:51:50 +02:00
commit dea3ca12aa
15 changed files with 513 additions and 224 deletions

View file

@ -202,6 +202,7 @@ int kvz_config_init(kvz_config *cfg)
cfg->chroma_scale_out[1][0] = cfg->chroma_scale_in[1][0] = -1;
cfg->chroma_scale_out[2][0] = cfg->chroma_scale_in[2][0] = -1;
cfg->mrl = false;
parse_qp_map(cfg, 0);
@ -1484,6 +1485,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
parse_qp_map(cfg, 0);
return success;
}
else if OPT("mrl") {
cfg->mrl = atobool(value);
}
else if OPT("jccr") {
cfg->jccr = (bool)atobool(value);
}

View file

@ -173,6 +173,8 @@ static const struct option long_options[] = {
{ "fastrd-outdir", required_argument, NULL, 0 },
{ "chroma-qp-in", required_argument, NULL, 0 },
{ "chroma-qp-out", required_argument, NULL, 0 },
{ "mrl", no_argument, NULL, 0 },
{ "no-mrl", no_argument, NULL, 0 },
{ "jccr", no_argument, NULL, 0 },
{ "no-jccr", no_argument, NULL, 0 },
{ "amvr", no_argument, NULL, 0 },
@ -622,6 +624,8 @@ void print_help(void)
" --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n"
" with lossless coding. [disabled]\n"
" --(no-)tmvp : Temporal motion vector prediction [enabled]\n"
" --(no-)mrl : Enable use of multiple reference lines in intra\n"
" predictions.\n"
" --mts <string> : Multiple Transform Selection [off].\n"
" (Currently only implemented for intra\n"
" and has effect only when rd >= 2)\n"

View file

@ -842,28 +842,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
*/
const int num_pred_units = kvz_part_mode_num_parts[cur_cu->part_size];
//ToDo: update multi_ref_lines variable when it's something else than constant 3
//int multi_ref_lines = 3;
/*
if(isp_enable_flag){ //ToDo: implement flag value to be something else than constant zero
for (int i = 0; i < num_pred_units; i++) {
if (multi_ref_lines > 1) {
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]);
CABAC_BIN(cabac, cur_cu->intra.multi_ref_idx != 0, "multi_ref_line_0");
if (multi_ref_lines > 2 && cur_cu->intra.multi_ref_idx != 0) {
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[1]);
CABAC_BIN(cabac, cur_cu->intra.multi_ref_idx != 1, "multi_ref_line_1");
if (multi_ref_lines > 3 && cur_cu->intra.multi_ref_idx != 1) {
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[2]);
CABAC_BIN(cabac, cur_cu->intra.multi_ref_idx != 3, "multi_ref_line_2");
}
}
}
}
}
*/
// Intra Subpartition mode
uint32_t width = (LCU_WIDTH >> depth);
uint32_t height = (LCU_WIDTH >> depth);
@ -875,14 +854,25 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
//isp_mode += ((height > TR_MAX_WIDTH) || !enough_samples) ? 2 : 0;
bool allow_isp = enough_samples;
if (0 && cur_cu->type == 1/*intra*/ && (y % LCU_WIDTH) != 0) {
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]);
CABAC_BIN(cabac, 0, "multi_ref_line");
// Code MRL related bits
bool enable_mrl = state->encoder_control->cfg.mrl;
int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0;
if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl) {
if (MAX_REF_LINE_IDX > 1) {
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]);
CABAC_BIN(cabac, multi_ref_idx != 0, "multi_ref_line");
if (MAX_REF_LINE_IDX > 2 && multi_ref_idx != 0) {
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[1]);
CABAC_BIN(cabac, multi_ref_idx != 1, "multi_ref_line")
}
}
}
// ToDo: update real usage, these if clauses as such don't make any sense
if (isp_mode != 0) {
if (isp_mode != 0 && multi_ref_idx == 0) {
if (isp_mode) {
cabac->cur_ctx = &(cabac->ctx.intra_subpart_model[0]);
CABAC_BIN(cabac, 0, "intra_subPartitions");
@ -940,14 +930,11 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
}
// Is the mode in the MPM array or not
flag[j] = (mpm_preds[j] == -1) ? 0 : 1;
if (true||!(cur_pu->intra.multi_ref_idx || (isp_mode))) {
if (!(cur_pu->intra.multi_ref_idx || (isp_mode))) {
CABAC_BIN(cabac, flag[j], "prev_intra_luma_pred_flag");
}
}
for (int j = 0; j < num_pred_units; ++j) {
// Signal index of the prediction mode in the prediction list, if it is there
if (flag[j]) {
@ -956,7 +943,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, j);
const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y);
cabac->cur_ctx = &(cabac->ctx.luma_planar_model[(isp_mode ? 0 : 1)]);
if (true||cur_pu->intra.multi_ref_idx == 0) {
if (cur_pu->intra.multi_ref_idx == 0) {
CABAC_BIN(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx_luma_planar");
}
//CABAC_BIN_EP(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx");

View file

@ -661,7 +661,13 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
WRITE_UE(stream, encoder->cfg.log2_parallel_merge_level-2, "log2_parallel_merge_level_minus2");
WRITE_U(stream, 0, 1, "sps_isp_enabled_flag");
WRITE_U(stream, 0, 1, "sps_mrl_enabled_flag");
if (state->encoder_control->cfg.mrl) {
WRITE_U(stream, 1, 1, "sps_mrl_enabled_flag");
} else {
WRITE_U(stream, 0, 1, "sps_mrl_enabled_flag");
}
WRITE_U(stream, 0, 1, "sps_mip_enabled_flag");
// if(!no_cclm_constraint_flag)
if(encoder->chroma_format != KVZ_CSP_400) {

View file

@ -211,6 +211,11 @@ typedef int16_t mv_t;
*/
#define INTRA_MPM_COUNT 6
/**
* \brief Maximum reference lines for MRL intra
*/
#define MAX_REF_LINE_IDX 3
/**
* \brief Number of pixels to delay deblocking.
*

View file

@ -212,24 +212,26 @@ static void intra_filter_reference(
/**
* \brief Generage planar prediction.
* \brief Generate dc prediction.
* \param log2_width Log2 of width, range 2..5.
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param ref_top Pointer to -1 index of above reference, length=width*2+1.
* \param ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param dst Buffer of size width*width.
* \param multi_ref_idx Multi reference line index for use with MRL.
*/
static void intra_pred_dc(
const int_fast8_t log2_width,
const kvz_pixel *const ref_top,
const kvz_pixel *const ref_left,
kvz_pixel *const out_block)
kvz_pixel *const out_block,
const uint8_t multi_ref_idx)
{
int_fast8_t width = 1 << log2_width;
int_fast16_t sum = 0;
for (int_fast8_t i = 0; i < width; ++i) {
sum += ref_top[i + 1];
sum += ref_left[i + 1];
sum += ref_top[i + 1 + multi_ref_idx];
sum += ref_left[i + 1 + multi_ref_idx];
}
// JVET_K0122
@ -549,13 +551,17 @@ void kvz_intra_predict(
int_fast8_t mode,
color_t color,
kvz_pixel *dst,
bool filter_boundary)
bool filter_boundary,
const uint8_t multi_ref_idx)
{
const int_fast8_t width = 1 << log2_width;
const kvz_config *cfg = &state->encoder_control->cfg;
// MRL only for luma
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
const kvz_intra_ref *used_ref = &refs->ref;
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || width == 4) {
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || width == 4 || multi_ref_index) {
// For chroma, DC and 4x4 blocks, always use unfiltered reference.
} else if (mode == 0) {
// Otherwise, use filtered for planar.
@ -586,15 +592,15 @@ void kvz_intra_predict(
if (mode == 0) {
kvz_intra_pred_planar(log2_width, used_ref->top, used_ref->left, dst);
} else if (mode == 1) {
intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst);
intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst, multi_ref_index);
} else {
kvz_angular_pred(log2_width, mode, color, used_ref->top, used_ref->left, dst);
kvz_angular_pred(log2_width, mode, color, used_ref->top, used_ref->left, dst, multi_ref_index);
}
// pdpc
// bool pdpcCondition = (mode == 0 || mode == 1 || mode == 18 || mode == 50);
bool pdpcCondition = (mode == 0 || mode == 1); // Planar and DC
if (pdpcCondition)
if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL.
{
kvz_pdpc_planar_dc(mode, width, log2_width, used_ref, dst);
}
@ -607,7 +613,9 @@ void kvz_intra_build_reference_any(
const vector2d_t *const luma_px,
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs)
kvz_intra_references *const refs,
const uint8_t multi_ref_idx,
kvz_pixel *extra_ref_lines)
{
assert(log2_width >= 2 && log2_width <= 5);
@ -617,8 +625,13 @@ void kvz_intra_build_reference_any(
const kvz_pixel dc_val = 1 << (KVZ_BIT_DEPTH - 1); //TODO: add used bitdepth as a variable
const int is_chroma = color != COLOR_Y ? 1 : 0;
// TODO: height for non-square blocks
const int_fast8_t width = 1 << log2_width;
// Get multi ref index from CU under prediction or reconstrcution. Do not use MRL if not luma
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
assert(multi_ref_index < MAX_REF_LINE_IDX);
// Convert luma coordinates to chroma coordinates for chroma.
const vector2d_t lcu_px = {
luma_px->x % LCU_WIDTH,
@ -630,26 +643,42 @@ void kvz_intra_build_reference_any(
};
// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
const kvz_pixel *left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
const kvz_pixel *left_ref;
bool extra_ref = false;
// On the left LCU edge, if left neighboring LCU is available,
// left_ref needs to point to correct extra reference line if MRL is used.
if (luma_px->x > 0 && lcu_px.x == 0 && multi_ref_index != 0) {
left_ref = &extra_ref_lines[multi_ref_index * 128];
extra_ref = true;
}
else {
left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
}
const kvz_pixel *top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
const kvz_pixel *rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
// Init top borders pointer to point to the correct place in the correct reference array.
const kvz_pixel *top_border;
if (px.y) {
top_border = &rec_ref[px.x + (px.y - 1) * (LCU_WIDTH >> is_chroma)];
top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)];
} else {
top_border = &top_ref[px.x];
top_border = &top_ref[px.x]; // Top row, no need for multi_ref_index
}
// Init left borders pointer to point to the correct place in the correct reference array.
const kvz_pixel *left_border;
int left_stride; // Distance between reference samples.
if (px.x) {
left_border = &rec_ref[px.x - 1 + px.y * (LCU_WIDTH >> is_chroma)];
left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)];
left_stride = LCU_WIDTH >> is_chroma;
} else {
left_border = &left_ref[px.y];
if (extra_ref) {
left_border = &left_ref[MAX_REF_LINE_IDX];
}
else {
left_border = &left_ref[px.y];
}
left_stride = 1;
}
@ -660,41 +689,103 @@ void kvz_intra_build_reference_any(
// Limit the number of available pixels based on block size and dimensions
// of the picture.
px_available_left = MIN(px_available_left, width * 2);
// TODO: height for non-square blocks
px_available_left = MIN(px_available_left, width * 2 + multi_ref_index);
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
// Copy pixels from coded CUs.
for (int i = 0; i < px_available_left; ++i) {
out_left_ref[i + 1] = left_border[i * left_stride];
// Reserve space for top left reference
out_left_ref[i + 1 + multi_ref_index] = left_border[i * left_stride];
}
// Extend the last pixel for the rest of the reference values.
kvz_pixel nearest_pixel = out_left_ref[px_available_left];
for (int i = px_available_left; i < width * 2; ++i) {
out_left_ref[i + 1] = nearest_pixel;
kvz_pixel nearest_pixel = left_border[(px_available_left - 1) * left_stride];
for (int i = px_available_left; i < width * 2 + multi_ref_index * 2; ++i) {
out_left_ref[i + 1 + multi_ref_index] = nearest_pixel;
}
} else {
// If we are on the left edge, extend the first pixel of the top row.
kvz_pixel nearest_pixel = luma_px->y > 0 ? top_border[0] : dc_val;
for (int i = 0; i < width * 2; i++) {
out_left_ref[i + 1] = nearest_pixel;
for (int i = 0; i < width * 2 + multi_ref_index; i++) {
// Reserve space for top left reference
out_left_ref[i + 1 + multi_ref_index] = nearest_pixel;
}
}
// Generate top-left reference.
if (luma_px->x > 0 && luma_px->y > 0) {
// If the block is at an LCU border, the top-left must be copied from
// the border that points to the LCUs 1D reference buffer.
if (px.x == 0) {
out_left_ref[0] = left_border[-1 * left_stride];
out_top_ref[0] = left_border[-1 * left_stride];
} else {
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
// Generate top-left reference
if (multi_ref_index)
{
if (luma_px->x > 0 && luma_px->y > 0) {
// If the block is at an LCU border, the top-left must be copied from
// the border that points to the LCUs 1D reference buffer.
// Inner picture cases
if (px.x == 0 && px.y == 0) {
// LCU top left corner case. Multi ref will be 0.
out_left_ref[0] = out_left_ref[1];
out_top_ref[0] = out_left_ref[1];
}
else if (px.x == 0) {
// LCU left border case
kvz_pixel *top_left_corner = &extra_ref_lines[multi_ref_index * 128];
for (int i = 0; i <= multi_ref_index; ++i) {
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index];
}
}
else if (px.y == 0) {
// LCU top border case. Multi ref will be 0.
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
}
else {
// Inner case
for (int i = 0; i <= multi_ref_index; ++i) {
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
out_top_ref[i] = top_border[i - 1 - multi_ref_index];
}
}
}
else {
// Picture border cases
if (px.x == 0 && px.y == 0) {
// Top left picture corner case. Multi ref will be 0.
out_left_ref[0] = out_left_ref[1];
out_top_ref[0] = out_left_ref[1];
}
else if (px.x == 0) {
// Picture left border case. Reference pixel cannot be taken from outside LCU border
kvz_pixel nearest = out_left_ref[1 + multi_ref_index];
for (int i = 0; i <= multi_ref_index; ++i) {
out_left_ref[i] = nearest;
out_top_ref[i] = nearest;
}
}
else {
// Picture top border case. Multi ref will be 0.
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
}
}
}
else {
if (luma_px->x > 0 && luma_px->y > 0) {
// If the block is at an LCU border, the top-left must be copied from
// the border that points to the LCUs 1D reference buffer.
if (px.x == 0) {
out_left_ref[0] = left_border[-1 * left_stride];
out_top_ref[0] = left_border[-1 * left_stride];
}
else {
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
}
}
else {
// Copy reference clockwise.
out_left_ref[0] = out_left_ref[1];
out_top_ref[0] = out_left_ref[1];
}
} else {
// Copy reference clockwise.
out_left_ref[0] = out_left_ref[1];
out_top_ref[0] = out_left_ref[1];
}
// Generate top reference.
@ -704,22 +795,22 @@ void kvz_intra_build_reference_any(
// Limit the number of available pixels based on block size and dimensions
// of the picture.
px_available_top = MIN(px_available_top, width * 2);
px_available_top = MIN(px_available_top, width * 2 + multi_ref_index);
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
// Copy all the pixels we can.
for (int i = 0; i < px_available_top; ++i) {
out_top_ref[i + 1] = top_border[i];
out_top_ref[i + 1 + multi_ref_index] = top_border[i];
}
// Extend the last pixel for the rest of the reference values.
kvz_pixel nearest_pixel = top_border[px_available_top - 1];
for (int i = px_available_top; i < width * 2; ++i) {
out_top_ref[i + 1] = nearest_pixel;
for (int i = px_available_top; i < width * 2 + multi_ref_index * 2; ++i) {
out_top_ref[i + 1 + multi_ref_index] = nearest_pixel;
}
} else {
// Extend nearest pixel.
kvz_pixel nearest_pixel = luma_px->x > 0 ? left_border[0] : dc_val;
for (int i = 0; i < width * 2; i++) {
for (int i = 0; i < width * 2 + multi_ref_index; i++) {
out_top_ref[i + 1] = nearest_pixel;
}
}
@ -732,7 +823,9 @@ void kvz_intra_build_reference_inner(
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs,
bool entropy_sync)
bool entropy_sync,
const uint8_t multi_ref_idx,
kvz_pixel* extra_ref_lines)
{
assert(log2_width >= 2 && log2_width <= 5);
@ -741,8 +834,13 @@ void kvz_intra_build_reference_inner(
kvz_pixel * __restrict out_top_ref = &refs->ref.top[0];
const int is_chroma = color != COLOR_Y ? 1 : 0;
// TODO: height for non-sqaure blocks
const int_fast8_t width = 1 << log2_width;
// Get multiRefIdx from CU under prediction. Do not use MRL if not luma
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
assert(multi_ref_index < MAX_REF_LINE_IDX);
// Convert luma coordinates to chroma coordinates for chroma.
const vector2d_t lcu_px = {
luma_px->x % LCU_WIDTH,
@ -754,41 +852,90 @@ void kvz_intra_build_reference_inner(
};
// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
const kvz_pixel * __restrict left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
const kvz_pixel* left_ref;
bool extra_ref = false;
// On the left LCU edge, if left neighboring LCU is available,
// left_ref needs to point to correct extra reference line if MRL is used.
if (lcu_px.x == 0 && multi_ref_index != 0) {
left_ref = &extra_ref_lines[multi_ref_index * 128];
extra_ref = true;
}
else {
left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
}
const kvz_pixel * __restrict top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
const kvz_pixel * __restrict rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
// Init top borders pointer to point to the correct place in the correct reference array.
const kvz_pixel * __restrict top_border;
if (px.y) {
top_border = &rec_ref[px.x + (px.y - 1) * (LCU_WIDTH >> is_chroma)];
top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)];
} else {
top_border = &top_ref[px.x];
top_border = &top_ref[px.x]; // At the top line. No need for multi_ref_index
}
// Init left borders pointer to point to the correct place in the correct reference array.
const kvz_pixel * __restrict left_border;
int left_stride; // Distance between reference samples.
// Generate top-left reference.
// If the block is at an LCU border, the top-left must be copied from
// the border that points to the LCUs 1D reference buffer.
if (px.x) {
left_border = &rec_ref[px.x - 1 + px.y * (LCU_WIDTH >> is_chroma)];
left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)];
left_stride = LCU_WIDTH >> is_chroma;
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
} else {
left_border = &left_ref[px.y];
if (extra_ref) {
left_border = &left_ref[MAX_REF_LINE_IDX];
}
else {
left_border = &left_ref[px.y];
}
left_stride = 1;
out_left_ref[0] = left_border[-1 * left_stride];
out_top_ref[0] = left_border[-1 * left_stride];
}
// Generate top-left reference
if (multi_ref_index)
{
// Inner picture cases
if (px.x == 0 && px.y == 0) {
// LCU top left corner case. Multi ref will be 0.
out_left_ref[0] = out_left_ref[1];
out_top_ref[0] = out_left_ref[1];
}
else if (px.x == 0) {
// LCU left border case
kvz_pixel* top_left_corner = &extra_ref_lines[multi_ref_index * 128];
for (int i = 0; i <= multi_ref_index; ++i) {
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index];
}
}
else if (px.y == 0) {
// LCU top border case. Multi ref will be 0.
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
}
else {
// Inner case
for (int i = 0; i <= multi_ref_index; ++i) {
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
out_top_ref[i] = top_border[i - 1 - multi_ref_index];
}
}
}
else {
// If the block is at an LCU border, the top-left must be copied from
// the border that points to the LCUs 1D reference buffer.
if (px.x == 0) {
out_left_ref[0] = left_border[-1 * left_stride];
out_top_ref[0] = left_border[-1 * left_stride];
}
else {
out_left_ref[0] = top_border[-1];
out_top_ref[0] = top_border[-1];
}
}
// Generate left reference.
// Get the number of reference pixels based on the PU coordinate within the LCU.
// Get the number of reference pixels based on the PU coordinate within the LCU.
int px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
// Limit the number of available pixels based on block size and dimensions
@ -797,12 +944,12 @@ void kvz_intra_build_reference_inner(
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
// Copy pixels from coded CUs.
int i = 0;
int i = multi_ref_index; // Offset by multi_ref_index
do {
out_left_ref[i + 1] = left_border[(i + 0) * left_stride];
out_left_ref[i + 2] = left_border[(i + 1) * left_stride];
out_left_ref[i + 3] = left_border[(i + 2) * left_stride];
out_left_ref[i + 4] = left_border[(i + 3) * left_stride];
out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride];
out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride];
out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride];
out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride];
i += 4;
} while (i < px_available_left);
@ -815,6 +962,13 @@ void kvz_intra_build_reference_inner(
out_left_ref[i + 4] = nearest_pixel;
}
// Extend for MRL
if (multi_ref_index) {
for (; i < width * 2 + multi_ref_index; ++i) {
out_left_ref[i + 1] = nearest_pixel;
}
}
// Generate top reference.
// Get the number of reference pixels based on the PU coordinate within the LCU.
@ -822,7 +976,7 @@ void kvz_intra_build_reference_inner(
// Limit the number of available pixels based on block size and dimensions
// of the picture.
px_available_top = MIN(px_available_top, width * 2);
px_available_top = MIN(px_available_top, width * 2 + multi_ref_index);
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
if (entropy_sync && px.y == 0) px_available_top = MIN(px_available_top, ((LCU_WIDTH >> is_chroma) - px.x) -1);
@ -830,17 +984,17 @@ void kvz_intra_build_reference_inner(
// Copy all the pixels we can.
i = 0;
do {
memcpy(out_top_ref + i + 1, top_border + i, 4 * sizeof(kvz_pixel));
memcpy(out_top_ref + i + 1 + multi_ref_index, top_border + i, 4 * sizeof(kvz_pixel));
i += 4;
} while (i < px_available_top);
// Extend the last pixel for the rest of the reference values.
nearest_pixel = out_top_ref[i];
for (; i < width * 2; i += 4) {
out_top_ref[i + 1] = nearest_pixel;
out_top_ref[i + 2] = nearest_pixel;
out_top_ref[i + 3] = nearest_pixel;
out_top_ref[i + 4] = nearest_pixel;
nearest_pixel = out_top_ref[i + multi_ref_index];
for (; i < (width + multi_ref_index) * 2; i += 4) {
out_top_ref[i + 1 + multi_ref_index] = nearest_pixel;
out_top_ref[i + 2 + multi_ref_index] = nearest_pixel;
out_top_ref[i + 3 + multi_ref_index] = nearest_pixel;
out_top_ref[i + 4 + multi_ref_index] = nearest_pixel;
}
}
@ -851,13 +1005,17 @@ void kvz_intra_build_reference(
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs,
bool entropy_sync)
bool entropy_sync,
kvz_pixel *extra_ref_lines,
uint8_t multi_ref_idx)
{
assert(!(extra_ref_lines == NULL && multi_ref_idx != 0) && "Trying to use MRL with NULL extra references.");
// Much logic can be discarded if not on the edge
if (luma_px->x > 0 && luma_px->y > 0) {
kvz_intra_build_reference_inner(log2_width, color, luma_px, pic_px, lcu, refs, entropy_sync);
kvz_intra_build_reference_inner(log2_width, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines);
} else {
kvz_intra_build_reference_any(log2_width, color, luma_px, pic_px, lcu, refs);
kvz_intra_build_reference_any(log2_width, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines);
}
}
@ -869,7 +1027,8 @@ static void intra_recon_tb_leaf(
int8_t intra_mode,
cclm_parameters_t *cclm_params,
lcu_t *lcu,
color_t color)
color_t color,
uint8_t multi_ref_idx)
{
const kvz_config *cfg = &state->encoder_control->cfg;
const int shift = color == COLOR_Y ? 0 : 1;
@ -890,15 +1049,33 @@ static void intra_recon_tb_leaf(
int x_scu = SUB_SCU(x);
int y_scu = SUB_SCU(y);
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
kvz_intra_references refs;
kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp);
// Extra reference lines for use with MRL. Extra lines needed only for left edge.
kvz_pixel extra_refs[128 * MAX_REF_LINE_IDX] = { 0 };
if (luma_px.x > 0 && lcu_px.x == 0 && lcu_px.y > 0 && multi_ref_index != 0) {
videoframe_t* const frame = state->tile->frame;
// Copy extra ref lines, including ref line 1 and top left corner.
for (int i = 0; i < MAX_REF_LINE_IDX; ++i) {
int height = (LCU_WIDTH >> depth) * 2 + MAX_REF_LINE_IDX;
height = MIN(height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist.
height = MIN(height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX);
kvz_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)],
&extra_refs[i * 128],
1, height,
frame->rec->stride, 1);
}
}
kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index);
kvz_pixel pred[32 * 32];
int stride = state->tile->frame->source->stride;
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
if(intra_mode < 68) {
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary);
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index);
} else {
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
if(cclm_params == NULL) {
@ -957,6 +1134,7 @@ void kvz_intra_recon_cu(
int8_t mode_chroma,
cu_info_t *cur_cu,
cclm_parameters_t *cclm_params,
uint8_t multi_ref_idx,
lcu_t *lcu)
{
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
@ -964,6 +1142,7 @@ void kvz_intra_recon_cu(
if (cur_cu == NULL) {
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
}
uint8_t multi_ref_index = multi_ref_idx;
// Reset CBFs because CBFs might have been set
// for depth earlier
@ -981,10 +1160,10 @@ void kvz_intra_recon_cu(
const int32_t x2 = x + offset;
const int32_t y2 = y + offset;
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
// Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = {
@ -1005,11 +1184,11 @@ void kvz_intra_recon_cu(
const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0);
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y);
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index);
}
if (has_chroma) {
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U);
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V);
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0);
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0);
}
kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);

View file

@ -42,10 +42,9 @@
#include "global.h" // IWYU pragma: keep
#include "kvazaar.h"
typedef struct {
kvz_pixel left[2 * 128 + 3];
kvz_pixel top[2 * 128 + 3];
kvz_pixel left[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX];
kvz_pixel top[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX];
} kvz_intra_ref;
typedef struct
{
@ -80,14 +79,16 @@ int8_t kvz_intra_get_dir_luma_predictor(
const cu_info_t *const above_pu);
/**
* \brief Generage angular predictions.
* \param width Width in pixels, range 4..32.
* \param color What color pixels to use.
* \param luma_px Luma coordinates of the prediction block.
* \param pic_px Picture dimensions in luma pixels.
* \param lcu LCU struct.
* \param out_left_ref Left reference pixels, index 0 is the top-left.
* \param out_top_ref Top reference pixels, index 0 is the top-left.
* \brief Build intra prediction reference buffers.
* \param log2_width Log2 of width, range 2..5.
* \param color What color pixels to use.
* \param luma_px Luma coordinates of the prediction block.
* \param pic_px Picture dimensions in luma pixels.
* \param lcu LCU struct.
* \param refs Pointer to top and left references.
* \param entropy_sync Indicate that top right is not available if WPP is enabled.
* \param extra_refs Additional left edge reference lines for use with MRL.
* \param multi_ref_idx Multi reference line index for the prediction block.
*/
void kvz_intra_build_reference(
const int_fast8_t log2_width,
@ -96,7 +97,9 @@ void kvz_intra_build_reference(
const vector2d_t *const pic_px,
const lcu_t *const lcu,
kvz_intra_references *const refs,
bool entropy_sync);
bool entropy_sync,
kvz_pixel *extra_refs,
uint8_t multi_ref_idx);
/**
* \brief Generate intra predictions.
@ -114,7 +117,8 @@ void kvz_intra_predict(
int_fast8_t mode,
color_t color,
kvz_pixel *dst,
bool filter_boundary);
bool filter_boundary,
const uint8_t multi_ref_idx);
void kvz_intra_recon_cu(
encoder_state_t *const state,
@ -125,6 +129,7 @@ void kvz_intra_recon_cu(
int8_t mode_chroma,
cu_info_t *cur_cu,
cclm_parameters_t* cclm_params,
uint8_t multi_ref_idx,
lcu_t *lcu);

View file

@ -514,6 +514,10 @@ typedef struct kvz_config
int8_t chroma_scale_in[3][17];
int8_t chroma_scale_out[3][17];
/** \brief enable use of multiple reference lines in intra prediction */
int8_t mrl;
int8_t jccr;
int8_t cclm;

View file

@ -160,6 +160,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
if (cu->type == CU_INTRA) {
to->intra.mode = cu->intra.mode;
to->intra.mode_chroma = cu->intra.mode_chroma;
to->intra.multi_ref_idx = cu->intra.multi_ref_idx;
} else {
to->skipped = cu->skipped;
to->merged = cu->merged;
@ -635,7 +636,6 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->type = CU_NOTSET;
cur_cu->part_size = SIZE_2Nx2N;
cur_cu->qp = state->qp;
cur_cu->intra.multi_ref_idx = 0;
cur_cu->bdpcmMode = 0;
cur_cu->tr_idx = 0;
cur_cu->violates_mts_coeff_constraint = 0;
@ -724,13 +724,15 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
int8_t intra_mode;
int8_t intra_trafo;
double intra_cost;
uint8_t multi_ref_index = 0;
kvz_search_cu_intra(state, x, y, depth, lcu,
&intra_mode, &intra_trafo, &intra_cost);
&intra_mode, &intra_trafo, &intra_cost, &multi_ref_index);
if (intra_cost < cost) {
cost = intra_cost;
cur_cu->type = CU_INTRA;
cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
cur_cu->intra.mode = intra_mode;
cur_cu->intra.multi_ref_idx = multi_ref_index;
//If the CU is not split from 64x64 block, the MTS is disabled for that CU.
cur_cu->tr_idx = (depth > 0) ? intra_trafo : 0;
@ -747,7 +749,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
x, y,
depth,
cur_cu->intra.mode, -1, // skip chroma
NULL, NULL, lcu);
NULL, NULL, cur_cu->intra.multi_ref_idx, lcu);
downsample_cclm_rec(
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
@ -769,7 +771,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
x & ~7, y & ~7, // TODO: as does this
depth,
-1, cur_cu->intra.mode_chroma, // skip luma
NULL, cclm_params, lcu);
NULL, cclm_params, 0, lcu);
}
} else if (cur_cu->type == CU_INTER) {
@ -917,6 +919,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
cur_cu->type = CU_INTRA;
cur_cu->part_size = SIZE_2Nx2N;
// Disable MRL in this case
cur_cu->intra.multi_ref_idx = 0;
kvz_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth);
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@ -926,7 +931,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
x, y,
depth,
cur_cu->intra.mode, mode_chroma,
NULL,NULL, lcu);
NULL,NULL, 0, lcu);
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
if (has_chroma) {

View file

@ -333,7 +333,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
x_px, y_px,
depth,
intra_mode, -1,
pred_cu, cclm_params, lcu);
pred_cu, cclm_params, pred_cu->intra.multi_ref_idx, lcu);
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
if (pred_cu->tr_idx > 1)
@ -361,7 +361,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
x_px, y_px,
depth,
-1, chroma_mode,
pred_cu, cclm_params, lcu);
pred_cu, cclm_params, 0, lcu);
best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
}
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
@ -480,7 +480,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
for (int i = 0; i < 5; ++i) {
if (modes[i] == -1) continue;
kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false);
kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false, 0);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
costs[i] += satd_func(pred, orig_block);
}
@ -494,7 +494,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
for (int i = 0; i < 5; ++i) {
if (modes[i] == -1) continue;
kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false);
kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false, 0);
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
costs[i] += satd_func(pred, orig_block);
}
@ -588,7 +588,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
double costs_out[PARALLEL_BLKS] = { 0 };
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
kvz_intra_predict(state, refs, log2_width, mode + i * offset, COLOR_Y, preds[i], filter_boundary);
kvz_intra_predict(state, refs, log2_width, mode + i * offset, COLOR_Y, preds[i], filter_boundary, 0);
}
}
@ -627,7 +627,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
if (mode_in_range) {
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (test_modes[i] >= 2 && test_modes[i] <= 66) {
kvz_intra_predict(state, refs, log2_width, test_modes[i], COLOR_Y, preds[i], filter_boundary);
kvz_intra_predict(state, refs, log2_width, test_modes[i], COLOR_Y, preds[i], filter_boundary, 0);
}
}
@ -664,7 +664,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
}
if (!has_mode) {
kvz_intra_predict(state, refs, log2_width, mode, COLOR_Y, preds[0], filter_boundary);
kvz_intra_predict(state, refs, log2_width, mode, COLOR_Y, preds[0], filter_boundary, 0);
costs[modes_selected] = get_cost(state, preds[0], orig_block, satd_func, sad_func, width);
modes[modes_selected] = mode;
++modes_selected;
@ -715,18 +715,25 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
int8_t *intra_preds,
int modes_to_check,
int8_t modes[67], int8_t trafo[67], double costs[67],
lcu_t *lcu)
lcu_t *lcu,
uint8_t multi_ref_idx)
{
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra);
const int width = LCU_WIDTH >> depth;
kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1];
// TODO: height for non-square blocks
kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
// Check that the predicted modes are in the RDO mode list
if (modes_to_check < 67) {
for (int pred_mode = 0; pred_mode < 6; pred_mode++) {
int pred_mode = 0;
// Skip planar if searching modes for MRL
if (multi_ref_idx != 0) {
pred_mode = 1;
}
for (; pred_mode < 6; pred_mode++) {
int mode_found = 0;
for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
if (intra_preds[pred_mode] == modes[rdo_mode]) {
@ -753,6 +760,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
pred_cu.intra.mode = modes[rdo_mode];
pred_cu.intra.mode_chroma = modes[rdo_mode];
pred_cu.intra.multi_ref_idx = multi_ref_idx;
pred_cu.joint_cb_cr = 0;
FILL(pred_cu.cbf, 0);
@ -783,6 +791,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
pred_cu.intra.mode = modes[0];
pred_cu.intra.mode_chroma = modes[0];
pred_cu.intra.multi_ref_idx = multi_ref_idx;
FILL(pred_cu.cbf, 0);
search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, NULL, trafo[0]);
}
@ -865,8 +874,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
int c_width = MAX(32 >> (depth), 4);
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp);
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp);
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0);
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0);
cclm_parameters_t cclm_params[2] = { 0 };
@ -892,7 +901,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
x_px, y_px,
depth,
-1, chroma.mode, // skip luma
NULL, NULL, lcu);
NULL, NULL, 0, lcu);
}
else {
@ -925,8 +934,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
x_px, y_px,
depth,
-1, chroma.mode, // skip luma
NULL, cclm_params, lcu
);
NULL, cclm_params, 0, lcu);
}
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
@ -982,10 +990,10 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
const vector2d_t luma_px = { x_px, y_px };
kvz_intra_references refs_u;
kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp);
kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp, NULL, 0);
kvz_intra_references refs_v;
kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp);
kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0);
vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 };
kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
@ -1015,7 +1023,8 @@ void kvz_search_cu_intra(encoder_state_t * const state,
const int depth, lcu_t *lcu,
int8_t *mode_out,
int8_t *trafo_out,
double *cost_out)
double *cost_out,
uint8_t *multi_ref_idx_out)
{
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const int8_t cu_width = LCU_WIDTH >> depth;
@ -1043,30 +1052,48 @@ void kvz_search_cu_intra(encoder_state_t * const state,
if (depth > 0) {
const vector2d_t luma_px = { x_px, y_px };
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs, state->encoder_control->cfg.wpp);
// These references will only be used with rough search. No need for MRL stuff here.
kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs, state->encoder_control->cfg.wpp, NULL, 0);
}
int8_t modes[67];
int8_t trafo[67] = { 0 };
double costs[67];
int8_t modes[MAX_REF_LINE_IDX][67];
int8_t trafo[MAX_REF_LINE_IDX][67] = { 0 };
double costs[MAX_REF_LINE_IDX][67];
// Find best intra mode for 2Nx2N.
kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
int8_t number_of_modes = 0;
int8_t number_of_modes[MAX_REF_LINE_IDX] = { 0 };
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4);
if (!skip_rough_search) {
number_of_modes = search_intra_rough(state,
number_of_modes[0] = search_intra_rough(state,
ref_pixels, LCU_WIDTH,
&refs,
log2_width, candidate_modes,
modes, costs);
} else {
number_of_modes = 67;
for (int i = 0; i < number_of_modes; ++i) {
modes[i] = i;
costs[i] = MAX_INT;
modes[0], costs[0]);
// Copy rough results for other reference lines
for (int line = 1; line < MAX_REF_LINE_IDX; ++line) {
number_of_modes[line] = number_of_modes[0];
for (int i = 0; i < number_of_modes[line]; ++i) {
modes[line][i] = modes[0][i];
costs[line][i] = costs[0][i];
}
}
} else {
for(int line = 0; line < MAX_REF_LINE_IDX; ++line) {
number_of_modes[line] = 67;
for (int i = 0; i < number_of_modes[line]; ++i) {
modes[line][i] = i;
costs[line][i] = MAX_INT;
}
}
}
uint8_t lines = 1;
// Find modes with multiple reference lines if in use. Do not use if CU in first row.
if (state->encoder_control->cfg.mrl && (y_px % LCU_WIDTH) != 0) {
lines = MAX_REF_LINE_IDX;
}
// Set transform depth to current depth, meaning no transform splits.
@ -1083,20 +1110,36 @@ void kvz_search_cu_intra(encoder_state_t * const state,
// Check only the predicted modes.
number_of_modes_to_search = 0;
}
int num_modes_to_check = MIN(number_of_modes, number_of_modes_to_search);
kvz_sort_modes(modes, costs, number_of_modes);
number_of_modes = search_intra_rdo(state,
x_px, y_px, depth,
ref_pixels, LCU_WIDTH,
candidate_modes,
num_modes_to_check,
modes, trafo, costs, lcu);
for(int8_t line = 0; line < lines; ++line) {
// For extra reference lines, only check predicted modes
if (line != 0) {
number_of_modes_to_search = 0;
}
int num_modes_to_check = MIN(number_of_modes[line], number_of_modes_to_search);
kvz_sort_modes(modes[line], costs[line], number_of_modes[line]);
number_of_modes[line] = search_intra_rdo(state,
x_px, y_px, depth,
ref_pixels, LCU_WIDTH,
candidate_modes,
num_modes_to_check,
modes[line], trafo[line], costs[line], lcu, line);
}
}
uint8_t best_line = 0;
double best_line_mode_cost = costs[0][0];
uint8_t best_mode_indices[MAX_REF_LINE_IDX];
for (int line = 0; line < lines; ++line) {
best_mode_indices[line] = select_best_mode_index(modes[line], costs[line], number_of_modes[line]);
if (best_line_mode_cost > costs[line][best_mode_indices[line]]) {
best_line_mode_cost = costs[line][best_mode_indices[line]];
best_line = line;
}
}
uint8_t best_mode_i = select_best_mode_index(modes, costs, number_of_modes);
*mode_out = modes[best_mode_i];
*trafo_out = trafo[best_mode_i];
*cost_out = costs[best_mode_i];
*mode_out = modes[best_line][best_mode_indices[best_line]];
*trafo_out = trafo[best_line][best_mode_indices[best_line]];
*cost_out = costs[best_line][best_mode_indices[best_line]];
*multi_ref_idx_out = best_line;
}

View file

@ -59,6 +59,7 @@ void kvz_search_cu_intra(encoder_state_t * const state,
const int depth, lcu_t *lcu,
int8_t *mode_out,
int8_t *trafo_out,
double *cost_out);
double *cost_out,
uint8_t *multi_ref_idx_out);
#endif // SEARCH_INTRA_H_

View file

@ -44,12 +44,14 @@
/**
* \brief Generage angular predictions.
* \brief Generate angular predictions.
* \param log2_width Log2 of width, range 2..5.
* \param intra_mode Angular mode in range 2..34.
* \param channel_type Color channel.
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param dst Buffer of size width*width.
* \param multi_ref_idx Reference line index for use with MRL.
*/
static void kvz_angular_pred_avx2(
const int_fast8_t log2_width,
@ -57,12 +59,16 @@ static void kvz_angular_pred_avx2(
const int_fast8_t channel_type,
const kvz_pixel *const in_ref_above,
const kvz_pixel *const in_ref_left,
kvz_pixel *const dst)
kvz_pixel *const dst,
const uint8_t multi_ref_idx)
{
assert(log2_width >= 2 && log2_width <= 5);
assert(intra_mode >= 2 && intra_mode <= 66);
// TODO: implement handling of MRL
uint8_t multi_ref_index = channel_type == COLOR_Y ? multi_ref_idx : 0;
__m256i p_shuf_01 = _mm256_setr_epi8(
0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04,
0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c,
@ -133,9 +139,9 @@ static void kvz_angular_pred_avx2(
// Temporary buffer for modes 11-25.
// It only needs to be big enough to hold indices from -width to width-1.
//kvz_pixel tmp_ref[2 * 128] = { 0 };
kvz_pixel temp_main[2 * 128] = { 0 };
kvz_pixel temp_side[2 * 128] = { 0 };
//kvz_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE:IDX] = { 0 };
kvz_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
kvz_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
const int_fast32_t width = 1 << log2_width;
uint32_t pred_mode = intra_mode; // ToDo: handle WAIP
@ -160,7 +166,7 @@ static void kvz_angular_pred_avx2(
// Set ref_main and ref_side such that, when indexed with 0, they point to
// index 0 in block coordinates.
if (sample_disp < 0) {
for (int i = 0; i <= width + 1; i++) {
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
temp_main[width + i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
temp_side[width + i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
}
@ -208,17 +214,17 @@ static void kvz_angular_pred_avx2(
}
else {
for (int i = 0; i <= (width << 1); i++) {
for (int i = 0; i <= (width << 1) + multi_ref_index; i++) {
temp_main[i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
temp_side[i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
}
const int s = 0;
const int max_index = (0 << s) + 2;
const int max_index = (multi_ref_index << s) + 2;
const int ref_length = width << 1;
const kvz_pixel val = temp_main[ref_length];
const kvz_pixel val = temp_main[ref_length + multi_ref_index];
for (int j = 0; j <= max_index; j++) {
temp_main[ref_length + j] = val;
temp_main[ref_length + multi_ref_index + j] = val;
}
ref_main = temp_main;
@ -235,10 +241,14 @@ static void kvz_angular_pred_avx2(
//tmp_ref[width + last_index] = tmp_ref[width + last_index - 1];
}
// compensate for line offset in reference line buffers
ref_main += multi_ref_index;
ref_side += multi_ref_index;
if (sample_disp != 0) {
// The mode is not horizontal or vertical, we have to do interpolation.
int_fast32_t delta_pos = 0;
int_fast32_t delta_pos = sample_disp * multi_ref_index;
int_fast32_t delta_int[4] = { 0 };
int_fast32_t delta_fract[4] = { 0 };
for (int_fast32_t y = 0; y + 3 < width; y += 4) {
@ -273,6 +283,10 @@ static void kvz_angular_pred_avx2(
use_cubic = false;
}
}
// Cubic must be used if ref line != 0
if (multi_ref_index) {
use_cubic = true;
}
const int16_t filter_coeff[4] = { 16 - (delta_fract[yy] >> 1), 32 - (delta_fract[yy] >> 1), 16 + (delta_fract[yy] >> 1), delta_fract[yy] >> 1 };
const int16_t *temp_f = use_cubic ? cubic_filter[delta_fract[yy]] : filter_coeff;
memcpy(f[yy], temp_f, 4 * sizeof(*temp_f));
@ -334,7 +348,7 @@ static void kvz_angular_pred_avx2(
// PDPC
bool PDPC_filter = (width >= 4 || channel_type != 0);
if (pred_mode > 1 && pred_mode < 67) {
if (mode_disp < 0) {
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
PDPC_filter = false;
}
else if (mode_disp > 0) {
@ -433,7 +447,7 @@ static void kvz_angular_pred_avx2(
for (int_fast32_t x = 0; x < width; ++x) {
dst[y * width + x] = ref_main[x + 1];
}
if ((width >= 4 || channel_type != 0) && sample_disp >= 0) {
if ((width >= 4 || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) {
int scale = (log2_width + log2_width - 2) >> 2;
const kvz_pixel top_left = ref_main[0];
const kvz_pixel left = ref_side[1 + y];
@ -590,7 +604,8 @@ static void kvz_intra_pred_planar_avx2(
// addends etc can be preinitialized for each position.
static void pred_filtered_dc_4x4(const uint8_t *ref_top,
const uint8_t *ref_left,
uint8_t *out_block)
uint8_t *out_block,
const uint8_t multi_ref_idx)
{
const uint32_t rt_u32 = *(const uint32_t *)(ref_top + 1);
const uint32_t rl_u32 = *(const uint32_t *)(ref_left + 1);
@ -651,7 +666,8 @@ static void pred_filtered_dc_4x4(const uint8_t *ref_top,
static void pred_filtered_dc_8x8(const uint8_t *ref_top,
const uint8_t *ref_left,
uint8_t *out_block)
uint8_t *out_block,
const uint8_t multi_ref_idx)
{
const uint64_t rt_u64 = *(const uint64_t *)(ref_top + 1);
const uint64_t rl_u64 = *(const uint64_t *)(ref_left + 1);
@ -755,7 +771,8 @@ static INLINE __m256i cvt_u32_si256(const uint32_t u)
static void pred_filtered_dc_16x16(const uint8_t *ref_top,
const uint8_t *ref_left,
uint8_t *out_block)
uint8_t *out_block,
const uint8_t multi_ref_idx)
{
const __m128i rt_128 = _mm_loadu_si128((const __m128i *)(ref_top + 1));
const __m128i rl_128 = _mm_loadu_si128((const __m128i *)(ref_left + 1));
@ -831,7 +848,8 @@ static void pred_filtered_dc_16x16(const uint8_t *ref_top,
static void pred_filtered_dc_32x32(const uint8_t *ref_top,
const uint8_t *ref_left,
uint8_t *out_block)
uint8_t *out_block,
const uint8_t multi_ref_idx)
{
const __m256i rt = _mm256_loadu_si256((const __m256i *)(ref_top + 1));
const __m256i rl = _mm256_loadu_si256((const __m256i *)(ref_left + 1));
@ -913,23 +931,26 @@ static void pred_filtered_dc_32x32(const uint8_t *ref_top,
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param dst Buffer of size width*width.
* \param multi_ref_idx Reference line index. May be non-zero when MRL is used.
*/
static void kvz_intra_pred_filtered_dc_avx2(
const int_fast8_t log2_width,
const uint8_t *ref_top,
const uint8_t *ref_left,
uint8_t *out_block)
uint8_t *out_block,
const uint8_t multi_ref_idx)
{
assert(log2_width >= 2 && log2_width <= 5);
// TODO: implement multi reference index for all subfunctions
if (log2_width == 2) {
pred_filtered_dc_4x4(ref_top, ref_left, out_block);
pred_filtered_dc_4x4(ref_top, ref_left, out_block, multi_ref_idx);
} else if (log2_width == 3) {
pred_filtered_dc_8x8(ref_top, ref_left, out_block);
pred_filtered_dc_8x8(ref_top, ref_left, out_block, multi_ref_idx);
} else if (log2_width == 4) {
pred_filtered_dc_16x16(ref_top, ref_left, out_block);
pred_filtered_dc_16x16(ref_top, ref_left, out_block, multi_ref_idx);
} else if (log2_width == 5) {
pred_filtered_dc_32x32(ref_top, ref_left, out_block);
pred_filtered_dc_32x32(ref_top, ref_left, out_block, multi_ref_idx);
}
}

View file

@ -41,12 +41,13 @@
/**
* \brief Generage angular predictions.
* \brief Generate angular predictions.
* \param log2_width Log2 of width, range 2..5.
* \param intra_mode Angular mode in range 2..34.
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param dst Buffer of size width*width.
* \param multi_ref_idx Multi reference line index for use with MRL.
*/
static void kvz_angular_pred_generic(
const int_fast8_t log2_width,
@ -54,7 +55,8 @@ static void kvz_angular_pred_generic(
const int_fast8_t channel_type,
const kvz_pixel *const in_ref_above,
const kvz_pixel *const in_ref_left,
kvz_pixel *const dst)
kvz_pixel *const dst,
const uint8_t multi_ref_idx)
{
assert(log2_width >= 2 && log2_width <= 5);
@ -102,12 +104,17 @@ static void kvz_angular_pred_generic(
// Temporary buffer for modes 11-25.
// It only needs to be big enough to hold indices from -width to width-1.
kvz_pixel temp_main[2 * 128] = { 0 };
kvz_pixel temp_side[2 * 128] = { 0 };
// TODO: check the correct size for these arrays when MRL is used
//kvz_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
kvz_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
kvz_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
const int_fast32_t width = 1 << log2_width;
uint32_t pred_mode = intra_mode; // ToDo: handle WAIP
uint8_t multi_ref_index = multi_ref_idx;
// Whether to swap references to always project on the left reference row.
const bool vertical_mode = intra_mode >= 34;
// Modes distance to horizontal or vertical mode.
@ -128,20 +135,22 @@ static void kvz_angular_pred_generic(
// Set ref_main and ref_side such that, when indexed with 0, they point to
// index 0 in block coordinates.
if (sample_disp < 0) {
for (int i = 0; i <= width + 1; i++) {
// TODO: for non-square blocks, separate loops for x and y is needed
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
temp_main[width + i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
temp_side[width + i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
}
// TODO: take into account non-square blocks
ref_main = temp_main + width;
ref_side = temp_side + width;
// TODO: for non square blocks, need to check if width or height is used for reference extension
for (int i = -width; i <= -1; i++) {
ref_main[i] = ref_side[MIN((-i * modedisp2invsampledisp[abs(mode_disp)] + 256) >> 9, width)];
}
//const uint32_t index_offset = width + 1;
//const int32_t last_index = width;
//const int_fast32_t most_negative_index = (width * sample_disp) >> 5;
@ -176,17 +185,20 @@ static void kvz_angular_pred_generic(
}
else {
for (int i = 0; i <= (width << 1); i++) {
// TODO: again, separate loop needed for non-square blocks
for (int i = 0; i <= (width << 1) + multi_ref_index; i++) {
temp_main[i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
temp_side[i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
}
// TODO: this code block will need to change also when non-square blocks are used
// const int log2_ratio = 0;
const int s = 0;
const int max_index = (0 << s) + 2;
const int max_index = (multi_ref_index << s) + 2;
const int ref_length = width << 1;
const kvz_pixel val = temp_main[ref_length];
for (int j = 0; j <= max_index; j++) {
temp_main[ref_length + j] = val;
const kvz_pixel val = temp_main[ref_length + multi_ref_index];
for (int j = 1; j <= max_index; j++) {
temp_main[ref_length + multi_ref_index + j] = val;
}
ref_main = temp_main;
@ -203,12 +215,14 @@ static void kvz_angular_pred_generic(
//tmp_ref[width + last_index] = tmp_ref[width + last_index - 1];
}
// compensate for line offset in reference line buffers
ref_main += multi_ref_index;
ref_side += multi_ref_index;
if (sample_disp != 0) {
// The mode is not horizontal or vertical, we have to do interpolation.
int_fast32_t delta_pos = 0;
for (int_fast32_t y = 0; y < width; ++y) {
delta_pos += sample_disp;
for (int_fast32_t y = 0, delta_pos = sample_disp * (1 + multi_ref_index); y < width; ++y, delta_pos += sample_disp) {
int_fast32_t delta_int = delta_pos >> 5;
int_fast32_t delta_fract = delta_pos & (32 - 1);
@ -231,6 +245,10 @@ static void kvz_angular_pred_generic(
use_cubic = false;
}
}
// Cubic must be used if ref line != 0
if (multi_ref_index) {
use_cubic = true;
}
const int16_t filter_coeff[4] = { 16 - (delta_fract >> 1), 32 - (delta_fract >> 1), 16 + (delta_fract >> 1), delta_fract >> 1 };
int16_t const * const f = use_cubic ? cubic_filter[delta_fract] : filter_coeff;
// Do 4-tap intra interpolation filtering
@ -265,7 +283,7 @@ static void kvz_angular_pred_generic(
// PDPC
bool PDPC_filter = (width >= 4 || channel_type != 0);
if (pred_mode > 1 && pred_mode < 67) {
if (mode_disp < 0) {
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
PDPC_filter = false;
}
else if (mode_disp > 0) {
@ -324,7 +342,8 @@ static void kvz_angular_pred_generic(
for (int_fast32_t x = 0; x < width; ++x) {
dst[y * width + x] = ref_main[x + 1];
}
if ((width >= 4 || channel_type != 0) && sample_disp >= 0) {
// Do not apply PDPC if multi ref line index is other than 0
if ((width >= 4 || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) {
int scale = (log2_width + log2_width - 2) >> 2;
const kvz_pixel top_left = ref_main[0];
const kvz_pixel left = ref_side[1 + y];
@ -396,26 +415,29 @@ static void kvz_intra_pred_planar_generic(
}
/**
* \brief Generage intra DC prediction with post filtering applied.
* \brief Generate intra DC prediction with post filtering applied.
* \param log2_width Log2 of width, range 2..5.
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param dst Buffer of size width*width.
* \param multi_ref_idx Reference line index. May be non-zero when MRL is used.
*/
static void kvz_intra_pred_filtered_dc_generic(
const int_fast8_t log2_width,
const kvz_pixel *const ref_top,
const kvz_pixel *const ref_left,
kvz_pixel *const out_block)
kvz_pixel *const out_block,
const uint8_t multi_ref_idx)
{
assert(log2_width >= 2 && log2_width <= 5);
// TODO: height for non-square block sizes
const int_fast8_t width = 1 << log2_width;
int_fast16_t sum = 0;
for (int_fast8_t i = 0; i < width; ++i) {
sum += ref_top[i + 1];
sum += ref_left[i + 1];
sum += ref_top[i + 1 + multi_ref_idx];
sum += ref_left[i + 1 + multi_ref_idx];
}
const kvz_pixel dc_val = (sum + width) >> (log2_width + 1);

View file

@ -49,7 +49,8 @@ typedef void (angular_pred_func)(
const int_fast8_t channel_type,
const kvz_pixel *const in_ref_above,
const kvz_pixel *const in_ref_left,
kvz_pixel *const dst);
kvz_pixel *const dst,
const uint8_t multi_ref_idx);
typedef void (intra_pred_planar_func)(
const int_fast8_t log2_width,
@ -61,7 +62,8 @@ typedef void (intra_pred_filtered_dc_func)(
const int_fast8_t log2_width,
const kvz_pixel *const ref_top,
const kvz_pixel *const ref_left,
kvz_pixel *const out_block);
kvz_pixel *const out_block,
const uint8_t multi_ref_idx);
typedef void (pdpc_planar_dc_func)(
const int mode,

View file

@ -10,6 +10,7 @@ common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no
valgrind_test $common_args --rd=1
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
valgrind_test $common_args --rd=2 --mrl
valgrind_test $common_args --rd=3
valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0
valgrind_test $common_args --alf=full --wpp --threads=1