mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Merge branch 'intra_mrl'
This commit is contained in:
commit
dea3ca12aa
|
@ -202,6 +202,7 @@ int kvz_config_init(kvz_config *cfg)
|
|||
cfg->chroma_scale_out[1][0] = cfg->chroma_scale_in[1][0] = -1;
|
||||
cfg->chroma_scale_out[2][0] = cfg->chroma_scale_in[2][0] = -1;
|
||||
|
||||
cfg->mrl = false;
|
||||
|
||||
parse_qp_map(cfg, 0);
|
||||
|
||||
|
@ -1484,6 +1485,9 @@ int kvz_config_parse(kvz_config *cfg, const char *name, const char *value)
|
|||
parse_qp_map(cfg, 0);
|
||||
return success;
|
||||
}
|
||||
else if OPT("mrl") {
|
||||
cfg->mrl = atobool(value);
|
||||
}
|
||||
else if OPT("jccr") {
|
||||
cfg->jccr = (bool)atobool(value);
|
||||
}
|
||||
|
|
|
@ -173,6 +173,8 @@ static const struct option long_options[] = {
|
|||
{ "fastrd-outdir", required_argument, NULL, 0 },
|
||||
{ "chroma-qp-in", required_argument, NULL, 0 },
|
||||
{ "chroma-qp-out", required_argument, NULL, 0 },
|
||||
{ "mrl", no_argument, NULL, 0 },
|
||||
{ "no-mrl", no_argument, NULL, 0 },
|
||||
{ "jccr", no_argument, NULL, 0 },
|
||||
{ "no-jccr", no_argument, NULL, 0 },
|
||||
{ "amvr", no_argument, NULL, 0 },
|
||||
|
@ -622,6 +624,8 @@ void print_help(void)
|
|||
" --(no-)implicit-rdpcm : Implicit residual DPCM. Currently only supported\n"
|
||||
" with lossless coding. [disabled]\n"
|
||||
" --(no-)tmvp : Temporal motion vector prediction [enabled]\n"
|
||||
" --(no-)mrl : Enable use of multiple reference lines in intra\n"
|
||||
" predictions.\n"
|
||||
" --mts <string> : Multiple Transform Selection [off].\n"
|
||||
" (Currently only implemented for intra\n"
|
||||
" and has effect only when rd >= 2)\n"
|
||||
|
|
|
@ -842,28 +842,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
|
|||
*/
|
||||
|
||||
const int num_pred_units = kvz_part_mode_num_parts[cur_cu->part_size];
|
||||
|
||||
//ToDo: update multi_ref_lines variable when it's something else than constant 3
|
||||
//int multi_ref_lines = 3;
|
||||
/*
|
||||
if(isp_enable_flag){ //ToDo: implement flag value to be something else than constant zero
|
||||
for (int i = 0; i < num_pred_units; i++) {
|
||||
if (multi_ref_lines > 1) {
|
||||
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]);
|
||||
CABAC_BIN(cabac, cur_cu->intra.multi_ref_idx != 0, "multi_ref_line_0");
|
||||
if (multi_ref_lines > 2 && cur_cu->intra.multi_ref_idx != 0) {
|
||||
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[1]);
|
||||
CABAC_BIN(cabac, cur_cu->intra.multi_ref_idx != 1, "multi_ref_line_1");
|
||||
if (multi_ref_lines > 3 && cur_cu->intra.multi_ref_idx != 1) {
|
||||
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[2]);
|
||||
CABAC_BIN(cabac, cur_cu->intra.multi_ref_idx != 3, "multi_ref_line_2");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
// Intra Subpartition mode
|
||||
uint32_t width = (LCU_WIDTH >> depth);
|
||||
uint32_t height = (LCU_WIDTH >> depth);
|
||||
|
@ -875,14 +854,25 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
|
|||
//isp_mode += ((height > TR_MAX_WIDTH) || !enough_samples) ? 2 : 0;
|
||||
bool allow_isp = enough_samples;
|
||||
|
||||
if (0 && cur_cu->type == 1/*intra*/ && (y % LCU_WIDTH) != 0) {
|
||||
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]);
|
||||
CABAC_BIN(cabac, 0, "multi_ref_line");
|
||||
// Code MRL related bits
|
||||
bool enable_mrl = state->encoder_control->cfg.mrl;
|
||||
int multi_ref_idx = enable_mrl ? cur_cu->intra.multi_ref_idx : 0;
|
||||
|
||||
|
||||
if (cur_cu->type == CU_INTRA && (y % LCU_WIDTH) != 0 && !cur_cu->bdpcmMode && enable_mrl) {
|
||||
if (MAX_REF_LINE_IDX > 1) {
|
||||
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[0]);
|
||||
CABAC_BIN(cabac, multi_ref_idx != 0, "multi_ref_line");
|
||||
if (MAX_REF_LINE_IDX > 2 && multi_ref_idx != 0) {
|
||||
cabac->cur_ctx = &(cabac->ctx.multi_ref_line[1]);
|
||||
CABAC_BIN(cabac, multi_ref_idx != 1, "multi_ref_line")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ToDo: update real usage, these if clauses as such don't make any sense
|
||||
if (isp_mode != 0) {
|
||||
if (isp_mode != 0 && multi_ref_idx == 0) {
|
||||
if (isp_mode) {
|
||||
cabac->cur_ctx = &(cabac->ctx.intra_subpart_model[0]);
|
||||
CABAC_BIN(cabac, 0, "intra_subPartitions");
|
||||
|
@ -940,14 +930,11 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
|
|||
}
|
||||
// Is the mode in the MPM array or not
|
||||
flag[j] = (mpm_preds[j] == -1) ? 0 : 1;
|
||||
if (true||!(cur_pu->intra.multi_ref_idx || (isp_mode))) {
|
||||
if (!(cur_pu->intra.multi_ref_idx || (isp_mode))) {
|
||||
CABAC_BIN(cabac, flag[j], "prev_intra_luma_pred_flag");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
for (int j = 0; j < num_pred_units; ++j) {
|
||||
// Signal index of the prediction mode in the prediction list, if it is there
|
||||
if (flag[j]) {
|
||||
|
@ -956,7 +943,7 @@ static void encode_intra_coding_unit(encoder_state_t * const state,
|
|||
const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, j);
|
||||
const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, pu_x, pu_y);
|
||||
cabac->cur_ctx = &(cabac->ctx.luma_planar_model[(isp_mode ? 0 : 1)]);
|
||||
if (true||cur_pu->intra.multi_ref_idx == 0) {
|
||||
if (cur_pu->intra.multi_ref_idx == 0) {
|
||||
CABAC_BIN(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx_luma_planar");
|
||||
}
|
||||
//CABAC_BIN_EP(cabac, (mpm_preds[j] > 0 ? 1 : 0), "mpm_idx");
|
||||
|
|
|
@ -661,7 +661,13 @@ static void encoder_state_write_bitstream_seq_parameter_set(bitstream_t* stream,
|
|||
WRITE_UE(stream, encoder->cfg.log2_parallel_merge_level-2, "log2_parallel_merge_level_minus2");
|
||||
|
||||
WRITE_U(stream, 0, 1, "sps_isp_enabled_flag");
|
||||
WRITE_U(stream, 0, 1, "sps_mrl_enabled_flag");
|
||||
|
||||
if (state->encoder_control->cfg.mrl) {
|
||||
WRITE_U(stream, 1, 1, "sps_mrl_enabled_flag");
|
||||
} else {
|
||||
WRITE_U(stream, 0, 1, "sps_mrl_enabled_flag");
|
||||
}
|
||||
|
||||
WRITE_U(stream, 0, 1, "sps_mip_enabled_flag");
|
||||
// if(!no_cclm_constraint_flag)
|
||||
if(encoder->chroma_format != KVZ_CSP_400) {
|
||||
|
|
|
@ -211,6 +211,11 @@ typedef int16_t mv_t;
|
|||
*/
|
||||
#define INTRA_MPM_COUNT 6
|
||||
|
||||
/**
|
||||
* \brief Maximum reference lines for MRL intra
|
||||
*/
|
||||
#define MAX_REF_LINE_IDX 3
|
||||
|
||||
/**
|
||||
* \brief Number of pixels to delay deblocking.
|
||||
*
|
||||
|
|
349
src/intra.c
349
src/intra.c
|
@ -212,24 +212,26 @@ static void intra_filter_reference(
|
|||
|
||||
|
||||
/**
|
||||
* \brief Generage planar prediction.
|
||||
* \brief Generate dc prediction.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param ref_top Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
* \param multi_ref_idx Multi reference line index for use with MRL.
|
||||
*/
|
||||
static void intra_pred_dc(
|
||||
const int_fast8_t log2_width,
|
||||
const kvz_pixel *const ref_top,
|
||||
const kvz_pixel *const ref_left,
|
||||
kvz_pixel *const out_block)
|
||||
kvz_pixel *const out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
int_fast8_t width = 1 << log2_width;
|
||||
|
||||
int_fast16_t sum = 0;
|
||||
for (int_fast8_t i = 0; i < width; ++i) {
|
||||
sum += ref_top[i + 1];
|
||||
sum += ref_left[i + 1];
|
||||
sum += ref_top[i + 1 + multi_ref_idx];
|
||||
sum += ref_left[i + 1 + multi_ref_idx];
|
||||
}
|
||||
|
||||
// JVET_K0122
|
||||
|
@ -549,13 +551,17 @@ void kvz_intra_predict(
|
|||
int_fast8_t mode,
|
||||
color_t color,
|
||||
kvz_pixel *dst,
|
||||
bool filter_boundary)
|
||||
bool filter_boundary,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
const kvz_config *cfg = &state->encoder_control->cfg;
|
||||
|
||||
// MRL only for luma
|
||||
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
|
||||
|
||||
const kvz_intra_ref *used_ref = &refs->ref;
|
||||
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || width == 4) {
|
||||
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || width == 4 || multi_ref_index) {
|
||||
// For chroma, DC and 4x4 blocks, always use unfiltered reference.
|
||||
} else if (mode == 0) {
|
||||
// Otherwise, use filtered for planar.
|
||||
|
@ -586,15 +592,15 @@ void kvz_intra_predict(
|
|||
if (mode == 0) {
|
||||
kvz_intra_pred_planar(log2_width, used_ref->top, used_ref->left, dst);
|
||||
} else if (mode == 1) {
|
||||
intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst);
|
||||
intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst, multi_ref_index);
|
||||
} else {
|
||||
kvz_angular_pred(log2_width, mode, color, used_ref->top, used_ref->left, dst);
|
||||
kvz_angular_pred(log2_width, mode, color, used_ref->top, used_ref->left, dst, multi_ref_index);
|
||||
}
|
||||
|
||||
// pdpc
|
||||
// bool pdpcCondition = (mode == 0 || mode == 1 || mode == 18 || mode == 50);
|
||||
bool pdpcCondition = (mode == 0 || mode == 1); // Planar and DC
|
||||
if (pdpcCondition)
|
||||
if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL.
|
||||
{
|
||||
kvz_pdpc_planar_dc(mode, width, log2_width, used_ref, dst);
|
||||
}
|
||||
|
@ -607,7 +613,9 @@ void kvz_intra_build_reference_any(
|
|||
const vector2d_t *const luma_px,
|
||||
const vector2d_t *const pic_px,
|
||||
const lcu_t *const lcu,
|
||||
kvz_intra_references *const refs)
|
||||
kvz_intra_references *const refs,
|
||||
const uint8_t multi_ref_idx,
|
||||
kvz_pixel *extra_ref_lines)
|
||||
{
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
|
||||
|
@ -617,8 +625,13 @@ void kvz_intra_build_reference_any(
|
|||
|
||||
const kvz_pixel dc_val = 1 << (KVZ_BIT_DEPTH - 1); //TODO: add used bitdepth as a variable
|
||||
const int is_chroma = color != COLOR_Y ? 1 : 0;
|
||||
// TODO: height for non-square blocks
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
|
||||
// Get multi ref index from CU under prediction or reconstrcution. Do not use MRL if not luma
|
||||
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
|
||||
assert(multi_ref_index < MAX_REF_LINE_IDX);
|
||||
|
||||
// Convert luma coordinates to chroma coordinates for chroma.
|
||||
const vector2d_t lcu_px = {
|
||||
luma_px->x % LCU_WIDTH,
|
||||
|
@ -630,26 +643,42 @@ void kvz_intra_build_reference_any(
|
|||
};
|
||||
|
||||
// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
|
||||
const kvz_pixel *left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
||||
const kvz_pixel *left_ref;
|
||||
bool extra_ref = false;
|
||||
// On the left LCU edge, if left neighboring LCU is available,
|
||||
// left_ref needs to point to correct extra reference line if MRL is used.
|
||||
if (luma_px->x > 0 && lcu_px.x == 0 && multi_ref_index != 0) {
|
||||
left_ref = &extra_ref_lines[multi_ref_index * 128];
|
||||
extra_ref = true;
|
||||
}
|
||||
else {
|
||||
left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
||||
}
|
||||
|
||||
const kvz_pixel *top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
|
||||
const kvz_pixel *rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
|
||||
|
||||
// Init top borders pointer to point to the correct place in the correct reference array.
|
||||
const kvz_pixel *top_border;
|
||||
if (px.y) {
|
||||
top_border = &rec_ref[px.x + (px.y - 1) * (LCU_WIDTH >> is_chroma)];
|
||||
top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)];
|
||||
} else {
|
||||
top_border = &top_ref[px.x];
|
||||
top_border = &top_ref[px.x]; // Top row, no need for multi_ref_index
|
||||
}
|
||||
|
||||
// Init left borders pointer to point to the correct place in the correct reference array.
|
||||
const kvz_pixel *left_border;
|
||||
int left_stride; // Distance between reference samples.
|
||||
if (px.x) {
|
||||
left_border = &rec_ref[px.x - 1 + px.y * (LCU_WIDTH >> is_chroma)];
|
||||
left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)];
|
||||
left_stride = LCU_WIDTH >> is_chroma;
|
||||
} else {
|
||||
left_border = &left_ref[px.y];
|
||||
if (extra_ref) {
|
||||
left_border = &left_ref[MAX_REF_LINE_IDX];
|
||||
}
|
||||
else {
|
||||
left_border = &left_ref[px.y];
|
||||
}
|
||||
left_stride = 1;
|
||||
}
|
||||
|
||||
|
@ -660,41 +689,103 @@ void kvz_intra_build_reference_any(
|
|||
|
||||
// Limit the number of available pixels based on block size and dimensions
|
||||
// of the picture.
|
||||
px_available_left = MIN(px_available_left, width * 2);
|
||||
// TODO: height for non-square blocks
|
||||
px_available_left = MIN(px_available_left, width * 2 + multi_ref_index);
|
||||
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
|
||||
|
||||
// Copy pixels from coded CUs.
|
||||
for (int i = 0; i < px_available_left; ++i) {
|
||||
out_left_ref[i + 1] = left_border[i * left_stride];
|
||||
// Reserve space for top left reference
|
||||
out_left_ref[i + 1 + multi_ref_index] = left_border[i * left_stride];
|
||||
}
|
||||
// Extend the last pixel for the rest of the reference values.
|
||||
kvz_pixel nearest_pixel = out_left_ref[px_available_left];
|
||||
for (int i = px_available_left; i < width * 2; ++i) {
|
||||
out_left_ref[i + 1] = nearest_pixel;
|
||||
kvz_pixel nearest_pixel = left_border[(px_available_left - 1) * left_stride];
|
||||
for (int i = px_available_left; i < width * 2 + multi_ref_index * 2; ++i) {
|
||||
out_left_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
||||
}
|
||||
} else {
|
||||
// If we are on the left edge, extend the first pixel of the top row.
|
||||
kvz_pixel nearest_pixel = luma_px->y > 0 ? top_border[0] : dc_val;
|
||||
for (int i = 0; i < width * 2; i++) {
|
||||
out_left_ref[i + 1] = nearest_pixel;
|
||||
for (int i = 0; i < width * 2 + multi_ref_index; i++) {
|
||||
// Reserve space for top left reference
|
||||
out_left_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate top-left reference.
|
||||
if (luma_px->x > 0 && luma_px->y > 0) {
|
||||
// If the block is at an LCU border, the top-left must be copied from
|
||||
// the border that points to the LCUs 1D reference buffer.
|
||||
if (px.x == 0) {
|
||||
out_left_ref[0] = left_border[-1 * left_stride];
|
||||
out_top_ref[0] = left_border[-1 * left_stride];
|
||||
} else {
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
// Generate top-left reference
|
||||
if (multi_ref_index)
|
||||
{
|
||||
if (luma_px->x > 0 && luma_px->y > 0) {
|
||||
// If the block is at an LCU border, the top-left must be copied from
|
||||
// the border that points to the LCUs 1D reference buffer.
|
||||
|
||||
// Inner picture cases
|
||||
if (px.x == 0 && px.y == 0) {
|
||||
// LCU top left corner case. Multi ref will be 0.
|
||||
out_left_ref[0] = out_left_ref[1];
|
||||
out_top_ref[0] = out_left_ref[1];
|
||||
}
|
||||
else if (px.x == 0) {
|
||||
// LCU left border case
|
||||
kvz_pixel *top_left_corner = &extra_ref_lines[multi_ref_index * 128];
|
||||
for (int i = 0; i <= multi_ref_index; ++i) {
|
||||
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
||||
out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index];
|
||||
}
|
||||
}
|
||||
else if (px.y == 0) {
|
||||
// LCU top border case. Multi ref will be 0.
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
}
|
||||
else {
|
||||
// Inner case
|
||||
for (int i = 0; i <= multi_ref_index; ++i) {
|
||||
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
||||
out_top_ref[i] = top_border[i - 1 - multi_ref_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Picture border cases
|
||||
if (px.x == 0 && px.y == 0) {
|
||||
// Top left picture corner case. Multi ref will be 0.
|
||||
out_left_ref[0] = out_left_ref[1];
|
||||
out_top_ref[0] = out_left_ref[1];
|
||||
}
|
||||
else if (px.x == 0) {
|
||||
// Picture left border case. Reference pixel cannot be taken from outside LCU border
|
||||
kvz_pixel nearest = out_left_ref[1 + multi_ref_index];
|
||||
for (int i = 0; i <= multi_ref_index; ++i) {
|
||||
out_left_ref[i] = nearest;
|
||||
out_top_ref[i] = nearest;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Picture top border case. Multi ref will be 0.
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (luma_px->x > 0 && luma_px->y > 0) {
|
||||
// If the block is at an LCU border, the top-left must be copied from
|
||||
// the border that points to the LCUs 1D reference buffer.
|
||||
if (px.x == 0) {
|
||||
out_left_ref[0] = left_border[-1 * left_stride];
|
||||
out_top_ref[0] = left_border[-1 * left_stride];
|
||||
}
|
||||
else {
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Copy reference clockwise.
|
||||
out_left_ref[0] = out_left_ref[1];
|
||||
out_top_ref[0] = out_left_ref[1];
|
||||
}
|
||||
} else {
|
||||
// Copy reference clockwise.
|
||||
out_left_ref[0] = out_left_ref[1];
|
||||
out_top_ref[0] = out_left_ref[1];
|
||||
}
|
||||
|
||||
// Generate top reference.
|
||||
|
@ -704,22 +795,22 @@ void kvz_intra_build_reference_any(
|
|||
|
||||
// Limit the number of available pixels based on block size and dimensions
|
||||
// of the picture.
|
||||
px_available_top = MIN(px_available_top, width * 2);
|
||||
px_available_top = MIN(px_available_top, width * 2 + multi_ref_index);
|
||||
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
|
||||
|
||||
// Copy all the pixels we can.
|
||||
for (int i = 0; i < px_available_top; ++i) {
|
||||
out_top_ref[i + 1] = top_border[i];
|
||||
out_top_ref[i + 1 + multi_ref_index] = top_border[i];
|
||||
}
|
||||
// Extend the last pixel for the rest of the reference values.
|
||||
kvz_pixel nearest_pixel = top_border[px_available_top - 1];
|
||||
for (int i = px_available_top; i < width * 2; ++i) {
|
||||
out_top_ref[i + 1] = nearest_pixel;
|
||||
for (int i = px_available_top; i < width * 2 + multi_ref_index * 2; ++i) {
|
||||
out_top_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
||||
}
|
||||
} else {
|
||||
// Extend nearest pixel.
|
||||
kvz_pixel nearest_pixel = luma_px->x > 0 ? left_border[0] : dc_val;
|
||||
for (int i = 0; i < width * 2; i++) {
|
||||
for (int i = 0; i < width * 2 + multi_ref_index; i++) {
|
||||
out_top_ref[i + 1] = nearest_pixel;
|
||||
}
|
||||
}
|
||||
|
@ -732,7 +823,9 @@ void kvz_intra_build_reference_inner(
|
|||
const vector2d_t *const pic_px,
|
||||
const lcu_t *const lcu,
|
||||
kvz_intra_references *const refs,
|
||||
bool entropy_sync)
|
||||
bool entropy_sync,
|
||||
const uint8_t multi_ref_idx,
|
||||
kvz_pixel* extra_ref_lines)
|
||||
{
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
|
||||
|
@ -741,8 +834,13 @@ void kvz_intra_build_reference_inner(
|
|||
kvz_pixel * __restrict out_top_ref = &refs->ref.top[0];
|
||||
|
||||
const int is_chroma = color != COLOR_Y ? 1 : 0;
|
||||
// TODO: height for non-sqaure blocks
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
|
||||
// Get multiRefIdx from CU under prediction. Do not use MRL if not luma
|
||||
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
|
||||
assert(multi_ref_index < MAX_REF_LINE_IDX);
|
||||
|
||||
// Convert luma coordinates to chroma coordinates for chroma.
|
||||
const vector2d_t lcu_px = {
|
||||
luma_px->x % LCU_WIDTH,
|
||||
|
@ -754,41 +852,90 @@ void kvz_intra_build_reference_inner(
|
|||
};
|
||||
|
||||
// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
|
||||
const kvz_pixel * __restrict left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
||||
const kvz_pixel* left_ref;
|
||||
bool extra_ref = false;
|
||||
// On the left LCU edge, if left neighboring LCU is available,
|
||||
// left_ref needs to point to correct extra reference line if MRL is used.
|
||||
if (lcu_px.x == 0 && multi_ref_index != 0) {
|
||||
left_ref = &extra_ref_lines[multi_ref_index * 128];
|
||||
extra_ref = true;
|
||||
}
|
||||
else {
|
||||
left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
||||
}
|
||||
|
||||
const kvz_pixel * __restrict top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
|
||||
const kvz_pixel * __restrict rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
|
||||
|
||||
// Init top borders pointer to point to the correct place in the correct reference array.
|
||||
const kvz_pixel * __restrict top_border;
|
||||
if (px.y) {
|
||||
top_border = &rec_ref[px.x + (px.y - 1) * (LCU_WIDTH >> is_chroma)];
|
||||
top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)];
|
||||
} else {
|
||||
top_border = &top_ref[px.x];
|
||||
|
||||
top_border = &top_ref[px.x]; // At the top line. No need for multi_ref_index
|
||||
}
|
||||
|
||||
// Init left borders pointer to point to the correct place in the correct reference array.
|
||||
const kvz_pixel * __restrict left_border;
|
||||
int left_stride; // Distance between reference samples.
|
||||
|
||||
// Generate top-left reference.
|
||||
// If the block is at an LCU border, the top-left must be copied from
|
||||
// the border that points to the LCUs 1D reference buffer.
|
||||
if (px.x) {
|
||||
left_border = &rec_ref[px.x - 1 + px.y * (LCU_WIDTH >> is_chroma)];
|
||||
left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)];
|
||||
left_stride = LCU_WIDTH >> is_chroma;
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
} else {
|
||||
left_border = &left_ref[px.y];
|
||||
if (extra_ref) {
|
||||
left_border = &left_ref[MAX_REF_LINE_IDX];
|
||||
}
|
||||
else {
|
||||
left_border = &left_ref[px.y];
|
||||
}
|
||||
left_stride = 1;
|
||||
out_left_ref[0] = left_border[-1 * left_stride];
|
||||
out_top_ref[0] = left_border[-1 * left_stride];
|
||||
}
|
||||
|
||||
// Generate top-left reference
|
||||
if (multi_ref_index)
|
||||
{
|
||||
// Inner picture cases
|
||||
if (px.x == 0 && px.y == 0) {
|
||||
// LCU top left corner case. Multi ref will be 0.
|
||||
out_left_ref[0] = out_left_ref[1];
|
||||
out_top_ref[0] = out_left_ref[1];
|
||||
}
|
||||
else if (px.x == 0) {
|
||||
// LCU left border case
|
||||
kvz_pixel* top_left_corner = &extra_ref_lines[multi_ref_index * 128];
|
||||
for (int i = 0; i <= multi_ref_index; ++i) {
|
||||
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
||||
out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index];
|
||||
}
|
||||
}
|
||||
else if (px.y == 0) {
|
||||
// LCU top border case. Multi ref will be 0.
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
}
|
||||
else {
|
||||
// Inner case
|
||||
for (int i = 0; i <= multi_ref_index; ++i) {
|
||||
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
||||
out_top_ref[i] = top_border[i - 1 - multi_ref_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
// If the block is at an LCU border, the top-left must be copied from
|
||||
// the border that points to the LCUs 1D reference buffer.
|
||||
if (px.x == 0) {
|
||||
out_left_ref[0] = left_border[-1 * left_stride];
|
||||
out_top_ref[0] = left_border[-1 * left_stride];
|
||||
}
|
||||
else {
|
||||
out_left_ref[0] = top_border[-1];
|
||||
out_top_ref[0] = top_border[-1];
|
||||
}
|
||||
}
|
||||
// Generate left reference.
|
||||
|
||||
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
||||
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
||||
int px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
|
||||
|
||||
// Limit the number of available pixels based on block size and dimensions
|
||||
|
@ -797,12 +944,12 @@ void kvz_intra_build_reference_inner(
|
|||
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
|
||||
|
||||
// Copy pixels from coded CUs.
|
||||
int i = 0;
|
||||
int i = multi_ref_index; // Offset by multi_ref_index
|
||||
do {
|
||||
out_left_ref[i + 1] = left_border[(i + 0) * left_stride];
|
||||
out_left_ref[i + 2] = left_border[(i + 1) * left_stride];
|
||||
out_left_ref[i + 3] = left_border[(i + 2) * left_stride];
|
||||
out_left_ref[i + 4] = left_border[(i + 3) * left_stride];
|
||||
out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride];
|
||||
out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride];
|
||||
out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride];
|
||||
out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride];
|
||||
i += 4;
|
||||
} while (i < px_available_left);
|
||||
|
||||
|
@ -815,6 +962,13 @@ void kvz_intra_build_reference_inner(
|
|||
out_left_ref[i + 4] = nearest_pixel;
|
||||
}
|
||||
|
||||
// Extend for MRL
|
||||
if (multi_ref_index) {
|
||||
for (; i < width * 2 + multi_ref_index; ++i) {
|
||||
out_left_ref[i + 1] = nearest_pixel;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate top reference.
|
||||
|
||||
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
||||
|
@ -822,7 +976,7 @@ void kvz_intra_build_reference_inner(
|
|||
|
||||
// Limit the number of available pixels based on block size and dimensions
|
||||
// of the picture.
|
||||
px_available_top = MIN(px_available_top, width * 2);
|
||||
px_available_top = MIN(px_available_top, width * 2 + multi_ref_index);
|
||||
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
|
||||
|
||||
if (entropy_sync && px.y == 0) px_available_top = MIN(px_available_top, ((LCU_WIDTH >> is_chroma) - px.x) -1);
|
||||
|
@ -830,17 +984,17 @@ void kvz_intra_build_reference_inner(
|
|||
// Copy all the pixels we can.
|
||||
i = 0;
|
||||
do {
|
||||
memcpy(out_top_ref + i + 1, top_border + i, 4 * sizeof(kvz_pixel));
|
||||
memcpy(out_top_ref + i + 1 + multi_ref_index, top_border + i, 4 * sizeof(kvz_pixel));
|
||||
i += 4;
|
||||
} while (i < px_available_top);
|
||||
|
||||
// Extend the last pixel for the rest of the reference values.
|
||||
nearest_pixel = out_top_ref[i];
|
||||
for (; i < width * 2; i += 4) {
|
||||
out_top_ref[i + 1] = nearest_pixel;
|
||||
out_top_ref[i + 2] = nearest_pixel;
|
||||
out_top_ref[i + 3] = nearest_pixel;
|
||||
out_top_ref[i + 4] = nearest_pixel;
|
||||
nearest_pixel = out_top_ref[i + multi_ref_index];
|
||||
for (; i < (width + multi_ref_index) * 2; i += 4) {
|
||||
out_top_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
||||
out_top_ref[i + 2 + multi_ref_index] = nearest_pixel;
|
||||
out_top_ref[i + 3 + multi_ref_index] = nearest_pixel;
|
||||
out_top_ref[i + 4 + multi_ref_index] = nearest_pixel;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -851,13 +1005,17 @@ void kvz_intra_build_reference(
|
|||
const vector2d_t *const pic_px,
|
||||
const lcu_t *const lcu,
|
||||
kvz_intra_references *const refs,
|
||||
bool entropy_sync)
|
||||
bool entropy_sync,
|
||||
kvz_pixel *extra_ref_lines,
|
||||
uint8_t multi_ref_idx)
|
||||
{
|
||||
assert(!(extra_ref_lines == NULL && multi_ref_idx != 0) && "Trying to use MRL with NULL extra references.");
|
||||
|
||||
// Much logic can be discarded if not on the edge
|
||||
if (luma_px->x > 0 && luma_px->y > 0) {
|
||||
kvz_intra_build_reference_inner(log2_width, color, luma_px, pic_px, lcu, refs, entropy_sync);
|
||||
kvz_intra_build_reference_inner(log2_width, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines);
|
||||
} else {
|
||||
kvz_intra_build_reference_any(log2_width, color, luma_px, pic_px, lcu, refs);
|
||||
kvz_intra_build_reference_any(log2_width, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -869,7 +1027,8 @@ static void intra_recon_tb_leaf(
|
|||
int8_t intra_mode,
|
||||
cclm_parameters_t *cclm_params,
|
||||
lcu_t *lcu,
|
||||
color_t color)
|
||||
color_t color,
|
||||
uint8_t multi_ref_idx)
|
||||
{
|
||||
const kvz_config *cfg = &state->encoder_control->cfg;
|
||||
const int shift = color == COLOR_Y ? 0 : 1;
|
||||
|
@ -890,15 +1049,33 @@ static void intra_recon_tb_leaf(
|
|||
int x_scu = SUB_SCU(x);
|
||||
int y_scu = SUB_SCU(y);
|
||||
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
|
||||
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
|
||||
|
||||
kvz_intra_references refs;
|
||||
kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp);
|
||||
// Extra reference lines for use with MRL. Extra lines needed only for left edge.
|
||||
kvz_pixel extra_refs[128 * MAX_REF_LINE_IDX] = { 0 };
|
||||
|
||||
if (luma_px.x > 0 && lcu_px.x == 0 && lcu_px.y > 0 && multi_ref_index != 0) {
|
||||
videoframe_t* const frame = state->tile->frame;
|
||||
|
||||
// Copy extra ref lines, including ref line 1 and top left corner.
|
||||
for (int i = 0; i < MAX_REF_LINE_IDX; ++i) {
|
||||
int height = (LCU_WIDTH >> depth) * 2 + MAX_REF_LINE_IDX;
|
||||
height = MIN(height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist.
|
||||
height = MIN(height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX);
|
||||
kvz_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)],
|
||||
&extra_refs[i * 128],
|
||||
1, height,
|
||||
frame->rec->stride, 1);
|
||||
}
|
||||
}
|
||||
kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index);
|
||||
|
||||
kvz_pixel pred[32 * 32];
|
||||
int stride = state->tile->frame->source->stride;
|
||||
const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
|
||||
if(intra_mode < 68) {
|
||||
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary);
|
||||
kvz_intra_predict(state, &refs, log2width, intra_mode, color, pred, filter_boundary, multi_ref_index);
|
||||
} else {
|
||||
kvz_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], pred, width, width, stride / 2, width);
|
||||
if(cclm_params == NULL) {
|
||||
|
@ -957,6 +1134,7 @@ void kvz_intra_recon_cu(
|
|||
int8_t mode_chroma,
|
||||
cu_info_t *cur_cu,
|
||||
cclm_parameters_t *cclm_params,
|
||||
uint8_t multi_ref_idx,
|
||||
lcu_t *lcu)
|
||||
{
|
||||
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
|
||||
|
@ -964,6 +1142,7 @@ void kvz_intra_recon_cu(
|
|||
if (cur_cu == NULL) {
|
||||
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
||||
}
|
||||
uint8_t multi_ref_index = multi_ref_idx;
|
||||
|
||||
// Reset CBFs because CBFs might have been set
|
||||
// for depth earlier
|
||||
|
@ -981,10 +1160,10 @@ void kvz_intra_recon_cu(
|
|||
const int32_t x2 = x + offset;
|
||||
const int32_t y2 = y + offset;
|
||||
|
||||
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, lcu);
|
||||
kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
|
||||
kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
|
||||
kvz_intra_recon_cu(state, x2, y2, depth + 1, mode_luma, mode_chroma, NULL, NULL, multi_ref_index, lcu);
|
||||
|
||||
// Propagate coded block flags from child CUs to parent CU.
|
||||
uint16_t child_cbfs[3] = {
|
||||
|
@ -1005,11 +1184,11 @@ void kvz_intra_recon_cu(
|
|||
const bool has_chroma = mode_chroma != -1 && (x % 8 == 0 && y % 8 == 0);
|
||||
// Process a leaf TU.
|
||||
if (has_luma) {
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_luma, cclm_params, lcu, COLOR_Y, multi_ref_index);
|
||||
}
|
||||
if (has_chroma) {
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_U, 0);
|
||||
intra_recon_tb_leaf(state, x, y, depth, mode_chroma, cclm_params, lcu, COLOR_V, 0);
|
||||
}
|
||||
|
||||
kvz_quantize_lcu_residual(state, has_luma, has_chroma, x, y, depth, cur_cu, lcu, false);
|
||||
|
|
31
src/intra.h
31
src/intra.h
|
@ -42,10 +42,9 @@
|
|||
#include "global.h" // IWYU pragma: keep
|
||||
#include "kvazaar.h"
|
||||
|
||||
|
||||
typedef struct {
|
||||
kvz_pixel left[2 * 128 + 3];
|
||||
kvz_pixel top[2 * 128 + 3];
|
||||
kvz_pixel left[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX];
|
||||
kvz_pixel top[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX];
|
||||
} kvz_intra_ref;
|
||||
typedef struct
|
||||
{
|
||||
|
@ -80,14 +79,16 @@ int8_t kvz_intra_get_dir_luma_predictor(
|
|||
const cu_info_t *const above_pu);
|
||||
|
||||
/**
|
||||
* \brief Generage angular predictions.
|
||||
* \param width Width in pixels, range 4..32.
|
||||
* \param color What color pixels to use.
|
||||
* \param luma_px Luma coordinates of the prediction block.
|
||||
* \param pic_px Picture dimensions in luma pixels.
|
||||
* \param lcu LCU struct.
|
||||
* \param out_left_ref Left reference pixels, index 0 is the top-left.
|
||||
* \param out_top_ref Top reference pixels, index 0 is the top-left.
|
||||
* \brief Build intra prediction reference buffers.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param color What color pixels to use.
|
||||
* \param luma_px Luma coordinates of the prediction block.
|
||||
* \param pic_px Picture dimensions in luma pixels.
|
||||
* \param lcu LCU struct.
|
||||
* \param refs Pointer to top and left references.
|
||||
* \param entropy_sync Indicate that top right is not available if WPP is enabled.
|
||||
* \param extra_refs Additional left edge reference lines for use with MRL.
|
||||
* \param multi_ref_idx Multi reference line index for the prediction block.
|
||||
*/
|
||||
void kvz_intra_build_reference(
|
||||
const int_fast8_t log2_width,
|
||||
|
@ -96,7 +97,9 @@ void kvz_intra_build_reference(
|
|||
const vector2d_t *const pic_px,
|
||||
const lcu_t *const lcu,
|
||||
kvz_intra_references *const refs,
|
||||
bool entropy_sync);
|
||||
bool entropy_sync,
|
||||
kvz_pixel *extra_refs,
|
||||
uint8_t multi_ref_idx);
|
||||
|
||||
/**
|
||||
* \brief Generate intra predictions.
|
||||
|
@ -114,7 +117,8 @@ void kvz_intra_predict(
|
|||
int_fast8_t mode,
|
||||
color_t color,
|
||||
kvz_pixel *dst,
|
||||
bool filter_boundary);
|
||||
bool filter_boundary,
|
||||
const uint8_t multi_ref_idx);
|
||||
|
||||
void kvz_intra_recon_cu(
|
||||
encoder_state_t *const state,
|
||||
|
@ -125,6 +129,7 @@ void kvz_intra_recon_cu(
|
|||
int8_t mode_chroma,
|
||||
cu_info_t *cur_cu,
|
||||
cclm_parameters_t* cclm_params,
|
||||
uint8_t multi_ref_idx,
|
||||
lcu_t *lcu);
|
||||
|
||||
|
||||
|
|
|
@ -514,6 +514,10 @@ typedef struct kvz_config
|
|||
int8_t chroma_scale_in[3][17];
|
||||
int8_t chroma_scale_out[3][17];
|
||||
|
||||
/** \brief enable use of multiple reference lines in intra prediction */
|
||||
int8_t mrl;
|
||||
|
||||
|
||||
int8_t jccr;
|
||||
|
||||
int8_t cclm;
|
||||
|
|
15
src/search.c
15
src/search.c
|
@ -160,6 +160,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
|
|||
if (cu->type == CU_INTRA) {
|
||||
to->intra.mode = cu->intra.mode;
|
||||
to->intra.mode_chroma = cu->intra.mode_chroma;
|
||||
to->intra.multi_ref_idx = cu->intra.multi_ref_idx;
|
||||
} else {
|
||||
to->skipped = cu->skipped;
|
||||
to->merged = cu->merged;
|
||||
|
@ -635,7 +636,6 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
cur_cu->type = CU_NOTSET;
|
||||
cur_cu->part_size = SIZE_2Nx2N;
|
||||
cur_cu->qp = state->qp;
|
||||
cur_cu->intra.multi_ref_idx = 0;
|
||||
cur_cu->bdpcmMode = 0;
|
||||
cur_cu->tr_idx = 0;
|
||||
cur_cu->violates_mts_coeff_constraint = 0;
|
||||
|
@ -724,13 +724,15 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
int8_t intra_mode;
|
||||
int8_t intra_trafo;
|
||||
double intra_cost;
|
||||
uint8_t multi_ref_index = 0;
|
||||
kvz_search_cu_intra(state, x, y, depth, lcu,
|
||||
&intra_mode, &intra_trafo, &intra_cost);
|
||||
&intra_mode, &intra_trafo, &intra_cost, &multi_ref_index);
|
||||
if (intra_cost < cost) {
|
||||
cost = intra_cost;
|
||||
cur_cu->type = CU_INTRA;
|
||||
cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
|
||||
cur_cu->intra.mode = intra_mode;
|
||||
cur_cu->intra.multi_ref_idx = multi_ref_index;
|
||||
|
||||
//If the CU is not split from 64x64 block, the MTS is disabled for that CU.
|
||||
cur_cu->tr_idx = (depth > 0) ? intra_trafo : 0;
|
||||
|
@ -747,7 +749,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
x, y,
|
||||
depth,
|
||||
cur_cu->intra.mode, -1, // skip chroma
|
||||
NULL, NULL, lcu);
|
||||
NULL, NULL, cur_cu->intra.multi_ref_idx, lcu);
|
||||
|
||||
downsample_cclm_rec(
|
||||
state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
|
||||
|
@ -769,7 +771,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
x & ~7, y & ~7, // TODO: as does this
|
||||
depth,
|
||||
-1, cur_cu->intra.mode_chroma, // skip luma
|
||||
NULL, cclm_params, lcu);
|
||||
NULL, cclm_params, 0, lcu);
|
||||
}
|
||||
} else if (cur_cu->type == CU_INTER) {
|
||||
|
||||
|
@ -917,6 +919,9 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
cur_cu->type = CU_INTRA;
|
||||
cur_cu->part_size = SIZE_2Nx2N;
|
||||
|
||||
// Disable MRL in this case
|
||||
cur_cu->intra.multi_ref_idx = 0;
|
||||
|
||||
kvz_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth);
|
||||
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
|
||||
|
||||
|
@ -926,7 +931,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
|
|||
x, y,
|
||||
depth,
|
||||
cur_cu->intra.mode, mode_chroma,
|
||||
NULL,NULL, lcu);
|
||||
NULL,NULL, 0, lcu);
|
||||
|
||||
cost += kvz_cu_rd_cost_luma(state, x_local, y_local, depth, cur_cu, lcu);
|
||||
if (has_chroma) {
|
||||
|
|
|
@ -333,7 +333,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
x_px, y_px,
|
||||
depth,
|
||||
intra_mode, -1,
|
||||
pred_cu, cclm_params, lcu);
|
||||
pred_cu, cclm_params, pred_cu->intra.multi_ref_idx, lcu);
|
||||
|
||||
// TODO: Not sure if this should be 0 or 1 but at least seems to work with 1
|
||||
if (pred_cu->tr_idx > 1)
|
||||
|
@ -361,7 +361,7 @@ static double search_intra_trdepth(encoder_state_t * const state,
|
|||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma_mode,
|
||||
pred_cu, cclm_params, lcu);
|
||||
pred_cu, cclm_params, 0, lcu);
|
||||
best_rd_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu);
|
||||
}
|
||||
pred_cu->tr_skip = best_tr_idx == MTS_SKIP;
|
||||
|
@ -480,7 +480,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
|||
kvz_pixels_blit(orig_u, orig_block, width, width, origstride, width);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
if (modes[i] == -1) continue;
|
||||
kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false);
|
||||
kvz_intra_predict(state, refs_u, log2_width_c, modes[i], COLOR_U, pred, false, 0);
|
||||
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
|
||||
costs[i] += satd_func(pred, orig_block);
|
||||
}
|
||||
|
@ -494,7 +494,7 @@ static void search_intra_chroma_rough(encoder_state_t * const state,
|
|||
kvz_pixels_blit(orig_v, orig_block, width, width, origstride, width);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
if (modes[i] == -1) continue;
|
||||
kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false);
|
||||
kvz_intra_predict(state, refs_v, log2_width_c, modes[i], COLOR_V, pred, false, 0);
|
||||
//costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width);
|
||||
costs[i] += satd_func(pred, orig_block);
|
||||
}
|
||||
|
@ -588,7 +588,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
|
|||
double costs_out[PARALLEL_BLKS] = { 0 };
|
||||
for (int i = 0; i < PARALLEL_BLKS; ++i) {
|
||||
if (mode + i * offset <= 66) {
|
||||
kvz_intra_predict(state, refs, log2_width, mode + i * offset, COLOR_Y, preds[i], filter_boundary);
|
||||
kvz_intra_predict(state, refs, log2_width, mode + i * offset, COLOR_Y, preds[i], filter_boundary, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -627,7 +627,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
|
|||
if (mode_in_range) {
|
||||
for (int i = 0; i < PARALLEL_BLKS; ++i) {
|
||||
if (test_modes[i] >= 2 && test_modes[i] <= 66) {
|
||||
kvz_intra_predict(state, refs, log2_width, test_modes[i], COLOR_Y, preds[i], filter_boundary);
|
||||
kvz_intra_predict(state, refs, log2_width, test_modes[i], COLOR_Y, preds[i], filter_boundary, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -664,7 +664,7 @@ static int8_t search_intra_rough(encoder_state_t * const state,
|
|||
}
|
||||
|
||||
if (!has_mode) {
|
||||
kvz_intra_predict(state, refs, log2_width, mode, COLOR_Y, preds[0], filter_boundary);
|
||||
kvz_intra_predict(state, refs, log2_width, mode, COLOR_Y, preds[0], filter_boundary, 0);
|
||||
costs[modes_selected] = get_cost(state, preds[0], orig_block, satd_func, sad_func, width);
|
||||
modes[modes_selected] = mode;
|
||||
++modes_selected;
|
||||
|
@ -715,18 +715,25 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
|||
int8_t *intra_preds,
|
||||
int modes_to_check,
|
||||
int8_t modes[67], int8_t trafo[67], double costs[67],
|
||||
lcu_t *lcu)
|
||||
lcu_t *lcu,
|
||||
uint8_t multi_ref_idx)
|
||||
{
|
||||
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra);
|
||||
const int width = LCU_WIDTH >> depth;
|
||||
|
||||
kvz_pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1];
|
||||
|
||||
// TODO: height for non-square blocks
|
||||
kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
|
||||
|
||||
// Check that the predicted modes are in the RDO mode list
|
||||
if (modes_to_check < 67) {
|
||||
for (int pred_mode = 0; pred_mode < 6; pred_mode++) {
|
||||
int pred_mode = 0;
|
||||
// Skip planar if searching modes for MRL
|
||||
if (multi_ref_idx != 0) {
|
||||
pred_mode = 1;
|
||||
}
|
||||
for (; pred_mode < 6; pred_mode++) {
|
||||
int mode_found = 0;
|
||||
for (int rdo_mode = 0; rdo_mode < modes_to_check; rdo_mode++) {
|
||||
if (intra_preds[pred_mode] == modes[rdo_mode]) {
|
||||
|
@ -753,6 +760,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
|||
pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
|
||||
pred_cu.intra.mode = modes[rdo_mode];
|
||||
pred_cu.intra.mode_chroma = modes[rdo_mode];
|
||||
pred_cu.intra.multi_ref_idx = multi_ref_idx;
|
||||
pred_cu.joint_cb_cr = 0;
|
||||
FILL(pred_cu.cbf, 0);
|
||||
|
||||
|
@ -783,6 +791,7 @@ static int8_t search_intra_rdo(encoder_state_t * const state,
|
|||
pred_cu.part_size = ((depth == MAX_PU_DEPTH) ? SIZE_NxN : SIZE_2Nx2N);
|
||||
pred_cu.intra.mode = modes[0];
|
||||
pred_cu.intra.mode_chroma = modes[0];
|
||||
pred_cu.intra.multi_ref_idx = multi_ref_idx;
|
||||
FILL(pred_cu.cbf, 0);
|
||||
search_intra_trdepth(state, x_px, y_px, depth, tr_depth, modes[0], MAX_INT, &pred_cu, lcu, NULL, trafo[0]);
|
||||
}
|
||||
|
@ -865,8 +874,8 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
|||
|
||||
int c_width = MAX(32 >> (depth), 4);
|
||||
|
||||
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp);
|
||||
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp);
|
||||
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0);
|
||||
kvz_intra_build_reference(MAX(LOG2_LCU_WIDTH - depth - 1, 2), COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0);
|
||||
|
||||
cclm_parameters_t cclm_params[2] = { 0 };
|
||||
|
||||
|
@ -892,7 +901,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
|||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma.mode, // skip luma
|
||||
NULL, NULL, lcu);
|
||||
NULL, NULL, 0, lcu);
|
||||
}
|
||||
else {
|
||||
|
||||
|
@ -925,8 +934,7 @@ int8_t kvz_search_intra_chroma_rdo(encoder_state_t * const state,
|
|||
x_px, y_px,
|
||||
depth,
|
||||
-1, chroma.mode, // skip luma
|
||||
NULL, cclm_params, lcu
|
||||
);
|
||||
NULL, cclm_params, 0, lcu);
|
||||
}
|
||||
chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu);
|
||||
|
||||
|
@ -982,10 +990,10 @@ int8_t kvz_search_cu_intra_chroma(encoder_state_t * const state,
|
|||
const vector2d_t luma_px = { x_px, y_px };
|
||||
|
||||
kvz_intra_references refs_u;
|
||||
kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp);
|
||||
kvz_intra_build_reference(log2_width_c, COLOR_U, &luma_px, &pic_px, lcu, &refs_u, state->encoder_control->cfg.wpp, NULL, 0);
|
||||
|
||||
kvz_intra_references refs_v;
|
||||
kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp);
|
||||
kvz_intra_build_reference(log2_width_c, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0);
|
||||
|
||||
vector2d_t lcu_cpx = { lcu_px.x / 2, lcu_px.y / 2 };
|
||||
kvz_pixel *ref_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C];
|
||||
|
@ -1015,7 +1023,8 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
|||
const int depth, lcu_t *lcu,
|
||||
int8_t *mode_out,
|
||||
int8_t *trafo_out,
|
||||
double *cost_out)
|
||||
double *cost_out,
|
||||
uint8_t *multi_ref_idx_out)
|
||||
{
|
||||
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
|
||||
const int8_t cu_width = LCU_WIDTH >> depth;
|
||||
|
@ -1043,30 +1052,48 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
|||
if (depth > 0) {
|
||||
const vector2d_t luma_px = { x_px, y_px };
|
||||
const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height };
|
||||
kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs, state->encoder_control->cfg.wpp);
|
||||
|
||||
// These references will only be used with rough search. No need for MRL stuff here.
|
||||
kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs, state->encoder_control->cfg.wpp, NULL, 0);
|
||||
}
|
||||
|
||||
int8_t modes[67];
|
||||
int8_t trafo[67] = { 0 };
|
||||
double costs[67];
|
||||
int8_t modes[MAX_REF_LINE_IDX][67];
|
||||
int8_t trafo[MAX_REF_LINE_IDX][67] = { 0 };
|
||||
double costs[MAX_REF_LINE_IDX][67];
|
||||
|
||||
// Find best intra mode for 2Nx2N.
|
||||
kvz_pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
|
||||
|
||||
int8_t number_of_modes = 0;
|
||||
int8_t number_of_modes[MAX_REF_LINE_IDX] = { 0 };
|
||||
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4);
|
||||
if (!skip_rough_search) {
|
||||
number_of_modes = search_intra_rough(state,
|
||||
number_of_modes[0] = search_intra_rough(state,
|
||||
ref_pixels, LCU_WIDTH,
|
||||
&refs,
|
||||
log2_width, candidate_modes,
|
||||
modes, costs);
|
||||
} else {
|
||||
number_of_modes = 67;
|
||||
for (int i = 0; i < number_of_modes; ++i) {
|
||||
modes[i] = i;
|
||||
costs[i] = MAX_INT;
|
||||
modes[0], costs[0]);
|
||||
// Copy rough results for other reference lines
|
||||
for (int line = 1; line < MAX_REF_LINE_IDX; ++line) {
|
||||
number_of_modes[line] = number_of_modes[0];
|
||||
for (int i = 0; i < number_of_modes[line]; ++i) {
|
||||
modes[line][i] = modes[0][i];
|
||||
costs[line][i] = costs[0][i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for(int line = 0; line < MAX_REF_LINE_IDX; ++line) {
|
||||
number_of_modes[line] = 67;
|
||||
for (int i = 0; i < number_of_modes[line]; ++i) {
|
||||
modes[line][i] = i;
|
||||
costs[line][i] = MAX_INT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t lines = 1;
|
||||
// Find modes with multiple reference lines if in use. Do not use if CU in first row.
|
||||
if (state->encoder_control->cfg.mrl && (y_px % LCU_WIDTH) != 0) {
|
||||
lines = MAX_REF_LINE_IDX;
|
||||
}
|
||||
|
||||
// Set transform depth to current depth, meaning no transform splits.
|
||||
|
@ -1083,20 +1110,36 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
|||
// Check only the predicted modes.
|
||||
number_of_modes_to_search = 0;
|
||||
}
|
||||
int num_modes_to_check = MIN(number_of_modes, number_of_modes_to_search);
|
||||
|
||||
kvz_sort_modes(modes, costs, number_of_modes);
|
||||
number_of_modes = search_intra_rdo(state,
|
||||
x_px, y_px, depth,
|
||||
ref_pixels, LCU_WIDTH,
|
||||
candidate_modes,
|
||||
num_modes_to_check,
|
||||
modes, trafo, costs, lcu);
|
||||
|
||||
for(int8_t line = 0; line < lines; ++line) {
|
||||
// For extra reference lines, only check predicted modes
|
||||
if (line != 0) {
|
||||
number_of_modes_to_search = 0;
|
||||
}
|
||||
int num_modes_to_check = MIN(number_of_modes[line], number_of_modes_to_search);
|
||||
kvz_sort_modes(modes[line], costs[line], number_of_modes[line]);
|
||||
number_of_modes[line] = search_intra_rdo(state,
|
||||
x_px, y_px, depth,
|
||||
ref_pixels, LCU_WIDTH,
|
||||
candidate_modes,
|
||||
num_modes_to_check,
|
||||
modes[line], trafo[line], costs[line], lcu, line);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t best_line = 0;
|
||||
double best_line_mode_cost = costs[0][0];
|
||||
uint8_t best_mode_indices[MAX_REF_LINE_IDX];
|
||||
for (int line = 0; line < lines; ++line) {
|
||||
best_mode_indices[line] = select_best_mode_index(modes[line], costs[line], number_of_modes[line]);
|
||||
if (best_line_mode_cost > costs[line][best_mode_indices[line]]) {
|
||||
best_line_mode_cost = costs[line][best_mode_indices[line]];
|
||||
best_line = line;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t best_mode_i = select_best_mode_index(modes, costs, number_of_modes);
|
||||
|
||||
*mode_out = modes[best_mode_i];
|
||||
*trafo_out = trafo[best_mode_i];
|
||||
*cost_out = costs[best_mode_i];
|
||||
*mode_out = modes[best_line][best_mode_indices[best_line]];
|
||||
*trafo_out = trafo[best_line][best_mode_indices[best_line]];
|
||||
*cost_out = costs[best_line][best_mode_indices[best_line]];
|
||||
*multi_ref_idx_out = best_line;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ void kvz_search_cu_intra(encoder_state_t * const state,
|
|||
const int depth, lcu_t *lcu,
|
||||
int8_t *mode_out,
|
||||
int8_t *trafo_out,
|
||||
double *cost_out);
|
||||
double *cost_out,
|
||||
uint8_t *multi_ref_idx_out);
|
||||
|
||||
#endif // SEARCH_INTRA_H_
|
||||
|
|
|
@ -44,12 +44,14 @@
|
|||
|
||||
|
||||
/**
|
||||
* \brief Generage angular predictions.
|
||||
* \brief Generate angular predictions.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param intra_mode Angular mode in range 2..34.
|
||||
* \param channel_type Color channel.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
* \param multi_ref_idx Reference line index for use with MRL.
|
||||
*/
|
||||
static void kvz_angular_pred_avx2(
|
||||
const int_fast8_t log2_width,
|
||||
|
@ -57,12 +59,16 @@ static void kvz_angular_pred_avx2(
|
|||
const int_fast8_t channel_type,
|
||||
const kvz_pixel *const in_ref_above,
|
||||
const kvz_pixel *const in_ref_left,
|
||||
kvz_pixel *const dst)
|
||||
kvz_pixel *const dst,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
assert(intra_mode >= 2 && intra_mode <= 66);
|
||||
|
||||
// TODO: implement handling of MRL
|
||||
uint8_t multi_ref_index = channel_type == COLOR_Y ? multi_ref_idx : 0;
|
||||
|
||||
__m256i p_shuf_01 = _mm256_setr_epi8(
|
||||
0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04,
|
||||
0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c,
|
||||
|
@ -133,9 +139,9 @@ static void kvz_angular_pred_avx2(
|
|||
|
||||
// Temporary buffer for modes 11-25.
|
||||
// It only needs to be big enough to hold indices from -width to width-1.
|
||||
//kvz_pixel tmp_ref[2 * 128] = { 0 };
|
||||
kvz_pixel temp_main[2 * 128] = { 0 };
|
||||
kvz_pixel temp_side[2 * 128] = { 0 };
|
||||
//kvz_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE:IDX] = { 0 };
|
||||
kvz_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
kvz_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
const int_fast32_t width = 1 << log2_width;
|
||||
|
||||
uint32_t pred_mode = intra_mode; // ToDo: handle WAIP
|
||||
|
@ -160,7 +166,7 @@ static void kvz_angular_pred_avx2(
|
|||
// Set ref_main and ref_side such that, when indexed with 0, they point to
|
||||
// index 0 in block coordinates.
|
||||
if (sample_disp < 0) {
|
||||
for (int i = 0; i <= width + 1; i++) {
|
||||
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
|
||||
temp_main[width + i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
|
||||
temp_side[width + i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
|
||||
}
|
||||
|
@ -208,17 +214,17 @@ static void kvz_angular_pred_avx2(
|
|||
}
|
||||
else {
|
||||
|
||||
for (int i = 0; i <= (width << 1); i++) {
|
||||
for (int i = 0; i <= (width << 1) + multi_ref_index; i++) {
|
||||
temp_main[i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
|
||||
temp_side[i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
|
||||
}
|
||||
|
||||
const int s = 0;
|
||||
const int max_index = (0 << s) + 2;
|
||||
const int max_index = (multi_ref_index << s) + 2;
|
||||
const int ref_length = width << 1;
|
||||
const kvz_pixel val = temp_main[ref_length];
|
||||
const kvz_pixel val = temp_main[ref_length + multi_ref_index];
|
||||
for (int j = 0; j <= max_index; j++) {
|
||||
temp_main[ref_length + j] = val;
|
||||
temp_main[ref_length + multi_ref_index + j] = val;
|
||||
}
|
||||
|
||||
ref_main = temp_main;
|
||||
|
@ -235,10 +241,14 @@ static void kvz_angular_pred_avx2(
|
|||
//tmp_ref[width + last_index] = tmp_ref[width + last_index - 1];
|
||||
}
|
||||
|
||||
// compensate for line offset in reference line buffers
|
||||
ref_main += multi_ref_index;
|
||||
ref_side += multi_ref_index;
|
||||
|
||||
if (sample_disp != 0) {
|
||||
// The mode is not horizontal or vertical, we have to do interpolation.
|
||||
|
||||
int_fast32_t delta_pos = 0;
|
||||
int_fast32_t delta_pos = sample_disp * multi_ref_index;
|
||||
int_fast32_t delta_int[4] = { 0 };
|
||||
int_fast32_t delta_fract[4] = { 0 };
|
||||
for (int_fast32_t y = 0; y + 3 < width; y += 4) {
|
||||
|
@ -273,6 +283,10 @@ static void kvz_angular_pred_avx2(
|
|||
use_cubic = false;
|
||||
}
|
||||
}
|
||||
// Cubic must be used if ref line != 0
|
||||
if (multi_ref_index) {
|
||||
use_cubic = true;
|
||||
}
|
||||
const int16_t filter_coeff[4] = { 16 - (delta_fract[yy] >> 1), 32 - (delta_fract[yy] >> 1), 16 + (delta_fract[yy] >> 1), delta_fract[yy] >> 1 };
|
||||
const int16_t *temp_f = use_cubic ? cubic_filter[delta_fract[yy]] : filter_coeff;
|
||||
memcpy(f[yy], temp_f, 4 * sizeof(*temp_f));
|
||||
|
@ -334,7 +348,7 @@ static void kvz_angular_pred_avx2(
|
|||
// PDPC
|
||||
bool PDPC_filter = (width >= 4 || channel_type != 0);
|
||||
if (pred_mode > 1 && pred_mode < 67) {
|
||||
if (mode_disp < 0) {
|
||||
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
|
||||
PDPC_filter = false;
|
||||
}
|
||||
else if (mode_disp > 0) {
|
||||
|
@ -433,7 +447,7 @@ static void kvz_angular_pred_avx2(
|
|||
for (int_fast32_t x = 0; x < width; ++x) {
|
||||
dst[y * width + x] = ref_main[x + 1];
|
||||
}
|
||||
if ((width >= 4 || channel_type != 0) && sample_disp >= 0) {
|
||||
if ((width >= 4 || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) {
|
||||
int scale = (log2_width + log2_width - 2) >> 2;
|
||||
const kvz_pixel top_left = ref_main[0];
|
||||
const kvz_pixel left = ref_side[1 + y];
|
||||
|
@ -590,7 +604,8 @@ static void kvz_intra_pred_planar_avx2(
|
|||
// addends etc can be preinitialized for each position.
|
||||
static void pred_filtered_dc_4x4(const uint8_t *ref_top,
|
||||
const uint8_t *ref_left,
|
||||
uint8_t *out_block)
|
||||
uint8_t *out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
const uint32_t rt_u32 = *(const uint32_t *)(ref_top + 1);
|
||||
const uint32_t rl_u32 = *(const uint32_t *)(ref_left + 1);
|
||||
|
@ -651,7 +666,8 @@ static void pred_filtered_dc_4x4(const uint8_t *ref_top,
|
|||
|
||||
static void pred_filtered_dc_8x8(const uint8_t *ref_top,
|
||||
const uint8_t *ref_left,
|
||||
uint8_t *out_block)
|
||||
uint8_t *out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
const uint64_t rt_u64 = *(const uint64_t *)(ref_top + 1);
|
||||
const uint64_t rl_u64 = *(const uint64_t *)(ref_left + 1);
|
||||
|
@ -755,7 +771,8 @@ static INLINE __m256i cvt_u32_si256(const uint32_t u)
|
|||
|
||||
static void pred_filtered_dc_16x16(const uint8_t *ref_top,
|
||||
const uint8_t *ref_left,
|
||||
uint8_t *out_block)
|
||||
uint8_t *out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
const __m128i rt_128 = _mm_loadu_si128((const __m128i *)(ref_top + 1));
|
||||
const __m128i rl_128 = _mm_loadu_si128((const __m128i *)(ref_left + 1));
|
||||
|
@ -831,7 +848,8 @@ static void pred_filtered_dc_16x16(const uint8_t *ref_top,
|
|||
|
||||
static void pred_filtered_dc_32x32(const uint8_t *ref_top,
|
||||
const uint8_t *ref_left,
|
||||
uint8_t *out_block)
|
||||
uint8_t *out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
const __m256i rt = _mm256_loadu_si256((const __m256i *)(ref_top + 1));
|
||||
const __m256i rl = _mm256_loadu_si256((const __m256i *)(ref_left + 1));
|
||||
|
@ -913,23 +931,26 @@ static void pred_filtered_dc_32x32(const uint8_t *ref_top,
|
|||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
* \param multi_ref_idx Reference line index. May be non-zero when MRL is used.
|
||||
*/
|
||||
static void kvz_intra_pred_filtered_dc_avx2(
|
||||
const int_fast8_t log2_width,
|
||||
const uint8_t *ref_top,
|
||||
const uint8_t *ref_left,
|
||||
uint8_t *out_block)
|
||||
uint8_t *out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
|
||||
// TODO: implement multi reference index for all subfunctions
|
||||
if (log2_width == 2) {
|
||||
pred_filtered_dc_4x4(ref_top, ref_left, out_block);
|
||||
pred_filtered_dc_4x4(ref_top, ref_left, out_block, multi_ref_idx);
|
||||
} else if (log2_width == 3) {
|
||||
pred_filtered_dc_8x8(ref_top, ref_left, out_block);
|
||||
pred_filtered_dc_8x8(ref_top, ref_left, out_block, multi_ref_idx);
|
||||
} else if (log2_width == 4) {
|
||||
pred_filtered_dc_16x16(ref_top, ref_left, out_block);
|
||||
pred_filtered_dc_16x16(ref_top, ref_left, out_block, multi_ref_idx);
|
||||
} else if (log2_width == 5) {
|
||||
pred_filtered_dc_32x32(ref_top, ref_left, out_block);
|
||||
pred_filtered_dc_32x32(ref_top, ref_left, out_block, multi_ref_idx);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,12 +41,13 @@
|
|||
|
||||
|
||||
/**
|
||||
* \brief Generage angular predictions.
|
||||
* \brief Generate angular predictions.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param intra_mode Angular mode in range 2..34.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
* \param multi_ref_idx Multi reference line index for use with MRL.
|
||||
*/
|
||||
static void kvz_angular_pred_generic(
|
||||
const int_fast8_t log2_width,
|
||||
|
@ -54,7 +55,8 @@ static void kvz_angular_pred_generic(
|
|||
const int_fast8_t channel_type,
|
||||
const kvz_pixel *const in_ref_above,
|
||||
const kvz_pixel *const in_ref_left,
|
||||
kvz_pixel *const dst)
|
||||
kvz_pixel *const dst,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
|
@ -102,12 +104,17 @@ static void kvz_angular_pred_generic(
|
|||
|
||||
// Temporary buffer for modes 11-25.
|
||||
// It only needs to be big enough to hold indices from -width to width-1.
|
||||
kvz_pixel temp_main[2 * 128] = { 0 };
|
||||
kvz_pixel temp_side[2 * 128] = { 0 };
|
||||
|
||||
// TODO: check the correct size for these arrays when MRL is used
|
||||
//kvz_pixel tmp_ref[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
kvz_pixel temp_main[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
kvz_pixel temp_side[2 * 128 + 3 + 33 * MAX_REF_LINE_IDX] = { 0 };
|
||||
const int_fast32_t width = 1 << log2_width;
|
||||
|
||||
uint32_t pred_mode = intra_mode; // ToDo: handle WAIP
|
||||
|
||||
uint8_t multi_ref_index = multi_ref_idx;
|
||||
|
||||
// Whether to swap references to always project on the left reference row.
|
||||
const bool vertical_mode = intra_mode >= 34;
|
||||
// Modes distance to horizontal or vertical mode.
|
||||
|
@ -128,20 +135,22 @@ static void kvz_angular_pred_generic(
|
|||
// Set ref_main and ref_side such that, when indexed with 0, they point to
|
||||
// index 0 in block coordinates.
|
||||
if (sample_disp < 0) {
|
||||
for (int i = 0; i <= width + 1; i++) {
|
||||
|
||||
// TODO: for non-square blocks, separate loops for x and y is needed
|
||||
for (int i = 0; i <= width + 1 + multi_ref_index; i++) {
|
||||
temp_main[width + i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
|
||||
temp_side[width + i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
|
||||
}
|
||||
|
||||
// TODO: take into account non-square blocks
|
||||
ref_main = temp_main + width;
|
||||
ref_side = temp_side + width;
|
||||
|
||||
// TODO: for non square blocks, need to check if width or height is used for reference extension
|
||||
for (int i = -width; i <= -1; i++) {
|
||||
ref_main[i] = ref_side[MIN((-i * modedisp2invsampledisp[abs(mode_disp)] + 256) >> 9, width)];
|
||||
}
|
||||
|
||||
|
||||
|
||||
//const uint32_t index_offset = width + 1;
|
||||
//const int32_t last_index = width;
|
||||
//const int_fast32_t most_negative_index = (width * sample_disp) >> 5;
|
||||
|
@ -176,17 +185,20 @@ static void kvz_angular_pred_generic(
|
|||
}
|
||||
else {
|
||||
|
||||
for (int i = 0; i <= (width << 1); i++) {
|
||||
// TODO: again, separate loop needed for non-square blocks
|
||||
for (int i = 0; i <= (width << 1) + multi_ref_index; i++) {
|
||||
temp_main[i] = (vertical_mode ? in_ref_above[i] : in_ref_left[i]);
|
||||
temp_side[i] = (vertical_mode ? in_ref_left[i] : in_ref_above[i]);
|
||||
}
|
||||
|
||||
// TODO: this code block will need to change also when non-square blocks are used
|
||||
// const int log2_ratio = 0;
|
||||
const int s = 0;
|
||||
const int max_index = (0 << s) + 2;
|
||||
const int max_index = (multi_ref_index << s) + 2;
|
||||
const int ref_length = width << 1;
|
||||
const kvz_pixel val = temp_main[ref_length];
|
||||
for (int j = 0; j <= max_index; j++) {
|
||||
temp_main[ref_length + j] = val;
|
||||
const kvz_pixel val = temp_main[ref_length + multi_ref_index];
|
||||
for (int j = 1; j <= max_index; j++) {
|
||||
temp_main[ref_length + multi_ref_index + j] = val;
|
||||
}
|
||||
|
||||
ref_main = temp_main;
|
||||
|
@ -203,12 +215,14 @@ static void kvz_angular_pred_generic(
|
|||
//tmp_ref[width + last_index] = tmp_ref[width + last_index - 1];
|
||||
}
|
||||
|
||||
// compensate for line offset in reference line buffers
|
||||
ref_main += multi_ref_index;
|
||||
ref_side += multi_ref_index;
|
||||
|
||||
if (sample_disp != 0) {
|
||||
// The mode is not horizontal or vertical, we have to do interpolation.
|
||||
|
||||
int_fast32_t delta_pos = 0;
|
||||
for (int_fast32_t y = 0; y < width; ++y) {
|
||||
delta_pos += sample_disp;
|
||||
for (int_fast32_t y = 0, delta_pos = sample_disp * (1 + multi_ref_index); y < width; ++y, delta_pos += sample_disp) {
|
||||
int_fast32_t delta_int = delta_pos >> 5;
|
||||
int_fast32_t delta_fract = delta_pos & (32 - 1);
|
||||
|
||||
|
@ -231,6 +245,10 @@ static void kvz_angular_pred_generic(
|
|||
use_cubic = false;
|
||||
}
|
||||
}
|
||||
// Cubic must be used if ref line != 0
|
||||
if (multi_ref_index) {
|
||||
use_cubic = true;
|
||||
}
|
||||
const int16_t filter_coeff[4] = { 16 - (delta_fract >> 1), 32 - (delta_fract >> 1), 16 + (delta_fract >> 1), delta_fract >> 1 };
|
||||
int16_t const * const f = use_cubic ? cubic_filter[delta_fract] : filter_coeff;
|
||||
// Do 4-tap intra interpolation filtering
|
||||
|
@ -265,7 +283,7 @@ static void kvz_angular_pred_generic(
|
|||
// PDPC
|
||||
bool PDPC_filter = (width >= 4 || channel_type != 0);
|
||||
if (pred_mode > 1 && pred_mode < 67) {
|
||||
if (mode_disp < 0) {
|
||||
if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL.
|
||||
PDPC_filter = false;
|
||||
}
|
||||
else if (mode_disp > 0) {
|
||||
|
@ -324,7 +342,8 @@ static void kvz_angular_pred_generic(
|
|||
for (int_fast32_t x = 0; x < width; ++x) {
|
||||
dst[y * width + x] = ref_main[x + 1];
|
||||
}
|
||||
if ((width >= 4 || channel_type != 0) && sample_disp >= 0) {
|
||||
// Do not apply PDPC if multi ref line index is other than 0
|
||||
if ((width >= 4 || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0) {
|
||||
int scale = (log2_width + log2_width - 2) >> 2;
|
||||
const kvz_pixel top_left = ref_main[0];
|
||||
const kvz_pixel left = ref_side[1 + y];
|
||||
|
@ -396,26 +415,29 @@ static void kvz_intra_pred_planar_generic(
|
|||
}
|
||||
|
||||
/**
|
||||
* \brief Generage intra DC prediction with post filtering applied.
|
||||
* \brief Generate intra DC prediction with post filtering applied.
|
||||
* \param log2_width Log2 of width, range 2..5.
|
||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||
* \param dst Buffer of size width*width.
|
||||
* \param multi_ref_idx Reference line index. May be non-zero when MRL is used.
|
||||
*/
|
||||
static void kvz_intra_pred_filtered_dc_generic(
|
||||
const int_fast8_t log2_width,
|
||||
const kvz_pixel *const ref_top,
|
||||
const kvz_pixel *const ref_left,
|
||||
kvz_pixel *const out_block)
|
||||
kvz_pixel *const out_block,
|
||||
const uint8_t multi_ref_idx)
|
||||
{
|
||||
assert(log2_width >= 2 && log2_width <= 5);
|
||||
|
||||
// TODO: height for non-square block sizes
|
||||
const int_fast8_t width = 1 << log2_width;
|
||||
|
||||
int_fast16_t sum = 0;
|
||||
for (int_fast8_t i = 0; i < width; ++i) {
|
||||
sum += ref_top[i + 1];
|
||||
sum += ref_left[i + 1];
|
||||
sum += ref_top[i + 1 + multi_ref_idx];
|
||||
sum += ref_left[i + 1 + multi_ref_idx];
|
||||
}
|
||||
|
||||
const kvz_pixel dc_val = (sum + width) >> (log2_width + 1);
|
||||
|
|
|
@ -49,7 +49,8 @@ typedef void (angular_pred_func)(
|
|||
const int_fast8_t channel_type,
|
||||
const kvz_pixel *const in_ref_above,
|
||||
const kvz_pixel *const in_ref_left,
|
||||
kvz_pixel *const dst);
|
||||
kvz_pixel *const dst,
|
||||
const uint8_t multi_ref_idx);
|
||||
|
||||
typedef void (intra_pred_planar_func)(
|
||||
const int_fast8_t log2_width,
|
||||
|
@ -61,7 +62,8 @@ typedef void (intra_pred_filtered_dc_func)(
|
|||
const int_fast8_t log2_width,
|
||||
const kvz_pixel *const ref_top,
|
||||
const kvz_pixel *const ref_left,
|
||||
kvz_pixel *const out_block);
|
||||
kvz_pixel *const out_block,
|
||||
const uint8_t multi_ref_idx);
|
||||
|
||||
typedef void (pdpc_planar_dc_func)(
|
||||
const int mode,
|
||||
|
|
|
@ -10,6 +10,7 @@ common_args='256x128 10 yuv420p -p1 --preset=ultrafast --threads=0 --no-wpp --no
|
|||
valgrind_test $common_args --rd=1
|
||||
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37
|
||||
valgrind_test $common_args --rd=2 --no-transform-skip --qp 37 --signhide --rdoq
|
||||
valgrind_test $common_args --rd=2 --mrl
|
||||
valgrind_test $common_args --rd=3
|
||||
valgrind_test $common_args --alf=full --no-wpp --threads=0 --owf=0
|
||||
valgrind_test $common_args --alf=full --wpp --threads=1
|
||||
|
|
Loading…
Reference in a new issue