mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
1902 lines
68 KiB
C
1902 lines
68 KiB
C
/*****************************************************************************
|
|
* This file is part of uvg266 VVC encoder.
|
|
*
|
|
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without modification,
|
|
* are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer.
|
|
*
|
|
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
* list of conditions and the following disclaimer in the documentation and/or
|
|
* other materials provided with the distribution.
|
|
*
|
|
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
****************************************************************************/
|
|
|
|
#include "intra.h"
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include "image.h"
|
|
#include "uvg_math.h"
|
|
#include "mip_data.h"
|
|
#include "search.h"
|
|
#include "search_intra.h"
|
|
#include "strategies/strategies-intra.h"
|
|
#include "tables.h"
|
|
#include "transform.h"
|
|
#include "videoframe.h"
|
|
|
|
// Tables for looking up the number of intra reference pixels based on
|
|
// prediction units coordinate within an LCU.
|
|
// generated by "tools/generate_ref_pixel_tables.py".
|
|
static const uint8_t num_ref_pixels_top[16][16] = {
|
|
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 64, 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 32, 28, 24, 20, 16, 12, 8, 4, 32, 28, 24, 20, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4, 16, 12, 8, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 }
|
|
};
|
|
static const uint8_t num_ref_pixels_left[16][16] = {
|
|
{ 64, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
|
|
{ 60, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
|
|
{ 56, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 52, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
|
|
{ 48, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
|
|
{ 44, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
|
|
{ 40, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 36, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
|
|
{ 32, 4, 8, 4, 16, 4, 8, 4, 32, 4, 8, 4, 16, 4, 8, 4 },
|
|
{ 28, 4, 4, 4, 12, 4, 4, 4, 28, 4, 4, 4, 12, 4, 4, 4 },
|
|
{ 24, 4, 8, 4, 8, 4, 8, 4, 24, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 20, 4, 4, 4, 4, 4, 4, 4, 20, 4, 4, 4, 4, 4, 4, 4 },
|
|
{ 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4, 16, 4, 8, 4 },
|
|
{ 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4, 12, 4, 4, 4 },
|
|
{ 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4, 8, 4 },
|
|
{ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 }
|
|
};
|
|
|
|
|
|
static void mip_predict(
|
|
const encoder_state_t* const state,
|
|
const uvg_intra_references* const refs,
|
|
const uint16_t pred_block_width,
|
|
const uint16_t pred_block_height,
|
|
uvg_pixel* dst,
|
|
const int mip_mode,
|
|
const bool mip_transp);
|
|
|
|
|
|
int8_t uvg_intra_get_dir_luma_predictor(
|
|
const uint32_t x,
|
|
const uint32_t y,
|
|
int8_t *preds,
|
|
const cu_info_t *const cur_pu,
|
|
const cu_info_t *const left_pu,
|
|
const cu_info_t *const above_pu)
|
|
{
|
|
enum {
|
|
PLANAR_IDX = 0,
|
|
DC_IDX = 1,
|
|
HOR_IDX = 18,
|
|
VER_IDX = 50,
|
|
};
|
|
|
|
int8_t number_of_candidates = 0;
|
|
|
|
// The default mode if block is not coded yet is INTRA_PLANAR.
|
|
// If the neighboring blocks were MIP blocks, intra mode is set to planar.
|
|
int8_t left_intra_dir = 0;
|
|
if (left_pu && left_pu->type == CU_INTRA) {
|
|
if (left_pu->intra.mip_flag) {
|
|
left_intra_dir = PLANAR_IDX;
|
|
} else {
|
|
left_intra_dir = left_pu->intra.mode;
|
|
}
|
|
}
|
|
|
|
int8_t above_intra_dir = 0;
|
|
if (above_pu && above_pu->type == CU_INTRA && y % LCU_WIDTH != 0) {
|
|
if (above_pu->intra.mip_flag) {
|
|
above_intra_dir = PLANAR_IDX;
|
|
} else {
|
|
above_intra_dir = above_pu->intra.mode;
|
|
}
|
|
}
|
|
|
|
const int offset = 61;
|
|
const int mod = 64;
|
|
|
|
preds[0] = PLANAR_IDX;
|
|
preds[1] = DC_IDX;
|
|
preds[2] = VER_IDX;
|
|
preds[3] = HOR_IDX;
|
|
preds[4] = VER_IDX - 4;
|
|
preds[5] = VER_IDX + 4;
|
|
|
|
// If the predictions are the same, add new predictions
|
|
if (left_intra_dir == above_intra_dir) {
|
|
number_of_candidates = 1;
|
|
if (left_intra_dir > DC_IDX) { // angular modes
|
|
preds[0] = PLANAR_IDX;
|
|
preds[1] = left_intra_dir;
|
|
preds[2] = ((left_intra_dir + offset) % mod) + 2;
|
|
preds[3] = ((left_intra_dir - 1) % mod) + 2;
|
|
preds[4] = ((left_intra_dir + offset - 1) % mod) + 2;
|
|
preds[5] = (left_intra_dir % mod) + 2;
|
|
}
|
|
} else { // If we have two distinct predictions
|
|
number_of_candidates = 2;
|
|
uint8_t max_cand_mode_idx = preds[0] > preds[1] ? 0 : 1;
|
|
|
|
if (left_intra_dir > DC_IDX && above_intra_dir > DC_IDX) {
|
|
preds[0] = PLANAR_IDX;
|
|
preds[1] = left_intra_dir;
|
|
preds[2] = above_intra_dir;
|
|
max_cand_mode_idx = preds[1] > preds[2] ? 1 : 2;
|
|
uint8_t min_cand_mode_idx = preds[1] > preds[2] ? 2 : 1;
|
|
|
|
if (preds[max_cand_mode_idx] - preds[min_cand_mode_idx] == 1) {
|
|
preds[3] = ((preds[min_cand_mode_idx] + offset) % mod) + 2;
|
|
preds[4] = ((preds[max_cand_mode_idx] - 1) % mod) + 2;
|
|
preds[5] = ((preds[min_cand_mode_idx] + offset - 1) % mod) + 2;
|
|
} else if (preds[max_cand_mode_idx] - preds[min_cand_mode_idx] >= 62) {
|
|
preds[3] = ((preds[min_cand_mode_idx] - 1) % mod) + 2;
|
|
preds[4] = ((preds[max_cand_mode_idx] + offset) % mod) + 2;
|
|
preds[5] = (preds[min_cand_mode_idx] % mod) + 2;
|
|
} else if (preds[max_cand_mode_idx] - preds[min_cand_mode_idx] == 2) {
|
|
preds[3] = ((preds[min_cand_mode_idx] - 1) % mod) + 2;
|
|
preds[4] = ((preds[min_cand_mode_idx] + offset) % mod) + 2;
|
|
preds[5] = ((preds[max_cand_mode_idx] - 1) % mod) + 2;
|
|
} else {
|
|
preds[3] = ((preds[min_cand_mode_idx] + offset) % mod) + 2;
|
|
preds[4] = ((preds[min_cand_mode_idx] - 1) % mod) + 2;
|
|
preds[5] = ((preds[max_cand_mode_idx] + offset) % mod) + 2;
|
|
}
|
|
} else if(left_intra_dir + above_intra_dir >= 2){ // Add DC mode if it's not present, otherwise VER_IDX.
|
|
preds[0] = PLANAR_IDX;
|
|
preds[1] = (left_intra_dir < above_intra_dir) ? above_intra_dir : left_intra_dir;
|
|
|
|
max_cand_mode_idx = 1;
|
|
|
|
preds[2] = ((preds[max_cand_mode_idx] + offset) % mod) + 2;
|
|
preds[3] = ((preds[max_cand_mode_idx] - 1) % mod) + 2;
|
|
preds[4] = ((preds[max_cand_mode_idx] +offset - 1) % mod) + 2;
|
|
preds[5] = ( preds[max_cand_mode_idx] % mod) + 2;
|
|
}
|
|
}
|
|
|
|
return number_of_candidates;
|
|
}
|
|
|
|
static void intra_filter_reference(
|
|
int_fast8_t log2_width,
|
|
int_fast8_t log2_height,
|
|
uvg_intra_references *refs)
|
|
{
|
|
if (refs->filtered_initialized) {
|
|
return;
|
|
} else {
|
|
refs->filtered_initialized = true;
|
|
}
|
|
|
|
const int_fast8_t ref_width = 2 * (1 << log2_width) + 1;
|
|
const int_fast8_t ref_height = 2 * (1 << log2_height) + 1;
|
|
uvg_intra_ref *ref = &refs->ref;
|
|
uvg_intra_ref *filtered_ref = &refs->filtered_ref;
|
|
|
|
// Starting point at top left for both iterations
|
|
filtered_ref->left[0] = (ref->left[1] + 2 * ref->left[0] + ref->top[1] + 2) >> 2;
|
|
filtered_ref->top[0] = filtered_ref->left[0];
|
|
|
|
// Top to bottom
|
|
for (int_fast8_t y = 1; y < ref_height - 1; ++y) {
|
|
uvg_pixel *p = &ref->left[y];
|
|
filtered_ref->left[y] = (p[-1] + 2 * p[0] + p[1] + 2) >> 2;
|
|
}
|
|
// Bottom left (not filtered)
|
|
filtered_ref->left[ref_height - 1] = ref->left[ref_height - 1];
|
|
|
|
// Left to right
|
|
for (int_fast8_t x = 1; x < ref_width - 1; ++x) {
|
|
uvg_pixel *p = &ref->top[x];
|
|
filtered_ref->top[x] = (p[-1] + 2 * p[0] + p[1] + 2) >> 2;
|
|
}
|
|
// Top right (not filtered)
|
|
filtered_ref->top[ref_width - 1] = ref->top[ref_width - 1];
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Generate dc prediction.
|
|
* \param cu_loc CU location and size data.
|
|
* \param color Color channel.
|
|
* \param ref_top Pointer to -1 index of above reference, length=width*2+1.
|
|
* \param ref_left Pointer to -1 index of left reference, length=width*2+1.
|
|
* \param dst Buffer of size width*width.
|
|
* \param multi_ref_idx Multi reference line index for use with MRL.
|
|
*/
|
|
static void intra_pred_dc(
|
|
const cu_loc_t* const cu_loc,
|
|
const color_t color,
|
|
const uvg_pixel *const ref_top,
|
|
const uvg_pixel *const ref_left,
|
|
uvg_pixel *const out_block,
|
|
const uint8_t multi_ref_idx)
|
|
{
|
|
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
|
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
|
|
|
int_fast16_t sum = 0;
|
|
// Only one loop is done for non-square blocks.
|
|
// In case of non-square blocks, only the longer reference is summed.
|
|
if (width >= height) {
|
|
for (int_fast8_t i = 0; i < width; ++i) {
|
|
sum += ref_top[i + 1 + multi_ref_idx];
|
|
}
|
|
}
|
|
if (width <= height) {
|
|
for (int_fast8_t j = 0; j < height; ++j) {
|
|
sum += ref_left[j + 1 + multi_ref_idx];
|
|
}
|
|
}
|
|
|
|
// JVET_K0122
|
|
const int denom = width == height ? width << 1 : MAX(width, height);
|
|
const int divShift = uvg_math_floor_log2(denom);
|
|
const int divOffset = denom >> 1;
|
|
|
|
const uvg_pixel dc_val = (sum + divOffset) >> divShift;
|
|
//const uvg_pixel dc_val = (sum + width) >> (log2_width + 1);
|
|
const int_fast16_t block_size = width * height;
|
|
|
|
for (int_fast16_t i = 0; i < block_size; ++i) {
|
|
out_block[i] = dc_val;
|
|
}
|
|
}
|
|
|
|
|
|
enum lm_mode
|
|
{
|
|
LM_CHROMA_IDX = 81,
|
|
LM_CHROMA_L_IDX = 82,
|
|
LM_CHROMA_T_IDX = 83,
|
|
};
|
|
|
|
|
|
static void get_cclm_parameters(
|
|
encoder_state_t const* const state,
|
|
int8_t width, int8_t height, int8_t mode,
|
|
int x0, int y0, int avai_above_right_units, int avai_left_below_units,
|
|
uvg_intra_ref* luma_src, uvg_intra_references*chroma_ref,
|
|
int16_t *a, int16_t*b, int16_t*shift) {
|
|
|
|
const int base_unit_size = 1 << (6 - PU_DEPTH_INTRA_MAX);
|
|
|
|
// TODO: take into account YUV422
|
|
const int unit_w = base_unit_size >> 1;
|
|
const int unit_h = base_unit_size >> 1;
|
|
|
|
const int c_height = height;
|
|
const int c_width = width;
|
|
height *= 2;
|
|
width *= 2;
|
|
|
|
const int tu_width_in_units = c_width / unit_w;
|
|
const int tu_height_in_units = c_height / unit_h;
|
|
|
|
|
|
//int top_template_samp_num = width; // for MDLM, the template sample number is 2W or 2H;
|
|
//int left_template_samp_num = height;
|
|
|
|
// These are used for calculating some stuff for non-square CUs
|
|
//int total_above_units = (top_template_samp_num + (unit_w - 1)) / unit_w;
|
|
//int total_left_units = (left_template_samp_num + (unit_h - 1)) / unit_h;
|
|
//int total_units = total_left_units + total_above_units + 1;
|
|
//int above_right_units = total_above_units - tu_width_in_units;
|
|
//int left_below_units = total_left_units - tu_height_in_units;
|
|
//int avai_above_right_units = 0; // TODO these are non zero only with non-square CUs
|
|
//int avai_left_below_units = 0;
|
|
int avai_above_units = CLIP(0, tu_height_in_units, y0/base_unit_size);
|
|
int avai_left_units = CLIP(0, tu_width_in_units, x0 / base_unit_size);
|
|
|
|
bool above_available = avai_above_units != 0;
|
|
bool left_available = avai_left_units != 0;
|
|
|
|
char internal_bit_depth = state->encoder_control->bitdepth;
|
|
|
|
int min_luma[2] = { MAX_INT, 0 };
|
|
int max_luma[2] = { -MAX_INT, 0 };
|
|
|
|
uvg_pixel* src;
|
|
int actualTopTemplateSampNum = 0;
|
|
int actualLeftTemplateSampNum = 0;
|
|
if (mode == LM_CHROMA_T_IDX)
|
|
{
|
|
left_available = 0;
|
|
avai_above_right_units = avai_above_right_units > (c_height / unit_w) ? c_height / unit_w : avai_above_right_units;
|
|
actualTopTemplateSampNum = unit_w * (avai_above_units + avai_above_right_units);
|
|
}
|
|
else if (mode == LM_CHROMA_L_IDX)
|
|
{
|
|
above_available = 0;
|
|
avai_left_below_units = avai_left_below_units > (c_width / unit_h) ? c_width / unit_h : avai_left_below_units;
|
|
actualLeftTemplateSampNum = unit_h * (avai_left_units + avai_left_below_units);
|
|
}
|
|
else if (mode == LM_CHROMA_IDX)
|
|
{
|
|
actualTopTemplateSampNum = c_width;
|
|
actualLeftTemplateSampNum = c_height;
|
|
}
|
|
int startPos[2]; //0:Above, 1: Left
|
|
int pickStep[2];
|
|
|
|
int aboveIs4 = left_available ? 0 : 1;
|
|
int leftIs4 = above_available ? 0 : 1;
|
|
|
|
startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
|
|
pickStep[0] = MAX(1, actualTopTemplateSampNum >> (1 + aboveIs4));
|
|
|
|
startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
|
|
pickStep[1] = MAX(1, actualLeftTemplateSampNum >> (1 + leftIs4));
|
|
|
|
uvg_pixel selectLumaPix[4] = { 0, 0, 0, 0 };
|
|
uvg_pixel selectChromaPix[4] = { 0, 0, 0, 0 };
|
|
|
|
int cntT, cntL;
|
|
cntT = cntL = 0;
|
|
int cnt = 0;
|
|
if (above_available)
|
|
{
|
|
cntT = MIN(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
|
|
src = luma_src->top;
|
|
const uvg_pixel* cur = chroma_ref->ref.top + 1;
|
|
for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
|
|
{
|
|
selectLumaPix[cnt] = src[pos];
|
|
selectChromaPix[cnt] = cur[pos];
|
|
}
|
|
}
|
|
|
|
if (left_available)
|
|
{
|
|
cntL = MIN(actualLeftTemplateSampNum, (1 + leftIs4) << 1);
|
|
src = luma_src->left;
|
|
const uvg_pixel* cur = chroma_ref->ref.left + 1;
|
|
for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
|
|
{
|
|
selectLumaPix[cnt + cntT] = src[pos];
|
|
selectChromaPix[cnt + cntT] = cur[pos];
|
|
}
|
|
}
|
|
cnt = cntL + cntT;
|
|
|
|
if (cnt == 2)
|
|
{
|
|
selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
|
|
selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
|
|
selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
|
|
selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
|
|
}
|
|
|
|
int minGrpIdx[2] = { 0, 2 };
|
|
int maxGrpIdx[2] = { 1, 3 };
|
|
int* tmpMinGrp = minGrpIdx;
|
|
int* tmpMaxGrp = maxGrpIdx;
|
|
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]])
|
|
{
|
|
SWAP(tmpMinGrp[0], tmpMinGrp[1], int);
|
|
}
|
|
if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]])
|
|
{
|
|
SWAP(tmpMaxGrp[0], tmpMaxGrp[1], int);
|
|
}
|
|
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]])
|
|
{
|
|
SWAP(tmpMinGrp, tmpMaxGrp, int*);
|
|
}
|
|
if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]])
|
|
{
|
|
SWAP(tmpMinGrp[1], tmpMaxGrp[0], int);
|
|
}
|
|
|
|
min_luma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1) >> 1;
|
|
min_luma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
|
|
max_luma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1) >> 1;
|
|
max_luma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
|
|
|
|
if (left_available || above_available)
|
|
{
|
|
int diff = max_luma[0] - min_luma[0];
|
|
if (diff > 0)
|
|
{
|
|
int diffC = max_luma[1] - min_luma[1];
|
|
int x = uvg_math_floor_log2(diff);
|
|
static const uint8_t DivSigTable[1 << 4] = {
|
|
// 4bit significands - 8 ( MSB is omitted )
|
|
0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0
|
|
};
|
|
int normDiff = (diff << 4 >> x) & 15;
|
|
int v = DivSigTable[normDiff] | 8;
|
|
x += normDiff != 0;
|
|
|
|
int y = diffC ? uvg_math_floor_log2(abs(diffC)) + 1 : 0;
|
|
int add = 1 << y >> 1;
|
|
*a = (diffC * v + add) >> y;
|
|
*shift = 3 + x - y;
|
|
if (*shift < 1)
|
|
{
|
|
*shift = 1;
|
|
*a = ((*a == 0) ? 0 : (*a < 0) ? -15 : 15); // a=Sign(a)*15
|
|
}
|
|
*b = min_luma[1] - ((*a * min_luma[0]) >> *shift);
|
|
}
|
|
else
|
|
{
|
|
*a = 0;
|
|
*b = min_luma[1];
|
|
*shift = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
*a = 0;
|
|
|
|
*b = 1 << (internal_bit_depth - 1);
|
|
|
|
*shift = 0;
|
|
}
|
|
}
|
|
|
|
static void linear_transform_cclm(const cclm_parameters_t* cclm_params, uvg_pixel * src, uvg_pixel * dst, int stride, int height) {
|
|
int scale = cclm_params->a;
|
|
int shift = cclm_params->shift;
|
|
int offset = cclm_params->b;
|
|
for (int y = 0; y < height; ++y) {
|
|
for (int x=0; x < stride; ++x) {
|
|
int val = src[x + y * stride] * scale;
|
|
val >>= shift;
|
|
val += offset;
|
|
val = CLIP_TO_PIXEL(val);
|
|
dst[x + y * stride] = val;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void predict_cclm(
|
|
encoder_state_t const* const state,
|
|
const color_t color,
|
|
const int8_t width,
|
|
const int8_t height,
|
|
const int16_t x0,
|
|
const int16_t y0,
|
|
const int16_t stride,
|
|
const int8_t mode,
|
|
const lcu_t* const lcu,
|
|
uvg_intra_references* chroma_ref,
|
|
uvg_pixel* dst,
|
|
cclm_parameters_t* cclm_params,
|
|
enum uvg_tree_type tree_type
|
|
)
|
|
{
|
|
assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX);
|
|
assert(state->encoder_control->cfg.cclm);
|
|
|
|
|
|
uvg_intra_ref sampled_luma_ref;
|
|
uvg_pixel sampled_luma[LCU_CHROMA_SIZE];
|
|
|
|
int x_scu = SUB_SCU(x0);
|
|
int y_scu = SUB_SCU(y0);
|
|
|
|
int available_above_right = 0;
|
|
int available_left_below = 0;
|
|
|
|
|
|
const uvg_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH;
|
|
const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
|
|
|
|
// Essentially what this does is that it uses 6-tap filtering to downsample
|
|
// the luma intra references down to match the resolution of the chroma channel.
|
|
// The luma reference is only needed when we are not on the edge of the picture.
|
|
// Because the reference pixels that are needed on the edge of the ctu this code
|
|
// is kinda messy but what can you do
|
|
const int ctu_size = tree_type == UVG_CHROMA_T ? LCU_WIDTH_C : LCU_WIDTH;
|
|
|
|
if (y0) {
|
|
if (y_scu == 0) available_above_right = MIN(MIN(width / 2, (64-x_scu - width * 2) / 2), (state->tile->frame->width - x0 - width* 2) / 2);
|
|
for (; available_above_right < width / 2; available_above_right++) {
|
|
int x_extension = x_scu + width * 2 + 4 * available_above_right;
|
|
x_extension >>= tree_type == UVG_CHROMA_T;
|
|
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, (y_scu >> (tree_type==UVG_CHROMA_T)) - 4);
|
|
if (x_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break;
|
|
}
|
|
if(y_scu == 0) {
|
|
if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4);
|
|
memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride2 / 2)], sizeof(uvg_pixel) * (width + available_above_right * 2));
|
|
}
|
|
else {
|
|
for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
|
|
bool left_padding = x0 || x;
|
|
int s = 4;
|
|
s += y_scu ? y_rec[x - LCU_WIDTH * 2] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 2) * stride] * 2;
|
|
s += y_scu ? y_rec[x - LCU_WIDTH * 2 + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 2) * stride];
|
|
s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH * 2 - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 2) * stride];
|
|
s += y_scu ? y_rec[x - LCU_WIDTH] * 2 : state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2;
|
|
s += y_scu ? y_rec[x - LCU_WIDTH + 1] : state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride];
|
|
s += y_scu && !(x0 && !x && !x_scu) ? y_rec[x - LCU_WIDTH - left_padding] : state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride];
|
|
sampled_luma_ref.top[x / 2] = s >> 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(x0) {
|
|
if (x_scu == 0) available_left_below = MIN(MIN(width / 2, (64 - y_scu - height * 2) / 2), (state->tile->frame->height - y0 - height * 2) / 2);
|
|
for (; available_left_below < height / 2; available_left_below++) {
|
|
int y_extension = y_scu + height * 2 + 4 * available_left_below;
|
|
y_extension >>= tree_type == UVG_CHROMA_T;
|
|
const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, (x_scu >> (tree_type == UVG_CHROMA_T)) - 4, y_extension);
|
|
if (y_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break;
|
|
if(x_scu == 32 && y_scu == 0 && pu->depth == 0) break;
|
|
}
|
|
for(int i = 0; i < height + available_left_below * 2; i++) {
|
|
sampled_luma_ref.left[i] = state->tile->frame->cclm_luma_rec[(y0/2 + i) * (stride2/2) + x0 / 2 - 1];
|
|
}
|
|
}
|
|
|
|
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x0 / 2 + (y0 * stride2) / 4], sampled_luma, width, height, stride2 / 2, width);
|
|
|
|
int16_t a, b, shift;
|
|
get_cclm_parameters(state, width, height, mode,x0, y0, available_above_right, available_left_below, &sampled_luma_ref, chroma_ref, &a, &b, &shift);
|
|
cclm_params->shift = shift;
|
|
cclm_params->a = a;
|
|
cclm_params->b = b;
|
|
|
|
if(dst)
|
|
linear_transform_cclm(cclm_params, sampled_luma, dst, width, height);
|
|
}
|
|
|
|
|
|
int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* lcu, cu_array_t* const cu_a) {
|
|
assert(!(lcu && cu_a));
|
|
if (width > 2 * height || height > 2 * width) {
|
|
return 3;
|
|
}
|
|
|
|
int context = 0;
|
|
const cu_info_t* left = NULL;
|
|
const cu_info_t* top = NULL;
|
|
if (lcu) {
|
|
int x_local = SUB_SCU(x);
|
|
int y_local = SUB_SCU(y);
|
|
if (x) {
|
|
left = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local);
|
|
}
|
|
if (y) {
|
|
top = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1);
|
|
}
|
|
}
|
|
else {
|
|
if (x > 0) {
|
|
left = uvg_cu_array_at_const(cu_a, x - 1, y);
|
|
}
|
|
if (y > 0) {
|
|
top = uvg_cu_array_at_const(cu_a, x, y - 1);
|
|
}
|
|
}
|
|
context += left && left->type == CU_INTRA ? left->intra.mip_flag : 0;
|
|
context += top && top->type == CU_INTRA ? top->intra.mip_flag : 0;
|
|
return context;
|
|
}
|
|
|
|
|
|
void uvg_mip_boundary_downsampling_1D(int* reduced_dst, const int* const ref_src, int src_len, int dst_len)
|
|
{
|
|
if (dst_len < src_len)
|
|
{
|
|
// Create reduced boundary by downsampling
|
|
uint16_t down_smp_factor = src_len / dst_len;
|
|
const int log2_factor = uvg_math_floor_log2(down_smp_factor);
|
|
const int rounding_offset = (1 << (log2_factor - 1));
|
|
|
|
uint16_t src_idx = 0;
|
|
for (uint16_t dst_idx = 0; dst_idx < dst_len; dst_idx++)
|
|
{
|
|
int sum = 0;
|
|
for (int k = 0; k < down_smp_factor; k++)
|
|
{
|
|
sum += ref_src[src_idx++];
|
|
}
|
|
reduced_dst[dst_idx] = (sum + rounding_offset) >> log2_factor;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Copy boundary if no downsampling is needed
|
|
for (uint16_t i = 0; i < dst_len; ++i)
|
|
{
|
|
reduced_dst[i] = ref_src[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void uvg_mip_reduced_pred(int* const output,
|
|
const int* const input,
|
|
const uint8_t* matrix,
|
|
const bool transpose,
|
|
const int red_bdry_size,
|
|
const int red_pred_size,
|
|
const int size_id,
|
|
const int in_offset,
|
|
const int in_offset_tr)
|
|
{
|
|
const int input_size = 2 * red_bdry_size;
|
|
|
|
// Use local buffer for transposed result
|
|
int out_buf_transposed[LCU_WIDTH * LCU_WIDTH];
|
|
int* const out_ptr = transpose ? out_buf_transposed : output;
|
|
|
|
int sum = 0;
|
|
for (int i = 0; i < input_size; i++) {
|
|
sum += input[i];
|
|
}
|
|
const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum;
|
|
assert((input_size == 4 * (input_size >> 2)) && "MIP input size must be divisible by four");
|
|
|
|
const uint8_t* weight = matrix;
|
|
const int input_offset = transpose ? in_offset_tr : in_offset;
|
|
|
|
const bool red_size = (size_id == 2);
|
|
int pos_res = 0;
|
|
for (int y = 0; y < red_pred_size; y++) {
|
|
for (int x = 0; x < red_pred_size; x++) {
|
|
if (red_size) {
|
|
weight -= 1;
|
|
}
|
|
int tmp0 = red_size ? 0 : (input[0] * weight[0]);
|
|
int tmp1 = input[1] * weight[1];
|
|
int tmp2 = input[2] * weight[2];
|
|
int tmp3 = input[3] * weight[3];
|
|
for (int i = 4; i < input_size; i += 4) {
|
|
tmp0 += input[i] * weight[i];
|
|
tmp1 += input[i + 1] * weight[i + 1];
|
|
tmp2 += input[i + 2] * weight[i + 2];
|
|
tmp3 += input[i + 3] * weight[i + 3];
|
|
}
|
|
out_ptr[pos_res] = CLIP_TO_PIXEL(((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) + input_offset);
|
|
pos_res++;
|
|
weight += input_size;
|
|
}
|
|
}
|
|
|
|
if (transpose) {
|
|
for (int y = 0; y < red_pred_size; y++) {
|
|
for (int x = 0; x < red_pred_size; x++) {
|
|
output[y * red_pred_size + x] = out_ptr[x * red_pred_size + y];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void uvg_mip_pred_upsampling_1D(int* const dst, const int* const src, const int* const boundary,
|
|
const uint16_t src_size_ups_dim, const uint16_t src_size_orth_dim,
|
|
const uint16_t src_step, const uint16_t src_stride,
|
|
const uint16_t dst_step, const uint16_t dst_stride,
|
|
const uint16_t boundary_step,
|
|
const uint16_t ups_factor)
|
|
{
|
|
const int log2_factor = uvg_math_floor_log2(ups_factor);
|
|
assert(ups_factor >= 2 && "Upsampling factor must be at least 2.");
|
|
const int rounding_offset = 1 << (log2_factor - 1);
|
|
|
|
uint16_t idx_orth_dim = 0;
|
|
const int* src_line = src;
|
|
int* dst_line = dst;
|
|
const int* boundary_line = boundary + boundary_step - 1;
|
|
while (idx_orth_dim < src_size_orth_dim)
|
|
{
|
|
uint16_t idx_upsample_dim = 0;
|
|
const int* before = boundary_line;
|
|
const int* behind = src_line;
|
|
int* cur_dst = dst_line;
|
|
while (idx_upsample_dim < src_size_ups_dim)
|
|
{
|
|
uint16_t pos = 1;
|
|
int scaled_before = (*before) << log2_factor;
|
|
int scaled_behind = 0;
|
|
while (pos <= ups_factor)
|
|
{
|
|
scaled_before -= *before;
|
|
scaled_behind += *behind;
|
|
*cur_dst = (scaled_before + scaled_behind + rounding_offset) >> log2_factor;
|
|
|
|
pos++;
|
|
cur_dst += dst_step;
|
|
}
|
|
|
|
idx_upsample_dim++;
|
|
before = behind;
|
|
behind += src_step;
|
|
}
|
|
|
|
idx_orth_dim++;
|
|
src_line += src_stride;
|
|
dst_line += dst_stride;
|
|
boundary_line += boundary_step;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/** \brief Matrix weighted intra prediction.
|
|
*/
|
|
static void mip_predict(
|
|
const encoder_state_t* const state,
|
|
const uvg_intra_references* const refs,
|
|
const uint16_t pred_block_width,
|
|
const uint16_t pred_block_height,
|
|
uvg_pixel* dst,
|
|
const int mip_mode,
|
|
const bool mip_transp)
|
|
{
|
|
// MIP prediction uses int values instead of uvg_pixel as some temp values may be negative
|
|
|
|
uvg_pixel* out = dst;
|
|
int result[32*32] = {0};
|
|
const int mode_idx = mip_mode;
|
|
|
|
// *** INPUT PREP ***
|
|
|
|
// Initialize prediction parameters START
|
|
uint16_t width = pred_block_width;
|
|
uint16_t height = pred_block_height;
|
|
|
|
int size_id; // Prediction block type
|
|
if (width == 4 && height == 4) {
|
|
size_id = 0;
|
|
}
|
|
else if (width == 4 || height == 4 || (width == 8 && height == 8)) {
|
|
size_id = 1;
|
|
}
|
|
else {
|
|
size_id = 2;
|
|
}
|
|
|
|
// Reduced boundary and prediction sizes
|
|
int red_bdry_size = (size_id == 0) ? 2 : 4;
|
|
int red_pred_size = (size_id < 2) ? 4 : 8;
|
|
|
|
// Upsampling factors
|
|
uint16_t ups_hor_factor = width / red_pred_size;
|
|
uint16_t ups_ver_factor = height / red_pred_size;
|
|
|
|
// Upsampling factors must be powers of two
|
|
assert(!((ups_hor_factor < 1) || ((ups_hor_factor & (ups_hor_factor - 1))) != 0) && "Horizontal upsampling factor must be power of two.");
|
|
assert(!((ups_ver_factor < 1) || ((ups_ver_factor & (ups_ver_factor - 1))) != 0) && "Vertical upsampling factor must be power of two.");
|
|
|
|
// Initialize prediction parameters END
|
|
|
|
int ref_samples_top[INTRA_REF_LENGTH];
|
|
int ref_samples_left[INTRA_REF_LENGTH];
|
|
|
|
for (int i = 1; i < INTRA_REF_LENGTH; i++) {
|
|
ref_samples_top[i-1] = (int)refs->ref.top[i]; // NOTE: in VTM code these are indexed as x + 1 & y + 1 during init
|
|
ref_samples_left[i-1] = (int)refs->ref.left[i];
|
|
}
|
|
|
|
// Compute reduced boundary with Haar-downsampling
|
|
const int input_size = 2 * red_bdry_size;
|
|
|
|
int red_bdry[MIP_MAX_INPUT_SIZE];
|
|
int red_bdry_trans[MIP_MAX_INPUT_SIZE];
|
|
|
|
int* const top_reduced = &red_bdry[0];
|
|
int* const left_reduced = &red_bdry[red_bdry_size];
|
|
|
|
uvg_mip_boundary_downsampling_1D(top_reduced, ref_samples_top, width, red_bdry_size);
|
|
uvg_mip_boundary_downsampling_1D(left_reduced, ref_samples_left, height, red_bdry_size);
|
|
|
|
// Transposed reduced boundaries
|
|
int* const left_reduced_trans = &red_bdry_trans[0];
|
|
int* const top_reduced_trans = &red_bdry_trans[red_bdry_size];
|
|
|
|
for (int x = 0; x < red_bdry_size; x++) {
|
|
top_reduced_trans[x] = top_reduced[x];
|
|
}
|
|
for (int y = 0; y < red_bdry_size; y++) {
|
|
left_reduced_trans[y] = left_reduced[y];
|
|
}
|
|
|
|
int input_offset = red_bdry[0];
|
|
int input_offset_trans = red_bdry_trans[0];
|
|
|
|
const bool has_first_col = (size_id < 2);
|
|
// First column of matrix not needed for large blocks
|
|
red_bdry[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset) : 0;
|
|
red_bdry_trans[0] = has_first_col ? ((1 << (UVG_BIT_DEPTH - 1)) - input_offset_trans) : 0;
|
|
|
|
for (int i = 1; i < input_size; ++i) {
|
|
red_bdry[i] -= input_offset;
|
|
red_bdry_trans[i] -= input_offset_trans;
|
|
}
|
|
|
|
// *** INPUT PREP *** END
|
|
|
|
// *** BLOCK PREDICT ***
|
|
|
|
const bool need_upsampling = (ups_hor_factor > 1) || (ups_ver_factor > 1);
|
|
const bool transpose = mip_transp;
|
|
|
|
const uint8_t* matrix;
|
|
switch (size_id) {
|
|
case 0:
|
|
matrix = &uvg_mip_matrix_4x4[mode_idx][0][0];
|
|
break;
|
|
case 1:
|
|
matrix = &uvg_mip_matrix_8x8[mode_idx][0][0];
|
|
break;
|
|
case 2:
|
|
matrix = &uvg_mip_matrix_16x16[mode_idx][0][0];
|
|
break;
|
|
default:
|
|
assert(false && "Invalid MIP size id.");
|
|
}
|
|
|
|
// Max possible size is red_pred_size * red_pred_size, red_pred_size can be either 4 or 8
|
|
int red_pred_buffer[8*8];
|
|
int* const reduced_pred = need_upsampling ? red_pred_buffer : result;
|
|
|
|
const int* const reduced_bdry = transpose ? red_bdry_trans : red_bdry;
|
|
|
|
uvg_mip_reduced_pred(reduced_pred, reduced_bdry, matrix, transpose, red_bdry_size, red_pred_size, size_id, input_offset, input_offset_trans);
|
|
if (need_upsampling) {
|
|
const int* ver_src = reduced_pred;
|
|
uint16_t ver_src_step = width;
|
|
|
|
if (ups_hor_factor > 1) {
|
|
int* const hor_dst = result + (ups_ver_factor - 1) * width;
|
|
ver_src = hor_dst;
|
|
ver_src_step *= ups_ver_factor;
|
|
|
|
uvg_mip_pred_upsampling_1D(hor_dst, reduced_pred, ref_samples_left,
|
|
red_pred_size, red_pred_size,
|
|
1, red_pred_size, 1, ver_src_step,
|
|
ups_ver_factor, ups_hor_factor);
|
|
}
|
|
|
|
if (ups_ver_factor > 1) {
|
|
uvg_mip_pred_upsampling_1D(result, ver_src, ref_samples_top,
|
|
red_pred_size, width,
|
|
ver_src_step, 1, width, 1,
|
|
1, ups_ver_factor);
|
|
}
|
|
}
|
|
|
|
// Assign and cast values from temp array to output
|
|
for (int i = 0; i < 32 * 32; i++) {
|
|
out[i] = (uvg_pixel)result[i];
|
|
}
|
|
// *** BLOCK PREDICT *** END
|
|
}
|
|
|
|
|
|
static void intra_predict_regular(
|
|
const encoder_state_t* const state,
|
|
uvg_intra_references *refs,
|
|
const cu_loc_t* const cu_loc,
|
|
int_fast8_t mode,
|
|
color_t color,
|
|
uvg_pixel *dst,
|
|
const uint8_t multi_ref_idx,
|
|
const uint8_t isp_mode)
|
|
{
|
|
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
|
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
|
const int log2_width = uvg_g_convert_to_log2[width];
|
|
const int log2_height = uvg_g_convert_to_log2[height];
|
|
const uvg_config *cfg = &state->encoder_control->cfg;
|
|
|
|
// MRL only for luma
|
|
uint8_t multi_ref_index = color == COLOR_Y ? multi_ref_idx : 0;
|
|
uint8_t isp = color == COLOR_Y ? isp_mode : 0;
|
|
|
|
const uvg_intra_ref *used_ref = &refs->ref;
|
|
if (cfg->intra_smoothing_disabled || color != COLOR_Y || mode == 1 || (width == 4 && height == 4) || multi_ref_index || width != height /*ISP_TODO: replace this fake ISP check*/) {
|
|
// For chroma, DC and 4x4 blocks, always use unfiltered reference.
|
|
} else if (mode == 0) {
|
|
// Otherwise, use filtered for planar.
|
|
if (width * height > 32) {
|
|
used_ref = &refs->filtered_ref;
|
|
}
|
|
} else {
|
|
// Angular modes use smoothed reference pixels, unless the mode is close
|
|
// to being either vertical or horizontal.
|
|
static const int uvg_intra_hor_ver_dist_thres[8] = {24, 24, 24, 14, 2, 0, 0, 0 };
|
|
int filter_threshold = uvg_intra_hor_ver_dist_thres[(log2_width + log2_height) >> 1];
|
|
int dist_from_vert_or_hor = MIN(abs(mode - 50), abs(mode - 18));
|
|
if (dist_from_vert_or_hor > filter_threshold) {
|
|
|
|
static const int16_t modedisp2sampledisp[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
|
|
const int_fast8_t mode_disp = (mode >= 34) ? mode - 50 : 18 - mode;
|
|
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
|
if ((abs(sample_disp) & 0x1F) == 0) {
|
|
used_ref = &refs->filtered_ref;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (used_ref == &refs->filtered_ref && !refs->filtered_initialized) {
|
|
intra_filter_reference(log2_width, log2_height, refs);
|
|
}
|
|
|
|
if (mode == 0) {
|
|
uvg_intra_pred_planar(cu_loc, color, used_ref->top, used_ref->left, dst);
|
|
} else if (mode == 1) {
|
|
intra_pred_dc(cu_loc, color, used_ref->top, used_ref->left, dst, multi_ref_index);
|
|
} else {
|
|
uvg_angular_pred(cu_loc, mode, color, used_ref->top, used_ref->left, dst, multi_ref_index, isp);
|
|
}
|
|
|
|
// pdpc
|
|
// bool pdpcCondition = (mode == 0 || mode == 1 || mode == 18 || mode == 50);
|
|
bool pdpcCondition = (mode == 0 || mode == 1); // Planar and DC
|
|
pdpcCondition &= width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH;
|
|
if (pdpcCondition && multi_ref_index == 0) // Cannot be used with MRL.
|
|
{
|
|
uvg_pdpc_planar_dc(mode, cu_loc, color, used_ref, dst);
|
|
}
|
|
}
|
|
|
|
|
|
void uvg_intra_build_reference_any(
|
|
const cu_loc_t* const pu_loc,
|
|
const cu_loc_t* const cu_loc,
|
|
const color_t color,
|
|
const vector2d_t *const luma_px,
|
|
const vector2d_t *const pic_px,
|
|
const lcu_t *const lcu,
|
|
uvg_intra_references *const refs,
|
|
const uint8_t multi_ref_idx,
|
|
uvg_pixel *extra_ref_lines,
|
|
const uint8_t isp_mode)
|
|
{
|
|
const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width;
|
|
const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height;
|
|
const int log2_width = uvg_g_convert_to_log2[width];
|
|
const int log2_height = uvg_g_convert_to_log2[height];
|
|
|
|
// These are only used with ISP, so no need to check chroma
|
|
const int cu_width = cu_loc->width;
|
|
const int cu_height = cu_loc->height;
|
|
const int pu_x = pu_loc->x;
|
|
const int pu_y = pu_loc->y;
|
|
const int cu_x = cu_loc->x;
|
|
const int cu_y = cu_loc->y;
|
|
|
|
assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5));
|
|
|
|
refs->filtered_initialized = false;
|
|
uvg_pixel *out_left_ref = &refs->ref.left[0];
|
|
uvg_pixel *out_top_ref = &refs->ref.top[0];
|
|
|
|
const uvg_pixel dc_val = 1 << (UVG_BIT_DEPTH - 1); //TODO: add used bitdepth as a variable
|
|
const int is_chroma = color != COLOR_Y ? 1 : 0;
|
|
|
|
// Get multi ref index from CU under prediction or reconstrcution. Do not use MRL if not luma
|
|
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
|
|
assert(multi_ref_index < MAX_REF_LINE_IDX);
|
|
|
|
// Convert luma coordinates to chroma coordinates for chroma.
|
|
const vector2d_t lcu_px = {
|
|
luma_px->x % LCU_WIDTH,
|
|
luma_px->y % LCU_WIDTH
|
|
};
|
|
const vector2d_t px = {
|
|
lcu_px.x >> is_chroma,
|
|
lcu_px.y >> is_chroma,
|
|
};
|
|
|
|
// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
|
|
const uvg_pixel *left_ref;
|
|
bool extra_ref = false;
|
|
// On the left LCU edge, if left neighboring LCU is available,
|
|
// left_ref needs to point to correct extra reference line if MRL is used.
|
|
if (luma_px->x > 0 && lcu_px.x == 0 && multi_ref_index != 0) {
|
|
left_ref = &extra_ref_lines[multi_ref_index * 128];
|
|
extra_ref = true;
|
|
}
|
|
else {
|
|
left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
|
}
|
|
|
|
const uvg_pixel *top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
|
|
const uvg_pixel *rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
|
|
|
|
// Init top borders pointer to point to the correct place in the correct reference array.
|
|
const uvg_pixel *top_border;
|
|
if (px.y) {
|
|
top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)];
|
|
} else {
|
|
top_border = &top_ref[px.x]; // Top row, no need for multi_ref_index
|
|
}
|
|
|
|
// Init left borders pointer to point to the correct place in the correct reference array.
|
|
const uvg_pixel *left_border;
|
|
int left_stride; // Distance between reference samples.
|
|
if (px.x) {
|
|
left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)];
|
|
left_stride = LCU_WIDTH >> is_chroma;
|
|
} else {
|
|
if (extra_ref) {
|
|
left_border = &left_ref[MAX_REF_LINE_IDX];
|
|
}
|
|
else {
|
|
left_border = &left_ref[px.y];
|
|
}
|
|
left_stride = 1;
|
|
}
|
|
|
|
// Generate left reference.
|
|
if (luma_px->x > 0) {
|
|
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
|
int px_available_left;
|
|
if (isp_mode && !is_chroma) {
|
|
if (isp_mode == ISP_MODE_VER) {
|
|
px_available_left = height;
|
|
}
|
|
else {
|
|
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4];
|
|
// This table does not have values for dimensions less than 4
|
|
if (lcu_px.y % 4 != 0) {
|
|
px_available_left -= 2;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
|
|
}
|
|
|
|
// Limit the number of available pixels based on block size and dimensions
|
|
// of the picture.
|
|
px_available_left = MIN(px_available_left, cu_height * 2 + multi_ref_index);
|
|
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
|
|
|
|
// Copy pixels from coded CUs.
|
|
for (int i = 0; i < px_available_left; ++i) {
|
|
// Reserve space for top left reference
|
|
out_left_ref[i + 1 + multi_ref_index] = left_border[i * left_stride];
|
|
}
|
|
// Extend the last pixel for the rest of the reference values.
|
|
uvg_pixel nearest_pixel = left_border[(px_available_left - 1) * left_stride];
|
|
for (int i = px_available_left; i < cu_height * 2 + multi_ref_index * 2; ++i) {
|
|
out_left_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
|
}
|
|
} else {
|
|
// If we are on the left edge, extend the first pixel of the top row.
|
|
uvg_pixel nearest_pixel = luma_px->y > 0 ? top_border[0] : dc_val;
|
|
for (int i = 0; i < height * 2 + multi_ref_index; i++) {
|
|
// Reserve space for top left reference
|
|
out_left_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
|
}
|
|
}
|
|
|
|
// Generate top-left reference
|
|
if (multi_ref_index)
|
|
{
|
|
if (luma_px->x > 0 && luma_px->y > 0) {
|
|
// If the block is at an LCU border, the top-left must be copied from
|
|
// the border that points to the LCUs 1D reference buffer.
|
|
|
|
// Inner picture cases
|
|
if (px.x == 0 && px.y == 0) {
|
|
// LCU top left corner case. Multi ref will be 0.
|
|
out_left_ref[0] = out_left_ref[1];
|
|
out_top_ref[0] = out_left_ref[1];
|
|
}
|
|
else if (px.x == 0) {
|
|
// LCU left border case
|
|
uvg_pixel *top_left_corner = &extra_ref_lines[multi_ref_index * 128];
|
|
for (int i = 0; i <= multi_ref_index; ++i) {
|
|
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
|
out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index];
|
|
}
|
|
}
|
|
else if (px.y == 0) {
|
|
// LCU top border case. Multi ref will be 0.
|
|
out_left_ref[0] = top_border[-1];
|
|
out_top_ref[0] = top_border[-1];
|
|
}
|
|
else {
|
|
// Inner case
|
|
for (int i = 0; i <= multi_ref_index; ++i) {
|
|
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
|
out_top_ref[i] = top_border[i - 1 - multi_ref_index];
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// Picture border cases
|
|
if (px.x == 0 && px.y == 0) {
|
|
// Top left picture corner case. Multi ref will be 0.
|
|
out_left_ref[0] = out_left_ref[1];
|
|
out_top_ref[0] = out_left_ref[1];
|
|
}
|
|
else if (px.x == 0) {
|
|
// Picture left border case. Reference pixel cannot be taken from outside LCU border
|
|
uvg_pixel nearest = out_left_ref[1 + multi_ref_index];
|
|
for (int i = 0; i <= multi_ref_index; ++i) {
|
|
out_left_ref[i] = nearest;
|
|
out_top_ref[i] = nearest;
|
|
}
|
|
}
|
|
else {
|
|
// Picture top border case. Multi ref will be 0.
|
|
out_left_ref[0] = top_border[-1];
|
|
out_top_ref[0] = top_border[-1];
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if (luma_px->x > 0 && luma_px->y > 0) {
|
|
// If the block is at an LCU border, the top-left must be copied from
|
|
// the border that points to the LCUs 1D reference buffer.
|
|
if (px.x == 0) {
|
|
out_left_ref[0] = left_border[-1 * left_stride];
|
|
out_top_ref[0] = left_border[-1 * left_stride];
|
|
}
|
|
else {
|
|
out_left_ref[0] = top_border[-1];
|
|
out_top_ref[0] = top_border[-1];
|
|
}
|
|
}
|
|
else {
|
|
// Copy reference clockwise.
|
|
out_left_ref[0] = out_left_ref[1];
|
|
out_top_ref[0] = out_left_ref[1];
|
|
}
|
|
}
|
|
|
|
// Generate top reference.
|
|
int px_available_top;
|
|
if (luma_px->y > 0) {
|
|
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
|
if (isp_mode && !is_chroma) {
|
|
if (isp_mode == ISP_MODE_HOR) {
|
|
px_available_top = width;
|
|
}
|
|
else {
|
|
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4];
|
|
}
|
|
}
|
|
else {
|
|
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
|
|
}
|
|
|
|
// Limit the number of available pixels based on block size and dimensions
|
|
// of the picture.
|
|
px_available_top = MIN(px_available_top, cu_width * 2 + multi_ref_index);
|
|
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
|
|
|
|
// Copy all the pixels we can.
|
|
for (int i = 0; i < px_available_top; ++i) {
|
|
out_top_ref[i + 1 + multi_ref_index] = top_border[i];
|
|
}
|
|
// Extend the last pixel for the rest of the reference values.
|
|
uvg_pixel nearest_pixel = top_border[px_available_top - 1];
|
|
for (int i = px_available_top; i < width * 2 + multi_ref_index * 2; ++i) {
|
|
out_top_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
|
}
|
|
} else {
|
|
// Extend nearest pixel.
|
|
uvg_pixel nearest_pixel = luma_px->x > 0 ? left_border[0] : dc_val;
|
|
for (int i = 0; i < cu_width * 2 + multi_ref_index; i++) {
|
|
out_top_ref[i + 1] = nearest_pixel;
|
|
}
|
|
}
|
|
}
|
|
|
|
void uvg_intra_build_reference_inner(
|
|
const cu_loc_t* const pu_loc,
|
|
const cu_loc_t* const cu_loc,
|
|
const color_t color,
|
|
const vector2d_t *const luma_px,
|
|
const vector2d_t *const pic_px,
|
|
const lcu_t *const lcu,
|
|
uvg_intra_references *const refs,
|
|
bool entropy_sync,
|
|
const uint8_t multi_ref_idx,
|
|
uvg_pixel* extra_ref_lines,
|
|
uint8_t isp_mode)
|
|
{
|
|
const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width;
|
|
const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height;
|
|
const int cu_width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
|
const int cu_height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
|
const int log2_width = uvg_g_convert_to_log2[width];
|
|
const int log2_height = uvg_g_convert_to_log2[height];
|
|
|
|
// These are only used with ISP, so no need to check chroma
|
|
const int pu_x = pu_loc->x;
|
|
const int pu_y = pu_loc->y;
|
|
const int cu_x = cu_loc->x;
|
|
const int cu_y = cu_loc->y;
|
|
|
|
// Log2_dim 1 is possible with ISP blocks
|
|
assert((log2_width >= 1 && log2_width <= 5) && (log2_height >= 1 && log2_height <= 5));
|
|
|
|
refs->filtered_initialized = false;
|
|
uvg_pixel * __restrict out_left_ref = &refs->ref.left[0];
|
|
uvg_pixel * __restrict out_top_ref = &refs->ref.top[0];
|
|
|
|
const int is_chroma = color != COLOR_Y ? 1 : 0;
|
|
|
|
// Get multiRefIdx from CU under prediction. Do not use MRL if not luma
|
|
const uint8_t multi_ref_index = !is_chroma ? multi_ref_idx : 0;
|
|
assert(multi_ref_index < MAX_REF_LINE_IDX);
|
|
|
|
// Convert luma coordinates to chroma coordinates for chroma.
|
|
const vector2d_t lcu_px = {
|
|
luma_px->x % LCU_WIDTH,
|
|
luma_px->y % LCU_WIDTH
|
|
};
|
|
const vector2d_t px = {
|
|
lcu_px.x >> is_chroma,
|
|
lcu_px.y >> is_chroma,
|
|
};
|
|
|
|
// Init pointers to LCUs reconstruction buffers, such that index 0 refers to block coordinate 0.
|
|
const uvg_pixel* left_ref;
|
|
bool extra_ref = false;
|
|
// On the left LCU edge, if left neighboring LCU is available,
|
|
// left_ref needs to point to correct extra reference line if MRL is used.
|
|
if (lcu_px.x == 0 && multi_ref_index != 0) {
|
|
left_ref = &extra_ref_lines[multi_ref_index * 128];
|
|
extra_ref = true;
|
|
}
|
|
else {
|
|
left_ref = !color ? &lcu->left_ref.y[1] : (color == 1) ? &lcu->left_ref.u[1] : &lcu->left_ref.v[1];
|
|
}
|
|
|
|
const uvg_pixel * __restrict top_ref = !color ? &lcu->top_ref.y[1] : (color == 1) ? &lcu->top_ref.u[1] : &lcu->top_ref.v[1];
|
|
const uvg_pixel * __restrict rec_ref = !color ? lcu->rec.y : (color == 1) ? lcu->rec.u : lcu->rec.v;
|
|
|
|
// Init top borders pointer to point to the correct place in the correct reference array.
|
|
const uvg_pixel * __restrict top_border;
|
|
if (px.y) {
|
|
top_border = &rec_ref[px.x + (px.y - 1 - multi_ref_index) * (LCU_WIDTH >> is_chroma)];
|
|
} else {
|
|
top_border = &top_ref[px.x]; // At the top line. No need for multi_ref_index
|
|
}
|
|
|
|
// Init left borders pointer to point to the correct place in the correct reference array.
|
|
const uvg_pixel * __restrict left_border;
|
|
int left_stride; // Distance between reference samples.
|
|
if (px.x) {
|
|
left_border = &rec_ref[px.x - 1 - multi_ref_index + px.y * (LCU_WIDTH >> is_chroma)];
|
|
left_stride = LCU_WIDTH >> is_chroma;
|
|
} else {
|
|
if (extra_ref) {
|
|
left_border = &left_ref[MAX_REF_LINE_IDX];
|
|
}
|
|
else {
|
|
left_border = &left_ref[px.y];
|
|
}
|
|
left_stride = 1;
|
|
}
|
|
|
|
// Generate top-left reference
|
|
if (multi_ref_index)
|
|
{
|
|
// Inner picture cases
|
|
if (px.x == 0 && px.y == 0) {
|
|
// LCU top left corner case. Multi ref will be 0.
|
|
out_left_ref[0] = out_left_ref[1];
|
|
out_top_ref[0] = out_left_ref[1];
|
|
}
|
|
else if (px.x == 0) {
|
|
// LCU left border case
|
|
uvg_pixel* top_left_corner = &extra_ref_lines[multi_ref_index * 128];
|
|
for (int i = 0; i <= multi_ref_index; ++i) {
|
|
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
|
out_top_ref[i] = top_left_corner[(128 * -i) + MAX_REF_LINE_IDX - 1 - multi_ref_index];
|
|
}
|
|
}
|
|
else if (px.y == 0) {
|
|
// LCU top border case. Multi ref will be 0.
|
|
out_left_ref[0] = top_border[-1];
|
|
out_top_ref[0] = top_border[-1];
|
|
}
|
|
else {
|
|
// Inner case
|
|
for (int i = 0; i <= multi_ref_index; ++i) {
|
|
out_left_ref[i] = left_border[(i - 1 - multi_ref_index) * left_stride];
|
|
out_top_ref[i] = top_border[i - 1 - multi_ref_index];
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// If the block is at an LCU border, the top-left must be copied from
|
|
// the border that points to the LCUs 1D reference buffer.
|
|
if (px.x == 0) {
|
|
out_left_ref[0] = left_border[-1 * left_stride];
|
|
out_top_ref[0] = left_border[-1 * left_stride];
|
|
}
|
|
else {
|
|
out_left_ref[0] = top_border[-1];
|
|
out_top_ref[0] = top_border[-1];
|
|
}
|
|
}
|
|
// Generate left reference.
|
|
|
|
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
|
int px_available_left;
|
|
if (isp_mode && !is_chroma) {
|
|
if (isp_mode == ISP_MODE_VER) {
|
|
px_available_left = height;
|
|
}
|
|
else {
|
|
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4];
|
|
// This table does not have values for dimensions less than 4
|
|
if (lcu_px.y % 4 != 0) {
|
|
px_available_left -= 2;
|
|
}
|
|
}
|
|
|
|
}
|
|
else {
|
|
px_available_left = num_ref_pixels_left[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
|
|
}
|
|
|
|
// Limit the number of available pixels based on block size and dimensions
|
|
// of the picture.
|
|
px_available_left = MIN(px_available_left, cu_height * 2);
|
|
px_available_left = MIN(px_available_left, (pic_px->y - luma_px->y) >> is_chroma);
|
|
|
|
// Copy pixels from coded CUs.
|
|
int i = multi_ref_index; // Offset by multi_ref_index
|
|
|
|
// Do different loop for heights smaller than 4 (possible for some ISP splits)
|
|
if (lcu_px.y % 4 != 0) {
|
|
do {
|
|
out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride];
|
|
out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride];
|
|
i += 2;
|
|
} while (i < px_available_left);
|
|
}
|
|
else {
|
|
do {
|
|
out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride];
|
|
out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride];
|
|
out_left_ref[i + 3] = left_border[(i + 2 - multi_ref_index) * left_stride];
|
|
out_left_ref[i + 4] = left_border[(i + 3 - multi_ref_index) * left_stride];
|
|
i += 4;
|
|
} while (i < px_available_left);
|
|
}
|
|
|
|
// Extend the last pixel for the rest of the reference values.
|
|
uvg_pixel nearest_pixel = out_left_ref[i];
|
|
for (; i < cu_height * 2; i += 4) {
|
|
out_left_ref[i + 1] = nearest_pixel;
|
|
out_left_ref[i + 2] = nearest_pixel;
|
|
out_left_ref[i + 3] = nearest_pixel;
|
|
out_left_ref[i + 4] = nearest_pixel;
|
|
}
|
|
|
|
// Extend for MRL
|
|
if (multi_ref_index) {
|
|
for (; i < width * 2 + multi_ref_index; ++i) {
|
|
out_left_ref[i + 1] = nearest_pixel;
|
|
}
|
|
}
|
|
|
|
// Generate top reference.
|
|
|
|
// Get the number of reference pixels based on the PU coordinate within the LCU.
|
|
int px_available_top;
|
|
if (isp_mode && !is_chroma) {
|
|
if (isp_mode == ISP_MODE_HOR) {
|
|
px_available_top = width;
|
|
}
|
|
else {
|
|
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4];
|
|
}
|
|
}
|
|
else {
|
|
px_available_top = num_ref_pixels_top[lcu_px.y / 4][lcu_px.x / 4] >> is_chroma;
|
|
}
|
|
|
|
// Limit the number of available pixels based on block size and dimensions
|
|
// of the picture.
|
|
px_available_top = MIN(px_available_top, cu_width * 2 + multi_ref_index);
|
|
px_available_top = MIN(px_available_top, (pic_px->x - luma_px->x) >> is_chroma);
|
|
|
|
if (entropy_sync && px.y == 0) px_available_top = MIN(px_available_top, ((LCU_WIDTH >> is_chroma) - px.x) -1);
|
|
|
|
// Copy all the pixels we can.
|
|
i = 0;
|
|
do {
|
|
memcpy(out_top_ref + i + 1 + multi_ref_index, top_border + i, 4 * sizeof(uvg_pixel));
|
|
i += 4;
|
|
} while (i < px_available_top);
|
|
|
|
// Extend the last pixel for the rest of the reference values.
|
|
nearest_pixel = out_top_ref[i + multi_ref_index];
|
|
for (; i < (cu_width + multi_ref_index) * 2; i += 4) {
|
|
out_top_ref[i + 1 + multi_ref_index] = nearest_pixel;
|
|
out_top_ref[i + 2 + multi_ref_index] = nearest_pixel;
|
|
out_top_ref[i + 3 + multi_ref_index] = nearest_pixel;
|
|
out_top_ref[i + 4 + multi_ref_index] = nearest_pixel;
|
|
}
|
|
}
|
|
|
|
|
|
void uvg_intra_build_reference(
|
|
const cu_loc_t* const pu_loc,
|
|
const cu_loc_t* const cu_loc,
|
|
const color_t color,
|
|
const vector2d_t *const luma_px,
|
|
const vector2d_t *const pic_px,
|
|
const lcu_t *const lcu,
|
|
uvg_intra_references *const refs,
|
|
bool entropy_sync,
|
|
uvg_pixel *extra_ref_lines,
|
|
uint8_t multi_ref_idx,
|
|
const uint8_t isp_mode)
|
|
{
|
|
assert(!(extra_ref_lines == NULL && multi_ref_idx != 0) && "Trying to use MRL with NULL extra references.");
|
|
|
|
bool first_split = color == COLOR_Y && isp_mode && pu_loc->x == cu_loc->x && pu_loc->y == cu_loc->y;
|
|
uint8_t isp = first_split ? 0 : isp_mode;
|
|
|
|
// Much logic can be discarded if not on the edge
|
|
if (luma_px->x > 0 && luma_px->y > 0) {
|
|
uvg_intra_build_reference_inner(pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, entropy_sync, multi_ref_idx, extra_ref_lines, isp);
|
|
} else {
|
|
uvg_intra_build_reference_any(pu_loc, cu_loc, color, luma_px, pic_px, lcu, refs, multi_ref_idx, extra_ref_lines, isp);
|
|
}
|
|
}
|
|
|
|
|
|
void uvg_intra_predict(
|
|
const encoder_state_t* const state,
|
|
uvg_intra_references* const refs,
|
|
const cu_loc_t* const cu_loc,
|
|
const color_t color,
|
|
uvg_pixel* dst,
|
|
const intra_search_data_t* data,
|
|
const lcu_t* lcu,
|
|
enum uvg_tree_type tree_type
|
|
)
|
|
{
|
|
const int stride = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA);
|
|
// TODO: what is this used for?
|
|
// const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm);
|
|
bool use_mip = false;
|
|
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
|
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
|
const int x = cu_loc->x;
|
|
const int y = cu_loc->y;
|
|
int8_t intra_mode = color == COLOR_Y ? data->pred_cu.intra.mode : data->pred_cu.intra.mode_chroma;
|
|
if (data->pred_cu.intra.mip_flag) {
|
|
if (color == COLOR_Y) {
|
|
use_mip = true;
|
|
}
|
|
else {
|
|
use_mip = state->encoder_control->chroma_format == UVG_CSP_444;
|
|
}
|
|
}
|
|
if (intra_mode < 68) {
|
|
if (use_mip) {
|
|
assert(intra_mode >= 0 && intra_mode < 16 && "MIP mode must be between [0, 15]");
|
|
mip_predict(state, refs, width, height, dst, intra_mode, data->pred_cu.intra.mip_is_transposed);
|
|
}
|
|
else {
|
|
intra_predict_regular(state, refs, cu_loc, intra_mode, color, dst, data->pred_cu.intra.multi_ref_idx, data->pred_cu.intra.isp_mode);
|
|
}
|
|
}
|
|
else {
|
|
uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width);
|
|
if (data->pred_cu.depth != data->pred_cu.tr_depth || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) {
|
|
predict_cclm(
|
|
state, color, width, width, x, y, stride, intra_mode, lcu, refs, dst,
|
|
(cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1],
|
|
tree_type);
|
|
}
|
|
else {
|
|
linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, width);
|
|
}
|
|
}
|
|
}
|
|
|
|
// This function works on luma coordinates
|
|
const cu_info_t* uvg_get_co_located_luma_cu(
|
|
int x,
|
|
int y,
|
|
int width,
|
|
int height,
|
|
const lcu_t* const lcu,
|
|
const cu_array_t* const cu_array,
|
|
enum uvg_tree_type tree_type)
|
|
{
|
|
assert((cu_array || lcu) && !(cu_array && lcu));
|
|
assert(tree_type != UVG_LUMA_T && "Luma only CU shouldn't need colocated luma CU");
|
|
if(tree_type == UVG_CHROMA_T) {
|
|
x += width >> 1;
|
|
y += height >> 1;
|
|
}
|
|
if(cu_array) {
|
|
return uvg_cu_array_at_const(cu_array, x, y);
|
|
}
|
|
else {
|
|
return LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Returns ISP split partition size based on block dimensions and split type.
|
|
*
|
|
* Returns ISP split partition size based on block dimensions and split type.
|
|
* Will fail if resulting partition size has less than 16 samples.
|
|
*
|
|
* \param width Block width.
|
|
* \param height Block height.
|
|
* \param split_type Horizontal or vertical split.
|
|
*/
|
|
int uvg_get_isp_split_dim(const int width, const int height, const int split_type, const bool is_transform_split)
|
|
{
|
|
assert(split_type != ISP_MODE_NO_ISP && "Cannot calculate split dimension if no split type is set. Make sure this function is not called in this case.");
|
|
|
|
bool divide_in_rows = split_type == SPLIT_TYPE_HOR;
|
|
int split_dim_size, non_split_dim_size, partition_size, div_shift = 2;
|
|
|
|
if (divide_in_rows) {
|
|
split_dim_size = height;
|
|
non_split_dim_size = width;
|
|
}
|
|
else {
|
|
split_dim_size = width;
|
|
non_split_dim_size = height;
|
|
}
|
|
|
|
const int min_num_samples = 16; // Minimum allowed number of samples for split block
|
|
const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1;
|
|
partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift);
|
|
|
|
// Minimum width for ISP splits are 4. (JVET-T2001 chapter 8.4.5.1 equation 246: nPbW = Max(4, nW))
|
|
// Except this does not apply for transform blocks for some reason. VTM does seem to expect 4 transform blocks even if only two pred blocks were used
|
|
// Height can be 2.
|
|
if (!divide_in_rows && !is_transform_split) {
|
|
partition_size = MAX(4, partition_size);
|
|
}
|
|
|
|
assert((uvg_math_floor_log2(partition_size) + uvg_math_floor_log2(non_split_dim_size) >= uvg_math_floor_log2(min_num_samples)) &&
|
|
"Partition has less than allowed minimum number of samples.");
|
|
return partition_size;
|
|
}
|
|
|
|
|
|
int uvg_get_isp_split_num(const int width, const int height, const int split_type, const bool is_transform_split)
|
|
{
|
|
assert((split_type != ISP_MODE_NO_ISP) && "This function cannot be called if ISP mode is 0.");
|
|
int split_dim = uvg_get_isp_split_dim(width, height, split_type, is_transform_split);
|
|
int num = split_type == ISP_MODE_HOR ? height / split_dim : width / split_dim;
|
|
|
|
return num;
|
|
}
|
|
|
|
|
|
void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, int split_idx, const int split_type, const bool is_transform_split)
|
|
{
|
|
// Check for illegal splits
|
|
assert(!(block_w == 4 && block_h == 4) || split_idx == 0 && "Trying to get ISP split CU when split is not allowed.");
|
|
assert(!((block_w * block_h) <= 16) || split_idx < 2 && "Split index for small blocks must be in [0, 1]");
|
|
assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3].");
|
|
assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP.");
|
|
int part_dim = block_w;
|
|
if (split_type != ISP_MODE_NO_ISP) {
|
|
part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type, is_transform_split);
|
|
}
|
|
if(split_type == ISP_MODE_VER && block_w < 16 && !is_transform_split) {
|
|
split_idx /= 2;
|
|
}
|
|
const int offset = part_dim * split_idx;
|
|
|
|
const int part_x = split_type == ISP_MODE_HOR ? x : x + offset;
|
|
const int part_y = split_type == ISP_MODE_HOR ? y + offset : y;
|
|
const int part_w = split_type == ISP_MODE_HOR ? block_w : part_dim;
|
|
const int part_h = split_type == ISP_MODE_HOR ? part_dim : block_h;
|
|
|
|
uvg_cu_loc_ctor(loc, part_x, part_y, part_w, part_h);
|
|
}
|
|
|
|
|
|
static void intra_recon_tb_leaf(
|
|
encoder_state_t* const state,
|
|
const cu_loc_t* pu_loc,
|
|
const cu_loc_t* cu_loc,
|
|
lcu_t *lcu,
|
|
color_t color,
|
|
const intra_search_data_t* search_data,
|
|
enum uvg_tree_type tree_type)
|
|
{
|
|
const uvg_config *cfg = &state->encoder_control->cfg;
|
|
const int shift = color == COLOR_Y ? 0 : 1;
|
|
|
|
const int x = pu_loc->x;
|
|
const int y = pu_loc->y;
|
|
|
|
const int width = color == COLOR_Y ? pu_loc->width : pu_loc->chroma_width;
|
|
const int height = color == COLOR_Y ? pu_loc->height : pu_loc->chroma_height;
|
|
int log2_width = uvg_g_convert_to_log2[width];
|
|
int log2_height = uvg_g_convert_to_log2[height];
|
|
|
|
const int lcu_width = LCU_WIDTH >> shift;
|
|
|
|
const vector2d_t luma_px = { x, y };
|
|
const vector2d_t pic_px = {
|
|
state->tile->frame->width,
|
|
state->tile->frame->height,
|
|
};
|
|
int x_scu = SUB_SCU(x);
|
|
int y_scu = SUB_SCU(y);
|
|
const vector2d_t lcu_px = {x_scu >> shift, y_scu >> shift };
|
|
uint8_t multi_ref_index = color == COLOR_Y ? search_data->pred_cu.intra.multi_ref_idx: 0;
|
|
uint8_t isp_mode = color == COLOR_Y ? search_data->pred_cu.intra.isp_mode : 0;
|
|
|
|
uvg_intra_references refs;
|
|
// Extra reference lines for use with MRL. Extra lines needed only for left edge.
|
|
uvg_pixel extra_refs[128 * MAX_REF_LINE_IDX] = { 0 };
|
|
|
|
if (luma_px.x > 0 && lcu_px.x == 0 && lcu_px.y > 0 && multi_ref_index != 0) {
|
|
videoframe_t* const frame = state->tile->frame;
|
|
|
|
// Copy extra ref lines, including ref line 1 and top left corner.
|
|
for (int i = 0; i < MAX_REF_LINE_IDX; ++i) {
|
|
int ref_height = height * 2 + MAX_REF_LINE_IDX;
|
|
ref_height = MIN(ref_height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist.
|
|
ref_height = MIN(ref_height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX);
|
|
uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)],
|
|
&extra_refs[i * 128],
|
|
1, ref_height,
|
|
frame->rec->stride, 1);
|
|
}
|
|
}
|
|
|
|
uvg_intra_build_reference(pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode);
|
|
|
|
uvg_pixel pred[32 * 32];
|
|
uvg_intra_predict(state, &refs, pu_loc, color, pred, search_data, lcu, tree_type);
|
|
|
|
const int index = lcu_px.x + lcu_px.y * lcu_width;
|
|
uvg_pixel *block = NULL;
|
|
uvg_pixel *block2 = NULL;
|
|
switch (color) {
|
|
case COLOR_Y:
|
|
block = &lcu->rec.y[index];
|
|
break;
|
|
case COLOR_U:
|
|
block = &lcu->rec.u[index];
|
|
block2 = &lcu->rec.joint_u[index];
|
|
break;
|
|
case COLOR_V:
|
|
block = &lcu->rec.v[index];
|
|
block2 = &lcu->rec.joint_v[index];
|
|
break;
|
|
default: break;
|
|
}
|
|
|
|
uvg_pixels_blit(pred, block , width, height, width, lcu_width);
|
|
if(color != COLOR_Y && cfg->jccr) {
|
|
uvg_pixels_blit(pred, block2, width, height, width, lcu_width);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* \brief Reconstruct an intra CU
|
|
*
|
|
* \param state encoder state
|
|
* \param x x-coordinate of the CU in luma pixels
|
|
* \param y y-coordinate of the CU in luma pixels
|
|
* \param depth depth in the CU tree
|
|
* \param mode_luma intra mode for luma, or -1 to skip luma recon
|
|
* \param mode_chroma intra mode for chroma, or -1 to skip chroma recon
|
|
* \param cur_cu pointer to the CU, or NULL to fetch CU from LCU
|
|
* \param cclm_params pointer for the cclm_parameters, can be NULL if the mode is not cclm mode
|
|
* \param mip_flag indicates whether the passed mode_luma is a MIP mode
|
|
* \param mip_transp indicates whether the used MIP mode is transposed
|
|
* \param lcu containing LCU
|
|
*/
|
|
void uvg_intra_recon_cu(
|
|
encoder_state_t* const state,
|
|
int x,
|
|
int y,
|
|
int depth,
|
|
intra_search_data_t* search_data,
|
|
cu_info_t *cur_cu,
|
|
lcu_t *lcu,
|
|
enum uvg_tree_type tree_type,
|
|
bool recon_luma,
|
|
bool recon_chroma)
|
|
{
|
|
const vector2d_t lcu_px = { SUB_SCU(x) >> (tree_type == UVG_CHROMA_T), SUB_SCU(y) >> (tree_type == UVG_CHROMA_T) };
|
|
const int8_t width = LCU_WIDTH >> depth;
|
|
const int8_t height = width; // TODO: height for non-square blocks.
|
|
if (cur_cu == NULL) {
|
|
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
|
|
}
|
|
|
|
if(!recon_luma && recon_chroma) {
|
|
x &= ~7;
|
|
y &= ~7;
|
|
}
|
|
|
|
// Reset CBFs because CBFs might have been set
|
|
// for depth earlier
|
|
if (recon_luma) {
|
|
cbf_clear(&cur_cu->cbf, depth, COLOR_Y);
|
|
}
|
|
if (recon_chroma) {
|
|
cbf_clear(&cur_cu->cbf, depth, COLOR_U);
|
|
cbf_clear(&cur_cu->cbf, depth, COLOR_V);
|
|
}
|
|
|
|
if (depth == 0 || cur_cu->tr_depth > depth) {
|
|
|
|
const int offset = width / 2;
|
|
const int32_t x2 = x + offset;
|
|
const int32_t y2 = y + offset;
|
|
|
|
uvg_intra_recon_cu(state, x, y, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
|
|
uvg_intra_recon_cu(state, x2, y, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
|
|
uvg_intra_recon_cu(state, x, y2, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
|
|
uvg_intra_recon_cu(state, x2, y2, depth + 1, search_data, NULL, lcu, tree_type, recon_luma, recon_chroma);
|
|
|
|
// Propagate coded block flags from child CUs to parent CU.
|
|
uint16_t child_cbfs[3] = {
|
|
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), lcu_px.y >> (tree_type == UVG_CHROMA_T))->cbf,
|
|
LCU_GET_CU_AT_PX(lcu, lcu_px.x >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf,
|
|
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf,
|
|
};
|
|
|
|
if (recon_luma && depth <= MAX_DEPTH) {
|
|
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y);
|
|
}
|
|
if (recon_chroma && depth <= MAX_DEPTH) {
|
|
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U);
|
|
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V);
|
|
}
|
|
return;
|
|
}
|
|
if (search_data->pred_cu.intra.isp_mode != ISP_MODE_NO_ISP && recon_luma ) {
|
|
search_data->best_isp_cbfs = 0;
|
|
// ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions.
|
|
// Small blocks are split only twice.
|
|
int split_type = search_data->pred_cu.intra.isp_mode;
|
|
int split_limit = uvg_get_isp_split_num(width, height, split_type, true);
|
|
cu_loc_t origin_cu;
|
|
uvg_cu_loc_ctor(&origin_cu, x, y, width, height);
|
|
|
|
for (int i = 0; i < split_limit; ++i) {
|
|
cu_loc_t tu_loc;
|
|
uvg_get_isp_split_loc(&tu_loc, x, y, width, height, i, split_type, true);
|
|
cu_loc_t pu_loc;
|
|
uvg_get_isp_split_loc(&pu_loc, x, y, width, height, i, split_type, false);
|
|
|
|
if(tu_loc.x % 4 == 0) {
|
|
intra_recon_tb_leaf(state, &pu_loc, &origin_cu, lcu, COLOR_Y, search_data, tree_type);
|
|
}
|
|
uvg_quantize_lcu_residual(state, true, false, false,
|
|
&tu_loc, depth, cur_cu, lcu,
|
|
false, tree_type);
|
|
search_data->best_isp_cbfs |= cbf_is_set(cur_cu->cbf, depth, COLOR_Y) << i;
|
|
}
|
|
}
|
|
const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP;
|
|
const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0);
|
|
|
|
cu_loc_t loc;
|
|
uvg_cu_loc_ctor(&loc, x, y, width, height);
|
|
|
|
// Process a leaf TU.
|
|
if (has_luma) {
|
|
intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_Y, search_data, tree_type);
|
|
}
|
|
if (has_chroma) {
|
|
intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_U, search_data, tree_type);
|
|
intra_recon_tb_leaf(state, &loc, &loc, lcu, COLOR_V, search_data, tree_type);
|
|
}
|
|
|
|
// TODO: not necessary to call if only luma and ISP is on
|
|
uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3),
|
|
search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma,
|
|
&loc, depth, cur_cu, lcu,
|
|
false, tree_type);
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Check if ISP can be used for block size.
|
|
*
|
|
* \return True if isp can be used.
|
|
* \param width Block width.
|
|
* \param height Block height.
|
|
* \param max_tr_size Maximum supported transform block size (64).
|
|
*/
|
|
bool uvg_can_use_isp(const int width, const int height)
|
|
{
|
|
assert(!(width > LCU_WIDTH || height > LCU_WIDTH) && "Block size larger than max LCU size.");
|
|
assert(!(width < TR_MIN_WIDTH || height < TR_MIN_WIDTH) && "Block size smaller than min TR_WIDTH.");
|
|
|
|
const int log2_width = uvg_g_convert_to_log2[width];
|
|
const int log2_height = uvg_g_convert_to_log2[height];
|
|
|
|
// Each split block must have at least 16 samples.
|
|
bool not_enough_samples = (log2_width + log2_height <= 4);
|
|
bool cu_size_larger_than_max_tr_size = width > TR_MAX_WIDTH || height > TR_MAX_WIDTH;
|
|
if (not_enough_samples || cu_size_larger_than_max_tr_size) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Check if given ISP mode can be used with LFNST.
|
|
*
|
|
* \return True if isp can be used.
|
|
* \param width Block width.
|
|
* \param height Block height.
|
|
* \param isp_mode ISP mode.
|
|
* \param tree_type Tree type. Dual, luma or chroma tree.
|
|
*/
|
|
bool uvg_can_use_isp_with_lfnst(const int width, const int height, const int isp_split_type, const enum uvg_tree_type tree_type)
|
|
{
|
|
if (tree_type == UVG_CHROMA_T) {
|
|
return false;
|
|
}
|
|
if (isp_split_type == ISP_MODE_NO_ISP) {
|
|
return false;
|
|
}
|
|
|
|
const int tu_width = (isp_split_type == ISP_MODE_HOR) ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER, true);
|
|
const int tu_height = (isp_split_type == ISP_MODE_HOR) ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR, true) : height;
|
|
|
|
if (!(tu_width >= TR_MIN_WIDTH && tu_height >= TR_MIN_WIDTH))
|
|
{
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|