[intra] WIP: improve search

This commit is contained in:
Joose Sainio 2022-04-27 12:05:32 +03:00
parent c16f42e9df
commit 6d080b215c
5 changed files with 470 additions and 85 deletions

View file

@ -888,7 +888,12 @@ static void encode_chroma_intra_cu(cabac_data_t* const cabac, const cu_info_t* c
unsigned pred_mode = 0;
unsigned chroma_pred_modes[8] = {0, 50, 18, 1, 67, 81, 82, 83};
int8_t chroma_intra_dir = cur_cu->intra.mode_chroma;
int8_t luma_intra_dir = cur_cu->intra.mode;
int8_t luma_intra_dir = !cur_cu->intra.mip_flag ? cur_cu->intra.mode : 0;
for(int i = 0; i < 4; i++) {
if(chroma_pred_modes[i] == luma_intra_dir) {
chroma_pred_modes[i] = 66;
}
}
bool derived_mode = chroma_intra_dir == luma_intra_dir;
@ -1096,11 +1101,13 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
if (x > 0) {
assert(x >> 2 > 0);
const int x_scu = SUB_SCU(x) - 1;
const int y_scu = SUB_SCU(y + cu_width) - 1;
left_pu = lcu ?
LCU_GET_CU_AT_PX(
lcu,
SUB_SCU(x - 1),
SUB_SCU(y + cu_width - 1)) :
x_scu,
y_scu) :
uvg_cu_array_at_const(
frame->cu_array,
x - 1,
@ -1112,8 +1119,8 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
above_pu = lcu ?
LCU_GET_CU_AT_PX(
lcu,
SUB_SCU(x + cu_width - 1),
SUB_SCU(y -1)) :
SUB_SCU(x + cu_width) - 1,
SUB_SCU(y) - 1) :
uvg_cu_array_at_const(
frame->cu_array,
x + cu_width - 1,

View file

@ -583,7 +583,7 @@ int uvg_get_mip_flag_context(int x, int y, int width, int height, const lcu_t* l
left = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local);
}
if (y) {
top = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local);
top = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1);
}
}
else {
@ -1392,7 +1392,6 @@ void uvg_intra_predict(
}
else {
use_mip = state->encoder_control->chroma_format == UVG_CSP_444;
intra_mode = use_mip ? intra_mode : 0;
}
}
if (intra_mode < 68) {

View file

@ -435,8 +435,9 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state,
int cbf_mask = cbf_is_set(pred_cu->cbf, depth, COLOR_U) * 2 + cbf_is_set(pred_cu->cbf, depth, COLOR_V) - 1;
const cabac_ctx_t* ctx = NULL;
if (cbf_mask != -1) {
ctx = &(state->cabac.ctx.joint_cb_cr[cbf_mask]);
tr_tree_bits += CTX_ENTROPY_FBITS(ctx, 0);
cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac;
ctx = &(cabac->ctx.joint_cb_cr[cbf_mask]);
CABAC_FBITS_UPDATE(cabac, ctx, 0, tr_tree_bits, "cbf_cb_search");
}
}
@ -978,8 +979,8 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
// rd2. Possibly because the luma mode search already takes chroma
// into account, so there is less of a chanse of luma mode being
// really bad for chroma.
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; // skip luma
if (ctrl->cfg.rdo >= 3 && !cur_cu->intra.mip_flag) {
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode; // skip luma
if (ctrl->cfg.rdo >= 3) {
cur_cu->intra.mode_chroma = uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search);
if (intra_search.pred_cu.joint_cb_cr == 0) intra_search.pred_cu.joint_cb_cr = 4;

View file

@ -48,11 +48,11 @@
// Modify weight of luma SSD.
#ifndef UVG_LUMA_MULT
#define UVG_LUMA_MULT 0.8
#define UVG_LUMA_MULT 1.0
#endif
// Modify weight of chroma SSD.
#ifndef UVG_CHROMA_MULT
#define UVG_CHROMA_MULT 1.5
#define UVG_CHROMA_MULT 1.0
#endif
/**

View file

@ -33,6 +33,8 @@
#include "search_intra.h"
#include <limits.h>
#include <math.h>
#include "cabac.h"
#include "encoder.h"
@ -331,7 +333,7 @@ static double search_intra_trdepth(
const int offset = width / 2;
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
const bool reconstruct_chroma = (depth != 4 || (depth == 4 && (x_px & 4 && y_px & 4))) && state->encoder_control->chroma_format != UVG_CSP_400;
const bool reconstruct_chroma = false;// (depth != 4 || (depth == 4 && (x_px & 4 && y_px & 4))) && state->encoder_control->chroma_format != UVG_CSP_400;
cu_info_t* pred_cu = &search_data->pred_cu;
cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
@ -358,7 +360,7 @@ static double search_intra_trdepth(
cbf_clear(&pred_cu->cbf, depth, COLOR_V);
}
const int8_t chroma_mode = reconstruct_chroma ? pred_cu->intra.mode : -1;
const int8_t chroma_mode = reconstruct_chroma ? (!pred_cu->intra.mip_flag ? pred_cu->intra.mode : 0) : -1;
double best_rd_cost = MAX_INT;
int best_tr_idx = 0;
int best_lfnst_idx = 0;
@ -824,8 +826,8 @@ static int16_t search_intra_rough(
const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0;
const double mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 1);
const double not_mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 0);
const double planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 1);
const double not_planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 0);
const double planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 0);
const double not_planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 1);
for (int mode_i = 0; mode_i < modes_selected; ++mode_i) {
int i = 0;
int smaller_than_pred = 0;
@ -834,7 +836,7 @@ static int16_t search_intra_rough(
if (intra_preds[i] == modes[mode_i]) {
break;
}
if (modes[mode_i] > intra_preds[i]) {
if (modes[mode_i] < intra_preds[i]) {
smaller_than_pred += 1;
}
}
@ -842,7 +844,7 @@ static int16_t search_intra_rough(
bits = planar_mode_flag + mpm_mode_bit;
}
else if (i < INTRA_MPM_COUNT) {
bits = not_planar_mode_flag + mpm_mode_bit + MAX(i, 4);
bits = not_planar_mode_flag + mpm_mode_bit + MIN(i, 4);
}
else {
bits = not_mpm_mode_bit + 5 + (modes[mode_i] - smaller_than_pred > 3);
@ -860,6 +862,285 @@ static int16_t search_intra_rough(
}
static double count_bits(
encoder_state_t* const state,
int8_t* intra_preds,
const double not_mrl,
const double not_mip,
const double mpm_mode_bit,
const double not_mpm_mode_bit,
const double planar_mode_flag,
const double not_planar_mode_flag,
int8_t mode
)
{
int i = 0;
int smaller_than_pred = 0;
double bits;
for (; i < INTRA_MPM_COUNT; i++) {
if (intra_preds[i] == mode) {
break;
}
if (mode > intra_preds[i]) {
smaller_than_pred += 1;
}
}
if (i == 0) {
bits = planar_mode_flag + mpm_mode_bit;
}
else if (i < INTRA_MPM_COUNT) {
bits = not_planar_mode_flag + mpm_mode_bit + MIN(i, 4);
}
else {
bits = not_mpm_mode_bit + 5 + (mode - smaller_than_pred > 2);
}
bits += not_mrl + not_mip;
return bits;
}
static int16_t search_intra_rough(
encoder_state_t * const state,
kvz_pixel *orig,
int32_t origstride,
kvz_intra_references *refs,
int log2_width,
int8_t *intra_preds,
intra_search_data_t* modes_out,
cu_info_t* const pred_cu,
uint8_t mip_ctx)
{
#define PARALLEL_BLKS 2 // TODO: use 4 for AVX-512 in the future?
assert(log2_width >= 2 && log2_width <= 5);
int_fast8_t width = 1 << log2_width;
cost_pixel_nxn_func *satd_func = kvz_pixels_get_satd_func(width);
cost_pixel_nxn_func *sad_func = kvz_pixels_get_sad_func(width);
cost_pixel_nxn_multi_func *satd_dual_func = kvz_pixels_get_satd_dual_func(width);
cost_pixel_nxn_multi_func *sad_dual_func = kvz_pixels_get_sad_dual_func(width);
bool mode_checked[KVZ_NUM_INTRA_MODES] = {0};
double costs[KVZ_NUM_INTRA_MODES];
// const kvz_config *cfg = &state->encoder_control->cfg;
// const bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm);
// Temporary block arrays
kvz_pixel _preds[PARALLEL_BLKS * 32 * 32 + SIMD_ALIGNMENT];
pred_buffer preds = ALIGNED_POINTER(_preds, SIMD_ALIGNMENT);
kvz_pixel _orig_block[32 * 32 + SIMD_ALIGNMENT];
kvz_pixel *orig_block = ALIGNED_POINTER(_orig_block, SIMD_ALIGNMENT);
// Store original block for SAD computation
kvz_pixels_blit(orig, orig_block, width, width, origstride, width);
int8_t modes_selected = 0;
// Note: get_cost and get_cost_dual may return negative costs.
double min_cost;
double max_cost;
struct mode_cost {
int8_t mode;
double cost;
};
const double not_mrl = state->encoder_control->cfg.mrl ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.multi_ref_line[0]), 0) : 0;
const double not_mip = state->encoder_control->cfg.mip ? CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.mip_flag[mip_ctx]), 0) : 0;
const double mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 1);
const double not_mpm_mode_bit = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.intra_luma_mpm_flag_model), 0);
const double planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 0);
const double not_planar_mode_flag = CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.luma_planar_model[1]), 1);
struct mode_cost best_six_modes[6];
// Initial offset decides how many modes are tried before moving on to the
// recursive search.
// Calculate SAD for evenly spaced modes to select the starting point for
// the recursive search.
cu_loc_t loc = { 0, 0, width, width, width, width };
intra_search_data_t search_proxy;
FILL(search_proxy, 0);
search_proxy.pred_cu = *pred_cu;
int offset = 4;
search_proxy.pred_cu.intra.mode = 0;
kvz_intra_predict(state, refs, &loc, COLOR_Y, preds[0], &search_proxy, NULL);
search_proxy.pred_cu.intra.mode = 1;
kvz_intra_predict(state, refs, &loc, COLOR_Y, preds[1], &search_proxy, NULL);
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs);
mode_checked[0] = true;
mode_checked[1] = true;
costs[0] += count_bits(
state,
intra_preds,
not_mrl,
not_mip,
mpm_mode_bit,
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
0) * state->lambda_sqrt;
costs[1] += count_bits(
state,
intra_preds,
not_mrl,
not_mip,
mpm_mode_bit,
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
1) * state->lambda_sqrt;
if(costs[0] < costs[1]) {
min_cost = costs[0];
max_cost = costs[1];
best_six_modes[0].mode = 0;
best_six_modes[0].cost = costs[0];
best_six_modes[1].mode = 1;
best_six_modes[1].cost = costs[1];
}
else {
min_cost = costs[1];
max_cost = costs[0];
best_six_modes[1].mode = 0;
best_six_modes[1].cost = costs[0];
best_six_modes[0].mode = 1;
best_six_modes[0].cost = costs[1];
}
best_six_modes[2].cost = MAX_DOUBLE;
best_six_modes[3].cost = MAX_DOUBLE;
best_six_modes[4].cost = MAX_DOUBLE;
best_six_modes[5].cost = MAX_DOUBLE;
for (int mode = 4; mode <= 66; mode += PARALLEL_BLKS * offset) {
double costs_out[PARALLEL_BLKS] = { 0 };
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
search_proxy.pred_cu.intra.mode = mode + i*offset;
kvz_intra_predict(state, refs, &loc, COLOR_Y, preds[i], &search_proxy, NULL);
}
}
//TODO: add generic version of get cost multi
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
for (int i = 0; i < PARALLEL_BLKS; ++i) {
if (mode + i * offset <= 66) {
costs_out[i] += count_bits(
state,
intra_preds,
not_mrl,
not_mip,
mpm_mode_bit,
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
mode + i * offset) * state->lambda_sqrt;
}
}
for (int i = 0; i < PARALLEL_BLKS; ++i) {
int8_t mode_i = mode + i* offset;
if (mode_i <= 66) {
costs[mode_i] = costs_out[i];
mode_checked[mode_i] = true;
min_cost = MIN(min_cost, costs[mode_i]);
max_cost = MAX(max_cost, costs[mode_i]);
++modes_selected;
for (int j = 0; j < 6; j++) {
if (costs[mode_i] < best_six_modes[j].cost) {
for(int k = 5; k > j; k--) {
best_six_modes[k] = best_six_modes[k - 1];
}
best_six_modes[j].cost = costs[mode_i];
best_six_modes[j].mode = mode_i;
break;
}
}
}
}
}
offset >>= 1;
// Skip recursive search if all modes have the same cost.
if (min_cost != max_cost) {
// Do a recursive search to find the best mode, always centering on the
// current best mode.
for (; offset > 0; offset >>= 1) {
struct mode_cost temp_best_six_modes[6];
memcpy(temp_best_six_modes, best_six_modes, sizeof(temp_best_six_modes));
int8_t modes_to_check[12];
int num_modes_to_check = 0;
for(int i = 0; i < 6; i++) {
int8_t center_node = best_six_modes[i].mode;
int8_t test_modes[] = { center_node - offset, center_node + offset };
for(int j = 0; j < 2; j++) {
if((test_modes[j] >= 2 && test_modes[j] <= 66) && mode_checked[test_modes[j]] == false) {
modes_to_check[num_modes_to_check++] = test_modes[j];
mode_checked[test_modes[j]] = true;
}
}
}
while (num_modes_to_check & (PARALLEL_BLKS - 1)) {
modes_to_check[num_modes_to_check++] = 1;
}
for (int i = 0; i < num_modes_to_check; i += PARALLEL_BLKS) {
double costs_out[PARALLEL_BLKS] = { 0 };
for (int block = 0; block < PARALLEL_BLKS; ++block) {
search_proxy.pred_cu.intra.mode = modes_to_check[block + i];
kvz_intra_predict(state, refs, &loc, COLOR_Y, preds[block], &search_proxy, NULL);
}
//TODO: add generic version of get cost multi
get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, costs_out);
for (int block = 0; block < PARALLEL_BLKS; ++block) {
costs_out[block] += count_bits(
state,
intra_preds,
not_mrl,
not_mip,
mpm_mode_bit,
not_mpm_mode_bit,
planar_mode_flag,
not_planar_mode_flag,
modes_to_check[block + i]) * state->lambda_sqrt;
}
for (int block = 0; block < PARALLEL_BLKS; ++block) {
int8_t mode = modes_to_check[i + block];
if (mode == 1) continue;
costs[mode] = costs_out[block];
for (int j = 0; j < 6; j++) {
if (costs[mode] < best_six_modes[j].cost) {
for (int k = 5; k > j; k--) {
best_six_modes[k] = best_six_modes[k - 1];
}
best_six_modes[j].cost = costs[mode];
best_six_modes[j].mode = mode;
break;
}
}
}
}
}
}
// Add prediction mode coding cost as the last thing. We don't want this
// affecting the halving search.
for(int i=0; i < 6; i++) {
const int8_t mode = best_six_modes[i].mode;
modes_out[i].cost = costs[mode];
modes_out[i].pred_cu = *pred_cu;
modes_out[i].pred_cu.intra.mode = mode;
modes_out[i].pred_cu.intra.mode_chroma = mode;
}
#undef PARALLEL_BLKS
return 6;
}
static void get_rough_cost_for_2n_modes(
encoder_state_t* const state,
uvg_intra_references* refs,
@ -1010,7 +1291,7 @@ double uvg_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in
if (chroma_mode == luma_mode) {
mode_bits = CTX_ENTROPY_FBITS(ctx, 0);
} else {
if(chroma_mode > 67) {
if(chroma_mode < 67) {
mode_bits = 2.0 + CTX_ENTROPY_FBITS(ctx, 1);
}
else {
@ -1063,9 +1344,12 @@ int8_t uvg_search_intra_chroma_rdo(
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
cabac_data_t temp_cabac;
memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t));
for (int8_t i = 0; i < num_modes; ++i) {
const uint8_t mode = chroma_data[i].pred_cu.intra.mode_chroma;
state->search_cabac.update = 1;
uvg_intra_recon_cu(state,
x_px, y_px,
depth, &chroma_data[i],
@ -1080,6 +1364,8 @@ int8_t uvg_search_intra_chroma_rdo(
double mode_bits = uvg_chroma_mode_bits(state, mode, luma_mode);
chroma_data[i].cost += mode_bits * state->lambda;
memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t));
}
sort_modes(chroma_data, num_modes);
@ -1097,21 +1383,17 @@ int8_t uvg_search_cu_intra_chroma(encoder_state_t * const state,
const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) };
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
int8_t intra_mode = cur_pu->intra.mode;
int8_t intra_mode = !cur_pu->intra.mip_flag ? cur_pu->intra.mode : 0;
int8_t modes[8] = { 0, 50, 18, 1, -1, 81, 82, 83 };
int8_t modes[8] = { 0, 50, 18, 1, intra_mode, 81, 82, 83 };
uint8_t total_modes = (state->encoder_control->cfg.cclm ? 8 : 5);
if (intra_mode != 0 && intra_mode != 50 && intra_mode != 18 && intra_mode != 1) {
modes[4] = intra_mode;
for(int i = 0; i < 4; i++) {
if (modes[i] == intra_mode) {
modes[i] = 66;
break;
}
else {
total_modes -= 1;
modes[4] = modes[5];
modes[5] = modes[6];
modes[6] = modes[7];
}
// The number of modes to select for slower chroma search. Luma mode
// is always one of the modes, so 2 means the final decision is made
// between luma mode and one other mode that looks the best
@ -1166,6 +1448,86 @@ int8_t uvg_search_cu_intra_chroma(encoder_state_t * const state,
}
static int select_candidates_for_further_search(const encoder_state_t * const state,
intra_search_data_t *search_data,
uint8_t regular_modes,
uint8_t mip_modes,
int width,
int height
)
{
const double threshold_cost = 1.0 + 1.4 / sqrt(width * height);
const int max_cand_per_type = regular_modes >> 1;
const double minCost = MIN(search_data[0].cost, search_data[regular_modes].cost);
bool keepOneMip = search_data[regular_modes - 1].cost < search_data[regular_modes].cost;
const int maxNumConv = 3;
intra_search_data_t temp_mip_modes[3];
const int transp_offset = mip_modes / 2;
for(int i = 0; i <3; i++) {
const bool is_transp = search_data[regular_modes + i].cost > search_data[regular_modes + i + transp_offset].cost;
temp_mip_modes[i] = search_data[regular_modes + i + (is_transp ? transp_offset : 0)];
}
sort_modes(search_data, regular_modes + mip_modes);
intra_search_data_t temp_list_out[9];
int selected_modes = 0;
int numConv = 0;
int numMip = 0;
for (int idx = 0; idx < regular_modes + keepOneMip; idx++)
{
bool addMode = false;
if (!search_data[idx].pred_cu.intra.mip_flag)
{
addMode = (numConv < maxNumConv);
numConv += addMode ? 1 : 0;
}
else
{
addMode = (numMip < max_cand_per_type || (search_data[idx].cost < threshold_cost * minCost) || keepOneMip);
keepOneMip = false;
numMip += addMode ? 1 : 0;
}
if (addMode)
{
temp_list_out[selected_modes++] = search_data[idx];
}
}
if (width> 8 && height > 8)
{
// Sort MIP candidates by Hadamard cost
// Append MIP mode to RD mode list
for (int idx = 0; idx < 3; idx++)
{
bool alreadyIncluded = false;
for (int list_idx = 0; list_idx < selected_modes; list_idx++)
{
if (temp_list_out[list_idx].pred_cu.intra.mip_flag &&
temp_list_out[list_idx].pred_cu.intra.mip_is_transposed == temp_mip_modes[idx].pred_cu.intra.mip_is_transposed &&
temp_list_out[list_idx].pred_cu.intra.mode == idx
)
{
alreadyIncluded = true;
break;
}
}
if (!alreadyIncluded)
{
temp_list_out[selected_modes++] = temp_mip_modes[idx];
// if (fastMip) break;
}
}
}
memcpy(search_data, temp_list_out, selected_modes * sizeof(intra_search_data_t));
return selected_modes;
}
/**
* Update lcu to have best modes at this depth.
* \return Cost of best mode.
@ -1205,7 +1567,7 @@ void uvg_search_cu_intra(
if (y_px >= SCU_WIDTH && lcu_px.y > 0) {
above_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x+ cu_width-1, lcu_px.y - 1);
}
uvg_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
int8_t num_cand = uvg_intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
if (depth > 0) {
uvg_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, refs, state->encoder_control->cfg.wpp, NULL, 0);
@ -1227,17 +1589,24 @@ void uvg_search_cu_intra(
temp_pred_cu = *cur_cu;
temp_pred_cu.type = CU_INTRA;
FILL(temp_pred_cu.intra, 0);
// Find modes with multiple reference lines if in use. Do not use if CU in first row.
uint8_t lines = state->encoder_control->cfg.mrl && (y_px % LCU_WIDTH) != 0 ? MAX_REF_LINE_IDX : 1;
int16_t number_of_modes;
int16_t num_regular_modes;
bool skip_rough_search = (depth == 0 || state->encoder_control->cfg.rdo >= 4);
if (!skip_rough_search) {
number_of_modes = search_intra_rough(state,
num_regular_modes = number_of_modes = search_intra_rough(
state,
ref_pixels,
LCU_WIDTH,
refs,
log2_width, candidate_modes,
search_data, &temp_pred_cu,
log2_width,
candidate_modes,
search_data,
&temp_pred_cu,
mip_ctx);
// if(lines == 1) sort_modes(search_data, number_of_modes);
} else {
for (int8_t i = 0; i < UVG_NUM_INTRA_MODES; i++) {
@ -1249,38 +1618,7 @@ void uvg_search_cu_intra(
number_of_modes = UVG_NUM_INTRA_MODES;
}
int num_mip_modes = 0;
if (state->encoder_control->cfg.mip) {
// MIP is not allowed for 64 x 4 or 4 x 64 blocks
if (!((width == 64 && height == 4) || (width == 4 && height == 64))) {
num_mip_modes = NUM_MIP_MODES_FULL(width, height);
for (int transpose = 0; transpose < 2; transpose++) {
const int half_mip_modes = NUM_MIP_MODES_HALF(width, height);
for (int i = 0; i < half_mip_modes; ++i) {
const int index = i + number_of_modes + transpose * half_mip_modes;
search_data[index].pred_cu = temp_pred_cu;
search_data[index].pred_cu.intra.mip_flag = 1;
search_data[index].pred_cu.intra.mode = i;
search_data[index].pred_cu.intra.mip_is_transposed = transpose;
search_data[index].pred_cu.intra.mode_chroma = i;
search_data[index].cost = MAX_INT;
}
}
if(!skip_rough_search) {
get_rough_cost_for_2n_modes(state, refs, &cu_loc,
ref_pixels,
LCU_WIDTH, search_data + number_of_modes, num_mip_modes,
mip_ctx);
}
}
number_of_modes += num_mip_modes;
}
int num_mrl_modes = 0;
// Find modes with multiple reference lines if in use. Do not use if CU in first row.
uint8_t lines = state->encoder_control->cfg.mrl && (y_px % LCU_WIDTH) != 0 ? MAX_REF_LINE_IDX : 1;
for(int line = 1; line < lines; ++line) {
uvg_pixel extra_refs[128 * MAX_REF_LINE_IDX] = { 0 };
@ -1314,8 +1652,39 @@ void uvg_search_cu_intra(
ref_pixels,
LCU_WIDTH, search_data + number_of_modes, num_mrl_modes,
mip_ctx);
sort_modes(search_data, number_of_modes);
}
number_of_modes += num_mrl_modes;
num_regular_modes += num_mrl_modes;
int num_mip_modes = 0;
if (state->encoder_control->cfg.mip) {
// MIP is not allowed for 64 x 4 or 4 x 64 blocks
if (!((width == 64 && height == 4) || (width == 4 && height == 64))) {
num_mip_modes = NUM_MIP_MODES_FULL(width, height);
for (int transpose = 0; transpose < 2; transpose++) {
const int half_mip_modes = NUM_MIP_MODES_HALF(width, height);
for (int i = 0; i < half_mip_modes; ++i) {
const int index = i + number_of_modes + transpose * half_mip_modes;
search_data[index].pred_cu = temp_pred_cu;
search_data[index].pred_cu.intra.mip_flag = 1;
search_data[index].pred_cu.intra.mode = i;
search_data[index].pred_cu.intra.mip_is_transposed = transpose;
search_data[index].pred_cu.intra.mode_chroma = 0;
search_data[index].cost = MAX_INT;
}
}
if (!skip_rough_search) {
get_rough_cost_for_2n_modes(state, refs, &cu_loc,
ref_pixels,
LCU_WIDTH, search_data + number_of_modes, num_mip_modes,
mip_ctx);
}
}
number_of_modes += num_mip_modes;
}
// Set transform depth to current depth, meaning no transform splits.
uvg_lcu_fill_trdepth(lcu, x_px, y_px, depth, depth);
@ -1326,19 +1695,39 @@ void uvg_search_cu_intra(
if (rdo_level == 4) {
number_of_modes_to_search = number_of_modes;
} else if (rdo_level == 2 || rdo_level == 3) {
number_of_modes_to_search = (cu_width == 4) ? 3 : 2;
const uint8_t g_aucIntraModeNumFast_UseMPM_2D[7 - 2 + 1][7 - 2 + 1] =
{
{3, 3, 3, 3, 2, 2}, // 4x4, 4x8, 4x16, 4x32, 4x64, 4x128,
{3, 3, 3, 3, 3, 2}, // 8x4, 8x8, 8x16, 8x32, 8x64, 8x128,
{3, 3, 3, 3, 3, 2}, // 16x4, 16x8, 16x16, 16x32, 16x64, 16x128,
{3, 3, 3, 3, 3, 2}, // 32x4, 32x8, 32x16, 32x32, 32x64, 32x128,
{2, 3, 3, 3, 3, 2}, // 64x4, 64x8, 64x16, 64x32, 64x64, 64x128,
{2, 2, 2, 2, 2, 3}, // 128x4, 128x8, 128x16, 128x32, 128x64, 128x128,
};
number_of_modes_to_search = g_aucIntraModeNumFast_UseMPM_2D[7- depth - 3][7 - depth - 3];
} else {
// Check only the predicted modes.
number_of_modes_to_search = 0;
}
if(!skip_rough_search) {
sort_modes(search_data, (uint8_t)number_of_modes);
if(state->encoder_control->cfg.mip) {
number_of_modes_to_search = select_candidates_for_further_search(
state,
search_data,
num_regular_modes,
num_mip_modes,
width,
height
);
}
}
for(int pred_mode = 0; pred_mode < INTRA_MPM_COUNT; ++pred_mode) {
for(int pred_mode = 0; pred_mode < num_cand; ++pred_mode) {
bool mode_found = false;
for(int i = 0; i < number_of_modes_to_search; i++) {
if(search_data[i].pred_cu.intra.mode == candidate_modes[pred_mode]) {
if(search_data[i].pred_cu.intra.mip_flag == 0 &&
search_data[i].pred_cu.intra.multi_ref_idx == 0 &&
search_data[i].pred_cu.intra.mode == candidate_modes[pred_mode]) {
mode_found = true;
break;
}
@ -1364,16 +1753,5 @@ void uvg_search_cu_intra(
search_data[0].pred_cu.violates_mts_coeff_constraint = false;
search_data[0].pred_cu.mts_last_scan_pos = false;
}
else {
double best_cost = MAX_INT;
int best_mode = 0;
for (int mode = 0; mode < number_of_modes; mode++) {
if (search_data[mode].cost < best_cost) {
best_cost = search_data[mode].cost;
best_mode = mode;
}
}
search_data[0] = search_data[best_mode];
}
*mode_out = search_data[0];
}