[isp] Implement ISP search and partitioning. Add helper function for constructing cu_loc types. WIP stuff for transform.

This commit is contained in:
siivonek 2022-07-29 15:36:56 +03:00 committed by Marko Viitanen
parent 6236cc29be
commit 06532dce02
10 changed files with 251 additions and 81 deletions

View file

@ -251,3 +251,28 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu
} }
} }
} }
/*
* \brief Constructs cu_loc_t based on given parameters. Calculates chroma dimensions automatically.
*
* \param loc Destination cu_loc.
* \param x Block top left x coordinate.
* \param y Block top left y coordinate.
* \param width Block width.
* \param height Block height.
*/
void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height)
{
assert(x >= 0 && y >= 0 && width >= 0 && height >= 0 && "Cannot give negative coordinates or block dimensions.");
assert(!(width > LCU_WIDTH || height > LCU_WIDTH) && "Luma CU dimension exceeds maximum (dim > LCU_WIDTH).");
assert(!(width < 4 || height < 4) && "Luma CU dimension smaller than 4."); // TODO: change if luma size 2 is allowed
loc->x = x;
loc->y = y;
loc->width = width;
loc->height = height;
// TODO: when MTT is implemented, chroma dimensions can be minimum 2.
// Chroma width is half of luma width, when not at maximum depth.
loc->chroma_width = MAX(width >> 1, 4);
loc->chroma_height = MAX(height >> 1, 4);
}

View file

@ -185,6 +185,7 @@ typedef struct
uint8_t multi_ref_idx; uint8_t multi_ref_idx;
int8_t mip_flag; int8_t mip_flag;
int8_t mip_is_transposed; int8_t mip_is_transposed;
int8_t isp_mode;
} intra; } intra;
struct { struct {
mv_t mv[2][2]; // \brief Motion vectors for L0 and L1 mv_t mv[2][2]; // \brief Motion vectors for L0 and L1
@ -206,6 +207,8 @@ typedef struct {
int8_t chroma_height; int8_t chroma_height;
} cu_loc_t; } cu_loc_t;
void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height);
#define CU_GET_MV_CAND(cu_info_ptr, reflist) \ #define CU_GET_MV_CAND(cu_info_ptr, reflist) \
(((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1) (((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1)

View file

@ -37,6 +37,8 @@
#include "image.h" #include "image.h"
#include "uvg_math.h" #include "uvg_math.h"
#include "mip_data.h" #include "mip_data.h"
#include "search.h"
#include "search_intra.h"
#include "strategies/strategies-intra.h" #include "strategies/strategies-intra.h"
#include "tables.h" #include "tables.h"
#include "transform.h" #include "transform.h"
@ -1471,9 +1473,7 @@ const cu_info_t* uvg_get_co_located_luma_cu(
static void intra_recon_tb_leaf( static void intra_recon_tb_leaf(
encoder_state_t* const state, encoder_state_t* const state,
int x, const cu_loc_t* cu_loc,
int y,
int depth,
lcu_t *lcu, lcu_t *lcu,
color_t color, color_t color,
const intra_search_data_t* search_data, const intra_search_data_t* search_data,
@ -1482,13 +1482,14 @@ static void intra_recon_tb_leaf(
const uvg_config *cfg = &state->encoder_control->cfg; const uvg_config *cfg = &state->encoder_control->cfg;
const int shift = color == COLOR_Y ? 0 : 1; const int shift = color == COLOR_Y ? 0 : 1;
int log2width = LOG2_LCU_WIDTH - depth; const int x = cu_loc->x;
if (color != COLOR_Y && depth < MAX_PU_DEPTH) { const int y = cu_loc->y;
// Chroma width is half of luma width, when not at maximum depth.
log2width -= 1; const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
} const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
const int width = 1 << log2width; int log2_width = uvg_g_convert_to_bit[width] + 2;
const int height = width; // TODO: proper height for non-square blocks int log2_height = uvg_g_convert_to_bit[height] + 2;
const int lcu_width = LCU_WIDTH >> shift; const int lcu_width = LCU_WIDTH >> shift;
const vector2d_t luma_px = { x, y }; const vector2d_t luma_px = { x, y };
@ -1510,25 +1511,20 @@ static void intra_recon_tb_leaf(
// Copy extra ref lines, including ref line 1 and top left corner. // Copy extra ref lines, including ref line 1 and top left corner.
for (int i = 0; i < MAX_REF_LINE_IDX; ++i) { for (int i = 0; i < MAX_REF_LINE_IDX; ++i) {
int height = (LCU_WIDTH >> depth) * 2 + MAX_REF_LINE_IDX; int ref_height = height * 2 + MAX_REF_LINE_IDX;
height = MIN(height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist. ref_height = MIN(ref_height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist.
height = MIN(height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX); ref_height = MIN(ref_height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX);
uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)], uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)],
&extra_refs[i * 128], &extra_refs[i * 128],
1, height, 1, ref_height,
frame->rec->stride, 1); frame->rec->stride, 1);
} }
} }
cu_loc_t loc = {
x, y,
width, height,
width, height,
};
uvg_intra_build_reference(&loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index); uvg_intra_build_reference(cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index);
uvg_pixel pred[32 * 32]; uvg_pixel pred[32 * 32];
uvg_intra_predict(state, &refs, &loc, color, pred, search_data, lcu, tree_type); uvg_intra_predict(state, &refs, cu_loc, color, pred, search_data, lcu, tree_type);
const int index = lcu_px.x + lcu_px.y * lcu_width; const int index = lcu_px.x + lcu_px.y * lcu_width;
uvg_pixel *block = NULL; uvg_pixel *block = NULL;
@ -1548,9 +1544,9 @@ static void intra_recon_tb_leaf(
default: break; default: break;
} }
uvg_pixels_blit(pred, block , width, width, width, lcu_width); uvg_pixels_blit(pred, block , width, height, width, lcu_width);
if(color != COLOR_Y && cfg->jccr) { if(color != COLOR_Y && cfg->jccr) {
uvg_pixels_blit(pred, block2, width, width, width, lcu_width); uvg_pixels_blit(pred, block2, width, height, width, lcu_width);
} }
} }
@ -1583,6 +1579,7 @@ void uvg_intra_recon_cu(
{ {
const vector2d_t lcu_px = { SUB_SCU(x) >> (tree_type == UVG_CHROMA_T), SUB_SCU(y) >> (tree_type == UVG_CHROMA_T) }; const vector2d_t lcu_px = { SUB_SCU(x) >> (tree_type == UVG_CHROMA_T), SUB_SCU(y) >> (tree_type == UVG_CHROMA_T) };
const int8_t width = LCU_WIDTH >> depth; const int8_t width = LCU_WIDTH >> depth;
const int8_t height = width; // TODO: height for non-square blocks.
if (cur_cu == NULL) { if (cur_cu == NULL) {
cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
} }
@ -1620,6 +1617,7 @@ void uvg_intra_recon_cu(
LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf, LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf,
}; };
// ISP_TODO: does not work with ISP yet, ask Joose when this is relevant.
if (recon_luma && depth <= MAX_DEPTH) { if (recon_luma && depth <= MAX_DEPTH) {
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y);
} }
@ -1627,23 +1625,46 @@ void uvg_intra_recon_cu(
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U);
cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V);
} }
} else { return;
const bool has_luma = recon_luma;
const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0);
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_Y, search_data, tree_type);
}
if (has_chroma) {
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_U, search_data, tree_type);
intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_V, search_data, tree_type);
}
uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3),
search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma,
x, y, depth, cur_cu, lcu,
false,
tree_type);
} }
if (search_data->pred_cu.intra.isp_mode != ISP_MODE_NO_ISP && recon_luma ) {
// ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions.
// Small blocks are split only twice.
int split_type = search_data->pred_cu.intra.isp_mode;
int part_dim = uvg_get_isp_split_dim(width, height, split_type);
int limit = split_type == ISP_MODE_HOR ? height : width;
for (int part = 0; part < limit; part + part_dim) {
const int part_x = split_type == ISP_MODE_HOR ? x : x + part;
const int part_y = split_type == ISP_MODE_HOR ? y + part: y;
const int part_w = split_type == ISP_MODE_HOR ? part_dim : width;
const int part_h = split_type == ISP_MODE_HOR ? height : part_dim;
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, part_x, part_y, part_w, part_h);
intra_recon_tb_leaf(state, &loc, lcu, COLOR_Y, search_data, tree_type);
uvg_quantize_lcu_residual(state, true, false, false,
&loc, depth, cur_cu, lcu,
false, tree_type);
}
}
const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP;
const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0);
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x, y, width, height);
// Process a leaf TU.
if (has_luma) {
intra_recon_tb_leaf(state, &loc, lcu, COLOR_Y, search_data, tree_type);
}
if (has_chroma) {
intra_recon_tb_leaf(state, &loc, lcu, COLOR_U, search_data, tree_type);
intra_recon_tb_leaf(state, &loc, lcu, COLOR_V, search_data, tree_type);
}
uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3),
search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma,
&loc, depth, cur_cu, lcu,
false, tree_type);
} }

View file

@ -170,6 +170,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
to->intra.multi_ref_idx = cu->intra.multi_ref_idx; to->intra.multi_ref_idx = cu->intra.multi_ref_idx;
to->intra.mip_flag = cu->intra.mip_flag; to->intra.mip_flag = cu->intra.mip_flag;
to->intra.mip_is_transposed = cu->intra.mip_is_transposed; to->intra.mip_is_transposed = cu->intra.mip_is_transposed;
to->intra.isp_mode = cu->intra.isp_mode;
} else { } else {
to->skipped = cu->skipped; to->skipped = cu->skipped;
to->merged = cu->merged; to->merged = cu->merged;
@ -1091,10 +1092,13 @@ static double search_cu(
inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda; inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda;
} }
cu_loc_t loc;
const int width = LCU_WIDTH << depth;
const int height = width; // TODO: height for non-square blocks
uvg_cu_loc_ctor(&loc, x, y, width, height);
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, has_chroma && !cur_cu->joint_cb_cr, true, has_chroma && !cur_cu->joint_cb_cr,
cur_cu->joint_cb_cr, x, y, cur_cu->joint_cb_cr, &loc,
depth, depth,
NULL, NULL,
lcu, lcu,

View file

@ -77,6 +77,14 @@ typedef struct unit_stats_map_t {
#define NUM_MIP_MODES_FULL(width, height) (((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12)) #define NUM_MIP_MODES_FULL(width, height) (((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12))
#define NUM_MIP_MODES_HALF(width, height) (NUM_MIP_MODES_FULL((width), (height)) >> 1) #define NUM_MIP_MODES_HALF(width, height) (NUM_MIP_MODES_FULL((width), (height)) >> 1)
// ISP related defines
#define NUM_ISP_MODES 3
#define ISP_MODE_NO_ISP 0
#define ISP_MODE_HOR 1
#define ISP_MODE_VER 2
#define SPLIT_TYPE_HOR 1
#define SPLIT_TYPE_VER 2
void uvg_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length); void uvg_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length);
void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length); void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length);

View file

@ -1679,6 +1679,7 @@ static void search_pu_inter(encoder_state_t * const state,
const uvg_config *cfg = &state->encoder_control->cfg; const uvg_config *cfg = &state->encoder_control->cfg;
const videoframe_t * const frame = state->tile->frame; const videoframe_t * const frame = state->tile->frame;
const int width_cu = LCU_WIDTH >> depth; const int width_cu = LCU_WIDTH >> depth;
const int height_cu = width_cu; // TODO: non-square blocks
const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu); const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu); const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
const int width = PU_GET_W(part_mode, width_cu, i_pu); const int width = PU_GET_W(part_mode, width_cu, i_pu);
@ -1826,7 +1827,11 @@ static void search_pu_inter(encoder_state_t * const state,
cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1]; cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1];
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T); uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T);
uvg_inter_recon_cu(state, lcu, x, y, width, true, false); uvg_inter_recon_cu(state, lcu, x, y, width, true, false);
uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T);
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x, y, width_cu, height_cu);
uvg_quantize_lcu_residual(state, true, false, false, &loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) { if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
continue; continue;
@ -1836,7 +1841,7 @@ static void search_pu_inter(encoder_state_t * const state,
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
false, has_chroma, false, has_chroma,
false, /*we are only checking for lack of coeffs so no need to check jccr*/ false, /*we are only checking for lack of coeffs so no need to check jccr*/
x, y, depth, cur_pu, lcu, &loc, depth, cur_pu, lcu,
true, true,
UVG_BOTH_T); UVG_BOTH_T);
if (!cbf_is_set_any(cur_pu->cbf, depth)) { if (!cbf_is_set_any(cur_pu->cbf, depth)) {
@ -2151,6 +2156,10 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
const int x_px = SUB_SCU(x); const int x_px = SUB_SCU(x);
const int y_px = SUB_SCU(y); const int y_px = SUB_SCU(y);
const int width = LCU_WIDTH >> depth; const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: non-square blocks
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x, y, width, height);
cabac_data_t cabac_copy; cabac_data_t cabac_copy;
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
cabac_data_t* cabac = &state->search_cabac; cabac_data_t* cabac = &state->search_cabac;
@ -2198,7 +2207,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, true,
false, false,
false, x, y, false, &loc,
depth, depth,
cur_cu, cur_cu,
lcu, lcu,
@ -2263,7 +2272,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
else { else {
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, reconstruct_chroma, true, reconstruct_chroma,
reconstruct_chroma && state->encoder_control->cfg.jccr, x, y, reconstruct_chroma && state->encoder_control->cfg.jccr,
&loc,
depth, depth,
cur_cu, cur_cu,
lcu, lcu,

View file

@ -49,6 +49,7 @@
#include "strategies/strategies-picture.h" #include "strategies/strategies-picture.h"
#include "videoframe.h" #include "videoframe.h"
#include "strategies/strategies-quant.h" #include "strategies/strategies-quant.h"
#include "uvg_math.h"
// Normalize SAD for comparison against SATD to estimate transform skip // Normalize SAD for comparison against SATD to estimate transform skip
@ -247,6 +248,76 @@ static void derive_mts_constraints(cu_info_t *const pred_cu,
} }
// ISP_TODO: move this function if it is used elsewhere
bool can_use_isp(const int width, const int height, const int max_tr_size)
{
const int log2_width = uvg_g_convert_to_bit[width] + 2;
const int log2_height = uvg_g_convert_to_bit[height] + 2;
// Each split block must have at least 16 samples.
bool not_enough_samples = (log2_width + log2_height <= 4);
bool cu_size_larger_than_max_tr_size = width > max_tr_size || height > max_tr_size;
if (not_enough_samples || cu_size_larger_than_max_tr_size) {
return false;
}
return true;
}
/**
* \brief Returns ISP split partition size based on block dimensions and split type.
*
* Returns ISP split partition size based on block dimensions and split type.
* Will fail if resulting partition size has less than 16 samples.
*
* \param width Block width.
* \param height Block height.
* \param split_type Horizontal or vertical split.
*/
int uvg_get_isp_split_dim(const int width, const int height, const int split_type)
{
bool divide_in_rows = split_type == SPLIT_TYPE_HOR;
int split_dim_size, non_split_dim_size, partition_size, div_shift = 2;
if (divide_in_rows) {
split_dim_size = height;
non_split_dim_size = width;
}
else {
split_dim_size = width;
non_split_dim_size = height;
}
// ISP_TODO: make a define for this. Depends on minimum transform block log2 side length
const int min_num_samples = 16; // Minimum allowed number of samples for split block
const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1;
partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift);
assert((uvg_math_floor_log2(partition_size) + uvg_math_floor_log2(non_split_dim_size) < uvg_math_floor_log2(min_num_samples)) &&
"Partition has less than allowed minimum number of samples.");
return partition_size;
}
// ISP_TODO: move this function if it is used elsewhere
bool can_use_isp_with_lfnst(const int width, const int height, const int isp_mode)
{
if (isp_mode == ISP_MODE_NO_ISP) {
return false;
}
const int tu_width = isp_mode == ISP_MODE_HOR ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER);
const int tu_height = isp_mode == ISP_MODE_HOR ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR) : height;
// ISP_TODO: make a define for this or use existing
const int min_tb_size = 4;
if (!(tu_width >= min_tb_size && tu_height >= min_tb_size)) {
return false;
}
return true;
}
/** /**
* \brief Perform search for best intra transform split configuration. * \brief Perform search for best intra transform split configuration.
* *
@ -325,6 +396,8 @@ static double search_intra_trdepth(
{ {
trafo = 0; trafo = 0;
num_transforms = (mts_enabled ? MTS_TR_NUM : 1); num_transforms = (mts_enabled ? MTS_TR_NUM : 1);
// Do not do MTS search if ISP mode is used
num_transforms = pred_cu->intra.isp_mode == ISP_MODE_NO_ISP ? num_transforms : 1;
} }
const int mts_start = trafo; const int mts_start = trafo;
//TODO: height //TODO: height
@ -360,6 +433,11 @@ static double search_intra_trdepth(
pred_cu->violates_lfnst_constrained_chroma = false; pred_cu->violates_lfnst_constrained_chroma = false;
pred_cu->lfnst_last_scan_pos = false; pred_cu->lfnst_last_scan_pos = false;
if (pred_cu->lfnst_idx != 0) {
// Cannot use ISP with LFNST for small blocks
pred_cu->intra.isp_mode = can_use_isp_with_lfnst(width, height, pred_cu->intra.isp_mode) ? pred_cu->intra.isp_mode : ISP_MODE_NO_ISP;
}
for (trafo = mts_start; trafo < num_transforms; trafo++) { for (trafo = mts_start; trafo < num_transforms; trafo++) {
pred_cu->tr_idx = trafo; pred_cu->tr_idx = trafo;
pred_cu->tr_skip = trafo == MTS_SKIP; pred_cu->tr_skip = trafo == MTS_SKIP;
@ -1371,18 +1449,27 @@ static int8_t search_intra_rdo(
enum uvg_tree_type tree_type) enum uvg_tree_type tree_type)
{ {
const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra); const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra);
const int width = LCU_WIDTH << depth;
const int height = width; // TODO: height for non-square blocks
for (int mode = 0; mode < modes_to_check; mode++) { for (int mode = 0; mode < modes_to_check; mode++) {
double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, x_px, y_px, depth, lcu); bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false: true; // Cannot use ISP with MIP
search_data[mode].pred_cu.tr_idx = MTS_TR_NUM; can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL
search_data[mode].bits = rdo_bitcost; int max_isp_modes = can_do_isp_search && can_use_isp(width, height, 64 /*MAX_TR_SIZE*/) && state->encoder_control->cfg.isp ? NUM_ISP_MODES : 1;
search_data[mode].cost = rdo_bitcost * state->lambda;
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type); for (int isp_mode = 0; isp_mode < max_isp_modes; ++isp_mode) {
search_data[mode].cost += mode_cost; search_data[mode].pred_cu.intra.isp_mode = isp_mode;
if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(search_data[mode].pred_cu.cbf, depth)) { double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, x_px, y_px, depth, lcu);
modes_to_check = mode + 1; search_data[mode].pred_cu.tr_idx = MTS_TR_NUM;
break; search_data[mode].bits = rdo_bitcost;
search_data[mode].cost = rdo_bitcost * state->lambda;
double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type);
search_data[mode].cost += mode_cost;
if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(search_data[mode].pred_cu.cbf, depth)) {
modes_to_check = mode + 1;
break;
}
} }
} }

View file

@ -66,4 +66,6 @@ void uvg_search_cu_intra(
lcu_t *lcu, lcu_t *lcu,
enum uvg_tree_type tree_type); enum uvg_tree_type tree_type);
int uvg_get_isp_split_dim(const int width, const int height, const int split_type);
#endif // SEARCH_INTRA_H_ #endif // SEARCH_INTRA_H_

View file

@ -1105,14 +1105,15 @@ int uvg_quantize_residual_trskip(
static void quantize_tr_residual( static void quantize_tr_residual(
encoder_state_t * const state, encoder_state_t * const state,
const color_t color, const color_t color,
const int32_t x, const cu_loc_t *cu_loc,
const int32_t y,
const uint8_t depth, const uint8_t depth,
cu_info_t *cur_pu, cu_info_t *cur_pu,
lcu_t* lcu, lcu_t* lcu,
bool early_skip, bool early_skip,
enum uvg_tree_type tree_type) enum uvg_tree_type tree_type)
{ {
const int x = cu_loc->x;
const int y = cu_loc->y;
const uvg_config *cfg = &state->encoder_control->cfg; const uvg_config *cfg = &state->encoder_control->cfg;
const int32_t shift = color == COLOR_Y ? 0 : 1; const int32_t shift = color == COLOR_Y ? 0 : 1;
const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift};
@ -1130,13 +1131,9 @@ static void quantize_tr_residual(
// This should ensure that the CBF data doesn't get corrupted if this function // This should ensure that the CBF data doesn't get corrupted if this function
// is called more than once. // is called more than once.
int32_t tr_width; int32_t tr_width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
if (color == COLOR_Y) { int32_t tr_height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
tr_width = LCU_WIDTH >> depth;
} else {
const int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth);
tr_width = LCU_WIDTH_C >> chroma_depth;
}
const int32_t lcu_width = LCU_WIDTH >> shift; const int32_t lcu_width = LCU_WIDTH >> shift;
const int8_t mode = const int8_t mode =
(color == COLOR_Y) ? cur_pu->intra.mode : cur_pu->intra.mode_chroma; (color == COLOR_Y) ? cur_pu->intra.mode : cur_pu->intra.mode_chroma;
@ -1287,15 +1284,18 @@ void uvg_quantize_lcu_residual(
const bool luma, const bool luma,
const bool chroma, const bool chroma,
const bool jccr, const bool jccr,
const int32_t x, const cu_loc_t * cu_loc,
const int32_t y,
const uint8_t depth, const uint8_t depth,
cu_info_t *cur_pu, cu_info_t *cur_pu,
lcu_t* lcu, lcu_t* lcu,
bool early_skip, bool early_skip,
enum uvg_tree_type tree_type) enum uvg_tree_type tree_type)
{ {
const int32_t width = LCU_WIDTH >> depth; const int x = cu_loc->x;
const int y = cu_loc->y;
const int width = cu_loc->width;
const int height = cu_loc->height;
const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
if (cur_pu == NULL) { if (cur_pu == NULL) {
@ -1324,14 +1324,22 @@ void uvg_quantize_lcu_residual(
// Split transform and increase depth // Split transform and increase depth
const int offset = width / 2; const int offset = width / 2;
const int32_t x2 = x + offset; for (int j = 0; j < 2; ++j) {
const int32_t y2 = y + offset; for (int i = 0; i < 2; ++i) {
const cu_loc_t loc;
uvg_cu_loc_ctor(&loc, (x + i * offset), (y + j * offset), width, height);
// jccr is currently not supported if transform is split
uvg_quantize_lcu_residual(state, luma, chroma, 0, &loc, depth + 1, NULL, lcu, early_skip, tree_type);
}
}
// jccr is currently not supported if transform is split //const int32_t x2 = x + offset;
uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip, tree_type); //const int32_t y2 = y + offset;
uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip, tree_type);
uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip, tree_type); //uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip, tree_type);
uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip, tree_type); //uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip, tree_type);
//uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip, tree_type);
//uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip, tree_type);
// Propagate coded block flags from child CUs to parent CU. // Propagate coded block flags from child CUs to parent CU.
uint16_t child_cbfs[3] = { uint16_t child_cbfs[3] = {
@ -1348,15 +1356,18 @@ void uvg_quantize_lcu_residual(
} else { } else {
// Process a leaf TU. // Process a leaf TU.
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x, y, width, height);
if (luma) { if (luma) {
quantize_tr_residual(state, COLOR_Y, x, y, depth, cur_pu, lcu, early_skip, tree_type); quantize_tr_residual(state, COLOR_Y, &loc, depth, cur_pu, lcu, early_skip, tree_type);
} }
if (chroma) { if (chroma) {
quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip, tree_type); quantize_tr_residual(state, COLOR_U, &loc, depth, cur_pu, lcu, early_skip, tree_type);
quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip, tree_type); quantize_tr_residual(state, COLOR_V, &loc, depth, cur_pu, lcu, early_skip, tree_type);
} }
if (jccr && cur_pu->tr_depth == cur_pu->depth) { if (jccr && cur_pu->tr_depth == cur_pu->depth) {
quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip, tree_type); quantize_tr_residual(state, COLOR_UV, &loc, depth, cur_pu, lcu, early_skip, tree_type);
} }
if(chroma && jccr && cur_pu->tr_depth == cur_pu->depth) { if(chroma && jccr && cur_pu->tr_depth == cur_pu->depth) {
assert( 0 && "Trying to quantize both jccr and regular at the same time.\n"); assert( 0 && "Trying to quantize both jccr and regular at the same time.\n");

View file

@ -89,8 +89,7 @@ void uvg_quantize_lcu_residual(
bool luma, bool luma,
bool chroma, bool chroma,
const bool jccr, const bool jccr,
int32_t x, const cu_loc_t* cu_loc,
int32_t y,
uint8_t depth, uint8_t depth,
cu_info_t *cur_cu, cu_info_t *cur_cu,
lcu_t* lcu, lcu_t* lcu,