[mtt] change most if not all of search hierarchy to use cu_loc_t

This commit is contained in:
Joose Sainio 2022-09-06 13:45:08 +03:00 committed by Marko Viitanen
parent 0ec16967a1
commit 26dcadc149
17 changed files with 579 additions and 717 deletions

View file

@ -77,55 +77,6 @@ typedef enum {
MTS_TR_NUM = 6, MTS_TR_NUM = 6,
} mts_idx; } mts_idx;
extern const uint8_t uvg_part_mode_num_parts[];
extern const uint8_t uvg_part_mode_offsets[][4][2];
extern const uint8_t uvg_part_mode_sizes[][4][2];
/**
* \brief Get the x coordinate of a PU.
*
* \param part_mode partition mode of the containing CU
* \param cu_width width of the containing CU
* \param cu_x x coordinate of the containing CU
* \param i number of the PU
* \return location of the left edge of the PU
*/
#define PU_GET_X(part_mode, cu_width, cu_x, i) \
((cu_x) + uvg_part_mode_offsets[(part_mode)][(i)][0] * (cu_width) / 4)
/**
* \brief Get the y coordinate of a PU.
*
* \param part_mode partition mode of the containing CU
* \param cu_width width of the containing CU
* \param cu_y y coordinate of the containing CU
* \param i number of the PU
* \return location of the top edge of the PU
*/
#define PU_GET_Y(part_mode, cu_width, cu_y, i) \
((cu_y) + uvg_part_mode_offsets[(part_mode)][(i)][1] * (cu_width) / 4)
/**
* \brief Get the width of a PU.
*
* \param part_mode partition mode of the containing CU
* \param cu_width width of the containing CU
* \param i number of the PU
* \return width of the PU
*/
#define PU_GET_W(part_mode, cu_width, i) \
(uvg_part_mode_sizes[(part_mode)][(i)][0] * (cu_width) / 4)
/**
* \brief Get the height of a PU.
*
* \param part_mode partition mode of the containing CU
* \param cu_width width of the containing CU
* \param i number of the PU
* \return height of the PU
*/
#define PU_GET_H(part_mode, cu_width, i) \
(uvg_part_mode_sizes[(part_mode)][(i)][1] * (cu_width) / 4)
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// TYPES // TYPES
@ -142,6 +93,25 @@ enum uvg_tree_type {
UVG_CHROMA_T = 2 UVG_CHROMA_T = 2
}; };
enum split_type {
NO_SPLIT = 0,
QT_SPLIT = 1,
BT_HOR_SPLIT = 2,
BT_VER_SPLIT = 3,
TT_HOR_SPLIT = 4,
TT_VER_SPLIT = 5,
};
typedef struct {
uint32_t split_tree;
uint8_t current_depth;
} split_tree_t;
// Split for each depth takes three bits like xxy where if either x bit is set
// it is a MTT split, and if there are any MTT split QT split is not allowed
#define CAN_QT_SPLIT(x) (((x) & 6DB6DB6) == 0)
/** /**
* \brief Struct for CU info * \brief Struct for CU info
*/ */
@ -149,7 +119,6 @@ typedef struct
{ {
uint8_t type : 3; //!< \brief block type, one of cu_type_t values uint8_t type : 3; //!< \brief block type, one of cu_type_t values
uint8_t depth : 3; //!< \brief depth / size of this block uint8_t depth : 3; //!< \brief depth / size of this block
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
uint8_t tr_depth : 3; //!< \brief transform depth uint8_t tr_depth : 3; //!< \brief transform depth
uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped uint8_t skipped : 1; //!< \brief flag to indicate this block is skipped
uint8_t merged : 1; //!< \brief flag to indicate this block is merged uint8_t merged : 1; //!< \brief flag to indicate this block is merged

View file

@ -825,11 +825,14 @@ static void encode_transform_coeff(
* \param depth Depth from LCU. * \param depth Depth from LCU.
* \return if non-zero mvd is coded * \return if non-zero mvd is coded
*/ */
int uvg_encode_inter_prediction_unit(encoder_state_t * const state, int uvg_encode_inter_prediction_unit(
cabac_data_t * const cabac, encoder_state_t * const state,
const cu_info_t * const cur_cu, cabac_data_t * const cabac,
int x, int y, int width, int height, const cu_info_t * const cur_cu,
int depth, lcu_t* lcu, double* bits_out) int depth,
lcu_t* lcu,
double* bits_out,
const cu_loc_t* const cu_loc)
{ {
// Mergeflag // Mergeflag
int16_t num_cand = 0; int16_t num_cand = 0;
@ -864,8 +867,8 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
// Code Inter Dir // Code Inter Dir
uint8_t inter_dir = cur_cu->inter.mv_dir; uint8_t inter_dir = cur_cu->inter.mv_dir;
if (cur_cu->part_size == SIZE_2Nx2N || (LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4 if ((LCU_WIDTH >> depth) != 4) { // ToDo: limit on 4x8/8x4
uint32_t inter_dir_ctx = (7 - ((uvg_math_floor_log2(width) + uvg_math_floor_log2(height) + 1) >> 1)); uint32_t inter_dir_ctx = (7 - ((uvg_math_floor_log2(cu_loc->width) + uvg_math_floor_log2(cu_loc->height) + 1) >> 1));
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[inter_dir_ctx]), (inter_dir == 3), bits, "inter_pred_idc"); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.inter_dir[inter_dir_ctx]), (inter_dir == 3), bits, "inter_pred_idc");
} }
@ -916,16 +919,14 @@ int uvg_encode_inter_prediction_unit(encoder_state_t * const state,
if (lcu) { if (lcu) {
uvg_inter_get_mv_cand( uvg_inter_get_mv_cand(
state, state,
x, y, width, height, mv_cand, cur_cu, lcu, ref_list_idx,
mv_cand, cur_cu, cu_loc);
lcu, ref_list_idx);
} }
else { else {
uvg_inter_get_mv_cand_cua( uvg_inter_get_mv_cand_cua(
state, state,
x, y, width, height, mv_cand, cur_cu, ref_list_idx, cu_loc
mv_cand, cur_cu, ref_list_idx );
);
} }
uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx); uint8_t cu_mv_cand = CU_GET_MV_CAND(cur_cu, ref_list_idx);
@ -1346,11 +1347,11 @@ bool uvg_write_split_flag(
if (no_split && allow_split) { if (no_split && allow_split) {
// Get left and top block split_flags and if they are present and true, increase model number // Get left and top block split_flags and if they are present and true, increase model number
// ToDo: should use height and width to increase model, PU_GET_W() ? // ToDo: should use height and width to increase model, PU_GET_W() ?
if (left_cu && PU_GET_H(left_cu->part_size, LCU_WIDTH >> left_cu->depth, 0) < LCU_WIDTH >> depth) { if (left_cu && LCU_WIDTH >> left_cu->depth < LCU_WIDTH >> depth) {
split_model++; split_model++;
} }
if (above_cu && PU_GET_W(above_cu->part_size, LCU_WIDTH >> above_cu->depth, 0) < LCU_WIDTH >> depth) { if (above_cu && LCU_WIDTH >> above_cu->depth < LCU_WIDTH >> depth) {
split_model++; split_model++;
} }
@ -1625,22 +1626,15 @@ void uvg_encode_coding_tree(
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
uint8_t imv_mode = UVG_IMV_OFF; uint8_t imv_mode = UVG_IMV_OFF;
const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size];
bool non_zero_mvd = false; bool non_zero_mvd = false;
// TODO: height for non-square blocks
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, cu_loc.x, cu_loc.y);
for (int i = 0; i < num_pu; ++i) { non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, depth, NULL, NULL, &cu_loc);
// TODO: height for non-square blocks DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu);
const int pu_x = PU_GET_X(cur_cu->part_size, cu_width, x, i); uvg_hmvp_add_mv(state, x, y, width, height, cur_pu);
const int pu_y = PU_GET_Y(cur_cu->part_size, cu_width, y, i);
const int pu_w = PU_GET_W(cur_cu->part_size, cu_width, i);
const int pu_h = PU_GET_H(cur_cu->part_size, cu_width, i);
const cu_info_t *cur_pu = uvg_cu_array_at_const(used_array, pu_x, pu_y);
non_zero_mvd |= uvg_encode_inter_prediction_unit(state, cabac, cur_pu, pu_x, pu_y, pu_w, pu_h, depth, NULL, NULL);
DBG_PRINT_MV(state, pu_x, pu_y, pu_w, pu_h, cur_pu);
uvg_hmvp_add_mv(state, x, y, pu_w, pu_h, cur_pu);
}
// imv mode, select between fullpel, half-pel and quarter-pel resolutions // imv mode, select between fullpel, half-pel and quarter-pel resolutions
// 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel // 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel
@ -1661,7 +1655,7 @@ void uvg_encode_coding_tree(
int cbf = cbf_is_set_any(cur_cu->cbf, depth); int cbf = cbf_is_set_any(cur_cu->cbf, depth);
// Only need to signal coded block flag if not skipped or merged // Only need to signal coded block flag if not skipped or merged
// skip = no coded residual, merge = coded residual // skip = no coded residual, merge = coded residual
if (cur_cu->part_size != SIZE_2Nx2N || !cur_cu->merged) { if (!cur_cu->merged) {
cabac->cur_ctx = &(cabac->ctx.cu_qt_root_cbf_model); cabac->cur_ctx = &(cabac->ctx.cu_qt_root_cbf_model);
CABAC_BIN(cabac, cbf, "rqt_root_cbf"); CABAC_BIN(cabac, cbf, "rqt_root_cbf");
} }
@ -1747,15 +1741,18 @@ end:
double uvg_mock_encode_coding_unit( double uvg_mock_encode_coding_unit(
encoder_state_t* const state, encoder_state_t* const state,
cabac_data_t* cabac, cabac_data_t* cabac,
int x, const cu_loc_t* const cu_loc,
int y,
int depth,
lcu_t* lcu, lcu_t* lcu,
cu_info_t* cur_cu, cu_info_t* cur_cu,
enum uvg_tree_type tree_type) { enum uvg_tree_type tree_type) {
double bits = 0; double bits = 0;
const encoder_control_t* const ctrl = state->encoder_control; const encoder_control_t* const ctrl = state->encoder_control;
const int x = cu_loc->x;
const int y = cu_loc->y;
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T); int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T);
int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T); int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T);
@ -1846,7 +1843,7 @@ double uvg_mock_encode_coding_unit(
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
const uint8_t imv_mode = UVG_IMV_OFF; const uint8_t imv_mode = UVG_IMV_OFF;
const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, x, y, cu_width, cu_width, depth, lcu, &bits); const int non_zero_mvd = uvg_encode_inter_prediction_unit(state, cabac, cur_cu, depth, lcu, &bits, cu_loc);
if (ctrl->cfg.amvr && non_zero_mvd) { if (ctrl->cfg.amvr && non_zero_mvd) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.imv_flag[0]), imv_mode, bits, "imv_flag"); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.imv_flag[0]), imv_mode, bits, "imv_flag");
if (imv_mode > UVG_IMV_OFF) { if (imv_mode > UVG_IMV_OFF) {

View file

@ -78,20 +78,19 @@ void uvg_encode_mvd(encoder_state_t * const state,
double uvg_mock_encode_coding_unit( double uvg_mock_encode_coding_unit(
encoder_state_t* const state, encoder_state_t* const state,
cabac_data_t* cabac, cabac_data_t* cabac,
int x, const cu_loc_t* const cu_loc,
int y,
int depth,
lcu_t* lcu, lcu_t* lcu,
cu_info_t* cur_cu, cu_info_t* cur_cu,
enum uvg_tree_type tree_type); enum uvg_tree_type tree_type);
int uvg_encode_inter_prediction_unit(encoder_state_t* const state, int uvg_encode_inter_prediction_unit(
cabac_data_t* const cabac, encoder_state_t* const state,
const cu_info_t* const cur_cu, cabac_data_t* const cabac,
int x, int y, int width, int height, const cu_info_t* const cur_cu,
int depth, int depth,
lcu_t* lcu, lcu_t* lcu,
double* bits_out); double* bits_out,
const cu_loc_t* const cu_loc);
void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state, void uvg_encode_intra_luma_coding_unit(const encoder_state_t* const state,
cabac_data_t* const cabac, cabac_data_t* const cabac,

View file

@ -855,13 +855,11 @@ static void filter_deblock_edge_luma(encoder_state_t * const state,
uint8_t max_filter_length_P = 0; uint8_t max_filter_length_P = 0;
uint8_t max_filter_length_Q = 0; uint8_t max_filter_length_Q = 0;
const int cu_size = LCU_WIDTH >> cu_q->depth; const int cu_size = LCU_WIDTH >> cu_q->depth;
const int pu_part_idx = (y + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ? // TODO: NON square
1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0) const int pu_size = dir == EDGE_HOR ? cu_size
+ (x + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0); : cu_size;
const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx) const int pu_pos = dir == EDGE_HOR ? y_coord
: PU_GET_W(cu_q->part_size, cu_size, pu_part_idx); : x_coord;
const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
: x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord, get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
dir, tu_boundary, dir, tu_boundary,
LCU_WIDTH >> cu_p->tr_depth, LCU_WIDTH >> cu_p->tr_depth,
@ -1088,13 +1086,10 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
} }
const int cu_size = LCU_WIDTH >> (cu_q->depth + (tree_type == UVG_CHROMA_T)); const int cu_size = LCU_WIDTH >> (cu_q->depth + (tree_type == UVG_CHROMA_T));
const int pu_part_idx = ((y << (tree_type != UVG_CHROMA_T)) + PU_GET_H(cu_q->part_size, cu_size, 0) <= y_coord ? // TODO: non-square
1 + (uvg_part_mode_num_parts[cu_q->part_size] >> 2) : 0) const int pu_size = dir == EDGE_HOR ? cu_size : cu_size;
+ ((x << (tree_type != UVG_CHROMA_T)) + PU_GET_W(cu_q->part_size, cu_size, 0) <= x_coord ? 1 : 0); const int pu_pos = dir == EDGE_HOR ? y_coord
const int pu_size = dir == EDGE_HOR ? PU_GET_H(cu_q->part_size, cu_size, pu_part_idx) : x_coord;
: PU_GET_W(cu_q->part_size, cu_size, pu_part_idx);
const int pu_pos = dir == EDGE_HOR ? y_coord - PU_GET_Y(cu_q->part_size, cu_size, 0, pu_part_idx)
: x_coord - PU_GET_X(cu_q->part_size, cu_size, 0, pu_part_idx);
uint8_t max_filter_length_P = 0; uint8_t max_filter_length_P = 0;
uint8_t max_filter_length_Q = 0; uint8_t max_filter_length_Q = 0;

View file

@ -375,23 +375,26 @@ static void inter_cp_with_ext_border(const uvg_pixel *ref_buf, int ref_stride,
* \param predict_luma Enable or disable luma prediction for this call. * \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
*/ */
static unsigned inter_recon_unipred(const encoder_state_t * const state, static unsigned inter_recon_unipred(
const uvg_picture * const ref, const encoder_state_t * const state,
int32_t pu_x, const uvg_picture * const ref,
int32_t pu_y, int32_t out_stride_luma,
int32_t pu_w, const mv_t mv_param[2],
int32_t pu_h, yuv_t *yuv_px,
int32_t out_stride_luma, yuv_im_t *yuv_im,
const mv_t mv_param[2], bool predict_luma,
yuv_t *yuv_px, bool predict_chroma,
yuv_im_t *yuv_im, const cu_loc_t* const cu_loc)
bool predict_luma,
bool predict_chroma)
{ {
vector2d_t int_mv = { mv_param[0], mv_param[1] }; vector2d_t int_mv = { mv_param[0], mv_param[1] };
uvg_change_precision_vector2d(INTERNAL_MV_PREC, 0, &int_mv); uvg_change_precision_vector2d(INTERNAL_MV_PREC, 0, &int_mv);
const int pu_x = cu_loc->x;
const int pu_y = cu_loc->y;
const int pu_w = cu_loc->width;
const int pu_h = cu_loc->height;
const vector2d_t int_mv_in_frame = { const vector2d_t int_mv_in_frame = {
int_mv.x + pu_x + state->tile->offset_x, int_mv.x + pu_x + state->tile->offset_x,
int_mv.y + pu_y + state->tile->offset_y int_mv.y + pu_y + state->tile->offset_y
@ -507,17 +510,15 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
* \param predict_luma Enable or disable luma prediction for this call. * \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
*/ */
void uvg_inter_recon_bipred(const encoder_state_t *const state, void uvg_inter_recon_bipred(
const encoder_state_t *const state,
const uvg_picture *ref1, const uvg_picture *ref1,
const uvg_picture *ref2, const uvg_picture *ref2,
int32_t pu_x,
int32_t pu_y,
int32_t pu_w,
int32_t pu_h,
mv_t mv_param[2][2], mv_t mv_param[2][2],
lcu_t *lcu, lcu_t *lcu,
bool predict_luma, bool predict_luma,
bool predict_chroma) bool predict_chroma,
const cu_loc_t* const cu_loc)
{ {
// Allocate maximum size arrays for interpolated and copied samples // Allocate maximum size arrays for interpolated and copied samples
ALIGNED(64) uvg_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) uvg_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
@ -525,6 +526,11 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
ALIGNED(64) uvg_pixel_im im_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) uvg_pixel_im im_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) uvg_pixel_im im_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) uvg_pixel_im im_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
const int pu_x = cu_loc->x;
const int pu_y = cu_loc->y;
const int pu_w = cu_loc->width;
const int pu_h = cu_loc->height;
yuv_t px_L0; yuv_t px_L0;
px_L0.size = pu_w * pu_h; px_L0.size = pu_w * pu_h;
px_L0.y = &px_buf_L0[0]; px_L0.y = &px_buf_L0[0];
@ -551,10 +557,10 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
// Sample blocks from both reference picture lists. // Sample blocks from both reference picture lists.
// Flags state if the outputs were written to high-precision / interpolated sample buffers. // Flags state if the outputs were written to high-precision / interpolated sample buffers.
unsigned im_flags_L0 = inter_recon_unipred(state, ref1, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[0], unsigned im_flags_L0 = inter_recon_unipred(state, ref1, pu_w, mv_param[0], &px_L0, &im_L0, predict_luma, predict_chroma,
&px_L0, &im_L0, predict_luma, predict_chroma); cu_loc);
unsigned im_flags_L1 = inter_recon_unipred(state, ref2, pu_x, pu_y, pu_w, pu_h, pu_w, mv_param[1], unsigned im_flags_L1 = inter_recon_unipred(state, ref2, pu_w, mv_param[1], &px_L1, &im_L1, predict_luma, predict_chroma,
&px_L1, &im_L1, predict_luma, predict_chroma); cu_loc);
// After reconstruction, merge the predictors by taking an average of each pixel // After reconstruction, merge the predictors by taking an average of each pixel
uvg_bipred_average(lcu, &px_L0, &px_L1, &im_L0, &im_L1, uvg_bipred_average(lcu, &px_L0, &px_L1, &im_L0, &im_L1,
@ -578,19 +584,14 @@ void uvg_inter_recon_bipred(const encoder_state_t *const state,
* \param predict_luma Enable or disable luma prediction for this call. * \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
*/ */
void uvg_inter_recon_cu(const encoder_state_t * const state, void uvg_inter_recon_cu(
lcu_t *lcu, const encoder_state_t * const state,
int32_t x, lcu_t *lcu,
int32_t y, bool predict_luma,
int32_t width, bool predict_chroma,
bool predict_luma, const cu_loc_t* const cu_loc)
bool predict_chroma)
{ {
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y)); uvg_inter_pred_pu(state, lcu, predict_luma, predict_chroma, cu_loc);
const int num_pu = uvg_part_mode_num_parts[cu->part_size];
for (int i = 0; i < num_pu; ++i) {
uvg_inter_pred_pu(state, lcu, x, y, width, predict_luma, predict_chroma, i);
}
} }
static void ibc_recon_cu(const encoder_state_t * const state, static void ibc_recon_cu(const encoder_state_t * const state,
@ -599,8 +600,7 @@ static void ibc_recon_cu(const encoder_state_t * const state,
int32_t y, int32_t y,
int32_t width, int32_t width,
bool predict_luma, bool predict_luma,
bool predict_chroma, bool predict_chroma)
int i_pu)
{ {
const int x_scu = SUB_SCU(x); const int x_scu = SUB_SCU(x);
const int y_scu = SUB_SCU(y); const int y_scu = SUB_SCU(y);
@ -668,79 +668,63 @@ static void ibc_recon_cu(const encoder_state_t * const state,
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
* \param i_pu Index of the PU. Always zero for 2Nx2N. Used for SMP+AMP. * \param i_pu Index of the PU. Always zero for 2Nx2N. Used for SMP+AMP.
*/ */
void uvg_inter_pred_pu(const encoder_state_t * const state, void uvg_inter_pred_pu(
lcu_t *lcu, const encoder_state_t * const state,
int32_t x, lcu_t *lcu,
int32_t y, bool predict_luma,
int32_t width, bool predict_chroma,
bool predict_luma, const cu_loc_t* const cu_loc)
bool predict_chroma,
int i_pu)
{ {
const int x_scu = SUB_SCU(x); const int x_scu = SUB_SCU(cu_loc->x);
const int y_scu = SUB_SCU(y); const int y_scu = SUB_SCU(cu_loc->y);
cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu); cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_scu, y_scu);
const int pu_x = PU_GET_X(cu->part_size, width, x, i_pu);
const int pu_y = PU_GET_Y(cu->part_size, width, y, i_pu);
const int pu_w = PU_GET_W(cu->part_size, width, i_pu);
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
if (cu->type == CU_IBC) { if (pu->inter.mv_dir == 3) {
ibc_recon_cu(state, lcu, x, y, width, predict_luma, predict_chroma, i_pu); const uvg_picture *const refs[2] = {
} else { state->frame->ref->images[
state->frame->ref_LX[0][
pu->inter.mv_ref[0]]],
state->frame->ref->images[
state->frame->ref_LX[1][
pu->inter.mv_ref[1]]],
};
uvg_inter_recon_bipred(state,
refs[0], refs[1],
pu->inter.mv, lcu,
predict_luma, predict_chroma,
cu_loc);
}
else if (pu->type == CU_IBC) {
ibc_recon_cu(state, lcu, cu_loc->x, cu_loc->y, cu_loc->width, predict_luma, predict_chroma);
} else{
const int mv_idx = pu->inter.mv_dir - 1;
const uvg_picture *const ref =
state->frame->ref->images[
state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]];
if (pu->inter.mv_dir == 3) { const unsigned offset_luma = SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x);
const uvg_picture * const refs[2] = { const unsigned offset_chroma = SUB_SCU(cu_loc->y) / 2 * LCU_WIDTH_C + SUB_SCU(cu_loc->x) / 2;
state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]], yuv_t lcu_adapter;
state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]], lcu_adapter.size = cu_loc->width * cu_loc->height;
}; lcu_adapter.y = lcu->rec.y + offset_luma,
uvg_inter_recon_bipred( lcu_adapter.u = lcu->rec.u + offset_chroma,
state, lcu_adapter.v = lcu->rec.v + offset_chroma,
refs[0],
refs[1],
pu_x,
pu_y,
pu_w,
pu_h,
pu->inter.mv,
lcu,
predict_luma,
predict_chroma);
} else {
const int mv_idx = pu->inter.mv_dir - 1;
const uvg_picture * const ref =
state->frame->ref->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]];
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x); inter_recon_unipred(state,
const unsigned offset_chroma = ref,
SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2; LCU_WIDTH, pu->inter.mv[mv_idx],
yuv_t lcu_adapter; &lcu_adapter,
lcu_adapter.size = pu_w * pu_h; NULL,
lcu_adapter.y = lcu->rec.y + offset_luma, predict_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma, predict_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma, cu_loc);
inter_recon_unipred(
state,
ref,
pu_x,
pu_y,
pu_w,
pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx],
&lcu_adapter,
NULL,
predict_luma,
predict_chroma);
}
} }
if (predict_chroma && state->encoder_control->cfg.jccr) { if (predict_chroma && state->encoder_control->cfg.jccr) {
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); uvg_pixels_blit(lcu->rec.v + offset, lcu->rec.joint_v + offset, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C);
} }
} }
@ -915,14 +899,12 @@ static bool is_b0_cand_coded(int x, int y, int width, int height)
* \param ref_idx index in the reference list * \param ref_idx index in the reference list
* \param cand_out will be filled with C0 and C1 candidates * \param cand_out will be filled with C0 and C1 candidates
*/ */
static void get_temporal_merge_candidates(const encoder_state_t * const state, static void get_temporal_merge_candidates(
int32_t x, const encoder_state_t * const state,
int32_t y, const cu_loc_t* const cu_loc,
int32_t width, uint8_t ref_list,
int32_t height, uint8_t ref_idx,
uint8_t ref_list, merge_candidates_t *cand_out)
uint8_t ref_idx,
merge_candidates_t *cand_out)
{ {
/* /*
Predictor block locations Predictor block locations
@ -951,8 +933,8 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[colocated_ref]; cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[colocated_ref];
int cu_per_width = ref_cu_array->width / SCU_WIDTH; int cu_per_width = ref_cu_array->width / SCU_WIDTH;
int32_t xColBr = x + width; int32_t xColBr = cu_loc->x + cu_loc->width;
int32_t yColBr = y + height; int32_t yColBr = cu_loc->y + cu_loc->height;
// C0 must be available // C0 must be available
if (xColBr < state->encoder_control->in.width && if (xColBr < state->encoder_control->in.width &&
@ -972,8 +954,8 @@ static void get_temporal_merge_candidates(const encoder_state_t * const state,
} }
} }
} }
int32_t xColCtr = x + (width / 2); int32_t xColCtr = cu_loc->x + (cu_loc->width / 2);
int32_t yColCtr = y + (height / 2); int32_t yColCtr = cu_loc->y + (cu_loc->height / 2);
// C1 must be inside the LCU, in the center position of current CU // C1 must be inside the LCU, in the center position of current CU
if (xColCtr < state->encoder_control->in.width && yColCtr < state->encoder_control->in.height) { if (xColCtr < state->encoder_control->in.width && yColCtr < state->encoder_control->in.height) {
@ -1254,10 +1236,7 @@ static void get_ibc_merge_candidates(const encoder_state_t * const state,
* \param lcu current LCU * \param lcu current LCU
* \param cand_out will be filled with A and B candidates * \param cand_out will be filled with A and B candidates
*/ */
static void get_spatial_merge_candidates(int32_t x, static void get_spatial_merge_candidates(const cu_loc_t* const cu_loc,
int32_t y,
int32_t width,
int32_t height,
int32_t picture_width, int32_t picture_width,
int32_t picture_height, int32_t picture_height,
lcu_t *lcu, lcu_t *lcu,
@ -1276,8 +1255,13 @@ static void get_spatial_merge_candidates(int32_t x,
|A1|_________| |A1|_________|
|A0| |A0|
*/ */
int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU const int32_t x_local = SUB_SCU(cu_loc->x); //!< coordinates from top-left of this LCU
int32_t y_local = SUB_SCU(y); const int32_t y_local = SUB_SCU(cu_loc->y);
const int x = cu_loc->x;
const int y = cu_loc->y;
const int width = cu_loc->width;
const int height = cu_loc->height;
// A0 and A1 availability testing // A0 and A1 availability testing
if (x != 0) { if (x != 0) {
cu_info_t *a1 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1); cu_info_t *a1 = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local + height - 1);
@ -1350,15 +1334,13 @@ static void get_spatial_merge_candidates(int32_t x,
* \param picture_height tile height in pixels * \param picture_height tile height in pixels
* \param cand_out will be filled with A and B candidates * \param cand_out will be filled with A and B candidates
*/ */
static void get_spatial_merge_candidates_cua(const cu_array_t *cua, static void get_spatial_merge_candidates_cua(
int32_t x, const cu_array_t *cua,
int32_t y, int32_t picture_width,
int32_t width, int32_t picture_height,
int32_t height, merge_candidates_t *cand_out,
int32_t picture_width, bool wpp,
int32_t picture_height, const cu_loc_t* const cu_loc)
merge_candidates_t *cand_out,
bool wpp)
{ {
/* /*
Predictor block locations Predictor block locations
@ -1370,8 +1352,12 @@ static void get_spatial_merge_candidates_cua(const cu_array_t *cua,
|A1|_________| |A1|_________|
|A0| |A0|
*/ */
int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU const int x = cu_loc->x;
int32_t y_local = SUB_SCU(y); const int y = cu_loc->y;
const int width = cu_loc->width;
const int height = cu_loc->height;
const int32_t x_local = SUB_SCU(x); //!< coordinates from top-left of this LCU
const int32_t y_local = SUB_SCU(y);
// A0 and A1 availability testing // A0 and A1 availability testing
if (x != 0) { if (x != 0) {
const cu_info_t *a1 = uvg_cu_array_at_const(cua, x - 1, y + height - 1); const cu_info_t *a1 = uvg_cu_array_at_const(cua, x - 1, y + height - 1);
@ -1484,15 +1470,13 @@ static bool add_temporal_candidate(const encoder_state_t *state,
/** /**
* \brief Pick two mv candidates from the spatial and temporal candidates. * \brief Pick two mv candidates from the spatial and temporal candidates.
*/ */
static void get_mv_cand_from_candidates(const encoder_state_t * const state, static void get_mv_cand_from_candidates(
int32_t x, const encoder_state_t * const state,
int32_t y, const merge_candidates_t *merge_cand,
int32_t width, const cu_info_t * const cur_cu,
int32_t height, int8_t reflist,
const merge_candidates_t *merge_cand, mv_t mv_cand[2][2],
const cu_info_t * const cur_cu, int ctu_row)
int8_t reflist,
mv_t mv_cand[2][2])
{ {
const cu_info_t *const *a = merge_cand->a; const cu_info_t *const *a = merge_cand->a;
const cu_info_t *const *b = merge_cand->b; const cu_info_t *const *b = merge_cand->b;
@ -1552,7 +1536,6 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
if (candidates < AMVP_MAX_NUM_CANDS) if (candidates < AMVP_MAX_NUM_CANDS)
{ {
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS; const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
int32_t num_cand = state->tile->frame->hmvp_size[ctu_row]; int32_t num_cand = state->tile->frame->hmvp_size[ctu_row];
for (int i = 0; i < MIN(/*MAX_NUM_HMVP_AVMPCANDS*/4,num_cand); i++) { for (int i = 0; i < MIN(/*MAX_NUM_HMVP_AVMPCANDS*/4,num_cand); i++) {
@ -1595,32 +1578,30 @@ static void get_mv_cand_from_candidates(const encoder_state_t * const state,
* \param lcu current LCU * \param lcu current LCU
* \param reflist reflist index (either 0 or 1) * \param reflist reflist index (either 0 or 1)
*/ */
void uvg_inter_get_mv_cand(const encoder_state_t * const state, void uvg_inter_get_mv_cand(
int32_t x, const encoder_state_t * const state,
int32_t y, mv_t mv_cand[2][2],
int32_t width, const cu_info_t * const cur_cu,
int32_t height, lcu_t *lcu,
mv_t mv_cand[2][2], int8_t reflist,
const cu_info_t * const cur_cu, const cu_loc_t* const cu_loc)
lcu_t *lcu,
int8_t reflist)
{ {
merge_candidates_t merge_cand = { 0 }; merge_candidates_t merge_cand = { 0 };
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level; const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
if (cur_cu->type == CU_IBC) { if (cur_cu->type == CU_IBC) {
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2]; mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand); get_ibc_merge_candidates(state, cur_cu,lcu,NULL, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2); memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2); memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
} else { } else {
get_spatial_merge_candidates(x, y, width, height, get_spatial_merge_candidates(cu_loc, state->tile->frame->width, state->tile->frame->height, lcu,
state->tile->frame->width, &merge_cand,
state->tile->frame->height, parallel_merge_level,
lcu, state->encoder_control->cfg.wpp);
&merge_cand, parallel_merge_level,state->encoder_control->cfg.wpp); get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); get_mv_cand_from_candidates(state, &merge_cand, cur_cu, reflist, mv_cand, cu_loc->y >> LOG2_LCU_WIDTH);
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
} }
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]); uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]); uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
} }
@ -1637,31 +1618,29 @@ void uvg_inter_get_mv_cand(const encoder_state_t * const state,
* \param cur_cu current CU * \param cur_cu current CU
* \param reflist reflist index (either 0 or 1) * \param reflist reflist index (either 0 or 1)
*/ */
void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state, void uvg_inter_get_mv_cand_cua(
int32_t x, const encoder_state_t * const state,
int32_t y, mv_t mv_cand[2][2],
int32_t width, const cu_info_t* cur_cu,
int32_t height, int8_t reflist,
mv_t mv_cand[2][2], const cu_loc_t* const cu_loc)
const cu_info_t* cur_cu,
int8_t reflist)
{ {
merge_candidates_t merge_cand = { 0 }; merge_candidates_t merge_cand = { 0 };
const cu_array_t *cua = state->tile->frame->cu_array; const cu_array_t *cua = state->tile->frame->cu_array;
if (cur_cu->type == CU_IBC) { if (cur_cu->type == CU_IBC) {
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2]; mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
get_ibc_merge_candidates(state, cur_cu, NULL,cua,x, y, width, height,ibc_mv_cand); get_ibc_merge_candidates(state, cur_cu, NULL,cua,cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2); memcpy(mv_cand[0], ibc_mv_cand[0], sizeof(mv_t) * 2);
memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2); memcpy(mv_cand[1], ibc_mv_cand[1], sizeof(mv_t) * 2);
} else { } else {
get_spatial_merge_candidates_cua(cua, get_spatial_merge_candidates_cua(cua,
x, y, width, height, state->tile->frame->width, state->tile->frame->height, &merge_cand, state->encoder_control->cfg.wpp,
state->tile->frame->width, state->tile->frame->height, cu_loc);
&merge_cand, state->encoder_control->cfg.wpp); get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); get_mv_cand_from_candidates(state, &merge_cand, cur_cu, reflist, mv_cand, cu_loc->y >> LOG2_LCU_WIDTH);
get_mv_cand_from_candidates(state, x, y, width, height, &merge_cand, cur_cu, reflist, mv_cand);
} }
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]); uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[0][0], &mv_cand[0][1]);
uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]); uvg_round_precision(INTERNAL_MV_PREC, 2, &mv_cand[1][0], &mv_cand[1][1]);
} }
@ -1885,23 +1864,23 @@ void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv) {
* \param lcu lcu containing the block * \param lcu lcu containing the block
* \return number of merge candidates * \return number of merge candidates
*/ */
uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state, uint8_t uvg_inter_get_merge_cand(
int32_t x, int32_t y, const encoder_state_t * const state,
int32_t width, int32_t height, const cu_loc_t* const cu_loc,
bool use_a1, bool use_b1, inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS], lcu_t *lcu)
lcu_t *lcu)
{ {
uint8_t candidates = 0; uint8_t candidates = 0;
int8_t zero_idx = 0; int8_t zero_idx = 0;
const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level; const uint8_t parallel_merge_level = state->encoder_control->cfg.log2_parallel_merge_level;
merge_candidates_t merge_cand = { 0 }; merge_candidates_t merge_cand = { 0 };
const uint8_t max_num_cands = state->encoder_control->cfg.max_merge; const uint8_t max_num_cands = state->encoder_control->cfg.max_merge;
// Current CU
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(cu_loc->x), SUB_SCU(cu_loc->y));
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
if(cur_cu->type == CU_IBC) { if(cur_cu->type == CU_IBC) {
mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2]; mv_t ibc_mv_cand[IBC_MRG_MAX_NUM_CANDS][2];
get_ibc_merge_candidates(state, cur_cu,lcu,NULL, x, y, width, height,ibc_mv_cand); get_ibc_merge_candidates(state, cur_cu,lcu,NULL, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height,ibc_mv_cand);
for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) { for (int i = 0; i < IBC_MRG_MAX_NUM_CANDS; i++) {
mv_cand[i].dir = 1; mv_cand[i].dir = 1;
mv_cand[i].mv[0][0] = ibc_mv_cand[i][0]; mv_cand[i].mv[0][0] = ibc_mv_cand[i][0];
@ -1909,18 +1888,16 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
} }
return IBC_MRG_MAX_NUM_CANDS; return IBC_MRG_MAX_NUM_CANDS;
} }
get_spatial_merge_candidates(cu_loc, state->tile->frame->width, state->tile->frame->height, lcu,
get_spatial_merge_candidates(x, y, width, height, &merge_cand,
state->tile->frame->width, parallel_merge_level,
state->tile->frame->height, state->encoder_control->cfg.wpp);
lcu,
&merge_cand, parallel_merge_level, state->encoder_control->cfg.wpp);
const cu_info_t **a = merge_cand.a; const cu_info_t **a = merge_cand.a;
const cu_info_t **b = merge_cand.b; const cu_info_t **b = merge_cand.b;
if (!use_a1) a[1] = NULL; const int x = cu_loc->x;
if (!use_b1) b[1] = NULL; const int y = cu_loc->y;
if (different_mer(x, y, x, y - 1, parallel_merge_level) && add_merge_candidate(b[1], NULL, NULL, &mv_cand[candidates])) candidates++; if (different_mer(x, y, x, y - 1, parallel_merge_level) && add_merge_candidate(b[1], NULL, NULL, &mv_cand[candidates])) candidates++;
if (different_mer(x, y, x - 1, y, parallel_merge_level) && add_merge_candidate(a[1], b[1], NULL, &mv_cand[candidates])) candidates++; if (different_mer(x, y, x - 1, y, parallel_merge_level) && add_merge_candidate(a[1], b[1], NULL, &mv_cand[candidates])) candidates++;
@ -1941,7 +1918,7 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
for (int reflist = 0; reflist <= max_reflist; reflist++) { for (int reflist = 0; reflist <= max_reflist; reflist++) {
// Fetch temporal candidates for the current CU // Fetch temporal candidates for the current CU
// ToDo: change collocated_from_l0_flag to allow L1 ref // ToDo: change collocated_from_l0_flag to allow L1 ref
get_temporal_merge_candidates(state, x, y, width, height, 1, 0, &merge_cand); get_temporal_merge_candidates(state, cu_loc, 1, 0, &merge_cand);
// TODO: enable L1 TMVP candidate // TODO: enable L1 TMVP candidate
// get_temporal_merge_candidates(state, x, y, width, height, 2, 0, &merge_cand); // get_temporal_merge_candidates(state, x, y, width, height, 2, 0, &merge_cand);
@ -1973,7 +1950,7 @@ uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state,
if (candidates == max_num_cands) return candidates; if (candidates == max_num_cands) return candidates;
if (candidates != max_num_cands - 1) { if (candidates != max_num_cands - 1) {
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH); const uint32_t ctu_row = (cu_loc->y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS; const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
int32_t num_cand = state->tile->frame->hmvp_size[ctu_row]; int32_t num_cand = state->tile->frame->hmvp_size[ctu_row];

View file

@ -58,61 +58,51 @@ void uvg_change_precision_vector2d(int src, int dst, vector2d_t* mv);
void uvg_round_precision(int src, int dst, mv_t* hor, mv_t* ver); void uvg_round_precision(int src, int dst, mv_t* hor, mv_t* ver);
void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv); void uvg_round_precision_vector2d(int src, int dst, vector2d_t* mv);
void uvg_inter_recon_cu(const encoder_state_t * const state, void uvg_inter_recon_cu(
lcu_t *lcu, const encoder_state_t * const state,
int32_t x,
int32_t y,
int32_t width,
bool predict_luma,
bool predict_chroma);
void uvg_inter_pred_pu(const encoder_state_t * const state,
lcu_t *lcu, lcu_t *lcu,
int32_t x,
int32_t y,
int32_t width,
bool predict_luma, bool predict_luma,
bool predict_chroma, bool predict_chroma,
int i_pu); const cu_loc_t* const cu_loc);
void uvg_inter_pred_pu(
const encoder_state_t * const state,
lcu_t *lcu,
bool predict_luma,
bool predict_chroma,
const cu_loc_t* const cu_loc);
void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu); void uvg_hmvp_add_mv(const encoder_state_t* const state, uint32_t pic_x, uint32_t pic_y, uint32_t block_width, uint32_t block_height, const cu_info_t* cu);
void uvg_inter_recon_bipred(const encoder_state_t * const state, void uvg_inter_recon_bipred(
const uvg_picture * ref1, const encoder_state_t * const state,
const uvg_picture * ref2, const uvg_picture * ref1,
int32_t xpos, const uvg_picture * ref2,
int32_t ypos, mv_t mv_param[2][2],
int32_t width, lcu_t* lcu,
int32_t height, bool predict_luma,
mv_t mv_param[2][2], bool predict_chroma,
lcu_t* lcu, const cu_loc_t* const cu_loc);
bool predict_luma,
bool predict_chroma);
void uvg_inter_get_mv_cand(const encoder_state_t * const state, void uvg_inter_get_mv_cand(
int32_t x, const encoder_state_t * const state,
int32_t y, mv_t mv_cand[2][2],
int32_t width, const cu_info_t* cur_cu,
int32_t height, lcu_t *lcu,
mv_t mv_cand[2][2], int8_t reflist,
const cu_info_t* cur_cu, const cu_loc_t* const cu_loc);
lcu_t *lcu,
int8_t reflist);
void uvg_inter_get_mv_cand_cua(const encoder_state_t * const state, void uvg_inter_get_mv_cand_cua(
int32_t x, const encoder_state_t * const state,
int32_t y, mv_t mv_cand[2][2],
int32_t width, const cu_info_t* cur_cu,
int32_t height, int8_t reflist,
mv_t mv_cand[2][2], const cu_loc_t* const cu_loc);
const cu_info_t* cur_cu,
int8_t reflist);
uint8_t uvg_inter_get_merge_cand(const encoder_state_t * const state, uint8_t uvg_inter_get_merge_cand(
int32_t x, int32_t y, const encoder_state_t * const state,
int32_t width, int32_t height, const cu_loc_t* const cu_loc,
bool use_a1, bool use_b1, inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS],
inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS], lcu_t *lcu);
lcu_t *lcu);
#endif #endif

View file

@ -166,7 +166,6 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
cu_info_t *to = LCU_GET_CU_AT_PX(lcu, x, y); cu_info_t *to = LCU_GET_CU_AT_PX(lcu, x, y);
to->type = cu->type; to->type = cu->type;
to->depth = cu->depth; to->depth = cu->depth;
to->part_size = cu->part_size;
to->qp = cu->qp; to->qp = cu->qp;
//to->tr_idx = cu->tr_idx; //to->tr_idx = cu->tr_idx;
to->lfnst_idx = cu->lfnst_idx; to->lfnst_idx = cu->lfnst_idx;
@ -191,22 +190,6 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
} }
} }
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type)
{
const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
const int num_pu = uvg_part_mode_num_parts[part_mode];
for (int i = 0; i < num_pu; ++i) {
const int x_pu = PU_GET_X(part_mode, cu_width, x_local, i);
const int y_pu = PU_GET_Y(part_mode, cu_width, y_local, i);
const int width_pu = PU_GET_W(part_mode, cu_width, i);
const int height_pu = PU_GET_H(part_mode, cu_width, i);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
pu->type = type;
lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
}
}
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu) static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, const cu_info_t *cur_cu)
{ {
@ -559,7 +542,7 @@ static double cu_rd_cost_tr_split_accurate(
int cbf = cbf_is_set_any(pred_cu->cbf, depth); int cbf = cbf_is_set_any(pred_cu->cbf, depth);
// Only need to signal coded block flag if not skipped or merged // Only need to signal coded block flag if not skipped or merged
// skip = no coded residual, merge = coded residual // skip = no coded residual, merge = coded residual
if (pred_cu->type != CU_INTRA && (pred_cu->part_size != SIZE_2Nx2N || !pred_cu->merged)) { if (pred_cu->type != CU_INTRA && (!pred_cu->merged)) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf"); CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.cu_qt_root_cbf_model), cbf, tr_tree_bits, "rqt_root_cbf");
} }
@ -876,18 +859,20 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map)
*/ */
static double search_cu( static double search_cu(
encoder_state_t* const state, encoder_state_t* const state,
int x, const cu_loc_t* const cu_loc,
int y,
int depth,
lcu_t* work_tree, lcu_t* work_tree,
enum uvg_tree_type enum uvg_tree_type
tree_type) tree_type,
const split_tree_t split_tree)
{ {
const int depth = split_tree.current_depth;
const encoder_control_t* ctrl = state->encoder_control; const encoder_control_t* ctrl = state->encoder_control;
const videoframe_t * const frame = state->tile->frame; const videoframe_t * const frame = state->tile->frame;
const int cu_width = tree_type != UVG_CHROMA_T ? LCU_WIDTH >> depth : LCU_WIDTH_C >> depth; const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width;
const int cu_height = cu_width; // TODO: height const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height;
const int luma_width = LCU_WIDTH >> depth; const int x = cu_loc->x;
const int y = cu_loc->y;
const int luma_width = cu_loc->width;
assert(cu_width >= 4); assert(cu_width >= 4);
double cost = MAX_DOUBLE; double cost = MAX_DOUBLE;
double inter_zero_coeff_cost = MAX_DOUBLE; double inter_zero_coeff_cost = MAX_DOUBLE;
@ -896,7 +881,7 @@ static double search_cu(
cabac_data_t pre_search_cabac; cabac_data_t pre_search_cabac;
memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac)); memcpy(&pre_search_cabac, &state->search_cabac, sizeof(pre_search_cabac));
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH); const uint32_t ctu_row = (cu_loc->y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS; const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS]; cu_info_t hmvp_lut[MAX_NUM_HMVP_CANDS];
@ -913,7 +898,7 @@ static double search_cu(
int32_t max; int32_t max;
} pu_depth_inter, pu_depth_intra; } pu_depth_inter, pu_depth_intra;
lcu_t *const lcu = &work_tree[depth]; lcu_t *const lcu = &work_tree[split_tree.current_depth];
int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T); int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T);
int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T); int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T);
@ -947,10 +932,9 @@ static double search_cu(
cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); cur_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
// Assign correct depth // Assign correct depth
cur_cu->depth = (depth > MAX_DEPTH) ? MAX_DEPTH : depth; cur_cu->depth = (split_tree.current_depth > MAX_DEPTH) ? MAX_DEPTH : split_tree.current_depth;
cur_cu->tr_depth = (depth > 0) ? depth : 1; cur_cu->tr_depth = cu_width > TR_MAX_WIDTH || cu_height > TR_MAX_WIDTH ? 1 : split_tree.current_depth;
cur_cu->type = CU_NOTSET; cur_cu->type = CU_NOTSET;
cur_cu->part_size = SIZE_2Nx2N;
cur_cu->qp = state->qp; cur_cu->qp = state->qp;
cur_cu->bdpcmMode = 0; cur_cu->bdpcmMode = 0;
cur_cu->tr_idx = 0; cur_cu->tr_idx = 0;
@ -969,9 +953,9 @@ static double search_cu(
int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max; int cu_width_inter_min = LCU_WIDTH >> pu_depth_inter.max;
bool can_use_inter = bool can_use_inter =
state->frame->slicetype != UVG_SLICE_I && state->frame->slicetype != UVG_SLICE_I &&
depth <= MAX_DEPTH && split_tree.current_depth <= MAX_DEPTH &&
( (
WITHIN(depth, pu_depth_inter.min, pu_depth_inter.max) || WITHIN(split_tree.current_depth, pu_depth_inter.min, pu_depth_inter.max) ||
// When the split was forced because the CTU is partially outside the // When the split was forced because the CTU is partially outside the
// frame, we permit inter coding even if pu_depth_inter would // frame, we permit inter coding even if pu_depth_inter would
// otherwise forbid it. // otherwise forbid it.
@ -983,10 +967,9 @@ static double search_cu(
double mode_cost; double mode_cost;
double mode_bitcost; double mode_bitcost;
uvg_search_cu_inter(state, uvg_search_cu_inter(state,
x, y, cu_loc, lcu,
depth, &mode_cost,
lcu, &mode_bitcost);
&mode_cost, &mode_bitcost);
if (mode_cost < cost) { if (mode_cost < cost) {
cost = mode_cost; cost = mode_cost;
inter_bitcost = mode_bitcost; inter_bitcost = mode_bitcost;
@ -1004,7 +987,7 @@ static double search_cu(
int32_t cu_width_intra_min = LCU_WIDTH >> pu_depth_intra.max; int32_t cu_width_intra_min = LCU_WIDTH >> pu_depth_intra.max;
bool can_use_intra = bool can_use_intra =
(WITHIN(depth, pu_depth_intra.min, pu_depth_intra.max) || (WITHIN(split_tree.current_depth, pu_depth_intra.min, pu_depth_intra.max) ||
// When the split was forced because the CTU is partially outside // When the split was forced because the CTU is partially outside
// the frame, we permit intra coding even if pu_depth_intra would // the frame, we permit intra coding even if pu_depth_intra would
// otherwise forbid it. // otherwise forbid it.
@ -1048,7 +1031,7 @@ static double search_cu(
int8_t intra_mode = intra_search.pred_cu.intra.mode; int8_t intra_mode = intra_search.pred_cu.intra.mode;
// TODO: This heavily relies to square CUs // TODO: This heavily relies to square CUs
if ((depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { if ((split_tree.current_depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
intra_search.pred_cu.joint_cb_cr = 0; intra_search.pred_cu.joint_cb_cr = 0;
// There is almost no benefit to doing the chroma mode search for // There is almost no benefit to doing the chroma mode search for
@ -1097,7 +1080,7 @@ static double search_cu(
} }
intra_search.pred_cu.intra.mode = intra_mode; intra_search.pred_cu.intra.mode = intra_mode;
if(tree_type == UVG_CHROMA_T) { if(tree_type == UVG_CHROMA_T) {
uvg_lcu_fill_trdepth(lcu, x_local, y_local, depth, depth, tree_type); uvg_lcu_fill_trdepth(lcu, x_local, y_local, split_tree.current_depth, split_tree.current_depth, tree_type);
} }
} }
if (intra_cost < cost) { if (intra_cost < cost) {
@ -1120,8 +1103,7 @@ static double search_cu(
double mode_cost; double mode_cost;
double mode_bitcost; double mode_bitcost;
uvg_search_cu_ibc(state, uvg_search_cu_ibc(state,
x, y, cu_loc,
depth,
lcu, lcu,
&mode_cost, &mode_bitcost); &mode_cost, &mode_bitcost);
if (mode_cost < cost) { if (mode_cost < cost) {
@ -1138,11 +1120,10 @@ static double search_cu(
// Reconstruct best mode because we need the reconstructed pixels for // Reconstruct best mode because we need the reconstructed pixels for
// mode search of adjacent CUs. // mode search of adjacent CUs.
if (cur_cu->type == CU_INTRA) { if (cur_cu->type == CU_INTRA) {
assert(cur_cu->part_size == SIZE_2Nx2N || cur_cu->part_size == SIZE_NxN);
bool recon_chroma = true; bool recon_chroma = true;
bool recon_luma = tree_type != UVG_CHROMA_T; bool recon_luma = tree_type != UVG_CHROMA_T;
if ((depth == 4) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { if ((split_tree.current_depth == 4) || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) {
recon_chroma = false; recon_chroma = false;
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@ -1153,7 +1134,7 @@ static double search_cu(
lcu, tree_type,recon_luma,recon_chroma); lcu, tree_type,recon_luma,recon_chroma);
if(depth == 4 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { if(split_tree.current_depth == 4 && x % 8 && y % 8 && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) {
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
uvg_intra_recon_cu(state, uvg_intra_recon_cu(state,
x, y, x, y,
@ -1168,8 +1149,8 @@ static double search_cu(
const int split_type = intra_search.pred_cu.intra.isp_mode; const int split_type = intra_search.pred_cu.intra.isp_mode;
const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true); const int split_num = split_type == ISP_MODE_NO_ISP ? 0 : uvg_get_isp_split_num(cu_width, cu_height, split_type, true);
const int cbf_cb = cbf_is_set(cur_cu->cbf, depth, COLOR_U); const int cbf_cb = cbf_is_set(cur_cu->cbf, split_tree.current_depth, COLOR_U);
const int cbf_cr = cbf_is_set(cur_cu->cbf, depth, COLOR_V); const int cbf_cr = cbf_is_set(cur_cu->cbf, split_tree.current_depth, COLOR_V);
const int jccr = cur_cu->joint_cb_cr; const int jccr = cur_cu->joint_cb_cr;
for (int i = 0; i < split_num; ++i) { for (int i = 0; i < split_num; ++i) {
cu_loc_t isp_loc; cu_loc_t isp_loc;
@ -1181,15 +1162,14 @@ static double search_cu(
uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y); uvg_get_isp_cu_arr_coords(&tmp_x, &tmp_y);
cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH); cu_info_t* split_cu = LCU_GET_CU_AT_PX(lcu, tmp_x % LCU_WIDTH, tmp_y % LCU_WIDTH);
bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1; bool cur_cbf = (intra_search.best_isp_cbfs >> i) & 1;
// ISP_TODO: here, cbfs are also set for chroma for all ISP splits, is this behavior wanted? cbf_clear(&split_cu->cbf, split_tree.current_depth, COLOR_Y);
cbf_clear(&split_cu->cbf, depth, COLOR_Y); cbf_clear(&split_cu->cbf, split_tree.current_depth, COLOR_U);
cbf_clear(&split_cu->cbf, depth, COLOR_U); cbf_clear(&split_cu->cbf, split_tree.current_depth, COLOR_V);
cbf_clear(&split_cu->cbf, depth, COLOR_V);
if (cur_cbf) { if (cur_cbf) {
cbf_set(&split_cu->cbf, depth, COLOR_Y); cbf_set(&split_cu->cbf, split_tree.current_depth, COLOR_Y);
} }
if(cbf_cb) cbf_set(&split_cu->cbf, depth, COLOR_U); if(cbf_cb) cbf_set(&split_cu->cbf, split_tree.current_depth, COLOR_U);
if(cbf_cr) cbf_set(&split_cu->cbf, depth, COLOR_V); if(cbf_cr) cbf_set(&split_cu->cbf, split_tree.current_depth, COLOR_V);
split_cu->joint_cb_cr = jccr; split_cu->joint_cb_cr = jccr;
} }
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
@ -1205,24 +1185,20 @@ static double search_cu(
} }
// Reset transform depth because intra messes with them. // Reset transform depth because intra messes with them.
// This will no longer be necessary if the transform depths are not shared. // This will no longer be necessary if the transform depths are not shared.
int tr_depth = MAX(1, depth); int tr_depth = MAX(1, split_tree.current_depth);
if (cur_cu->part_size != SIZE_2Nx2N) {
tr_depth = depth + 1;
}
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type); uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type);
const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400; const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
uvg_inter_recon_cu(state, lcu, x, y, cu_width, true, has_chroma); uvg_inter_recon_cu(state, lcu, true, has_chroma, cu_loc);
if (ctrl->cfg.zero_coeff_rdo && !ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) { if (ctrl->cfg.zero_coeff_rdo && !ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) {
//Calculate cost for zero coeffs //Calculate cost for zero coeffs
inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda; inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, split_tree.current_depth) + inter_bitcost * state->lambda;
} }
cu_loc_t loc; cu_loc_t loc;
const int width = LCU_WIDTH >> depth; uvg_cu_loc_ctor(&loc, x, y, cu_width, cu_height);
const int height = width; // TODO: height for non-square blocks
uvg_cu_loc_ctor(&loc, x, y, width, height);
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, has_chroma && !cur_cu->joint_cb_cr, true, has_chroma && !cur_cu->joint_cb_cr,
cur_cu->joint_cb_cr, &loc, cur_cu->joint_cb_cr, &loc,
@ -1232,9 +1208,9 @@ static double search_cu(
false, false,
tree_type); tree_type);
int cbf = cbf_is_set_any(cur_cu->cbf, depth); int cbf = cbf_is_set_any(cur_cu->cbf, split_tree.current_depth);
if (cur_cu->merged && !cbf && cur_cu->part_size == SIZE_2Nx2N) { if (cur_cu->merged && !cbf) {
cur_cu->merged = 0; cur_cu->merged = 0;
cur_cu->skipped = 1; cur_cu->skipped = 1;
// Selecting skip reduces bits needed to code the CU // Selecting skip reduces bits needed to code the CU
@ -1244,7 +1220,7 @@ static double search_cu(
inter_bitcost += cur_cu->merge_idx; inter_bitcost += cur_cu->merge_idx;
} }
} }
lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu);
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
} }
} }
@ -1253,19 +1229,13 @@ static double search_cu(
double bits = 0; double bits = 0;
cabac_data_t* cabac = &state->search_cabac; cabac_data_t* cabac = &state->search_cabac;
cabac->update = 1; cabac->update = 1;
bits += uvg_mock_encode_coding_unit(
state,
cabac,
cu_loc, lcu, cur_cu,
tree_type);
if(cur_cu->type != CU_INTRA || cur_cu->part_size == SIZE_2Nx2N) {
bits += uvg_mock_encode_coding_unit(
state,
cabac,
x, y, depth,
lcu,
cur_cu,
tree_type);
}
else {
assert(0);
}
cost = bits * state->lambda; cost = bits * state->lambda;
@ -1275,15 +1245,15 @@ static double search_cu(
cost = inter_zero_coeff_cost; cost = inter_zero_coeff_cost;
// Restore saved pixels from lower level of the working tree. // Restore saved pixels from lower level of the working tree.
copy_cu_pixels(x_local, y_local, cu_width, &work_tree[depth + 1], lcu, tree_type); copy_cu_pixels(x_local, y_local, cu_width, &work_tree[split_tree.current_depth + 1], lcu, tree_type);
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { if (cur_cu->merged) {
cur_cu->merged = 0; cur_cu->merged = 0;
cur_cu->skipped = 1; cur_cu->skipped = 1;
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
} }
if (cur_cu->tr_depth != depth) { if (cur_cu->tr_depth != 0) {
// Reset transform depth since there are no coefficients. This // Reset transform depth since there are no coefficients. This
// ensures that CBF is cleared for the whole area of the CU. // ensures that CBF is cleared for the whole area of the CU.
uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type); uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type);
@ -1299,12 +1269,12 @@ static double search_cu(
// If the CU is partially outside the frame, we need to split it even // If the CU is partially outside the frame, we need to split it even
// if pu_depth_intra and pu_depth_inter would not permit it. // if pu_depth_intra and pu_depth_inter would not permit it.
cur_cu->type == CU_NOTSET || cur_cu->type == CU_NOTSET ||
(depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != UVG_SLICE_I)) || (split_tree.current_depth < pu_depth_intra.max && !(state->encoder_control->cfg.force_inter&& state->frame->slicetype != UVG_SLICE_I)) ||
(state->frame->slicetype != UVG_SLICE_I && (state->frame->slicetype != UVG_SLICE_I &&
depth < pu_depth_inter.max); split_tree.current_depth < pu_depth_inter.max);
if(state->encoder_control->cabac_debug_file) { if(state->encoder_control->cabac_debug_file) {
fprintf(state->encoder_control->cabac_debug_file, "S %4d %4d %d %d", x, y, depth, tree_type); fprintf(state->encoder_control->cabac_debug_file, "S %4d %4d %d %d", x, y, split_tree.current_depth, tree_type);
fwrite(&state->search_cabac.ctx, 1, sizeof(state->search_cabac.ctx), state->encoder_control->cabac_debug_file); fwrite(&state->search_cabac.ctx, 1, sizeof(state->search_cabac.ctx), state->encoder_control->cabac_debug_file);
} }
@ -1312,7 +1282,7 @@ static double search_cu(
if (can_split_cu) { if (can_split_cu) {
int half_cu = cu_width >> (tree_type != UVG_CHROMA_T); int half_cu = cu_width >> (tree_type != UVG_CHROMA_T);
double split_cost = 0.0; double split_cost = 0.0;
int cbf = cbf_is_set_any(cur_cu->cbf, depth); int cbf = cbf_is_set_any(cur_cu->cbf, split_tree.current_depth);
cabac_data_t post_seach_cabac; cabac_data_t post_seach_cabac;
memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac));
memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac)); memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac));
@ -1320,7 +1290,7 @@ static double search_cu(
double split_bits = 0; double split_bits = 0;
if (depth < MAX_DEPTH) { if (split_tree.current_depth < MAX_DEPTH) {
state->search_cabac.update = 1; state->search_cabac.update = 1;
// Add cost of cu_split_flag. // Add cost of cu_split_flag.
@ -1364,10 +1334,24 @@ static double search_cu(
// It is ok to interrupt the search as soon as it is known that // It is ok to interrupt the search as soon as it is known that
// the split costs at least as much as not splitting. // the split costs at least as much as not splitting.
if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) {
if (split_cost < cost) split_cost += search_cu(state, x, y, depth + 1, work_tree, tree_type); const split_tree_t new_split = { split_tree.split_tree | QT_SPLIT << split_tree.current_depth, split_tree.current_depth + 1};
if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y, depth + 1, work_tree, tree_type); cu_loc_t new_cu_loc;
if (split_cost < cost) split_cost += search_cu(state, x, y + half_cu, depth + 1, work_tree, tree_type); if (split_cost < cost) {
if (split_cost < cost) split_cost += search_cu(state, x + half_cu, y + half_cu, depth + 1, work_tree, tree_type); uvg_cu_loc_ctor(&new_cu_loc, x, y, half_cu, half_cu);
split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
}
if (split_cost < cost) {
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y, half_cu, half_cu);
split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
}
if (split_cost < cost) {
uvg_cu_loc_ctor(&new_cu_loc, x, y + half_cu, half_cu, half_cu);
split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
}
if (split_cost < cost) {
uvg_cu_loc_ctor(&new_cu_loc, x + half_cu, y + half_cu, half_cu, half_cu);
split_cost += search_cu(state, &new_cu_loc, work_tree, tree_type, new_split);
}
} else { } else {
split_cost = INT_MAX; split_cost = INT_MAX;
} }
@ -1401,7 +1385,6 @@ static double search_cu(
cur_cu->intra = cu_d1->intra; cur_cu->intra = cu_d1->intra;
cur_cu->type = CU_INTRA; cur_cu->type = CU_INTRA;
cur_cu->part_size = SIZE_2Nx2N;
// Disable MRL in this case // Disable MRL in this case
cur_cu->intra.multi_ref_idx = 0; cur_cu->intra.multi_ref_idx = 0;
@ -1687,14 +1670,17 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
int tree_type = state->frame->slicetype == UVG_SLICE_I int tree_type = state->frame->slicetype == UVG_SLICE_I
&& state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T; && state->encoder_control->cfg.dual_tree ? UVG_LUMA_T : UVG_BOTH_T;
cu_loc_t start;
uvg_cu_loc_ctor(&start, x, y, LCU_WIDTH, LCU_WIDTH);
split_tree_t split_tree = { 0, 0 };
// Start search from depth 0. // Start search from depth 0.
double cost = search_cu( double cost = search_cu(
state, state,
x, &start,
y,
0,
work_tree, work_tree,
tree_type); tree_type,
split_tree);
// Save squared cost for rate control. // Save squared cost for rate control.
if(state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) { if(state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) {
@ -1710,12 +1696,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) { if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) {
cost = search_cu( cost = search_cu(
state, state, &start,
x,
y,
0,
work_tree, work_tree,
UVG_CHROMA_T); UVG_CHROMA_T, split_tree);
if (state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) { if (state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) {
uvg_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight += cost * cost; uvg_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight += cost * cost;

View file

@ -109,8 +109,10 @@ static INLINE bool fracmv_within_ibc_range(const ibc_search_info_t *info, int x,
} }
static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y) static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu, const cu_loc_t* loc, int32_t mv_x, int32_t mv_y)
{ {
const uint32_t x = loc->x;
const uint32_t y = loc->y;
const int x_scu = SUB_SCU(x); const int x_scu = SUB_SCU(x);
const int y_scu = SUB_SCU(y); const int y_scu = SUB_SCU(y);
@ -132,9 +134,11 @@ static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu
cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);; cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);;
cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);; cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);;
uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400); uvg_inter_recon_cu(state, lcu, true, state->encoder_control->chroma_format != UVG_CSP_400, loc);
*cur_cu = cu_backup; *cur_cu = cu_backup;
uint32_t width = loc->width;
uint32_t height = loc->height;
cost = uvg_satd_any_size(width, cost = uvg_satd_any_size(width,
width, width,
@ -162,8 +166,10 @@ static uint32_t calculate_ibc_cost_satd(const encoder_state_t *state, lcu_t* lcu
} }
static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_sad_func_ptr_t optimized_sad, lcu_t* lcu, int32_t x, int32_t y, int32_t width, int32_t mv_x, int32_t mv_y) static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_sad_func_ptr_t optimized_sad, lcu_t* lcu, const cu_loc_t* loc, int32_t mv_x, int32_t mv_y)
{ {
const uint32_t x = loc->x;
const uint32_t y = loc->y;
cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y)); cu_info_t *cur_cu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y));
cu_info_t cu_backup = *cur_cu; cu_info_t cu_backup = *cur_cu;
@ -173,6 +179,8 @@ static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_s
const int y_scu = SUB_SCU(y); const int y_scu = SUB_SCU(y);
const uint32_t offset = x_scu + y_scu * LCU_WIDTH; const uint32_t offset = x_scu + y_scu * LCU_WIDTH;
const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
const uint32_t width = loc->width;
const uint32_t height = loc->height;
cur_cu->type = CU_IBC; cur_cu->type = CU_IBC;
cur_cu->inter.mv_dir = 1; cur_cu->inter.mv_dir = 1;
@ -183,7 +191,7 @@ static uint32_t calculate_ibc_cost_sad(const encoder_state_t *state, optimized_s
cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);; cur_cu->inter.mv[0][0] = mv_x * (1 << INTERNAL_MV_PREC);;
cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);; cur_cu->inter.mv[0][1] = mv_y * (1 << INTERNAL_MV_PREC);;
uvg_inter_recon_cu(state, lcu, x, y, width, true, state->encoder_control->chroma_format != UVG_CSP_400); uvg_inter_recon_cu(state, lcu, true, state->encoder_control->chroma_format != UVG_CSP_400, loc);
*cur_cu = cu_backup; *cur_cu = cu_backup;
@ -235,8 +243,11 @@ static bool check_mv_cost(ibc_search_info_t *info,
double bitcost = 0; double bitcost = 0;
double cost = MAX_DOUBLE; double cost = MAX_DOUBLE;
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, info->origin.x, info->origin.y, info->width, info->height);
cost = calculate_ibc_cost_sad(info->state, info->optimized_sad, info->lcu, info->origin.x, info->origin.y, info->width, x, y);
cost = calculate_ibc_cost_sad(info->state, info->optimized_sad, info->lcu, &loc, x, y);
if (cost >= *best_cost) return false; if (cost >= *best_cost) return false;
@ -246,7 +257,7 @@ static bool check_mv_cost(ibc_search_info_t *info,
info->mv_cand, info->mv_cand,
NULL, NULL,
0, 0,
NULL, 0,
&bitcost &bitcost
); );
@ -782,63 +793,46 @@ static bool merge_candidate_in_list(inter_merge_cand_t *all_cands,
* \param amvp Return searched AMVP PUs sorted by costs * \param amvp Return searched AMVP PUs sorted by costs
* \param merge Return searched Merge PUs sorted by costs * \param merge Return searched Merge PUs sorted by costs
*/ */
static void search_pu_ibc(encoder_state_t * const state, static void search_pu_ibc(
int x_cu, int y_cu, encoder_state_t * const state,
int depth, const cu_loc_t * const cu_loc,
part_mode_t part_mode, unit_stats_map_t *amvp,
int i_pu, unit_stats_map_t *merge,
unit_stats_map_t *amvp, ibc_search_info_t *info)
unit_stats_map_t *merge,
ibc_search_info_t *info)
{ {
const uvg_config *cfg = &state->encoder_control->cfg; const uvg_config *cfg = &state->encoder_control->cfg;
const videoframe_t * const frame = state->tile->frame; const videoframe_t * const frame = state->tile->frame;
const int width_cu = LCU_WIDTH >> depth; const int width_cu = cu_loc->width;
const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu); const int height_cu= cu_loc->height;
const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
const int width = PU_GET_W(part_mode, width_cu, i_pu);
const int height = PU_GET_H(part_mode, width_cu, i_pu);
// Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
// nRx2N partitions.
const bool merge_a1 = i_pu == 0 || width >= height;
// Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
// 2NxnD partitions.
const bool merge_b1 = i_pu == 0 || width <= height;
lcu_t *lcu = info->lcu; lcu_t *lcu = info->lcu;
const int x_local = SUB_SCU(x); const int x_local = SUB_SCU(cu_loc->x);
const int y_local = SUB_SCU(y); const int y_local = SUB_SCU(cu_loc->y);
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
cur_pu->type = CU_IBC; cur_pu->type = CU_IBC;
cur_pu->part_size = part_mode; cur_pu->qp = state->qp;
cur_pu->depth = depth; cur_pu->inter.mv_dir = 1;
cur_pu->tr_depth = depth;
cur_pu->qp = state->qp;
cur_pu->inter.mv_dir = 1;
// Default to candidate 0 // Default to candidate 0
CU_SET_MV_CAND(cur_pu, 0, 0); CU_SET_MV_CAND(cur_pu, 0, 0);
FILL(*info, 0); FILL(*info, 0);
info->state = state; info->state = state;
info->pic = frame->source; info->pic = frame->source;
info->origin.x = x; info->origin.x = cu_loc->x;
info->origin.y = y; info->origin.y = cu_loc->y;
info->width = width; info->width = width_cu;
info->height = height; info->height = height_cu;
info->mvd_cost_func = cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost; info->mvd_cost_func =
info->optimized_sad = uvg_get_optimized_sad(width); cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
info->lcu = lcu; info->optimized_sad = uvg_get_optimized_sad(width_cu);
info->lcu = lcu;
// Search for merge mode candidates // Search for merge mode candidates
info->num_merge_cand = uvg_inter_get_merge_cand( info->num_merge_cand = uvg_inter_get_merge_cand(
state, state,
x, y, cu_loc,
width, height,
merge_a1, merge_b1,
info->merge_cand, info->merge_cand,
lcu); lcu);
@ -853,7 +847,7 @@ static void search_pu_ibc(encoder_state_t * const state,
#ifdef COMPLETE_PRED_MODE_BITS #ifdef COMPLETE_PRED_MODE_BITS
// Technically counting these bits would be correct, however counting // Technically counting these bits would be correct, however counting
// them universally degrades quality so this block is disabled by default // them universally degrades quality so this block is disabled by default
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(x, y, lcu, NULL)], 0); const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL)], 0);
#else #else
const double no_skip_flag = 0; const double no_skip_flag = 0;
#endif #endif
@ -875,7 +869,7 @@ static void search_pu_ibc(encoder_state_t * const state,
{ {
continue; continue;
} }
uvg_inter_pred_pu(state, info->lcu, x_cu, y_cu, width_cu, true, false, i_pu); uvg_inter_pred_pu(state, info->lcu, true, false, cu_loc);
merge->unit[merge->size] = *cur_pu; merge->unit[merge->size] = *cur_pu;
merge->unit[merge->size].type = CU_IBC; merge->unit[merge->size].type = CU_IBC;
merge->unit[merge->size].merge_idx = merge_idx; merge->unit[merge->size].merge_idx = merge_idx;
@ -883,11 +877,11 @@ static void search_pu_ibc(encoder_state_t * const state,
merge->unit[merge->size].skipped = false; merge->unit[merge->size].skipped = false;
double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0); double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { if(state->encoder_control->cfg.rdo >= 2) {
uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits); uvg_cu_cost_inter_rd2(state, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits, cu_loc);
} }
else { else {
merge->cost[merge->size] = uvg_satd_any_size(width, height, merge->cost[merge->size] = uvg_satd_any_size(width_cu, height_cu,
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH, lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH); lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
bits += no_skip_flag; bits += no_skip_flag;
@ -909,7 +903,7 @@ static void search_pu_ibc(encoder_state_t * const state,
// Early Skip Mode Decision // Early Skip Mode Decision
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400; bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) { if (cfg->early_skip) {
for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) { for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) {
if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) { if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) {
merge->size = 1; merge->size = 1;
@ -919,6 +913,7 @@ static void search_pu_ibc(encoder_state_t * const state,
merge->keys[0] = 0; merge->keys[0] = 0;
} }
else if(cfg->rdo < 2) { else if(cfg->rdo < 2) {
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
// Reconstruct blocks with merge candidate. // Reconstruct blocks with merge candidate.
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set // Check luma CBF. Then, check chroma CBFs if luma CBF is not set
// and chroma exists. // and chroma exists.
@ -927,18 +922,18 @@ static void search_pu_ibc(encoder_state_t * const state,
cur_pu->inter.mv_dir = info->merge_cand[merge_idx].dir; cur_pu->inter.mv_dir = info->merge_cand[merge_idx].dir;
cur_pu->inter.mv[0][0] = info->merge_cand[merge_idx].mv[0][0]; cur_pu->inter.mv[0][0] = info->merge_cand[merge_idx].mv[0][0];
cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1]; cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1];
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T); uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, MAX(1, depth), UVG_BOTH_T);
uvg_inter_recon_cu(state, lcu, x, y, width, true, false); uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T); uvg_quantize_lcu_residual(state, true, false, false, cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) { if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
continue; continue;
} }
else if (has_chroma) { else if (has_chroma) {
uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma); uvg_inter_recon_cu(state, lcu, false, has_chroma, cu_loc);
uvg_quantize_lcu_residual(state, false, has_chroma, uvg_quantize_lcu_residual(state, false, has_chroma,
false, /*we are only checking for lack of coeffs so no need to check jccr*/ false, /*we are only checking for lack of coeffs so no need to check jccr*/
x, y, depth, cur_pu, lcu, true, UVG_BOTH_T); cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
if (!cbf_is_set_any(cur_pu->cbf, depth)) { if (!cbf_is_set_any(cur_pu->cbf, depth)) {
cur_pu->type = CU_IBC; cur_pu->type = CU_IBC;
cur_pu->merge_idx = merge_idx; cur_pu->merge_idx = merge_idx;
@ -964,15 +959,12 @@ static void search_pu_ibc(encoder_state_t * const state,
// Do the motion search // Do the motion search
uvg_inter_get_mv_cand(info->state, uvg_inter_get_mv_cand(info->state,
info->origin.x,
info->origin.y,
info->width,
info->height,
info->mv_cand, info->mv_cand,
cur_pu, cur_pu,
lcu, lcu,
NULL); 0,
cu_loc);
vector2d_t best_mv = { 0, 0 }; vector2d_t best_mv = { 0, 0 };
@ -1003,9 +995,7 @@ static void search_pu_ibc(encoder_state_t * const state,
best_cost = calculate_ibc_cost_satd( best_cost = calculate_ibc_cost_satd(
info->state, info->state,
lcu, lcu,
info->origin.x, cu_loc,
info->origin.y,
info->width,
(best_mv.x >> INTERNAL_MV_PREC), (best_mv.x >> INTERNAL_MV_PREC),
(best_mv.y >> INTERNAL_MV_PREC)); (best_mv.y >> INTERNAL_MV_PREC));
best_cost += best_bits * info->state->lambda; best_cost += best_bits * info->state->lambda;
@ -1052,16 +1042,16 @@ static void search_pu_ibc(encoder_state_t * const state,
}; };
if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { if (state->encoder_control->cfg.rdo >= 2) {
if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]); if (amvp[0].size) uvg_cu_cost_inter_rd2(state, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]], cu_loc);
} }
if(cfg->rdo < 2) { if(cfg->rdo < 2) {
int predmode_ctx; int predmode_ctx;
const int ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1) * 3; const float ibc_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.ibc_flag[0], 1);
const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx); const int skip_contest = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, &predmode_ctx);
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0); const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0);
const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0); const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0);
@ -1077,33 +1067,29 @@ static void search_pu_ibc(encoder_state_t * const state,
#include "threads.h" #include "threads.h"
static int uvg_search_hash_cu_ibc(encoder_state_t* const state, static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
int x, int y, int depth, const cu_loc_t* cu_loc,
lcu_t* lcu, lcu_t* lcu,
double* inter_cost, double* inter_cost,
double* inter_bitcost) double* inter_bitcost)
{ {
const int x_cu = x; const int x_cu = cu_loc->x;
const int y_cu = y; const int y_cu = cu_loc->y;
const int part_mode = SIZE_2Nx2N; const int part_mode = SIZE_2Nx2N;
const uvg_config *cfg = &state->encoder_control->cfg; const uvg_config *cfg = &state->encoder_control->cfg;
const videoframe_t * const frame = state->tile->frame; const videoframe_t * const frame = state->tile->frame;
const int width_cu = LCU_WIDTH >> depth; const int width_cu = cu_loc->width;
const int width = PU_GET_W(part_mode, width_cu, 0); const int height_cu = cu_loc->height;
const int height = PU_GET_H(part_mode, width_cu, 0);
const bool merge_a1 = true; const bool merge_a1 = true;
const bool merge_b1 = true; const bool merge_b1 = true;
ibc_search_info_t info; ibc_search_info_t info;
const int x_local = SUB_SCU(x); const int x_local = SUB_SCU(x_cu);
const int y_local = SUB_SCU(y); const int y_local = SUB_SCU(y_cu);
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
cur_pu->type = CU_IBC; cur_pu->type = CU_IBC;
cur_pu->part_size = part_mode;
cur_pu->depth = depth;
cur_pu->tr_depth = depth;
cur_pu->qp = state->qp; cur_pu->qp = state->qp;
// Default to candidate 0 // Default to candidate 0
@ -1113,22 +1099,19 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
info.state = state; info.state = state;
info.pic = frame->source; info.pic = frame->source;
info.origin.x = x; info.origin.x = cu_loc->x;
info.origin.y = y; info.origin.y = cu_loc->y;
info.width = width; info.width = width_cu;
info.height = height; info.height = height_cu;
info.mvd_cost_func = info.mvd_cost_func =
cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost; cfg->mv_rdo ? uvg_calc_ibc_mvd_cost_cabac : calc_ibc_mvd_cost;
info.optimized_sad = uvg_get_optimized_sad(width); info.optimized_sad = uvg_get_optimized_sad(width_cu);
info.lcu = lcu; info.lcu = lcu;
// Search for merge mode candidates // Search for merge mode candidates
info.num_merge_cand = uvg_inter_get_merge_cand( info.num_merge_cand = uvg_inter_get_merge_cand(
state, state,
x, cu_loc,
y,
width,
height,
merge_a1, merge_a1,
merge_b1, merge_b1,
info.merge_cand, info.merge_cand,
@ -1154,8 +1137,8 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
UVG_CLOCK_T hashmap_end_real_time; UVG_CLOCK_T hashmap_end_real_time;
UVG_GET_TIME(&hashmap_start_real_time); UVG_GET_TIME(&hashmap_start_real_time);
int xx = x; int xx = x_cu;
int yy = y; int yy = y_cu;
int best_mv_x = INT_MAX>>2; int best_mv_x = INT_MAX>>2;
int best_mv_y = INT_MAX>>2; int best_mv_y = INT_MAX>>2;
@ -1185,12 +1168,12 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
int pos_y = result->value & 0xffff; int pos_y = result->value & 0xffff;
int mv_x = pos_x - xx; int mv_x = pos_x - xx;
int mv_y = pos_y - yy; int mv_y = pos_y - yy;
if (pos_x <= xx - width && pos_y <= yy - height) { if (pos_x <= xx - width_cu && pos_y <= yy - height_cu) {
valid_mv = intmv_within_ibc_range(&info, mv_x, mv_y); valid_mv = intmv_within_ibc_range(&info, mv_x, mv_y);
if (valid_mv) { if (valid_mv) {
bool full_block = true; // Is the full block covered by the IBC? bool full_block = true; // Is the full block covered by the IBC?
for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width; offset_x+=UVG_HASHMAP_BLOCKSIZE) { for (int offset_x = UVG_HASHMAP_BLOCKSIZE; offset_x < width_cu; offset_x+=UVG_HASHMAP_BLOCKSIZE) {
for (int offset_y = 0; offset_y < height; offset_y += UVG_HASHMAP_BLOCKSIZE) { for (int offset_y = 0; offset_y < height_cu; offset_y += UVG_HASHMAP_BLOCKSIZE) {
uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[ uint32_t crc_other_blocks = state->tile->frame->ibc_hashmap_pos_to_hash[
((yy+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (xx+offset_x) / UVG_HASHMAP_BLOCKSIZE]; ((yy+offset_y) / UVG_HASHMAP_BLOCKSIZE)*state->tile->frame->ibc_hashmap_pos_to_hash_stride + (xx+offset_x) / UVG_HASHMAP_BLOCKSIZE];
@ -1220,7 +1203,7 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
best_mv_y = mv_y; best_mv_y = mv_y;
ibc_cost = cost; ibc_cost = cost;
ibc_bitcost = bits; ibc_bitcost = bits;
fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x,y, width,width, mv_x, mv_y); fprintf(stderr, "Found best IBC!! %dx%d %dx%d: %d,%d\r\n", x_cu,y_cu, width_cu,height_cu, mv_x, mv_y);
found_block = true; found_block = true;
//break; //break;
} }
@ -1274,11 +1257,9 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
uvg_inter_recon_cu( uvg_inter_recon_cu(
state, state,
lcu, lcu,
x,
y,
CU_WIDTH_FROM_DEPTH(depth),
true, true,
state->encoder_control->chroma_format != UVG_CSP_400); state->encoder_control->chroma_format != UVG_CSP_400,
cu_loc);
if (*inter_cost < MAX_DOUBLE) { if (*inter_cost < MAX_DOUBLE) {
assert(fracmv_within_ibc_range( assert(fracmv_within_ibc_range(
@ -1305,17 +1286,18 @@ static int uvg_search_hash_cu_ibc(encoder_state_t* const state,
* \param inter_bitcost Return inter bitcost * \param inter_bitcost Return inter bitcost
*/ */
void uvg_search_cu_ibc(encoder_state_t * const state, void uvg_search_cu_ibc(encoder_state_t * const state,
int x, int y, int depth, const cu_loc_t * const cu_loc,
lcu_t *lcu, lcu_t *lcu,
double *inter_cost, double *inter_cost,
double* inter_bitcost) double* inter_bitcost)
{ {
*inter_cost = MAX_DOUBLE; *inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT; *inter_bitcost = MAX_INT;
// Quick hashmap search // Quick hashmap search
/* uvg_search_hash_cu_ibc( /* uvg_search_hash_cu_ibc(
state, state,
x, y, depth, cu_loc,
lcu, lcu,
inter_cost, inter_cost,
inter_bitcost); inter_bitcost);
@ -1330,7 +1312,7 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
info.lcu = lcu; info.lcu = lcu;
search_pu_ibc(state, search_pu_ibc(state,
x, y, depth, cu_loc,
SIZE_2Nx2N, 0, SIZE_2Nx2N, 0,
amvp, amvp,
&merge, &merge,
@ -1374,14 +1356,14 @@ void uvg_search_cu_ibc(encoder_state_t * const state,
return; return;
} }
const int x_local = SUB_SCU(x); const int x_local = SUB_SCU(cu_loc->x);
const int y_local = SUB_SCU(y); const int y_local = SUB_SCU(cu_loc->y);
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
*cur_pu = *best_inter_pu; *cur_pu = *best_inter_pu;
cur_pu->type = CU_IBC; cur_pu->type = CU_IBC;
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), uvg_inter_recon_cu(state, lcu,
true, state->encoder_control->chroma_format != UVG_CSP_400); true, state->encoder_control->chroma_format != UVG_CSP_400, cu_loc);
if (*inter_cost < MAX_DOUBLE) { if (*inter_cost < MAX_DOUBLE) {
assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1])); assert(fracmv_within_ibc_range(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));

View file

@ -46,7 +46,7 @@
void uvg_search_cu_ibc(encoder_state_t * const state, void uvg_search_cu_ibc(encoder_state_t * const state,
int x, int y, int depth, const cu_loc_t * const cu_loc,
lcu_t *lcu, lcu_t *lcu,
double *inter_cost, double *inter_cost,
double* inter_bitcost); double* inter_bitcost);

View file

@ -1293,8 +1293,8 @@ static void apply_mv_scaling(int32_t current_poc,
/** /**
* \brief Perform inter search for a single reference frame. * \brief Perform inter search for a single reference frame.
*/ */
static void search_pu_inter_ref(inter_search_info_t *info, static void search_pu_inter_ref(
int depth, inter_search_info_t *info,
lcu_t *lcu, lcu_t *lcu,
cu_info_t *cur_cu, cu_info_t *cur_cu,
unit_stats_map_t *amvp) unit_stats_map_t *amvp)
@ -1327,15 +1327,15 @@ static void search_pu_inter_ref(inter_search_info_t *info,
// Get MV candidates // Get MV candidates
cur_cu->inter.mv_ref[ref_list] = ref_list_idx[ref_list]; cur_cu->inter.mv_ref[ref_list] = ref_list_idx[ref_list];
cu_loc_t cu_loc;
uvg_cu_loc_ctor(&cu_loc, info->origin.x, info->origin.y, info->width, info->height);
uvg_inter_get_mv_cand(info->state, uvg_inter_get_mv_cand(info->state,
info->origin.x, info->mv_cand,
info->origin.y, cur_cu,
info->width, lcu,
info->height, ref_list,
info->mv_cand, &cu_loc);
cur_cu,
lcu,
ref_list);
vector2d_t best_mv = { 0, 0 }; vector2d_t best_mv = { 0, 0 };
@ -1498,11 +1498,13 @@ static void search_pu_inter_ref(inter_search_info_t *info,
/** /**
* \brief Search bipred modes for a PU. * \brief Search bipred modes for a PU.
*/ */
static void search_pu_inter_bipred(inter_search_info_t *info, static void search_pu_inter_bipred(
int depth, inter_search_info_t *info,
lcu_t *lcu, lcu_t *lcu,
unit_stats_map_t *amvp_bipred) unit_stats_map_t *amvp_bipred)
{ {
cu_loc_t cu_loc;
uvg_cu_loc_ctor(&cu_loc, info->origin.x, info->origin.y, info->width, info->height);
const image_list_t *const ref = info->state->frame->ref; const image_list_t *const ref = info->state->frame->ref;
uint8_t (*ref_LX)[16] = info->state->frame->ref_LX; uint8_t (*ref_LX)[16] = info->state->frame->ref_LX;
const videoframe_t * const frame = info->state->tile->frame; const videoframe_t * const frame = info->state->tile->frame;
@ -1551,7 +1553,7 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
bipred_pu->skipped = false; bipred_pu->skipped = false;
for (int reflist = 0; reflist < 2; reflist++) { for (int reflist = 0; reflist < 2; reflist++) {
uvg_inter_get_mv_cand(info->state, x, y, width, height, info->mv_cand, bipred_pu, lcu, reflist); uvg_inter_get_mv_cand(info->state, info->mv_cand, bipred_pu, lcu, reflist, &cu_loc);
} }
// Don't try merge candidates that don't satisfy mv constraints. // Don't try merge candidates that don't satisfy mv constraints.
@ -1564,13 +1566,11 @@ static void search_pu_inter_bipred(inter_search_info_t *info,
uvg_inter_recon_bipred(info->state, uvg_inter_recon_bipred(info->state,
ref->images[ref_LX[0][merge_cand[i].ref[0]]], ref->images[ref_LX[0][merge_cand[i].ref[0]]],
ref->images[ref_LX[1][merge_cand[j].ref[1]]], ref->images[ref_LX[1][merge_cand[j].ref[1]]],
x, y,
width,
height,
mv, mv,
lcu, lcu,
true, true,
false); false,
&cu_loc);
const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)]; const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)];
const uvg_pixel *src = &frame->source->y[x + y * frame->source->stride]; const uvg_pixel *src = &frame->source->y[x + y * frame->source->stride];
@ -1666,11 +1666,9 @@ static bool merge_candidate_in_list(inter_merge_cand_t *all_cands,
* \param amvp Return searched AMVP PUs sorted by costs * \param amvp Return searched AMVP PUs sorted by costs
* \param merge Return searched Merge PUs sorted by costs * \param merge Return searched Merge PUs sorted by costs
*/ */
static void search_pu_inter(encoder_state_t * const state, static void search_pu_inter(
int x_cu, int y_cu, encoder_state_t * const state,
int depth, const cu_loc_t* const cu_loc,
part_mode_t part_mode,
int i_pu,
lcu_t *lcu, lcu_t *lcu,
unit_stats_map_t *amvp, unit_stats_map_t *amvp,
unit_stats_map_t *merge, unit_stats_map_t *merge,
@ -1678,26 +1676,14 @@ static void search_pu_inter(encoder_state_t * const state,
{ {
const uvg_config *cfg = &state->encoder_control->cfg; const uvg_config *cfg = &state->encoder_control->cfg;
const videoframe_t * const frame = state->tile->frame; const videoframe_t * const frame = state->tile->frame;
const int width_cu = LCU_WIDTH >> depth; const int width_cu = cu_loc->width;
const int height_cu = width_cu; // TODO: non-square blocks const int height_cu = cu_loc->height;
const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu);
const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu);
const int width = PU_GET_W(part_mode, width_cu, i_pu);
const int height = PU_GET_H(part_mode, width_cu, i_pu);
// Merge candidate A1 may not be used for the second PU of Nx2N, nLx2N and
// nRx2N partitions.
const bool merge_a1 = i_pu == 0 || width >= height;
// Merge candidate B1 may not be used for the second PU of 2NxN, 2NxnU and
// 2NxnD partitions.
const bool merge_b1 = i_pu == 0 || width <= height;
const int x_local = SUB_SCU(x); const int x_local = SUB_SCU(cu_loc->x);
const int y_local = SUB_SCU(y); const int y_local = SUB_SCU(cu_loc->y);
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
cur_pu->type = CU_NOTSET; cur_pu->type = CU_NOTSET;
cur_pu->part_size = part_mode;
cur_pu->depth = depth;
cur_pu->qp = state->qp; cur_pu->qp = state->qp;
// Default to candidate 0 // Default to candidate 0
@ -1708,19 +1694,17 @@ static void search_pu_inter(encoder_state_t * const state,
info->state = state; info->state = state;
info->pic = frame->source; info->pic = frame->source;
info->origin.x = x; info->origin.x = cu_loc->x;
info->origin.y = y; info->origin.y = cu_loc->y;
info->width = width; info->width = width_cu;
info->height = height; info->height = height_cu;
info->mvd_cost_func = cfg->mv_rdo ? uvg_calc_mvd_cost_cabac : calc_mvd_cost; info->mvd_cost_func = cfg->mv_rdo ? uvg_calc_mvd_cost_cabac : calc_mvd_cost;
info->optimized_sad = uvg_get_optimized_sad(width); info->optimized_sad = uvg_get_optimized_sad(width_cu);
// Search for merge mode candidates // Search for merge mode candidates
info->num_merge_cand = uvg_inter_get_merge_cand( info->num_merge_cand = uvg_inter_get_merge_cand(
state, state,
x, y, cu_loc,
width, height,
merge_a1, merge_b1,
info->merge_cand, info->merge_cand,
lcu lcu
); );
@ -1755,7 +1739,7 @@ static void search_pu_inter(encoder_state_t * const state,
// If bipred is not enabled, do not try candidates with mv_dir == 3. // If bipred is not enabled, do not try candidates with mv_dir == 3.
// Bipred is also forbidden for 4x8 and 8x4 blocks by the standard. // Bipred is also forbidden for 4x8 and 8x4 blocks by the standard.
if (cur_pu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue; if (cur_pu->inter.mv_dir == 3 && !state->encoder_control->cfg.bipred) continue;
if (cur_pu->inter.mv_dir == 3 && !(width + height > 12)) continue; if (cur_pu->inter.mv_dir == 3 && !(cu_loc->width + cu_loc->height > 12)) continue;
bool is_duplicate = merge_candidate_in_list(info->merge_cand, cur_cand, merge); bool is_duplicate = merge_candidate_in_list(info->merge_cand, cur_cand, merge);
@ -1769,7 +1753,7 @@ static void search_pu_inter(encoder_state_t * const state,
{ {
continue; continue;
} }
uvg_inter_pred_pu(state, lcu, x_cu, y_cu, width_cu, true, false, i_pu); uvg_inter_pred_pu(state, lcu, true, false, cu_loc);
merge->unit[merge->size] = *cur_pu; merge->unit[merge->size] = *cur_pu;
merge->unit[merge->size].type = CU_INTER; merge->unit[merge->size].type = CU_INTER;
merge->unit[merge->size].merge_idx = merge_idx; merge->unit[merge->size].merge_idx = merge_idx;
@ -1777,11 +1761,11 @@ static void search_pu_inter(encoder_state_t * const state,
merge->unit[merge->size].skipped = false; merge->unit[merge->size].skipped = false;
double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0); double bits = merge_flag_cost + merge_idx + CTX_ENTROPY_FBITS(&(state->search_cabac.ctx.cu_merge_idx_ext_model), merge_idx != 0);
if(state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { if(state->encoder_control->cfg.rdo >= 2) {
uvg_cu_cost_inter_rd2(state, x, y, depth, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits); uvg_cu_cost_inter_rd2(state, &merge->unit[merge->size], lcu, &merge->cost[merge->size], &bits, cu_loc);
} }
else { else {
merge->cost[merge->size] = uvg_satd_any_size(width, height, merge->cost[merge->size] = uvg_satd_any_size(cu_loc->width, cu_loc->height,
lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH, lcu->rec.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH,
lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH); lcu->ref.y + y_local * LCU_WIDTH + x_local, LCU_WIDTH);
bits += no_skip_flag; bits += no_skip_flag;
@ -1803,7 +1787,7 @@ static void search_pu_inter(encoder_state_t * const state,
// Early Skip Mode Decision // Early Skip Mode Decision
bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400; bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
if (cfg->early_skip && cur_pu->part_size == SIZE_2Nx2N) { if (cfg->early_skip) {
for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) { for (int merge_key = 0; merge_key < num_rdo_cands; ++merge_key) {
if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) { if(cfg->rdo >= 2 && merge->unit[merge->keys[merge_key]].skipped) {
merge->size = 1; merge->size = 1;
@ -1813,6 +1797,8 @@ static void search_pu_inter(encoder_state_t * const state,
merge->keys[0] = 0; merge->keys[0] = 0;
} }
else if(cfg->rdo < 2) { else if(cfg->rdo < 2) {
const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
// Reconstruct blocks with merge candidate. // Reconstruct blocks with merge candidate.
// Check luma CBF. Then, check chroma CBFs if luma CBF is not set // Check luma CBF. Then, check chroma CBFs if luma CBF is not set
// and chroma exists. // and chroma exists.
@ -1825,23 +1811,20 @@ static void search_pu_inter(encoder_state_t * const state,
cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1]; cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1];
cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0]; cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0];
cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1]; cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1];
uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T); uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, MAX(1, depth), UVG_BOTH_T);
uvg_inter_recon_cu(state, lcu, x, y, width, true, false); uvg_inter_recon_cu(state, lcu, true, false, cu_loc);
cu_loc_t loc; uvg_quantize_lcu_residual(state, true, false, false, cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
uvg_cu_loc_ctor(&loc, x, y, width_cu, height_cu);
uvg_quantize_lcu_residual(state, true, false, false, &loc, depth, cur_pu, lcu, true, UVG_BOTH_T);
if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) { if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) {
continue; continue;
} }
else if (has_chroma) { else if (has_chroma) {
uvg_inter_recon_cu(state, lcu, x, y, width, false, has_chroma); uvg_inter_recon_cu(state, lcu, false, has_chroma, cu_loc);
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
false, has_chroma, false, has_chroma,
false, /*we are only checking for lack of coeffs so no need to check jccr*/ false, /*we are only checking for lack of coeffs so no need to check jccr*/
&loc, depth, cur_pu, lcu, cu_loc, depth, cur_pu, lcu,
true, true,
UVG_BOTH_T); UVG_BOTH_T);
if (!cbf_is_set_any(cur_pu->cbf, depth)) { if (!cbf_is_set_any(cur_pu->cbf, depth)) {
@ -1876,7 +1859,7 @@ static void search_pu_inter(encoder_state_t * const state,
info->ref_idx = ref_idx; info->ref_idx = ref_idx;
info->ref = state->frame->ref->images[ref_idx]; info->ref = state->frame->ref->images[ref_idx];
search_pu_inter_ref(info, depth, lcu, cur_pu, amvp); search_pu_inter_ref(info, lcu, cur_pu, amvp);
} }
assert(amvp[0].size <= MAX_UNIT_STATS_MAP_SIZE); assert(amvp[0].size <= MAX_UNIT_STATS_MAP_SIZE);
@ -1941,14 +1924,11 @@ static void search_pu_inter(encoder_state_t * const state,
info->ref = ref->images[info->ref_idx]; info->ref = ref->images[info->ref_idx];
uvg_inter_get_mv_cand(info->state, uvg_inter_get_mv_cand(info->state,
info->origin.x, info->mv_cand,
info->origin.y, unipred_pu,
info->width, lcu,
info->height, list,
info->mv_cand, cu_loc);
unipred_pu,
lcu,
list);
double frac_cost = MAX_DOUBLE; double frac_cost = MAX_DOUBLE;
double frac_bits = MAX_INT; double frac_bits = MAX_INT;
@ -1969,8 +1949,8 @@ static void search_pu_inter(encoder_state_t * const state,
unipred_pu->inter.mv[list][1] = frac_mv.y; unipred_pu->inter.mv[list][1] = frac_mv.y;
CU_SET_MV_CAND(unipred_pu, list, cu_mv_cand); CU_SET_MV_CAND(unipred_pu, list, cu_mv_cand);
if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { if (state->encoder_control->cfg.rdo >= 2) {
uvg_cu_cost_inter_rd2(state, x, y, depth, unipred_pu, lcu, &frac_cost, &frac_bits); uvg_cu_cost_inter_rd2(state, unipred_pu, lcu, &frac_cost, &frac_bits, cu_loc);
} }
amvp[list].cost[key] = frac_cost; amvp[list].cost[key] = frac_cost;
@ -1992,15 +1972,15 @@ static void search_pu_inter(encoder_state_t * const state,
amvp[list].size = n_best; amvp[list].size = n_best;
} }
if (state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N && cfg->fme_level == 0) { if (state->encoder_control->cfg.rdo >= 2 && cfg->fme_level == 0) {
if (amvp[0].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]]); if (amvp[0].size) uvg_cu_cost_inter_rd2(state, &amvp[0].unit[best_keys[0]], lcu, &amvp[0].cost[best_keys[0]], &amvp[0].bits[best_keys[0]], cu_loc);
if (amvp[1].size) uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]]); if (amvp[1].size) uvg_cu_cost_inter_rd2(state, &amvp[1].unit[best_keys[1]], lcu, &amvp[1].cost[best_keys[1]], &amvp[1].bits[best_keys[1]], cu_loc);
} }
// Search bi-pred positions // Search bi-pred positions
bool can_use_bipred = state->frame->slicetype == UVG_SLICE_B bool can_use_bipred = state->frame->slicetype == UVG_SLICE_B
&& cfg->bipred && cfg->bipred
&& width + height >= 16; // 4x8 and 8x4 PBs are restricted to unipred && cu_loc->width + cu_loc->height >= 16; // 4x8 and 8x4 PBs are restricted to unipred
if (can_use_bipred) { if (can_use_bipred) {
@ -2031,25 +2011,23 @@ static void search_pu_inter(encoder_state_t * const state,
bipred_pu->skipped = false; bipred_pu->skipped = false;
for (int reflist = 0; reflist < 2; reflist++) { for (int reflist = 0; reflist < 2; reflist++) {
uvg_inter_get_mv_cand(info->state, x, y, width, height, info->mv_cand, bipred_pu, lcu, reflist); uvg_inter_get_mv_cand(info->state, info->mv_cand, bipred_pu, lcu, reflist, cu_loc);
} }
uvg_inter_recon_bipred(info->state, uvg_inter_recon_bipred(info->state,
ref->images[ref_LX[0][bipred_pu->inter.mv_ref[0]]], ref->images[ref_LX[0][bipred_pu->inter.mv_ref[0]]],
ref->images[ref_LX[1][bipred_pu->inter.mv_ref[1]]], ref->images[ref_LX[1][bipred_pu->inter.mv_ref[1]]],
x, y, mv, lcu,
width, true,
height, false,
mv, cu_loc
lcu, );
true,
false);
const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)]; const uvg_pixel *rec = &lcu->rec.y[SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x)];
const uvg_pixel *src = &lcu->ref.y[SUB_SCU(y) * LCU_WIDTH + SUB_SCU(x)]; const uvg_pixel *src = &lcu->ref.y[SUB_SCU(cu_loc->y) * LCU_WIDTH + SUB_SCU(cu_loc->x)];
best_bipred_cost = best_bipred_cost =
uvg_satd_any_size(width, height, rec, LCU_WIDTH, src, LCU_WIDTH); uvg_satd_any_size(cu_loc->width, cu_loc->height, rec, LCU_WIDTH, src, LCU_WIDTH);
double bitcost[2] = { 0, 0 }; double bitcost[2] = { 0, 0 };
@ -2096,17 +2074,17 @@ static void search_pu_inter(encoder_state_t * const state,
} }
// TODO: this probably should have a separate command line option // TODO: this probably should have a separate command line option
if (cfg->rdo >= 3) search_pu_inter_bipred(info, depth, lcu, &amvp[2]); if (cfg->rdo >= 3) search_pu_inter_bipred(info, lcu, &amvp[2]);
assert(amvp[2].size <= MAX_UNIT_STATS_MAP_SIZE); assert(amvp[2].size <= MAX_UNIT_STATS_MAP_SIZE);
uvg_sort_keys_by_cost(&amvp[2]); uvg_sort_keys_by_cost(&amvp[2]);
if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2 && cur_pu->part_size == SIZE_2Nx2N) { if (amvp[2].size > 0 && state->encoder_control->cfg.rdo >= 2) {
uvg_cu_cost_inter_rd2(state, x, y, depth, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]]); uvg_cu_cost_inter_rd2(state, &amvp[2].unit[amvp[2].keys[0]], lcu, &amvp[2].cost[amvp[2].keys[0]], &amvp[2].bits[amvp[2].keys[0]], cu_loc);
} }
} }
if(cfg->rdo < 2) { if(cfg->rdo < 2) {
int predmode_ctx; int predmode_ctx;
const int skip_contest = uvg_get_skip_context(x, y, lcu, NULL, &predmode_ctx); const int skip_contest = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, &predmode_ctx);
const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0); const double no_skip_flag = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_skip_flag_model[skip_contest], 0);
const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0); const double pred_mode_bits = CTX_ENTROPY_FBITS(&state->search_cabac.ctx.cu_pred_mode_model[predmode_ctx], 0);
@ -2140,25 +2118,23 @@ static void search_pu_inter(encoder_state_t * const state,
* \param inter_cost Return inter cost * \param inter_cost Return inter cost
* \param inter_bitcost Return inter bitcost * \param inter_bitcost Return inter bitcost
*/ */
void uvg_cu_cost_inter_rd2(encoder_state_t * const state, void uvg_cu_cost_inter_rd2(
int x, int y, int depth, encoder_state_t * const state,
cu_info_t* cur_cu, cu_info_t* cur_cu,
lcu_t *lcu, lcu_t *lcu,
double *inter_cost, double *inter_cost,
double* inter_bitcost){ double* inter_bitcost,
const cu_loc_t* const cu_loc){
int tr_depth = MAX(1, depth);
if (cur_cu->part_size != SIZE_2Nx2N) {
tr_depth = depth + 1;
}
uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, UVG_BOTH_T);
const int x_px = SUB_SCU(x); const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width];
const int y_px = SUB_SCU(y); int tr_depth = MAX(1, depth);
uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, tr_depth, UVG_BOTH_T);
const int x_px = SUB_SCU(cu_loc->x);
const int y_px = SUB_SCU(cu_loc->y);
const int width = LCU_WIDTH >> depth; const int width = LCU_WIDTH >> depth;
const int height = width; // TODO: non-square blocks const int height = width; // TODO: non-square blocks
cu_loc_t loc;
uvg_cu_loc_ctor(&loc, x, y, width, height);
cabac_data_t cabac_copy; cabac_data_t cabac_copy;
memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy));
@ -2169,7 +2145,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
*cur_pu = *cur_cu; *cur_pu = *cur_cu;
const bool reconstruct_chroma = state->encoder_control->chroma_format != UVG_CSP_400; const bool reconstruct_chroma = state->encoder_control->chroma_format != UVG_CSP_400;
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, reconstruct_chroma); uvg_inter_recon_cu(state, lcu, true, reconstruct_chroma, cu_loc);
int index = y_px * LCU_WIDTH + x_px; int index = y_px * LCU_WIDTH + x_px;
double ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], double ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
@ -2187,13 +2163,13 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
} }
double no_cbf_bits; double no_cbf_bits;
double bits = 0; double bits = 0;
const int skip_context = uvg_get_skip_context(x, y, lcu, NULL, NULL); const int skip_context = uvg_get_skip_context(cu_loc->x, cu_loc->y, lcu, NULL, NULL);
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { if (cur_cu->merged) {
no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost; no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost;
bits += uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu, UVG_BOTH_T); bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T);
} }
else { else {
no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, x, y, depth, lcu, cur_cu, UVG_BOTH_T); no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T);
bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1); bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1);
} }
double no_cbf_cost = ssd + no_cbf_bits * state->lambda; double no_cbf_cost = ssd + no_cbf_bits * state->lambda;
@ -2207,7 +2183,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, true,
false, false,
false, &loc, false,
cu_loc,
depth, depth,
cur_cu, cur_cu,
lcu, lcu,
@ -2243,7 +2220,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
depth, depth,
lcu, lcu,
&cabac_copy, &cabac_copy,
&loc, cu_loc,
index, index,
0, 0,
cur_cu, cur_cu,
@ -2274,7 +2251,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
uvg_quantize_lcu_residual(state, uvg_quantize_lcu_residual(state,
true, reconstruct_chroma, true, reconstruct_chroma,
reconstruct_chroma && state->encoder_control->cfg.jccr, reconstruct_chroma && state->encoder_control->cfg.jccr,
&loc, cu_loc,
depth, depth,
cur_cu, cur_cu,
lcu, lcu,
@ -2308,7 +2285,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
if(no_cbf_cost < *inter_cost) { if(no_cbf_cost < *inter_cost) {
cur_cu->cbf = 0; cur_cu->cbf = 0;
if (cur_cu->merged && cur_cu->part_size == SIZE_2Nx2N) { if (cur_cu->merged) {
cur_cu->skipped = 1; cur_cu->skipped = 1;
} }
*inter_cost = no_cbf_cost; *inter_cost = no_cbf_cost;
@ -2332,11 +2309,12 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state,
* \param inter_cost Return inter cost * \param inter_cost Return inter cost
* \param inter_bitcost Return inter bitcost * \param inter_bitcost Return inter bitcost
*/ */
void uvg_search_cu_inter(encoder_state_t * const state, void uvg_search_cu_inter(
int x, int y, int depth, encoder_state_t * const state,
lcu_t *lcu, const cu_loc_t* const cu_loc,
double *inter_cost, lcu_t *lcu,
double* inter_bitcost) double *inter_cost,
double* inter_bitcost)
{ {
*inter_cost = MAX_DOUBLE; *inter_cost = MAX_DOUBLE;
*inter_bitcost = MAX_INT; *inter_bitcost = MAX_INT;
@ -2349,12 +2327,8 @@ void uvg_search_cu_inter(encoder_state_t * const state,
inter_search_info_t info; inter_search_info_t info;
search_pu_inter(state, search_pu_inter(state,
x, y, depth, cu_loc, lcu, amvp,
SIZE_2Nx2N, 0, &merge, &info);
lcu,
amvp,
&merge,
&info);
// Early Skip CU decision // Early Skip CU decision
if (merge.size == 1 && merge.unit[0].skipped) { if (merge.size == 1 && merge.unit[0].skipped) {
@ -2396,13 +2370,14 @@ void uvg_search_cu_inter(encoder_state_t * const state,
return; return;
} }
const int x_local = SUB_SCU(x); const int x_local = SUB_SCU(cu_loc->x);
const int y_local = SUB_SCU(y); const int y_local = SUB_SCU(cu_loc->y);
cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_local, y_local);
*cur_pu = *best_inter_pu; *cur_pu = *best_inter_pu;
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), uvg_inter_recon_cu(state, lcu,
true, state->encoder_control->chroma_format != UVG_CSP_400); true, state->encoder_control->chroma_format != UVG_CSP_400,
cu_loc);
if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) { if (*inter_cost < MAX_DOUBLE && cur_pu->inter.mv_dir & 1) {
assert(fracmv_within_tile(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1])); assert(fracmv_within_tile(&info, cur_pu->inter.mv[0][0], cur_pu->inter.mv[0][1]));

View file

@ -73,11 +73,12 @@ typedef double uvg_mvd_cost_func(const encoder_state_t *state,
int32_t ref_idx, int32_t ref_idx,
double *bitcost); double *bitcost);
void uvg_search_cu_inter(encoder_state_t * const state, void uvg_search_cu_inter(
int x, int y, int depth, encoder_state_t * const state,
lcu_t *lcu, const cu_loc_t* const cu_loc,
double *inter_cost, lcu_t *lcu,
double* inter_bitcost); double *inter_cost,
double* inter_bitcost);
@ -85,12 +86,13 @@ unsigned uvg_inter_satd_cost(const encoder_state_t* state,
const lcu_t *lcu, const lcu_t *lcu,
int x, int x,
int y); int y);
void uvg_cu_cost_inter_rd2(encoder_state_t* const state, void uvg_cu_cost_inter_rd2(
int x, int y, int depth, encoder_state_t* const state,
cu_info_t* cur_cu, cu_info_t* cur_cu,
lcu_t* lcu, lcu_t* lcu,
double* inter_cost, double* inter_cost,
double* inter_bitcost); double* inter_bitcost,
const cu_loc_t* const cu_loc);
int uvg_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx); int uvg_get_skip_context(int x, int y, lcu_t* const lcu, cu_array_t* const cu_a, int* predmode_ctx);

View file

@ -294,13 +294,6 @@ static void uvg_angular_pred_avx2(
f[yy][2] = 16 + offset; f[yy][2] = 16 + offset;
f[yy][3] = offset; f[yy][3] = offset;
} }
// Cubic must be used if ref line != 0 or if isp mode != 0
if (multi_ref_index || isp) {
use_cubic = true;
}
const int16_t filter_coeff[4] = { 16 - (delta_fract[yy] >> 1), 32 - (delta_fract[yy] >> 1), 16 + (delta_fract[yy] >> 1), delta_fract[yy] >> 1 };
const int16_t *temp_f = use_cubic ? cubic_filter[delta_fract[yy]] : filter_coeff;
memcpy(f[yy], temp_f, 4 * sizeof(*temp_f));
} }
// Do 4-tap intra interpolation filtering // Do 4-tap intra interpolation filtering

View file

@ -708,7 +708,6 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state,
(width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip) (width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
{ {
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
uvg_rdoq(state, coeff, coeff_out, width, height, color, uvg_rdoq(state, coeff, coeff_out, width, height, color,
scan_order, cur_cu->type, tr_depth, cur_cu->cbf, lfnst_index); scan_order, cur_cu->type, tr_depth, cur_cu->cbf, lfnst_index);
} }

View file

@ -316,7 +316,6 @@ int uvg_quant_cbcr_residual_generic(
(width > 4 || !state->encoder_control->cfg.rdoq_skip)) (width > 4 || !state->encoder_control->cfg.rdoq_skip))
{ {
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U,
scan_order, cur_cu->type, tr_depth, cur_cu->cbf, scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
cur_cu->cr_lfnst_idx); cur_cu->cr_lfnst_idx);
@ -499,7 +498,6 @@ int uvg_quantize_residual_generic(encoder_state_t *const state,
(width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip) (width > 4 || !state->encoder_control->cfg.rdoq_skip) && !use_trskip)
{ {
int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
uvg_rdoq(state, coeff, coeff_out, width, height, color, uvg_rdoq(state, coeff, coeff_out, width, height, color,
scan_order, cur_cu->type, tr_depth, cur_cu->cbf, scan_order, cur_cu->type, tr_depth, cur_cu->cbf,
lfnst_index); lfnst_index);

View file

@ -490,7 +490,7 @@ void uvg_chroma_transform_search(
int depth, int depth,
lcu_t* const lcu, lcu_t* const lcu,
cabac_data_t* temp_cabac, cabac_data_t* temp_cabac,
cu_loc_t *cu_loc, const cu_loc_t* const cu_loc,
const int offset, const int offset,
const uint8_t mode, const uint8_t mode,
cu_info_t* pred_cu, cu_info_t* pred_cu,

View file

@ -108,7 +108,7 @@ void uvg_chroma_transform_search(
int depth, int depth,
lcu_t* const lcu, lcu_t* const lcu,
cabac_data_t* temp_cabac, cabac_data_t* temp_cabac,
cu_loc_t *cu_loc, const cu_loc_t* const cu_loc,
const int offset, const int offset,
const uint8_t mode, const uint8_t mode,
cu_info_t* pred_cu, cu_info_t* pred_cu,

View file

@ -46,8 +46,11 @@ TEST test_get_spatial_merge_cand(void)
merge_candidates_t cand = { 0 }; merge_candidates_t cand = { 0 };
get_spatial_merge_candidates(64 + 32, 64, // x, y cu_loc_t cu_loc;
32, 24, // width, height uvg_cu_loc_ctor(&cu_loc, 64 + 32, 64, // x, y
32, 24); // width, height)
get_spatial_merge_candidates(&cu_loc,
1920, 1080, // picture size 1920, 1080, // picture size
&lcu, &lcu,
&cand, &cand,