From 06532dce0219158d7afac72592b37eb61b51bb29 Mon Sep 17 00:00:00 2001 From: siivonek Date: Fri, 29 Jul 2022 15:36:56 +0300 Subject: [PATCH] [isp] Implement ISP search and partitioning. Add helper function for constructing cu_loc types. WIP stuff for transform. --- src/cu.c | 25 +++++++++++ src/cu.h | 3 ++ src/intra.c | 103 ++++++++++++++++++++++++++------------------ src/search.c | 8 +++- src/search.h | 8 ++++ src/search_inter.c | 18 ++++++-- src/search_intra.c | 105 +++++++++++++++++++++++++++++++++++++++++---- src/search_intra.h | 2 + src/transform.c | 57 ++++++++++++++---------- src/transform.h | 3 +- 10 files changed, 251 insertions(+), 81 deletions(-) diff --git a/src/cu.c b/src/cu.c index 40fce65e..f47f5cf3 100644 --- a/src/cu.c +++ b/src/cu.c @@ -251,3 +251,28 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu } } } + +/* + * \brief Constructs cu_loc_t based on given parameters. Calculates chroma dimensions automatically. + * + * \param loc Destination cu_loc. + * \param x Block top left x coordinate. + * \param y Block top left y coordinate. + * \param width Block width. + * \param height Block height. +*/ +void uvg_cu_loc_ctor(cu_loc_t* loc, int x, int y, int width, int height) +{ + assert(x >= 0 && y >= 0 && width >= 0 && height >= 0 && "Cannot give negative coordinates or block dimensions."); + assert(!(width > LCU_WIDTH || height > LCU_WIDTH) && "Luma CU dimension exceeds maximum (dim > LCU_WIDTH)."); + assert(!(width < 4 || height < 4) && "Luma CU dimension smaller than 4."); // TODO: change if luma size 2 is allowed + + loc->x = x; + loc->y = y; + loc->width = width; + loc->height = height; + // TODO: when MTT is implemented, chroma dimensions can be minimum 2. + // Chroma width is half of luma width, when not at maximum depth. + loc->chroma_width = MAX(width >> 1, 4); + loc->chroma_height = MAX(height >> 1, 4); +} diff --git a/src/cu.h b/src/cu.h index ddddaf55..6fe960e7 100644 --- a/src/cu.h +++ b/src/cu.h @@ -185,6 +185,7 @@ typedef struct uint8_t multi_ref_idx; int8_t mip_flag; int8_t mip_is_transposed; + int8_t isp_mode; } intra; struct { mv_t mv[2][2]; // \brief Motion vectors for L0 and L1 @@ -206,6 +207,8 @@ typedef struct { int8_t chroma_height; } cu_loc_t; +void uvg_cu_loc_ctor(cu_loc_t *loc, int x, int y, int width, int height); + #define CU_GET_MV_CAND(cu_info_ptr, reflist) \ (((reflist) == 0) ? (cu_info_ptr)->inter.mv_cand0 : (cu_info_ptr)->inter.mv_cand1) diff --git a/src/intra.c b/src/intra.c index df7a971b..cab91005 100644 --- a/src/intra.c +++ b/src/intra.c @@ -37,6 +37,8 @@ #include "image.h" #include "uvg_math.h" #include "mip_data.h" +#include "search.h" +#include "search_intra.h" #include "strategies/strategies-intra.h" #include "tables.h" #include "transform.h" @@ -1471,9 +1473,7 @@ const cu_info_t* uvg_get_co_located_luma_cu( static void intra_recon_tb_leaf( encoder_state_t* const state, - int x, - int y, - int depth, + const cu_loc_t* cu_loc, lcu_t *lcu, color_t color, const intra_search_data_t* search_data, @@ -1482,13 +1482,14 @@ static void intra_recon_tb_leaf( const uvg_config *cfg = &state->encoder_control->cfg; const int shift = color == COLOR_Y ? 0 : 1; - int log2width = LOG2_LCU_WIDTH - depth; - if (color != COLOR_Y && depth < MAX_PU_DEPTH) { - // Chroma width is half of luma width, when not at maximum depth. - log2width -= 1; - } - const int width = 1 << log2width; - const int height = width; // TODO: proper height for non-square blocks + const int x = cu_loc->x; + const int y = cu_loc->y; + + const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; + const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + int log2_width = uvg_g_convert_to_bit[width] + 2; + int log2_height = uvg_g_convert_to_bit[height] + 2; + const int lcu_width = LCU_WIDTH >> shift; const vector2d_t luma_px = { x, y }; @@ -1510,25 +1511,20 @@ static void intra_recon_tb_leaf( // Copy extra ref lines, including ref line 1 and top left corner. for (int i = 0; i < MAX_REF_LINE_IDX; ++i) { - int height = (LCU_WIDTH >> depth) * 2 + MAX_REF_LINE_IDX; - height = MIN(height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist. - height = MIN(height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX); + int ref_height = height * 2 + MAX_REF_LINE_IDX; + ref_height = MIN(ref_height, (LCU_WIDTH - lcu_px.y + MAX_REF_LINE_IDX)); // Cut short if on bottom LCU edge. Cannot take references from below since they don't exist. + ref_height = MIN(ref_height, pic_px.y - luma_px.y + MAX_REF_LINE_IDX); uvg_pixels_blit(&frame->rec->y[(luma_px.y - MAX_REF_LINE_IDX) * frame->rec->stride + luma_px.x - (1 + i)], &extra_refs[i * 128], - 1, height, + 1, ref_height, frame->rec->stride, 1); } } - cu_loc_t loc = { - x, y, - width, height, - width, height, - }; - uvg_intra_build_reference(&loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index); + uvg_intra_build_reference(cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index); uvg_pixel pred[32 * 32]; - uvg_intra_predict(state, &refs, &loc, color, pred, search_data, lcu, tree_type); + uvg_intra_predict(state, &refs, cu_loc, color, pred, search_data, lcu, tree_type); const int index = lcu_px.x + lcu_px.y * lcu_width; uvg_pixel *block = NULL; @@ -1548,9 +1544,9 @@ static void intra_recon_tb_leaf( default: break; } - uvg_pixels_blit(pred, block , width, width, width, lcu_width); + uvg_pixels_blit(pred, block , width, height, width, lcu_width); if(color != COLOR_Y && cfg->jccr) { - uvg_pixels_blit(pred, block2, width, width, width, lcu_width); + uvg_pixels_blit(pred, block2, width, height, width, lcu_width); } } @@ -1583,6 +1579,7 @@ void uvg_intra_recon_cu( { const vector2d_t lcu_px = { SUB_SCU(x) >> (tree_type == UVG_CHROMA_T), SUB_SCU(y) >> (tree_type == UVG_CHROMA_T) }; const int8_t width = LCU_WIDTH >> depth; + const int8_t height = width; // TODO: height for non-square blocks. if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } @@ -1620,6 +1617,7 @@ void uvg_intra_recon_cu( LCU_GET_CU_AT_PX(lcu, (lcu_px.x + offset) >> (tree_type == UVG_CHROMA_T), (lcu_px.y + offset) >> (tree_type == UVG_CHROMA_T))->cbf, }; + // ISP_TODO: does not work with ISP yet, ask Joose when this is relevant. if (recon_luma && depth <= MAX_DEPTH) { cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y); } @@ -1627,23 +1625,46 @@ void uvg_intra_recon_cu( cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V); } - } else { - const bool has_luma = recon_luma; - const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0); - - // Process a leaf TU. - if (has_luma) { - intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_Y, search_data, tree_type); - } - if (has_chroma) { - intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_U, search_data, tree_type); - intra_recon_tb_leaf(state, x, y, depth, lcu, COLOR_V, search_data, tree_type); - } - - uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3), - search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma, - x, y, depth, cur_cu, lcu, - false, - tree_type); + return; } + if (search_data->pred_cu.intra.isp_mode != ISP_MODE_NO_ISP && recon_luma ) { + // ISP split is done horizontally or vertically depending on ISP mode, 2 or 4 times depending on block dimensions. + // Small blocks are split only twice. + int split_type = search_data->pred_cu.intra.isp_mode; + int part_dim = uvg_get_isp_split_dim(width, height, split_type); + int limit = split_type == ISP_MODE_HOR ? height : width; + for (int part = 0; part < limit; part + part_dim) { + const int part_x = split_type == ISP_MODE_HOR ? x : x + part; + const int part_y = split_type == ISP_MODE_HOR ? y + part: y; + const int part_w = split_type == ISP_MODE_HOR ? part_dim : width; + const int part_h = split_type == ISP_MODE_HOR ? height : part_dim; + + cu_loc_t loc; + uvg_cu_loc_ctor(&loc, part_x, part_y, part_w, part_h); + + intra_recon_tb_leaf(state, &loc, lcu, COLOR_Y, search_data, tree_type); + uvg_quantize_lcu_residual(state, true, false, false, + &loc, depth, cur_cu, lcu, + false, tree_type); + } + } + const bool has_luma = recon_luma && search_data->pred_cu.intra.isp_mode == ISP_MODE_NO_ISP; + const bool has_chroma = recon_chroma && (x % 8 == 0 && y % 8 == 0); + + cu_loc_t loc; + uvg_cu_loc_ctor(&loc, x, y, width, height); + + // Process a leaf TU. + if (has_luma) { + intra_recon_tb_leaf(state, &loc, lcu, COLOR_Y, search_data, tree_type); + } + if (has_chroma) { + intra_recon_tb_leaf(state, &loc, lcu, COLOR_U, search_data, tree_type); + intra_recon_tb_leaf(state, &loc, lcu, COLOR_V, search_data, tree_type); + } + + uvg_quantize_lcu_residual(state, has_luma, has_chroma && !(search_data->pred_cu.joint_cb_cr & 3), + search_data->pred_cu.joint_cb_cr & 3 && state->encoder_control->cfg.jccr && has_chroma, + &loc, depth, cur_cu, lcu, + false, tree_type); } diff --git a/src/search.c b/src/search.c index cb9fc1d1..4fbf33f3 100644 --- a/src/search.c +++ b/src/search.c @@ -170,6 +170,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in to->intra.multi_ref_idx = cu->intra.multi_ref_idx; to->intra.mip_flag = cu->intra.mip_flag; to->intra.mip_is_transposed = cu->intra.mip_is_transposed; + to->intra.isp_mode = cu->intra.isp_mode; } else { to->skipped = cu->skipped; to->merged = cu->merged; @@ -1091,10 +1092,13 @@ static double search_cu( inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, depth) + inter_bitcost * state->lambda; } - + cu_loc_t loc; + const int width = LCU_WIDTH << depth; + const int height = width; // TODO: height for non-square blocks + uvg_cu_loc_ctor(&loc, x, y, width, height); uvg_quantize_lcu_residual(state, true, has_chroma && !cur_cu->joint_cb_cr, - cur_cu->joint_cb_cr, x, y, + cur_cu->joint_cb_cr, &loc, depth, NULL, lcu, diff --git a/src/search.h b/src/search.h index 7566fb96..2a5a6867 100644 --- a/src/search.h +++ b/src/search.h @@ -77,6 +77,14 @@ typedef struct unit_stats_map_t { #define NUM_MIP_MODES_FULL(width, height) (((width) == 4 && (height) == 4) ? 32 : ((width) == 4 || (height) == 4 || ((width) == 8 && (height) == 8) ? 16 : 12)) #define NUM_MIP_MODES_HALF(width, height) (NUM_MIP_MODES_FULL((width), (height)) >> 1) +// ISP related defines +#define NUM_ISP_MODES 3 +#define ISP_MODE_NO_ISP 0 +#define ISP_MODE_HOR 1 +#define ISP_MODE_VER 2 +#define SPLIT_TYPE_HOR 1 +#define SPLIT_TYPE_VER 2 + void uvg_sort_modes(int8_t *__restrict modes, double *__restrict costs, uint8_t length); void uvg_sort_modes_intra_luma(int8_t *__restrict modes, int8_t *__restrict trafo, double *__restrict costs, uint8_t length); diff --git a/src/search_inter.c b/src/search_inter.c index 6508995f..7922f34b 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1679,6 +1679,7 @@ static void search_pu_inter(encoder_state_t * const state, const uvg_config *cfg = &state->encoder_control->cfg; const videoframe_t * const frame = state->tile->frame; const int width_cu = LCU_WIDTH >> depth; + const int height_cu = width_cu; // TODO: non-square blocks const int x = PU_GET_X(part_mode, width_cu, x_cu, i_pu); const int y = PU_GET_Y(part_mode, width_cu, y_cu, i_pu); const int width = PU_GET_W(part_mode, width_cu, i_pu); @@ -1826,7 +1827,11 @@ static void search_pu_inter(encoder_state_t * const state, cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1]; uvg_lcu_fill_trdepth(lcu, x, y, depth, MAX(1, depth), UVG_BOTH_T); uvg_inter_recon_cu(state, lcu, x, y, width, true, false); - uvg_quantize_lcu_residual(state, true, false, false, x, y, depth, cur_pu, lcu, true, UVG_BOTH_T); + + cu_loc_t loc; + uvg_cu_loc_ctor(&loc, x, y, width_cu, height_cu); + + uvg_quantize_lcu_residual(state, true, false, false, &loc, depth, cur_pu, lcu, true, UVG_BOTH_T); if (cbf_is_set(cur_pu->cbf, depth, COLOR_Y)) { continue; @@ -1836,7 +1841,7 @@ static void search_pu_inter(encoder_state_t * const state, uvg_quantize_lcu_residual(state, false, has_chroma, false, /*we are only checking for lack of coeffs so no need to check jccr*/ - x, y, depth, cur_pu, lcu, + &loc, depth, cur_pu, lcu, true, UVG_BOTH_T); if (!cbf_is_set_any(cur_pu->cbf, depth)) { @@ -2151,6 +2156,10 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, const int x_px = SUB_SCU(x); const int y_px = SUB_SCU(y); const int width = LCU_WIDTH >> depth; + const int height = width; // TODO: non-square blocks + cu_loc_t loc; + uvg_cu_loc_ctor(&loc, x, y, width, height); + cabac_data_t cabac_copy; memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); cabac_data_t* cabac = &state->search_cabac; @@ -2198,7 +2207,7 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, uvg_quantize_lcu_residual(state, true, false, - false, x, y, + false, &loc, depth, cur_cu, lcu, @@ -2263,7 +2272,8 @@ void uvg_cu_cost_inter_rd2(encoder_state_t * const state, else { uvg_quantize_lcu_residual(state, true, reconstruct_chroma, - reconstruct_chroma && state->encoder_control->cfg.jccr, x, y, + reconstruct_chroma && state->encoder_control->cfg.jccr, + &loc, depth, cur_cu, lcu, diff --git a/src/search_intra.c b/src/search_intra.c index 1ce4c8a5..f3c8c838 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -49,6 +49,7 @@ #include "strategies/strategies-picture.h" #include "videoframe.h" #include "strategies/strategies-quant.h" +#include "uvg_math.h" // Normalize SAD for comparison against SATD to estimate transform skip @@ -247,6 +248,76 @@ static void derive_mts_constraints(cu_info_t *const pred_cu, } +// ISP_TODO: move this function if it is used elsewhere +bool can_use_isp(const int width, const int height, const int max_tr_size) +{ + const int log2_width = uvg_g_convert_to_bit[width] + 2; + const int log2_height = uvg_g_convert_to_bit[height] + 2; + + // Each split block must have at least 16 samples. + bool not_enough_samples = (log2_width + log2_height <= 4); + bool cu_size_larger_than_max_tr_size = width > max_tr_size || height > max_tr_size; + if (not_enough_samples || cu_size_larger_than_max_tr_size) { + return false; + } + return true; +} + + +/** +* \brief Returns ISP split partition size based on block dimensions and split type. +* +* Returns ISP split partition size based on block dimensions and split type. +* Will fail if resulting partition size has less than 16 samples. +* +* \param width Block width. +* \param height Block height. +* \param split_type Horizontal or vertical split. +*/ +int uvg_get_isp_split_dim(const int width, const int height, const int split_type) +{ + bool divide_in_rows = split_type == SPLIT_TYPE_HOR; + int split_dim_size, non_split_dim_size, partition_size, div_shift = 2; + + if (divide_in_rows) { + split_dim_size = height; + non_split_dim_size = width; + } + else { + split_dim_size = width; + non_split_dim_size = height; + } + + // ISP_TODO: make a define for this. Depends on minimum transform block log2 side length + const int min_num_samples = 16; // Minimum allowed number of samples for split block + const int factor_to_min_samples = non_split_dim_size < min_num_samples ? min_num_samples >> uvg_math_floor_log2(non_split_dim_size) : 1; + partition_size = (split_dim_size >> div_shift) < factor_to_min_samples ? factor_to_min_samples : (split_dim_size >> div_shift); + + assert((uvg_math_floor_log2(partition_size) + uvg_math_floor_log2(non_split_dim_size) < uvg_math_floor_log2(min_num_samples)) && + "Partition has less than allowed minimum number of samples."); + return partition_size; +} + + +// ISP_TODO: move this function if it is used elsewhere +bool can_use_isp_with_lfnst(const int width, const int height, const int isp_mode) +{ + if (isp_mode == ISP_MODE_NO_ISP) { + return false; + } + const int tu_width = isp_mode == ISP_MODE_HOR ? width : uvg_get_isp_split_dim(width, height, SPLIT_TYPE_VER); + const int tu_height = isp_mode == ISP_MODE_HOR ? uvg_get_isp_split_dim(width, height, SPLIT_TYPE_HOR) : height; + + // ISP_TODO: make a define for this or use existing + const int min_tb_size = 4; + + if (!(tu_width >= min_tb_size && tu_height >= min_tb_size)) { + return false; + } + return true; +} + + /** * \brief Perform search for best intra transform split configuration. * @@ -325,6 +396,8 @@ static double search_intra_trdepth( { trafo = 0; num_transforms = (mts_enabled ? MTS_TR_NUM : 1); + // Do not do MTS search if ISP mode is used + num_transforms = pred_cu->intra.isp_mode == ISP_MODE_NO_ISP ? num_transforms : 1; } const int mts_start = trafo; //TODO: height @@ -360,6 +433,11 @@ static double search_intra_trdepth( pred_cu->violates_lfnst_constrained_chroma = false; pred_cu->lfnst_last_scan_pos = false; + if (pred_cu->lfnst_idx != 0) { + // Cannot use ISP with LFNST for small blocks + pred_cu->intra.isp_mode = can_use_isp_with_lfnst(width, height, pred_cu->intra.isp_mode) ? pred_cu->intra.isp_mode : ISP_MODE_NO_ISP; + } + for (trafo = mts_start; trafo < num_transforms; trafo++) { pred_cu->tr_idx = trafo; pred_cu->tr_skip = trafo == MTS_SKIP; @@ -1371,18 +1449,27 @@ static int8_t search_intra_rdo( enum uvg_tree_type tree_type) { const int tr_depth = CLIP(1, MAX_PU_DEPTH, depth + state->encoder_control->cfg.tr_depth_intra); + const int width = LCU_WIDTH << depth; + const int height = width; // TODO: height for non-square blocks for (int mode = 0; mode < modes_to_check; mode++) { - double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, x_px, y_px, depth, lcu); - search_data[mode].pred_cu.tr_idx = MTS_TR_NUM; - search_data[mode].bits = rdo_bitcost; - search_data[mode].cost = rdo_bitcost * state->lambda; + bool can_do_isp_search = search_data[mode].pred_cu.intra.mip_flag ? false: true; // Cannot use ISP with MIP + can_do_isp_search = search_data[mode].pred_cu.intra.multi_ref_idx == 0 ? can_do_isp_search : false; // Cannot use ISP with MRL + int max_isp_modes = can_do_isp_search && can_use_isp(width, height, 64 /*MAX_TR_SIZE*/) && state->encoder_control->cfg.isp ? NUM_ISP_MODES : 1; - double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type); - search_data[mode].cost += mode_cost; - if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(search_data[mode].pred_cu.cbf, depth)) { - modes_to_check = mode + 1; - break; + for (int isp_mode = 0; isp_mode < max_isp_modes; ++isp_mode) { + search_data[mode].pred_cu.intra.isp_mode = isp_mode; + double rdo_bitcost = uvg_luma_mode_bits(state, &search_data[mode].pred_cu, x_px, y_px, depth, lcu); + search_data[mode].pred_cu.tr_idx = MTS_TR_NUM; + search_data[mode].bits = rdo_bitcost; + search_data[mode].cost = rdo_bitcost * state->lambda; + + double mode_cost = search_intra_trdepth(state, x_px, y_px, depth, tr_depth, MAX_INT, &search_data[mode], lcu, tree_type); + search_data[mode].cost += mode_cost; + if (state->encoder_control->cfg.intra_rdo_et && !cbf_is_set_any(search_data[mode].pred_cu.cbf, depth)) { + modes_to_check = mode + 1; + break; + } } } diff --git a/src/search_intra.h b/src/search_intra.h index 36470e63..307b5ad9 100644 --- a/src/search_intra.h +++ b/src/search_intra.h @@ -66,4 +66,6 @@ void uvg_search_cu_intra( lcu_t *lcu, enum uvg_tree_type tree_type); +int uvg_get_isp_split_dim(const int width, const int height, const int split_type); + #endif // SEARCH_INTRA_H_ diff --git a/src/transform.c b/src/transform.c index c0adc121..abf793c2 100644 --- a/src/transform.c +++ b/src/transform.c @@ -1105,14 +1105,15 @@ int uvg_quantize_residual_trskip( static void quantize_tr_residual( encoder_state_t * const state, const color_t color, - const int32_t x, - const int32_t y, + const cu_loc_t *cu_loc, const uint8_t depth, cu_info_t *cur_pu, lcu_t* lcu, bool early_skip, enum uvg_tree_type tree_type) { + const int x = cu_loc->x; + const int y = cu_loc->y; const uvg_config *cfg = &state->encoder_control->cfg; const int32_t shift = color == COLOR_Y ? 0 : 1; const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; @@ -1130,13 +1131,9 @@ static void quantize_tr_residual( // This should ensure that the CBF data doesn't get corrupted if this function // is called more than once. - int32_t tr_width; - if (color == COLOR_Y) { - tr_width = LCU_WIDTH >> depth; - } else { - const int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth); - tr_width = LCU_WIDTH_C >> chroma_depth; - } + int32_t tr_width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width; + int32_t tr_height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height; + const int32_t lcu_width = LCU_WIDTH >> shift; const int8_t mode = (color == COLOR_Y) ? cur_pu->intra.mode : cur_pu->intra.mode_chroma; @@ -1287,15 +1284,18 @@ void uvg_quantize_lcu_residual( const bool luma, const bool chroma, const bool jccr, - const int32_t x, - const int32_t y, + const cu_loc_t * cu_loc, const uint8_t depth, cu_info_t *cur_pu, lcu_t* lcu, bool early_skip, enum uvg_tree_type tree_type) { - const int32_t width = LCU_WIDTH >> depth; + const int x = cu_loc->x; + const int y = cu_loc->y; + const int width = cu_loc->width; + const int height = cu_loc->height; + const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; if (cur_pu == NULL) { @@ -1324,14 +1324,22 @@ void uvg_quantize_lcu_residual( // Split transform and increase depth const int offset = width / 2; - const int32_t x2 = x + offset; - const int32_t y2 = y + offset; + for (int j = 0; j < 2; ++j) { + for (int i = 0; i < 2; ++i) { + const cu_loc_t loc; + uvg_cu_loc_ctor(&loc, (x + i * offset), (y + j * offset), width, height); + // jccr is currently not supported if transform is split + uvg_quantize_lcu_residual(state, luma, chroma, 0, &loc, depth + 1, NULL, lcu, early_skip, tree_type); + } + } - // jccr is currently not supported if transform is split - uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip, tree_type); - uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip, tree_type); - uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip, tree_type); - uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip, tree_type); + //const int32_t x2 = x + offset; + //const int32_t y2 = y + offset; + + //uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y, depth + 1, NULL, lcu, early_skip, tree_type); + //uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y, depth + 1, NULL, lcu, early_skip, tree_type); + //uvg_quantize_lcu_residual(state, luma, chroma, 0, x, y2, depth + 1, NULL, lcu, early_skip, tree_type); + //uvg_quantize_lcu_residual(state, luma, chroma, 0, x2, y2, depth + 1, NULL, lcu, early_skip, tree_type); // Propagate coded block flags from child CUs to parent CU. uint16_t child_cbfs[3] = { @@ -1348,15 +1356,18 @@ void uvg_quantize_lcu_residual( } else { // Process a leaf TU. + cu_loc_t loc; + uvg_cu_loc_ctor(&loc, x, y, width, height); + if (luma) { - quantize_tr_residual(state, COLOR_Y, x, y, depth, cur_pu, lcu, early_skip, tree_type); + quantize_tr_residual(state, COLOR_Y, &loc, depth, cur_pu, lcu, early_skip, tree_type); } if (chroma) { - quantize_tr_residual(state, COLOR_U, x, y, depth, cur_pu, lcu, early_skip, tree_type); - quantize_tr_residual(state, COLOR_V, x, y, depth, cur_pu, lcu, early_skip, tree_type); + quantize_tr_residual(state, COLOR_U, &loc, depth, cur_pu, lcu, early_skip, tree_type); + quantize_tr_residual(state, COLOR_V, &loc, depth, cur_pu, lcu, early_skip, tree_type); } if (jccr && cur_pu->tr_depth == cur_pu->depth) { - quantize_tr_residual(state, COLOR_UV, x, y, depth, cur_pu, lcu, early_skip, tree_type); + quantize_tr_residual(state, COLOR_UV, &loc, depth, cur_pu, lcu, early_skip, tree_type); } if(chroma && jccr && cur_pu->tr_depth == cur_pu->depth) { assert( 0 && "Trying to quantize both jccr and regular at the same time.\n"); diff --git a/src/transform.h b/src/transform.h index d3f44edf..61c50c04 100644 --- a/src/transform.h +++ b/src/transform.h @@ -89,8 +89,7 @@ void uvg_quantize_lcu_residual( bool luma, bool chroma, const bool jccr, - int32_t x, - int32_t y, + const cu_loc_t* cu_loc, uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu,