From 6a0864839c513649787857b4715f823263b98d8a Mon Sep 17 00:00:00 2001 From: Joose Sainio Date: Thu, 8 Sep 2022 15:10:54 +0300 Subject: [PATCH] [mtt] Actually remove the last width dependency to depth --- src/encode_coding_tree.c | 83 ++-- src/encode_coding_tree.h | 7 +- src/encoderstate.c | 45 ++- src/encoderstate.h | 9 +- src/filter.c | 3 +- src/global.h | 1 - src/intra.c | 2 +- src/rdo.c | 5 +- src/rdo.h | 3 +- src/search.c | 353 ++++++++++-------- src/search.h | 28 +- src/search_inter.c | 18 +- src/search_intra.c | 95 ++--- src/search_intra.h | 4 +- src/strategies/avx2/quant-avx2.c | 2 +- .../generic/encode_coding_tree-generic.c | 6 +- .../generic/encode_coding_tree-generic.h | 2 +- src/strategies/generic/quant-generic.c | 6 +- src/strategies/strategies-encode.h | 2 +- src/transform.c | 27 +- src/transform.h | 2 - tests/test_cabac_state.sh | 4 +- 22 files changed, 360 insertions(+), 347 deletions(-) diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index fcb6d308..8c19df4b 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -47,12 +47,13 @@ #include "tables.h" #include "videoframe.h" -bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pred_cu) +bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pred_cu, const cu_loc_t* + const cu_loc) { uint32_t ts_max_size = 1 << state->encoder_control->cfg.trskip_max_size; const uint32_t max_size = 32; // CU::isIntra(cu) ? MTS_INTRA_MAX_CU_SIZE : MTS_INTER_MAX_CU_SIZE; - const uint32_t cu_width = LCU_WIDTH >> pred_cu->depth; - const uint32_t cu_height = LCU_WIDTH >> pred_cu->depth; + const uint32_t cu_width = cu_loc->width; + const uint32_t cu_height = cu_loc->height; //bool mts_allowed = cu.chType == CHANNEL_TYPE_LUMA && compID == COMPONENT_Y; uint8_t mts_type = state->encoder_control->cfg.mts; @@ -66,14 +67,16 @@ bool uvg_is_mts_allowed(const encoder_state_t * const state, cu_info_t *const pr return mts_allowed; } -static void encode_mts_idx(encoder_state_t * const state, +static void encode_mts_idx( + encoder_state_t * const state, cabac_data_t * const cabac, - const cu_info_t *const pred_cu) + const cu_info_t *const pred_cu, + const cu_loc_t* const cu_loc) { //TransformUnit &tu = *cu.firstTU; int mts_idx = pred_cu->tr_idx; - if (uvg_is_mts_allowed(state, (cu_info_t* const )pred_cu) && mts_idx != MTS_SKIP + if (uvg_is_mts_allowed(state, (cu_info_t* const )pred_cu, cu_loc) && mts_idx != MTS_SKIP && !pred_cu->violates_mts_coeff_constraint && pred_cu->mts_last_scan_pos ) @@ -498,7 +501,7 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac, static void encode_chroma_tu( encoder_state_t* const state, - const cu_loc_t *cu_loc, + const cu_loc_t * const cu_loc, int depth, cu_info_t* cur_pu, int8_t* scan_idx, @@ -541,8 +544,7 @@ static void encode_chroma_tu( } } else { - // const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - const coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH]; + coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH]; uvg_get_sub_coeff(coeff_uv, coeff->joint_uv, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); if (state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)) { cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma; @@ -700,7 +702,7 @@ static void encode_transform_coeff( } */ - int8_t split = (LCU_WIDTH >> depth > TR_MAX_WIDTH); + int8_t split = (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH); const int cb_flag_y = tree_type != UVG_CHROMA_T ? cbf_is_set(cur_pu->cbf, depth, COLOR_Y) : 0; const int cb_flag_u = tree_type != UVG_LUMA_T ?( cur_pu->joint_cb_cr ? (cur_pu->joint_cb_cr >> 1) & 1 : cbf_is_set(cur_cu->cbf, depth, COLOR_U)) : 0; @@ -1290,15 +1292,13 @@ bool uvg_write_split_flag( const cu_info_t * left_cu, const cu_info_t * above_cu, uint8_t split_flag, + const cu_loc_t* const cu_loc, int depth, - int cu_width, - int x, - int y, enum uvg_tree_type tree_type, double* bits_out) { - uint16_t abs_x = x + (state->tile->offset_x >> (tree_type == UVG_CHROMA_T)); - uint16_t abs_y = y + (state->tile->offset_y >> (tree_type == UVG_CHROMA_T)); + uint16_t abs_x = (cu_loc->x + state->tile->offset_x) >> (tree_type == UVG_CHROMA_T); + uint16_t abs_y = (cu_loc->y + state->tile->offset_y) >> (tree_type == UVG_CHROMA_T); double bits = 0; const encoder_control_t* const ctrl = state->encoder_control; // Implisit split flag when on border @@ -1311,10 +1311,12 @@ bool uvg_write_split_flag( // ToDo: update this when btt is actually used bool allow_btt = false;// when mt_depth < MAX_BT_DEPTH + const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; + const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; uint8_t implicit_split_mode = UVG_NO_SPLIT; //bool implicit_split = border; - bool bottom_left_available = ((abs_y + cu_width - 1) < (ctrl->in.height >> (tree_type == UVG_CHROMA_T))); + bool bottom_left_available = ((abs_y + cu_height - 1) < (ctrl->in.height >> (tree_type == UVG_CHROMA_T))); bool top_right_available = ((abs_x + cu_width - 1) < (ctrl->in.width >> (tree_type == UVG_CHROMA_T))); if (!bottom_left_available && !top_right_available && allow_qt) { @@ -1349,11 +1351,11 @@ bool uvg_write_split_flag( if (no_split && allow_split) { // Get left and top block split_flags and if they are present and true, increase model number // ToDo: should use height and width to increase model, PU_GET_W() ? - if (left_cu && LCU_WIDTH >> left_cu->depth < LCU_WIDTH >> depth) { + if (left_cu && left_cu->depth > depth) { split_model++; } - if (above_cu && LCU_WIDTH >> above_cu->depth < LCU_WIDTH >> depth) { + if (above_cu && above_cu->depth > depth) { split_model++; } @@ -1457,7 +1459,16 @@ void uvg_encode_coding_tree( // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (depth != MAX_DEPTH && !(tree_type == UVG_CHROMA_T && depth == MAX_DEPTH -1)) { - const int split_flag = uvg_write_split_flag(state, cabac, left_cu, above_cu, (cur_cu->split_tree >> (split_tree.current_depth * 3)) & 7, depth, cu_width, x, y, tree_type,NULL); + const int split_flag = uvg_write_split_flag( + state, + cabac, + left_cu, + above_cu, + (cur_cu->split_tree >> (split_tree.current_depth * 3)) & 7, + cu_loc, + depth, + tree_type, + NULL); if (split_flag || border) { const int half_luma = cu_loc->width / 2; @@ -1597,8 +1608,8 @@ void uvg_encode_coding_tree( uvg_pixel *rec_base_v = &frame->rec->v[x / 2 + y / 2 * ctrl->in.width / 2]; // Luma - for (unsigned y_px = 0; y_px < LCU_WIDTH >> depth; y_px++) { - for (unsigned x_px = 0; x_px < LCU_WIDTH >> depth; x_px++) { + for (unsigned y_px = 0; y_px < cu_height; y_px++) { + for (unsigned x_px = 0; x_px < cu_width; x_px++) { uvg_bitstream_put(cabac->stream, base_y[x_px + y_px * ctrl->in.width], 8); rec_base_y[x_px + y_px * ctrl->in.width] = base_y[x_px + y_px * ctrl->in.width]; } @@ -1606,14 +1617,14 @@ void uvg_encode_coding_tree( // Chroma if (ctrl->chroma_format != UVG_CSP_400) { - for (unsigned y_px = 0; y_px < LCU_WIDTH >> (depth + 1); y_px++) { - for (unsigned x_px = 0; x_px < LCU_WIDTH >> (depth + 1); x_px++) { + for (unsigned y_px = 0; y_px < cu_loc->chroma_height; y_px++) { + for (unsigned x_px = 0; x_px < cu_loc->chroma_width; x_px++) { uvg_bitstream_put(cabac->stream, base_u[x_px + y_px * (ctrl->in.width >> 1)], 8); rec_base_u[x_px + y_px * (ctrl->in.width >> 1)] = base_u[x_px + y_px * (ctrl->in.width >> 1)]; } } - for (unsigned y_px = 0; y_px < LCU_WIDTH >> (depth + 1); y_px++) { - for (unsigned x_px = 0; x_px < LCU_WIDTH >> (depth + 1); x_px++) { + for (unsigned y_px = 0; y_px < cu_loc->chroma_height; y_px++) { + for (unsigned x_px = 0; x_px < cu_loc->chroma_width; x_px++) { uvg_bitstream_put(cabac->stream, base_v[x_px + y_px * (ctrl->in.width >> 1)], 8); rec_base_v[x_px + y_px * (ctrl->in.width >> 1)] = base_v[x_px + y_px * (ctrl->in.width >> 1)]; } @@ -1664,7 +1675,7 @@ void uvg_encode_coding_tree( encode_transform_coeff(state, &cu_loc, depth, 0, 0, 0, 0, coeff, tree_type, true, false, &luma_cbf_ctx, cu_loc); } - encode_mts_idx(state, cabac, cur_cu); + encode_mts_idx(state, cabac, cur_cu, cu_loc); } } else if (cur_cu->type == CU_INTRA) { @@ -1701,7 +1712,7 @@ void uvg_encode_coding_tree( if (tree_type != UVG_CHROMA_T) { bool lfnst_written = encode_lfnst_idx(state, cabac, cur_cu, x, y, depth, cu_width, cu_height, tree_type, COLOR_Y); } - encode_mts_idx(state, cabac, cur_cu); + encode_mts_idx(state, cabac, cur_cu, cu_loc); // For 4x4 the chroma PU/TU is coded after the last if (state->encoder_control->chroma_format != UVG_CSP_400 && @@ -1731,7 +1742,7 @@ void uvg_encode_coding_tree( end: - if (is_last_cu_in_qg(state, x, y, depth)) { + if (is_last_cu_in_qg(state, cu_loc)) { state->last_qp = cur_cu->qp; } @@ -1752,11 +1763,9 @@ double uvg_mock_encode_coding_unit( const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; - int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T); - int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T); - - const int cu_width = LCU_WIDTH >> depth; - + int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); + int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); + const cu_info_t* left_cu = NULL, *above_cu = NULL; if (x) { if(x_local || tree_type != UVG_CHROMA_T) { @@ -1787,16 +1796,14 @@ double uvg_mock_encode_coding_unit( left_cu, above_cu, 0, + cu_loc, depth, - cu_width >> (tree_type == UVG_CHROMA_T), - x >> (tree_type == UVG_CHROMA_T), - y >> (tree_type == UVG_CHROMA_T), tree_type, &bits); } // Encode skip flag - if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) { + if (state->frame->slicetype != UVG_SLICE_I && (cu_loc->width != 4 || cu_loc->height != 4)) { int8_t ctx_skip = 0; if (left_cu && left_cu->skipped) { @@ -1829,7 +1836,7 @@ double uvg_mock_encode_coding_unit( } } // Prediction mode - if (state->frame->slicetype != UVG_SLICE_I && cu_width != 4) { + if (state->frame->slicetype != UVG_SLICE_I && (cu_loc->width != 4 || cu_loc->height != 4)) { int8_t ctx_predmode = 0; diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 6c0c2cd1..e75ad46a 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -40,7 +40,8 @@ #include "encoderstate.h" #include "global.h" -bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu); +bool uvg_is_mts_allowed(const encoder_state_t* const state, cu_info_t* const pred_cu, const cu_loc_t* + const cu_loc); bool uvg_is_lfnst_allowed( const encoder_state_t* const state, const cu_info_t* const pred_cu, @@ -105,10 +106,8 @@ bool uvg_write_split_flag( const cu_info_t* left_cu, const cu_info_t* above_cu, uint8_t split_flag, + const cu_loc_t* const cu_loc, int depth, - int cu_width, - int x, - int y, enum uvg_tree_type tree_type, double* bits_out); diff --git a/src/encoderstate.c b/src/encoderstate.c index dc3416e3..593ffd16 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -627,36 +627,45 @@ static void encode_sao(encoder_state_t * const state, * \param prev_qp -1 if QP delta has not been coded in current QG, * otherwise the QP of the current QG */ -static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *last_qp, int *prev_qp) +static void set_cu_qps(encoder_state_t *state, const cu_loc_t* const cu_loc, int *last_qp, int *prev_qp, const + int depth) { // Stop recursion if the CU is completely outside the frame. - if (x >= state->tile->frame->width || y >= state->tile->frame->height) return; + if (cu_loc->x >= state->tile->frame->width || cu_loc->y >= state->tile->frame->height) return; - cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, x, y); - const int cu_width = LCU_WIDTH >> depth; + cu_info_t *cu = uvg_cu_array_at(state->tile->frame->cu_array, cu_loc->x, cu_loc->y); + const int width = LCU_WIDTH >> cu->depth; if (depth <= state->frame->max_qp_delta_depth) { *prev_qp = -1; } - if (cu->depth > depth) { + if (cu_loc->width > width) { // Recursively process sub-CUs. - const int d = cu_width >> 1; - set_cu_qps(state, x, y, depth + 1, last_qp, prev_qp); - set_cu_qps(state, x + d, y, depth + 1, last_qp, prev_qp); - set_cu_qps(state, x, y + d, depth + 1, last_qp, prev_qp); - set_cu_qps(state, x + d, y + d, depth + 1, last_qp, prev_qp); + const int half_width = cu_loc->width >> 1; + const int half_height = cu_loc->height >> 1; + cu_loc_t split_cu_loc; + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); + set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); + set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); + set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); + set_cu_qps(state, &split_cu_loc, last_qp, prev_qp, depth + 1); } else { bool cbf_found = *prev_qp >= 0; + int y_limit = cu_loc->y + cu_loc->height; + int x_limit = cu_loc->x + cu_loc->width; if (cu->tr_depth > depth) { // The CU is split into smaller transform units. Check whether coded // block flag is set for any of the TUs. const int tu_width = LCU_WIDTH >> cu->tr_depth; - for (int y_scu = y; !cbf_found && y_scu < y + cu_width; y_scu += tu_width) { - for (int x_scu = x; !cbf_found && x_scu < x + cu_width; x_scu += tu_width) { + for (int y_scu = cu_loc->y; !cbf_found && y_scu < y_limit; y_scu += tu_width) { + for (int x_scu = cu_loc->x; !cbf_found && x_scu < x_limit; x_scu += tu_width) { cu_info_t *tu = uvg_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu); if (cbf_is_set_any(tu->cbf, cu->depth)) { cbf_found = true; @@ -671,18 +680,18 @@ static void set_cu_qps(encoder_state_t *state, int x, int y, int depth, int *las if (cbf_found) { *prev_qp = qp = cu->qp; } else { - qp = uvg_get_cu_ref_qp(state, x, y, *last_qp); + qp = uvg_get_cu_ref_qp(state, cu_loc->x, cu_loc->y, *last_qp); } // Set the correct QP for all state->tile->frame->cu_array elements in // the area covered by the CU. - for (int y_scu = y; y_scu < y + cu_width; y_scu += SCU_WIDTH) { - for (int x_scu = x; x_scu < x + cu_width; x_scu += SCU_WIDTH) { + for (int y_scu = cu_loc->y; y_scu < y_limit; y_scu += SCU_WIDTH) { + for (int x_scu = cu_loc->x; x_scu < x_limit; x_scu += SCU_WIDTH) { uvg_cu_array_at(state->tile->frame->cu_array, x_scu, y_scu)->qp = qp; } } - if (is_last_cu_in_qg(state, x, y, depth)) { + if (is_last_cu_in_qg(state, cu_loc)) { *last_qp = cu->qp; } } @@ -812,7 +821,9 @@ static void encoder_state_worker_encode_lcu_search(void * opaque) if (state->frame->max_qp_delta_depth >= 0) { int last_qp = state->last_qp; int prev_qp = -1; - set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp); + cu_loc_t cu_loc; + uvg_cu_loc_ctor(&cu_loc, lcu->position_px.x, lcu->position_px.y, LCU_WIDTH, LCU_WIDTH); + set_cu_qps(state, &cu_loc, &last_qp, &prev_qp, 0); } if (state->tile->frame->lmcs_aps->m_sliceReshapeInfo.sliceReshaperEnableFlag) { diff --git a/src/encoderstate.h b/src/encoderstate.h index 55d265e3..6cad3e36 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -401,14 +401,13 @@ static INLINE bool encoder_state_must_write_vps(const encoder_state_t *state) * \param depth depth in the CU tree * \return true, if it's the last CU in its QG, otherwise false */ -static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, int x, int y, int depth) +static INLINE bool is_last_cu_in_qg(const encoder_state_t *state, const cu_loc_t* const cu_loc) { if (state->frame->max_qp_delta_depth < 0) return false; - - const int cu_width = LCU_WIDTH >> depth; + const int qg_width = LCU_WIDTH >> state->frame->max_qp_delta_depth; - const int right = x + cu_width; - const int bottom = y + cu_width; + const int right = cu_loc->x + cu_loc->width; + const int bottom = cu_loc->y + cu_loc->height; return (right % qg_width == 0 || right >= state->tile->frame->width) && (bottom % qg_width == 0 || bottom >= state->tile->frame->height); } diff --git a/src/filter.c b/src/filter.c index 26a57100..b366dd4e 100644 --- a/src/filter.c +++ b/src/filter.c @@ -856,8 +856,7 @@ static void filter_deblock_edge_luma(encoder_state_t * const state, uint8_t max_filter_length_Q = 0; const int cu_size = LCU_WIDTH >> cu_q->depth; // TODO: NON square - const int pu_size = dir == EDGE_HOR ? cu_size - : cu_size; + const int pu_size = dir == EDGE_HOR ? cu_size : cu_size; const int pu_pos = dir == EDGE_HOR ? y_coord : x_coord; get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord, diff --git a/src/global.h b/src/global.h index 65ca2fa9..e4a11b20 100644 --- a/src/global.h +++ b/src/global.h @@ -273,7 +273,6 @@ typedef int32_t mv_t; #define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value)) #define CLIP_TO_QP(value) CLIP(0, 51, (value)) #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } -#define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) #define CEILDIV(x,y) (((x) + (y) - 1) / (y)) diff --git a/src/intra.c b/src/intra.c index 764ac072..8f87104f 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1555,7 +1555,7 @@ void uvg_intra_predict( uvg_pixels_blit(&state->tile->frame->cclm_luma_rec[x / 2 + (y * stride) / 4], dst, width, width, stride / 2, width); if (data->pred_cu.depth != data->pred_cu.tr_depth || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) { predict_cclm( - state, color, width, width, x, y, stride, intra_mode, lcu, refs, dst, + state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst, (cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1], tree_type); } diff --git a/src/rdo.c b/src/rdo.c index f7eb2a9e..262b4f83 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -297,7 +297,7 @@ out: static INLINE double get_coeff_cabac_cost( const encoder_state_t * const state, const coeff_t *coeff, - cu_loc_t *cu_loc, + const cu_loc_t* const cu_loc, color_t color, int8_t scan_mode, int8_t tr_skip, @@ -415,7 +415,7 @@ double uvg_get_coeff_cost( const encoder_state_t * const state, const coeff_t *coeff, cu_info_t* cur_tu, - cu_loc_t *cu_loc, + const cu_loc_t* const cu_loc, color_t color, int8_t scan_mode, int8_t tr_skip, @@ -1409,7 +1409,6 @@ void uvg_rdoq( int8_t color, int8_t scan_mode, int8_t block_type, - int8_t tr_depth, uint16_t cbf, uint8_t lfnst_idx) { diff --git a/src/rdo.h b/src/rdo.h index eb9714f6..2b557651 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -60,7 +60,6 @@ void uvg_rdoq( int8_t type, int8_t scan_mode, int8_t block_type, - int8_t tr_depth, uint16_t cbf, uint8_t lfnst_idx); @@ -73,7 +72,7 @@ double uvg_get_coeff_cost( const encoder_state_t * const state, const coeff_t *coeff, cu_info_t* cur_tu, - cu_loc_t *cu_loc, + const cu_loc_t* const cu_loc, color_t color, int8_t scan_mode, int8_t tr_skip, diff --git a/src/search.c b/src/search.c index 1d992077..11d934b9 100644 --- a/src/search.c +++ b/src/search.c @@ -63,30 +63,39 @@ static const int INTRA_THRESHOLD = 8; -static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) +static INLINE void copy_cu_info(lcu_t *from, lcu_t *to, const cu_loc_t* const cu_loc, enum uvg_tree_type + tree_type) { - for (int y = y_local; y < y_local + width; y += SCU_WIDTH) { - for (int x = x_local; x < x_local + width; x += SCU_WIDTH) { + const int y_limit = (cu_loc->local_y + cu_loc->height) >> (tree_type == UVG_CHROMA_T); + const int x_limit = (cu_loc->local_x + cu_loc->width) >> (tree_type == UVG_CHROMA_T); + for (int y = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); y < y_limit; y += SCU_WIDTH) { + for (int x = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); x < x_limit; x += SCU_WIDTH) { *LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y); } } } -static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *from, lcu_t *to, enum uvg_tree_type - tree_type) +static INLINE void copy_cu_pixels( + lcu_t *from, + lcu_t *to, + const cu_loc_t* const cu_loc, + enum uvg_tree_type + tree_type) { + const int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); + const int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); const int luma_index = x_local + y_local * LCU_WIDTH; const int chroma_index = tree_type == UVG_CHROMA_T ? x_local + y_local * LCU_WIDTH_C : (x_local / 2) + (y_local / 2) * LCU_WIDTH_C; if(tree_type != UVG_CHROMA_T) { uvg_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index], - width, width, LCU_WIDTH, LCU_WIDTH); + cu_loc->width, cu_loc->height, LCU_WIDTH, LCU_WIDTH); } if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { uvg_pixels_blit(&from->rec.u[chroma_index], &to->rec.u[chroma_index], - width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); + cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C); uvg_pixels_blit(&from->rec.v[chroma_index], &to->rec.v[chroma_index], - width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); + cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C, LCU_WIDTH_C); } } @@ -103,8 +112,8 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { //const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T)); - const int chroma_x = cu_loc->x >> (tree_type != UVG_CHROMA_T); - const int chroma_y = cu_loc->y >> (tree_type != UVG_CHROMA_T); + const int chroma_x = (cu_loc->x >> 1) & ~3; + const int chroma_y = (cu_loc->y >> 1) & ~3; const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C); copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); @@ -118,15 +127,17 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to /** * Copy all non-reference CU data from next level to current level. */ -static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree, bool joint, enum - uvg_tree_type tree_type) +static void work_tree_copy_up( + lcu_t *work_tree, + bool joint, + enum + uvg_tree_type tree_type, + const cu_loc_t* const cu_loc, + const int depth) { - const int width = LCU_WIDTH >> depth; - cu_loc_t loc; - uvg_cu_loc_ctor(&loc, x_local, y_local, width, width); - copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); - copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth], tree_type); - copy_cu_coeffs(&loc, &work_tree[depth + 1], &work_tree[depth], joint, tree_type); + copy_cu_info (&work_tree[depth + 1], &work_tree[depth], cu_loc, tree_type); + copy_cu_pixels(&work_tree[depth + 1], &work_tree[depth], cu_loc, tree_type); + copy_cu_coeffs(cu_loc, &work_tree[depth + 1], &work_tree[depth], joint, tree_type); } @@ -134,24 +145,32 @@ static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_t /** * Copy all non-reference CU data from current level to all lower levels. */ -static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t *work_tree, enum uvg_tree_type - tree_type) +static void work_tree_copy_down( + int depth, + lcu_t *work_tree, + enum uvg_tree_type + tree_type, + const cu_loc_t* const cu_loc) { - const int width = tree_type != UVG_CHROMA_T ? LCU_WIDTH >> depth : LCU_WIDTH_C >> 1; for (int i = depth + 1; i <= MAX_PU_DEPTH; i++) { - copy_cu_info (x_local, y_local, width, &work_tree[depth], &work_tree[i]); - copy_cu_pixels(x_local, y_local, LCU_WIDTH >> depth, &work_tree[depth], &work_tree[i], tree_type); + copy_cu_info (&work_tree[depth], &work_tree[i], cu_loc, tree_type); + copy_cu_pixels(&work_tree[depth], &work_tree[i], cu_loc, tree_type); } } -void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type - tree_type) +void uvg_lcu_fill_trdepth( + lcu_t *lcu, + const cu_loc_t* const cu_loc, + uint8_t tr_depth, + enum uvg_tree_type + tree_type) { - const int x_local = SUB_SCU(x_px); - const int y_local = SUB_SCU(y_px); - const unsigned width = (tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C) >> depth; + const int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); + const int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); + const unsigned width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; + const unsigned height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; - for (unsigned y = 0; y < width; y += SCU_WIDTH) { + for (unsigned y = 0; y < height; y += SCU_WIDTH) { for (unsigned x = 0; x < width; x += SCU_WIDTH) { LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y)->tr_depth = tr_depth; } @@ -167,6 +186,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in to->type = cu->type; to->depth = cu->depth; to->qp = cu->qp; + to->split_tree = cu->split_tree; //to->tr_idx = cu->tr_idx; to->lfnst_idx = cu->lfnst_idx; to->lfnst_last_scan_pos = cu->lfnst_last_scan_pos; @@ -214,34 +234,37 @@ static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned wid //Calculates cost for all zero coeffs -static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree, const int x, const int y, +static double cu_zero_coeff_cost( + const encoder_state_t *state, + lcu_t *work_tree, + const cu_loc_t* const cu_loc, const int depth) { - int x_local = SUB_SCU(x); - int y_local = SUB_SCU(y); - int cu_width = LCU_WIDTH >> depth; lcu_t *const lcu = &work_tree[depth]; + const int y_local = cu_loc->local_y; + const int x_local = cu_loc->local_x; + const int luma_index = y_local * LCU_WIDTH + x_local; const int chroma_index = (y_local / 2) * LCU_WIDTH_C + (x_local / 2); double ssd = 0.0; ssd += UVG_LUMA_MULT * uvg_pixels_calc_ssd( &lcu->ref.y[luma_index], &lcu->rec.y[luma_index], - LCU_WIDTH, LCU_WIDTH, cu_width + LCU_WIDTH, LCU_WIDTH, cu_loc->width ); - if (x % 8 == 0 && y % 8 == 0 && state->encoder_control->chroma_format != UVG_CSP_400) { + if (y_local % 8 == 0 && x_local % 8 == 0 && state->encoder_control->chroma_format != UVG_CSP_400) { ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd( &lcu->ref.u[chroma_index], &lcu->rec.u[chroma_index], - LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2 + LCU_WIDTH_C, LCU_WIDTH_C, cu_loc->chroma_width ); ssd += UVG_CHROMA_MULT * uvg_pixels_calc_ssd( &lcu->ref.v[chroma_index], &lcu->rec.v[chroma_index], - LCU_WIDTH_C, LCU_WIDTH_C, cu_width / 2 + LCU_WIDTH_C, LCU_WIDTH_C, cu_loc->chroma_width ); } // Save the pixels at a lower level of the working tree. - copy_cu_pixels(x_local, y_local, cu_width, lcu, &work_tree[depth + 1], UVG_BOTH_T); + copy_cu_pixels(lcu, &work_tree[depth + 1], cu_loc, UVG_BOTH_T); return ssd; } @@ -295,46 +318,45 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, * Takes into account SSD of reconstruction and the cost of encoding whatever * prediction unit data needs to be coded. */ -double uvg_cu_rd_cost_luma(const encoder_state_t *const state, - const int x_px, const int y_px, const int depth, - const cu_info_t *const pred_cu, - lcu_t *const lcu, - uint8_t isp_cbf) +double uvg_cu_rd_cost_luma( + const encoder_state_t *const state, + const cu_loc_t* const cu_loc, + const cu_info_t *const pred_cu, + lcu_t *const lcu, + uint8_t isp_cbf) { - const int width = LCU_WIDTH >> depth; - const int height = width; // TODO: height for non-square blocks const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); cabac_data_t* cabac = (cabac_data_t *)&state->search_cabac; - - cu_loc_t loc; - uvg_cu_loc_ctor(&loc, x_px, y_px, width, height); - + // cur_cu is used for TU parameters. - cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); + cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y); double coeff_bits = 0; double tr_tree_bits = 0; - // Check that lcu is not in - assert(x_px >= 0 && x_px < LCU_WIDTH); - assert(y_px >= 0 && y_px < LCU_WIDTH); + // Check that lcu is not in - const uint8_t tr_depth = tr_cu->tr_depth - depth; - - if (tr_depth > 0) { - int offset = width / 2; + if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) { double sum = 0; + const int half_width = cu_loc->width >> 1; + const int half_height = cu_loc->height >> 1; + cu_loc_t split_cu_loc; - sum += uvg_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu, isp_cbf); - sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, isp_cbf); - sum += uvg_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, isp_cbf); - sum += uvg_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, isp_cbf); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); + sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); + sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y+ half_height, half_width, half_height); + sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); + sum += uvg_cu_rd_cost_luma(state, &split_cu_loc, pred_cu, lcu, isp_cbf); return sum + tr_tree_bits * state->lambda; } // Add transform_tree cbf_luma bit cost. if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { + const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; const int is_tr_split = tr_cu->tr_depth - tr_cu->depth; int is_set = cbf_is_set(pred_cu->cbf, depth, COLOR_Y); if (pred_cu->type == CU_INTRA || @@ -347,7 +369,9 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, CABAC_FBITS_UPDATE(cabac, ctx, is_set, tr_tree_bits, "cbf_y_search"); } - if (is_set && state->encoder_control->cfg.trskip_enable && width <= (1 << state->encoder_control->cfg.trskip_max_size)) { + if (is_set && state->encoder_control->cfg.trskip_enable + && cu_loc->width <= (1 << state->encoder_control->cfg.trskip_max_size) + && cu_loc->height <= (1 << state->encoder_control->cfg.trskip_max_size)) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, pred_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag"); } } @@ -367,28 +391,28 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, // SSD between reconstruction and original int ssd = 0; if (!state->encoder_control->cfg.lossless) { - int index = y_px * LCU_WIDTH + x_px; + int index = cu_loc->local_y * LCU_WIDTH + cu_loc->local_x; ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], LCU_WIDTH, LCU_WIDTH, - width); + cu_loc->width); } if (!skip_residual_coding) { - int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); + int8_t luma_scan_mode = SCAN_DIAG; if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { //const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; const coeff_t* coeffs = lcu->coeff.y; - coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, coeffs, NULL, cu_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); } else { int split_type = pred_cu->intra.isp_mode; - int split_limit = uvg_get_isp_split_num(width, height, split_type, true); + int split_limit = uvg_get_isp_split_num(cu_loc->width, cu_loc->height, split_type, true); for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true); + uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, cu_loc->width, cu_loc->height, i, split_type, true); const int part_x = split_loc.x; const int part_y = split_loc.y; @@ -406,34 +430,32 @@ double uvg_cu_rd_cost_luma(const encoder_state_t *const state, } -double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, - const int x_px, const int y_px, const int depth, - cu_info_t *const pred_cu, - lcu_t *const lcu) +double uvg_cu_rd_cost_chroma( + const encoder_state_t *const state, + cu_info_t *const pred_cu, + lcu_t *const lcu, + const cu_loc_t * const cu_loc) { - const vector2d_t lcu_px = { (x_px & ~7) / 2, (y_px & ~7) / 2 }; - const int width = (depth < MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth; - const int height = width; // TODO: height for non-square blocks - cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); + const vector2d_t lcu_px = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; + const int width = cu_loc->chroma_width; + const int height = cu_loc->chroma_height; + cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); - - cu_loc_t loc; - uvg_cu_loc_ctor(&loc, x_px, y_px, width, height); - + double tr_tree_bits = 0; double coeff_bits = 0; - - assert(x_px >= 0 && x_px < LCU_WIDTH); - assert(y_px >= 0 && y_px < LCU_WIDTH); - - if (depth == 4 && (x_px % 8 == 0 || y_px % 8 == 0)) { + + if (cu_loc->width == 4 && cu_loc->height == 4 && (cu_loc->x % 8 == 0 || cu_loc->y % 8 == 0)) { // For MAX_PU_DEPTH calculate chroma for previous depth for the first // block and return 0 cost for all others. return 0; } + + const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; int u_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 2) >> 1 : cbf_is_set(pred_cu->cbf, depth, COLOR_U); int v_is_set = pred_cu->joint_cb_cr ? (pred_cu->joint_cb_cr & 1) : cbf_is_set(pred_cu->cbf, depth, COLOR_V); + // See luma for why the second condition if (!skip_residual_coding) { const int tr_depth = depth - pred_cu->depth; @@ -450,14 +472,21 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, } - if (tr_cu->tr_depth > depth) { - int offset = LCU_WIDTH >> (depth + 1); + if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) { double sum = 0; + // Recursively process sub-CUs. + const int half_width = cu_loc->width >> 1; + const int half_height = cu_loc->height >> 1; + cu_loc_t split_cu_loc; - sum += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu); - sum += uvg_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); - sum += uvg_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); - sum += uvg_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); + sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); + sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); + sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); + sum += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, &split_cu_loc); return sum + tr_tree_bits * state->lambda; } @@ -487,14 +516,17 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, if (!skip_residual_coding) { int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); - //const int index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); + + // We need the rounded & shifted coordinates for the chroma coeff calculation + cu_loc_t chroma_loc; + uvg_cu_loc_ctor(&chroma_loc, lcu_px.x, lcu_px.y, cu_loc->width, cu_loc->height); if((pred_cu->joint_cb_cr & 3) == 0){ - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU); - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, NULL, cu_loc, 2, scan_order, 0, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, NULL, cu_loc, 2, scan_order, 0, COEFF_ORDER_CU); } else { - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, NULL, &loc, 2, scan_order, 0, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, NULL, cu_loc, 2, scan_order, 0, COEFF_ORDER_CU); } } @@ -507,39 +539,30 @@ double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, static double cu_rd_cost_tr_split_accurate( const encoder_state_t* const state, - const int x_px, - const int y_px, - const int depth, const cu_info_t* const pred_cu, lcu_t* const lcu, enum uvg_tree_type tree_type, - uint8_t isp_cbf) { - const int width = LCU_WIDTH >> depth; - const int height = width; // TODO: height for non-square blocks - - cu_loc_t loc; - uvg_cu_loc_ctor(&loc, x_px, y_px, width, height); - + uint8_t isp_cbf, + const cu_loc_t* const cu_loc) { + const int width = cu_loc->width; + const int height = cu_loc->height; // TODO: height for non-square blocks + const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); // cur_cu is used for TU parameters. - cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); + cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T)); double coeff_bits = 0; double tr_tree_bits = 0; - // Check that lcu is not in - assert(x_px >= 0 && x_px < LCU_WIDTH); - assert(y_px >= 0 && y_px < LCU_WIDTH); - - const uint8_t tr_depth = tr_cu->tr_depth - depth; - + const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; + const int cb_flag_u = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr >> 1 : cbf_is_set(tr_cu->cbf, depth, COLOR_U); const int cb_flag_v = tr_cu->joint_cb_cr ? tr_cu->joint_cb_cr & 1 : cbf_is_set(tr_cu->cbf, depth, COLOR_V); cabac_data_t* cabac = (cabac_data_t*)&state->search_cabac; { - int cbf = cbf_is_set_any(pred_cu->cbf, depth); + int cbf = cbf_is_set_any(tr_cu->cbf, depth); // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual if (pred_cu->type != CU_INTRA && (!pred_cu->merged)) { @@ -548,24 +571,30 @@ static double cu_rd_cost_tr_split_accurate( } - bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (x_px % 8 && y_px % 8)) && tree_type != UVG_LUMA_T; + bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400 && (depth != 4 || (cu_loc->x % 8 && cu_loc->y % 8)) && tree_type != UVG_LUMA_T; if( !skip_residual_coding && has_chroma) { - if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) { + if(tr_cu->tr_depth == depth) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cb[0]), cb_flag_u, tr_tree_bits, "cbf_cb"); } - if(tr_cu->depth == depth || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) { + if(tr_cu->tr_depth == depth) { CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.qt_cbf_model_cr[cb_flag_u]), cb_flag_v, tr_tree_bits, "cbf_cr"); } } - if (tr_depth > 0) { - int offset = LCU_WIDTH >> (depth + 1); + if (cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH) { double sum = 0; - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px, depth + 1, pred_cu, lcu, tree_type, isp_cbf); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px, depth + 1, pred_cu, lcu, tree_type, isp_cbf); - sum += cu_rd_cost_tr_split_accurate(state, x_px, y_px + offset, depth + 1, pred_cu, lcu, tree_type, isp_cbf); - sum += cu_rd_cost_tr_split_accurate(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu, tree_type, isp_cbf); + const int half_width = cu_loc->width >> 1; + const int half_height = cu_loc->height >> 1; + cu_loc_t split_cu_loc; + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y, half_width, half_height); + sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y, half_width, half_height); + sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x, cu_loc->y + half_height, half_width, half_height); + sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); + uvg_cu_loc_ctor(&split_cu_loc, cu_loc->x + half_width, cu_loc->y + half_height, half_width, half_height); + sum += cu_rd_cost_tr_split_accurate(state, pred_cu, lcu, tree_type, isp_cbf, &split_cu_loc); return sum + tr_tree_bits * state->lambda; } const int cb_flag_y = cbf_is_set(tr_cu->cbf, depth, COLOR_Y) && tree_type != UVG_CHROMA_T; @@ -573,7 +602,7 @@ static double cu_rd_cost_tr_split_accurate( const bool is_isp = !(pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP); // Add transform_tree cbf_luma bit cost. if (!is_isp) { - const int is_tr_split = depth - tr_cu->depth; + const int is_tr_split = cu_loc->width > TR_MAX_WIDTH || cu_loc->height > TR_MAX_WIDTH; if ((pred_cu->type == CU_INTRA || is_tr_split || cb_flag_u || @@ -610,7 +639,7 @@ static double cu_rd_cost_tr_split_accurate( // SSD between reconstruction and original unsigned luma_ssd = 0; if (!state->encoder_control->cfg.lossless && tree_type != UVG_CHROMA_T) { - int index = y_px * LCU_WIDTH + x_px; + int index = cu_loc->local_x + LCU_WIDTH * cu_loc->local_y; luma_ssd = uvg_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], LCU_WIDTH, LCU_WIDTH, width); @@ -623,12 +652,12 @@ static double cu_rd_cost_tr_split_accurate( if (can_use_tr_skip) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_luma, tr_cu->tr_idx == MTS_SKIP, tr_tree_bits, "transform_skip_flag"); } - int8_t luma_scan_mode = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); + int8_t luma_scan_mode = SCAN_DIAG; if (pred_cu->type == CU_INTER || pred_cu->intra.isp_mode == ISP_MODE_NO_ISP) { //const coeff_t* coeffs = &lcu->coeff.y[xy_to_zorder(LCU_WIDTH, x_px, y_px)]; const coeff_t* coeffs = lcu->coeff.y; - coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, &loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, coeffs, tr_cu, cu_loc, 0, luma_scan_mode, pred_cu->tr_idx == MTS_SKIP, COEFF_ORDER_CU); } else { int split_type = pred_cu->intra.isp_mode; @@ -636,7 +665,7 @@ static double cu_rd_cost_tr_split_accurate( for (int i = 0; i < split_limit; ++i) { cu_loc_t split_loc; - uvg_get_isp_split_loc(&split_loc, x_px, y_px, width, height, i, split_type, true); + uvg_get_isp_split_loc(&split_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, true); const int part_x = split_loc.x; const int part_y = split_loc.y; @@ -649,8 +678,8 @@ static double cu_rd_cost_tr_split_accurate( } } - if(depth == 4 || tree_type == UVG_LUMA_T) { - if (uvg_is_lfnst_allowed(state, tr_cu, width, width, x_px, y_px, tree_type, COLOR_Y, lcu)) { + if(cu_loc->width == 4 || tree_type == UVG_LUMA_T) { + if (uvg_is_lfnst_allowed(state, tr_cu, width, height, cu_loc->local_x, cu_loc->local_y, tree_type, COLOR_Y, lcu)) { const int lfnst_idx = tr_cu->lfnst_idx; CABAC_FBITS_UPDATE( cabac, @@ -672,14 +701,17 @@ static double cu_rd_cost_tr_split_accurate( unsigned chroma_ssd = 0; if(has_chroma) { - const vector2d_t lcu_px = { (x_px >> (tree_type != UVG_CHROMA_T)) & ~3, (y_px >> (tree_type != UVG_CHROMA_T)) &~3 }; - uvg_cu_loc_ctor(&loc, lcu_px.x, lcu_px.y, width, height); - const int chroma_width = MAX(4, LCU_WIDTH >> (depth + 1)); - const int chroma_height = chroma_width; // TODO: height for non-square blocks - int8_t scan_order = uvg_get_scan_order(pred_cu->type, pred_cu->intra.mode_chroma, depth); + cu_loc_t chroma_loc; + const vector2d_t lcu_px = { (cu_loc->local_x >> 1) & ~3, (cu_loc->local_y >> 1) &~3 }; + uvg_cu_loc_ctor(&chroma_loc, lcu_px.x, lcu_px.y, width, height); + const int chroma_width = cu_loc->chroma_width; + const int chroma_height = cu_loc->chroma_height; // TODO: height for non-square blocks + int8_t scan_order = SCAN_DIAG; //const unsigned index = xy_to_zorder(LCU_WIDTH_C, lcu_px.x, lcu_px.y); - const bool chroma_can_use_tr_skip = state->encoder_control->cfg.trskip_enable && chroma_width <= (1 << state->encoder_control->cfg.trskip_max_size); + const bool chroma_can_use_tr_skip = state->encoder_control->cfg.trskip_enable + && chroma_width <= (1 << state->encoder_control->cfg.trskip_max_size) + && chroma_height <= (1 << state->encoder_control->cfg.trskip_max_size); if(pred_cu->joint_cb_cr == 0) { if (!state->encoder_control->cfg.lossless) { int index = lcu_px.y * LCU_WIDTH_C + lcu_px.x; @@ -697,8 +729,8 @@ static double cu_rd_cost_tr_split_accurate( if(chroma_can_use_tr_skip && cb_flag_v) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 4, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.u, tr_cu, &chroma_loc, COLOR_U, scan_order, tr_cu->tr_skip & 2, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.v, tr_cu, &chroma_loc, COLOR_V, scan_order, tr_cu->tr_skip & 4, COEFF_ORDER_CU); } else { @@ -715,12 +747,12 @@ static double cu_rd_cost_tr_split_accurate( if (chroma_can_use_tr_skip) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); } } - if (uvg_is_lfnst_allowed(state, tr_cu, width, height, x_px, y_px, tree_type, depth == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y, lcu)) { - const int lfnst_idx = (depth != 4 && tree_type != UVG_CHROMA_T) ? tr_cu->lfnst_idx : tr_cu->cr_lfnst_idx; + if (uvg_is_lfnst_allowed(state, tr_cu, width, height, cu_loc->local_x, cu_loc->local_y, tree_type, cu_loc->width == 4 || tree_type == UVG_CHROMA_T ? COLOR_UV : COLOR_Y, lcu)) { + const int lfnst_idx = (cu_loc->width != 4 && tree_type != UVG_CHROMA_T) ? tr_cu->lfnst_idx : tr_cu->cr_lfnst_idx; CABAC_FBITS_UPDATE( cabac, &cabac->ctx.lfnst_idx_model[tr_cu->depth == 4 || tree_type != UVG_BOTH_T], @@ -739,7 +771,7 @@ static double cu_rd_cost_tr_split_accurate( tr_cu->lfnst_last_scan_pos = false; tr_cu->violates_lfnst_constrained_luma = false; tr_cu->violates_lfnst_constrained_chroma = false; - if (uvg_is_mts_allowed(state, tr_cu) && tree_type != UVG_CHROMA_T) { + if (uvg_is_mts_allowed(state, tr_cu, cu_loc) && tree_type != UVG_CHROMA_T) { bool symbol = tr_cu->tr_idx != 0; int ctx_idx = 0; @@ -1035,10 +1067,6 @@ static double search_cu( if ((split_tree.current_depth != 4 || (x % 8 && y % 8)) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { intra_search.pred_cu.joint_cb_cr = 0; - // There is almost no benefit to doing the chroma mode search for - // rd2. Possibly because the luma mode search already takes chroma - // into account, so there is less of a chanse of luma mode being - // really bad for chroma. if(tree_type == UVG_CHROMA_T) { intra_search.pred_cu.intra = uvg_get_co_located_luma_cu(x, y, luma_width, luma_width, NULL, state->tile->frame->cu_array, UVG_CHROMA_T)->intra; intra_mode = intra_search.pred_cu.intra.mode; @@ -1046,7 +1074,7 @@ static double search_cu( } intra_search.pred_cu.intra.mode_chroma = intra_search.pred_cu.intra.mode; if (ctrl->cfg.rdo >= 2 || ctrl->cfg.jccr || ctrl->cfg.lfnst) { - uvg_search_cu_intra_chroma(state, x, y, depth, lcu, &intra_search, tree_type); + uvg_search_cu_intra_chroma(state, cu_loc, lcu, &intra_search, tree_type); if (intra_search.pred_cu.joint_cb_cr == 0) { intra_search.pred_cu.joint_cb_cr = 4; @@ -1066,7 +1094,7 @@ static double search_cu( false, true); if(tree_type != UVG_CHROMA_T) { - intra_cost += uvg_cu_rd_cost_chroma(state, x_local, y_local, depth, &intra_search.pred_cu, lcu); + intra_cost += uvg_cu_rd_cost_chroma(state, &intra_search.pred_cu, lcu, cu_loc); } else { intra_cost = intra_search.cost; @@ -1080,7 +1108,7 @@ static double search_cu( } intra_search.pred_cu.intra.mode = intra_mode; if(tree_type == UVG_CHROMA_T) { - uvg_lcu_fill_trdepth(lcu, x_local, y_local, split_tree.current_depth, split_tree.current_depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc, split_tree.current_depth, tree_type); } } if (intra_cost < cost) { @@ -1187,14 +1215,14 @@ static double search_cu( // This will no longer be necessary if the transform depths are not shared. int tr_depth = MAX(1, split_tree.current_depth); - uvg_lcu_fill_trdepth(lcu, x, y, depth, tr_depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc, tr_depth, tree_type); const bool has_chroma = state->encoder_control->chroma_format != UVG_CSP_400; uvg_inter_recon_cu(state, lcu, true, has_chroma, cu_loc); if (ctrl->cfg.zero_coeff_rdo && !ctrl->cfg.lossless && !ctrl->cfg.rdoq_enable) { //Calculate cost for zero coeffs - inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, x, y, split_tree.current_depth) + inter_bitcost * state->lambda; + inter_zero_coeff_cost = cu_zero_coeff_cost(state, work_tree, cu_loc, split_tree.current_depth) + inter_bitcost * state->lambda; } cu_loc_t loc; @@ -1239,13 +1267,13 @@ static double search_cu( cost = bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0); + cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc); if (ctrl->cfg.zero_coeff_rdo && inter_zero_coeff_cost <= cost) { cost = inter_zero_coeff_cost; // Restore saved pixels from lower level of the working tree. - copy_cu_pixels(x_local, y_local, cu_width, &work_tree[split_tree.current_depth + 1], lcu, tree_type); + copy_cu_pixels(&work_tree[split_tree.current_depth + 1], lcu, cu_loc, tree_type); if (cur_cu->merged) { cur_cu->merged = 0; @@ -1256,7 +1284,7 @@ static double search_cu( if (cur_cu->tr_depth != 0) { // Reset transform depth since there are no coefficients. This // ensures that CBF is cleared for the whole area of the CU. - uvg_lcu_fill_trdepth(lcu, x, y, depth, depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type); } cur_cu->cbf = 0; @@ -1317,10 +1345,8 @@ static double search_cu( left_cu, above_cu, 1, + cu_loc, depth, - cu_width, - x >> (tree_type == UVG_CHROMA_T), - y >> (tree_type == UVG_CHROMA_T), tree_type, &split_bits); } @@ -1380,8 +1406,7 @@ static double search_cu( uvg_write_split_flag(state, &state->search_cabac, x > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x) - 1, SUB_SCU(y)) : NULL, y > 0 ? LCU_GET_CU_AT_PX(lcu, SUB_SCU(x), SUB_SCU(y) - 1) : NULL, - 0, depth, cu_width, x, y, tree_type, - &bits); + 0, cu_loc, depth, tree_type, &bits); cur_cu->intra = cu_d1->intra; cur_cu->type = CU_INTRA; @@ -1391,7 +1416,7 @@ static double search_cu( cur_cu->lfnst_idx = 0; cur_cu->cr_lfnst_idx = 0; - uvg_lcu_fill_trdepth(lcu, x, y, depth, cur_cu->tr_depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc, cur_cu->tr_depth, tree_type); lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); intra_search_data_t proxy; @@ -1404,12 +1429,12 @@ static double search_cu( lcu, tree_type, true, - state->encoder_control->chroma_format == UVG_CSP_400); + state->encoder_control->chroma_format != UVG_CSP_400); double mode_bits = calc_mode_bits(state, lcu, cur_cu, cu_loc) + bits; cost += mode_bits * state->lambda; - cost += cu_rd_cost_tr_split_accurate(state, x_local, y_local, depth, cur_cu, lcu, tree_type, 0); + cost += cu_rd_cost_tr_split_accurate(state, cur_cu, lcu, tree_type, 0, cu_loc); memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); memcpy(&state->search_cabac, &temp_cabac, sizeof(temp_cabac)); @@ -1419,7 +1444,7 @@ static double search_cu( if (split_cost < cost) { // Copy split modes to this depth. cost = split_cost; - work_tree_copy_up(x_local, y_local, depth, work_tree, state->encoder_control->cfg.jccr, tree_type); + work_tree_copy_up(work_tree, state->encoder_control->cfg.jccr, tree_type, cu_loc, depth); #if UVG_DEBUG //debug_split = 1; #endif @@ -1427,7 +1452,7 @@ static double search_cu( // Copy this CU's mode all the way down for use in adjacent CUs mode // search. memcpy(&state->search_cabac, &post_seach_cabac, sizeof(post_seach_cabac)); - work_tree_copy_down(x_local, y_local, depth, work_tree, tree_type); + work_tree_copy_down(depth, work_tree, tree_type, cu_loc); downsample_cclm_rec( state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] ); @@ -1454,7 +1479,7 @@ static double search_cu( } else if (depth >= 0 && depth < MAX_PU_DEPTH) { // Need to copy modes down since the lower level of the work tree is used // when searching SMP and AMP blocks. - work_tree_copy_down(x_local, y_local, depth, work_tree, tree_type); + work_tree_copy_down(depth, work_tree, tree_type, cu_loc); if(tree_type != UVG_CHROMA_T) { downsample_cclm_rec( state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64] diff --git a/src/search.h b/src/search.h index 1a013670..73c7efec 100644 --- a/src/search.h +++ b/src/search.h @@ -84,18 +84,24 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map); void uvg_search_lcu(encoder_state_t *state, int x, int y, const yuv_t *hor_buf, const yuv_t *ver_buf, lcu_coeff_t *coeff); -double uvg_cu_rd_cost_luma(const encoder_state_t *const state, - const int x_px, const int y_px, const int depth, - const cu_info_t *const pred_cu, - lcu_t *const lcu, - uint8_t isp_cbf); -double uvg_cu_rd_cost_chroma(const encoder_state_t *const state, - const int x_px, const int y_px, const int depth, - cu_info_t *const pred_cu, - lcu_t *const lcu); +double uvg_cu_rd_cost_luma( + const encoder_state_t *const state, + const cu_loc_t* const cu_loc, + const cu_info_t *const pred_cu, + lcu_t *const lcu, + uint8_t isp_cbf); +double uvg_cu_rd_cost_chroma( + const encoder_state_t *const state, + cu_info_t *const pred_cu, + lcu_t *const lcu, + const cu_loc_t * const); -void uvg_lcu_fill_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, uint8_t tr_depth, enum uvg_tree_type - tree_type); +void uvg_lcu_fill_trdepth( + lcu_t *lcu, + const cu_loc_t* const cu_loc, + uint8_t tr_depth, + enum uvg_tree_type + tree_type); void uvg_intra_recon_lcu_luma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); void uvg_intra_recon_lcu_chroma(encoder_state_t * const state, int x, int y, int depth, int8_t intra_mode, cu_info_t *cur_cu, lcu_t *lcu); diff --git a/src/search_inter.c b/src/search_inter.c index 53587b84..46b04349 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -1811,7 +1811,7 @@ static void search_pu_inter( cur_pu->inter.mv[0][1] = info->merge_cand[merge_idx].mv[0][1]; cur_pu->inter.mv[1][0] = info->merge_cand[merge_idx].mv[1][0]; cur_pu->inter.mv[1][1] = info->merge_cand[merge_idx].mv[1][1]; - uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, MAX(1, depth), UVG_BOTH_T); + uvg_lcu_fill_trdepth(lcu, cu_loc, MAX(1, depth), UVG_BOTH_T); uvg_inter_recon_cu(state, lcu, true, false, cu_loc); uvg_quantize_lcu_residual(state, true, false, false, cu_loc, depth, cur_pu, lcu, true, UVG_BOTH_T); @@ -2129,12 +2129,12 @@ void uvg_cu_cost_inter_rd2( const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; int tr_depth = MAX(1, depth); - uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, tr_depth, UVG_BOTH_T); + uvg_lcu_fill_trdepth(lcu, cu_loc, tr_depth, UVG_BOTH_T); const int x_px = SUB_SCU(cu_loc->x); const int y_px = SUB_SCU(cu_loc->y); - const int width = LCU_WIDTH >> depth; - const int height = width; // TODO: non-square blocks + const int width = cu_loc->width; + const int height = cu_loc->height; cabac_data_t cabac_copy; memcpy(&cabac_copy, &state->search_cabac, sizeof(cabac_copy)); @@ -2155,10 +2155,10 @@ void uvg_cu_cost_inter_rd2( int index = y_px / 2 * LCU_WIDTH_C + x_px / 2; double ssd_u = uvg_pixels_calc_ssd(&lcu->ref.u[index], &lcu->rec.u[index], LCU_WIDTH_C, LCU_WIDTH_C, - width / 2); + cu_loc->chroma_width); double ssd_v = uvg_pixels_calc_ssd(&lcu->ref.v[index], &lcu->rec.v[index], LCU_WIDTH_C, LCU_WIDTH_C, - width / 2); + cu_loc->chroma_width); ssd += (ssd_u + ssd_v) * UVG_CHROMA_MULT; } double no_cbf_bits; @@ -2217,12 +2217,10 @@ void uvg_cu_cost_inter_rd2( uvg_chorma_ts_out_t chorma_ts_out; uvg_chroma_transform_search( state, - depth, lcu, &cabac_copy, cu_loc, index, - 0, cur_cu, u_pred, v_pred, @@ -2262,10 +2260,10 @@ void uvg_cu_cost_inter_rd2( int cbf = cbf_is_set_any(cur_cu->cbf, depth); if(cbf) { - *inter_cost = uvg_cu_rd_cost_luma(state, x_px, y_px, depth, cur_cu, lcu, 0); + *inter_cost = uvg_cu_rd_cost_luma(state, cu_loc, cur_cu, lcu, 0); if (reconstruct_chroma) { if (cur_cu->depth != cur_cu->tr_depth || !state->encoder_control->cfg.jccr) { - *inter_cost += uvg_cu_rd_cost_chroma(state, x_px, y_px, depth, cur_cu, lcu); + *inter_cost += uvg_cu_rd_cost_chroma(state, cur_cu, lcu, cu_loc); } else { *inter_cost += chroma_cost; diff --git a/src/search_intra.c b/src/search_intra.c index 10c6657d..6710b6fc 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -431,9 +431,7 @@ static double search_intra_trdepth( } double rd_cost = uvg_cu_rd_cost_luma( state, - lcu_px.x, - lcu_px.y, - depth, + cu_loc, pred_cu, lcu, search_data->best_isp_cbfs); @@ -502,11 +500,9 @@ static double search_intra_trdepth( ); best_rd_cost += uvg_cu_rd_cost_chroma( state, - lcu_px.x, - lcu_px.y, - depth, pred_cu, - lcu); + lcu, + cu_loc); pred_cu->intra.mode = luma_mode; // Check lfnst constraints for chroma @@ -552,7 +548,7 @@ static double search_intra_trdepth( UVG_BOTH_T, false, true); - best_rd_cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + best_rd_cost += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, cu_loc); pred_cu->intra.mode = luma_mode; } pred_cu->tr_skip = best_tr_idx == MTS_SKIP; @@ -655,7 +651,7 @@ static double search_intra_trdepth( if (depth == 0 || split_cost < nosplit_cost) { return split_cost; } else { - uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type); pred_cu->cbf = nosplit_cbf; @@ -690,19 +686,15 @@ static void sort_modes(intra_search_data_t* __restrict modes, uint8_t length) static int search_intra_chroma_rough( encoder_state_t * const state, - int x_px, - int y_px, - int depth, - const vector2d_t* const lcu_px, intra_search_data_t* chroma_data, lcu_t* lcu, int8_t luma_mode, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc) { - assert(depth != 4 || (x_px & 4 && y_px & 4)); - const int_fast8_t log2_width_c = MAX(LOG2_LCU_WIDTH - depth - 1, 2); + const int_fast8_t log2_width_c = uvg_g_convert_to_log2[cu_loc->chroma_width]; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; - const vector2d_t luma_px = { x_px & ~7, y_px & ~7 }; + const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 }; const int width = 1 << log2_width_c; const int height = width; // TODO: height for non-square blocks @@ -714,7 +706,7 @@ static int search_intra_chroma_rough( uvg_intra_references refs_v; uvg_intra_build_reference(&loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0); - vector2d_t lcu_cpx = { (lcu_px->x & ~7) / 2, (lcu_px->y & ~7) / 2 }; + vector2d_t lcu_cpx = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; uvg_pixel* orig_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; @@ -1494,29 +1486,19 @@ double uvg_chroma_mode_bits(const encoder_state_t *state, int8_t chroma_mode, in int8_t uvg_search_intra_chroma_rdo( encoder_state_t * const state, - int x_px, - int y_px, - int depth, int8_t num_modes, lcu_t *const lcu, intra_search_data_t* chroma_data, int8_t luma_mode, - enum uvg_tree_type tree_type) + enum uvg_tree_type tree_type, + const cu_loc_t* const cu_loc) { - const bool reconstruct_chroma = (depth != 4) || (x_px & 4 && y_px & 4); - - const int luma_width = LCU_WIDTH >> depth; - const int luma_height = LCU_WIDTH >> depth; // TODO: height - - int log2_width = MAX(LOG2_LCU_WIDTH - depth - 1, 2); + const bool reconstruct_chroma = true; - cu_loc_t loc; - uvg_cu_loc_ctor(&loc, x_px & ~7, y_px & ~7, luma_width, luma_height); - - const int chroma_width = loc.chroma_width; - const int chroma_height = loc.chroma_height; + const int chroma_width = cu_loc->chroma_width; + const int chroma_height = cu_loc->chroma_height; uvg_intra_references refs[2]; - const vector2d_t luma_px = { x_px & ~7, y_px & ~7 }; + const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height, @@ -1524,17 +1506,17 @@ int8_t uvg_search_intra_chroma_rdo( if (reconstruct_chroma) { - uvg_intra_build_reference(&loc, &loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0); - uvg_intra_build_reference(&loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(cu_loc, cu_loc, COLOR_U, &luma_px, &pic_px, lcu, &refs[0], state->encoder_control->cfg.wpp, NULL, 0, 0); + uvg_intra_build_reference(cu_loc, cu_loc, COLOR_V, &luma_px, &pic_px, lcu, &refs[1], state->encoder_control->cfg.wpp, NULL, 0, 0); - const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; + const vector2d_t lcu_px = { cu_loc->local_x, cu_loc->local_y }; cabac_data_t temp_cabac; memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t)); - const int offset = ((lcu_px.x & ~7) >> 1) + ((lcu_px.y & ~7) >> 1)* LCU_WIDTH_C; + const int offset = ((cu_loc->local_x & ~7) >> 1) + ((cu_loc->local_y & ~7) >> 1)* LCU_WIDTH_C; int lfnst_modes_to_check[3]; - if((depth == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) { + if((cu_loc->width == 4 || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) { for (int i = 0; i < 3; ++i) { lfnst_modes_to_check[i] = i; } @@ -1572,7 +1554,7 @@ int8_t uvg_search_intra_chroma_rdo( uvg_intra_predict( state, &refs[COLOR_U - 1], - &loc, + cu_loc, COLOR_U, u_pred, &chroma_data[mode_i], @@ -1581,7 +1563,7 @@ int8_t uvg_search_intra_chroma_rdo( uvg_intra_predict( state, &refs[COLOR_V - 1], - &loc, + cu_loc, COLOR_V, v_pred, &chroma_data[mode_i], @@ -1606,12 +1588,10 @@ int8_t uvg_search_intra_chroma_rdo( uvg_chorma_ts_out_t chorma_ts_out; uvg_chroma_transform_search( state, - depth, lcu, &temp_cabac, - &loc, + cu_loc, offset, - mode, pred_cu, u_pred, v_pred, @@ -1653,12 +1633,12 @@ int8_t uvg_search_intra_chroma_rdo( state->search_cabac.update = 1; chroma_data[mode_i].cost = mode_bits * state->lambda; uvg_intra_recon_cu(state, - &chroma_data[mode_i], &loc, + &chroma_data[mode_i], cu_loc, pred_cu, lcu, tree_type, false, true); - chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + chroma_data[mode_i].cost += uvg_cu_rd_cost_chroma(state, pred_cu, lcu, cu_loc); memcpy(&state->search_cabac, &temp_cabac, sizeof(cabac_data_t)); } } @@ -1677,14 +1657,11 @@ int8_t uvg_search_intra_chroma_rdo( int8_t uvg_search_cu_intra_chroma( encoder_state_t * const state, - const int x_px, - const int y_px, - const int depth, + const cu_loc_t* const cu_loc, lcu_t *lcu, intra_search_data_t *search_data, enum uvg_tree_type tree_type) { - const vector2d_t lcu_px = { SUB_SCU(x_px), SUB_SCU(y_px) }; const cu_info_t *cur_pu = &search_data->pred_cu; int8_t intra_mode = !cur_pu->intra.mip_flag ? cur_pu->intra.mode : 0; @@ -1698,6 +1675,9 @@ int8_t uvg_search_cu_intra_chroma( } } + cu_loc_t chroma_loc; + uvg_cu_loc_ctor(&chroma_loc, cu_loc->x & ~7, cu_loc->y & ~7, cu_loc->width, cu_loc->height); + // The number of modes to select for slower chroma search. Luma mode // is always one of the modes, so 2 means the final decision is made // between luma mode and one other mode that looks the best @@ -1715,7 +1695,7 @@ int8_t uvg_search_cu_intra_chroma( chroma_data[i].pred_cu = *cur_pu; chroma_data[i].pred_cu.intra.mode_chroma = num_modes == 1 ? intra_mode : modes[i]; chroma_data[i].cost = 0; - if(depth != 4 && tree_type == UVG_BOTH_T) { + if(cu_loc->width != 4 && tree_type == UVG_BOTH_T) { memcpy(chroma_data[i].lfnst_costs, search_data->lfnst_costs, sizeof(double) * 3); } } @@ -1726,16 +1706,13 @@ int8_t uvg_search_cu_intra_chroma( if(state->encoder_control->cfg.cclm && 0){ - num_modes = search_intra_chroma_rough(state, x_px, y_px, depth, - &lcu_px, - chroma_data, - lcu, - intra_mode, - tree_type); + num_modes = search_intra_chroma_rough(state, chroma_data, lcu, intra_mode, + tree_type, + &chroma_loc); } if (num_modes > 1 || state->encoder_control->cfg.jccr) { - uvg_search_intra_chroma_rdo(state, x_px, y_px, depth, num_modes, lcu, chroma_data, intra_mode, tree_type); + uvg_search_intra_chroma_rdo(state, num_modes, lcu, chroma_data, intra_mode, tree_type, &chroma_loc); } else if(cur_pu->lfnst_idx) { chroma_data[0].pred_cu.cr_lfnst_idx = cur_pu->lfnst_idx; @@ -1983,7 +1960,7 @@ void uvg_search_cu_intra( // Set transform depth to current depth, meaning no transform splits. { const int8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; - uvg_lcu_fill_trdepth(lcu, cu_loc->x, cu_loc->y, depth, depth, tree_type); + uvg_lcu_fill_trdepth(lcu, cu_loc, depth, tree_type); } // Refine results with slower search or get some results if rough search was skipped. const int32_t rdo_level = state->encoder_control->cfg.rdo; diff --git a/src/search_intra.h b/src/search_intra.h index e9264275..faa26ff1 100644 --- a/src/search_intra.h +++ b/src/search_intra.h @@ -52,9 +52,7 @@ double uvg_chroma_mode_bits(const encoder_state_t *state, int8_t uvg_search_cu_intra_chroma( encoder_state_t * const state, - const int x_px, - const int y_px, - const int depth, + const cu_loc_t* const cu_loc, lcu_t *lcu, intra_search_data_t* best_cclm, enum uvg_tree_type tree_type); diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index bc70daab..2fc27872 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -709,7 +709,7 @@ int uvg_quantize_residual_avx2(encoder_state_t *const state, { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; uvg_rdoq(state, coeff, coeff_out, width, height, color, - scan_order, cur_cu->type, tr_depth, cur_cu->cbf, lfnst_index); + scan_order, cur_cu->type, cur_cu->cbf, lfnst_index); } else if (state->encoder_control->cfg.rdoq_enable && use_trskip) { uvg_ts_rdoq(state, coeff, coeff_out, width, height, color, diff --git a/src/strategies/generic/encode_coding_tree-generic.c b/src/strategies/generic/encode_coding_tree-generic.c index 8d9ca61d..48a5cc3d 100644 --- a/src/strategies/generic/encode_coding_tree-generic.c +++ b/src/strategies/generic/encode_coding_tree-generic.c @@ -54,7 +54,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, - const cu_loc_t *cu_loc, + const cu_loc_t * const cu_loc, uint8_t color, int8_t scan_mode, cu_info_t* cur_cu, @@ -80,8 +80,8 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, // CONSTANTS - const uint32_t log2_block_width = uvg_g_convert_to_log2[width]; - const uint32_t log2_block_height = uvg_g_convert_to_log2[height]; + const uint8_t log2_block_width = uvg_g_convert_to_log2[width]; + const uint8_t log2_block_height = uvg_g_convert_to_log2[height]; const uint32_t log2_cg_size = uvg_g_log2_sbb_size[log2_block_width][log2_block_height][0] + uvg_g_log2_sbb_size[log2_block_width][log2_block_height][1]; const uint32_t* const scan = uvg_get_scan_order_table(SCAN_GROUP_4X4, scan_mode, log2_block_width, log2_block_height); diff --git a/src/strategies/generic/encode_coding_tree-generic.h b/src/strategies/generic/encode_coding_tree-generic.h index 26682a61..0de02e3c 100644 --- a/src/strategies/generic/encode_coding_tree-generic.h +++ b/src/strategies/generic/encode_coding_tree-generic.h @@ -44,7 +44,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, - const cu_loc_t *loc, + const cu_loc_t * const loc, uint8_t color, int8_t scan_mode, cu_info_t* cur_cu, diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index be396a8b..bfb92700 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -317,8 +317,7 @@ int uvg_quant_cbcr_residual_generic( { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; uvg_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 1 ? COLOR_V : COLOR_U, - scan_order, cur_cu->type, tr_depth, cur_cu->cbf, - cur_cu->cr_lfnst_idx); + scan_order, cur_cu->type, cur_cu->cbf, cur_cu->cr_lfnst_idx); } else if (state->encoder_control->cfg.rdoq_enable && false) { uvg_ts_rdoq(state, coeff, coeff_out, width, width, cur_cu->joint_cb_cr == 2 ? COLOR_V : COLOR_U, @@ -499,8 +498,7 @@ int uvg_quantize_residual_generic(encoder_state_t *const state, { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; uvg_rdoq(state, coeff, coeff_out, width, height, color, - scan_order, cur_cu->type, tr_depth, cur_cu->cbf, - lfnst_index); + scan_order, cur_cu->type, cur_cu->cbf, lfnst_index); } else if(state->encoder_control->cfg.rdoq_enable && use_trskip) { uvg_ts_rdoq(state, coeff, coeff_out, width, height, color, scan_order); diff --git a/src/strategies/strategies-encode.h b/src/strategies/strategies-encode.h index 625f4005..969dfb57 100644 --- a/src/strategies/strategies-encode.h +++ b/src/strategies/strategies-encode.h @@ -49,7 +49,7 @@ typedef unsigned (encode_coeff_nxn_func)(encoder_state_t * const state, cabac_data_t * const cabac, const coeff_t *coeff, - const cu_loc_t *loc, + const cu_loc_t * const loc, uint8_t color, int8_t scan_mode, cu_info_t* cur_cu, diff --git a/src/transform.c b/src/transform.c index 84eb3558..86ff515b 100644 --- a/src/transform.c +++ b/src/transform.c @@ -434,8 +434,7 @@ static void quantize_chroma( (transforms[i] != CHROMA_TS || !state->encoder_control->cfg.rdoq_skip)) { uvg_rdoq(state, &u_coeff[i * trans_offset], u_quant_coeff, width, height, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, - scan_order, CU_INTRA, depth, 0, - lfnst_idx); + scan_order, CU_INTRA, 0, lfnst_idx); int j; for (j = 0; j < width * height; ++j) { @@ -449,8 +448,7 @@ static void quantize_chroma( uint16_t temp_cbf = 0; if (*u_has_coeffs)cbf_set(&temp_cbf, depth, COLOR_U); uvg_rdoq(state, &v_coeff[i * trans_offset], v_quant_coeff, width, height, COLOR_V, - scan_order, CU_INTRA, depth, temp_cbf, - lfnst_idx); + scan_order, CU_INTRA, temp_cbf, lfnst_idx); } } @@ -486,12 +484,10 @@ static void quantize_chroma( void uvg_chroma_transform_search( encoder_state_t* const state, - int depth, lcu_t* const lcu, cabac_data_t* temp_cabac, const cu_loc_t* const cu_loc, const int offset, - const uint8_t mode, cu_info_t* pred_cu, uvg_pixel u_pred[1024], uvg_pixel v_pred[1024], @@ -507,6 +503,8 @@ void uvg_chroma_transform_search( const int width = cu_loc->chroma_width; const int height = cu_loc->chroma_height; + const int depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; + uvg_transform2d( state->encoder_control, u_resi, u_coeff, width, height, COLOR_U, pred_cu ); @@ -553,8 +551,6 @@ void uvg_chroma_transform_search( coeff_t v_quant_coeff[LCU_WIDTH_C * LCU_WIDTH_C]; int16_t u_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C]; int16_t v_recon_resi[LCU_WIDTH_C * LCU_WIDTH_C]; - const coeff_scan_order_t scan_order = - uvg_get_scan_order(pred_cu->type, mode, depth); bool u_has_coeffs = false; bool v_has_coeffs = false; if(pred_cu->cr_lfnst_idx) { @@ -575,13 +571,13 @@ void uvg_chroma_transform_search( i, u_quant_coeff, v_quant_coeff, - scan_order, + SCAN_DIAG, &u_has_coeffs, &v_has_coeffs, pred_cu->cr_lfnst_idx); if(pred_cu->cr_lfnst_idx !=0 && !u_has_coeffs && !v_has_coeffs) continue; - if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (depth == 4 || tree_type == UVG_CHROMA_T)) { + if(pred_cu->type == CU_INTRA && transforms[i] != CHROMA_TS && (cu_loc->width == 4 || tree_type == UVG_CHROMA_T)) { bool constraints[2] = { false, false }; uvg_derive_lfnst_constraints(pred_cu, constraints, u_quant_coeff, width, height, NULL, COLOR_U); if(!IS_JCCR_MODE(transforms[i])) { @@ -593,9 +589,9 @@ void uvg_chroma_transform_search( if (IS_JCCR_MODE(transforms[i]) && !u_has_coeffs) continue; if (u_has_coeffs) { - uvg_dequant(state, u_quant_coeff, &u_coeff[i * trans_offset], width, width, transforms[i] != JCCR_1 ? COLOR_U : COLOR_V, pred_cu->type, transforms[i] == CHROMA_TS); + if (transforms[i] != CHROMA_TS) { if (pred_cu->cr_lfnst_idx) { uvg_inv_lfnst(pred_cu, width, height, COLOR_U, pred_cu->cr_lfnst_idx, &u_coeff[i * trans_offset], tree_type); @@ -606,6 +602,7 @@ void uvg_chroma_transform_search( else { uvg_itransformskip(state->encoder_control, u_recon_resi, &u_coeff[i * trans_offset], width, height); } + if (transforms[i] != JCCR_1) { for (int j = 0; j < width * height; j++) { u_recon[trans_offset * i + j] = CLIP_TO_PIXEL((uvg_pixel)(u_pred[j] + u_recon_resi[j])); @@ -620,9 +617,12 @@ void uvg_chroma_transform_search( else { uvg_pixels_blit(u_pred, &u_recon[trans_offset * i], width, height, width, width); } + + if (v_has_coeffs && !(IS_JCCR_MODE(transforms[i]))) { uvg_dequant(state, v_quant_coeff, &v_coeff[i * trans_offset], width, width, COLOR_V, pred_cu->type, transforms[i] == CHROMA_TS); + if (transforms[i] != CHROMA_TS) { if (pred_cu->cr_lfnst_idx) { uvg_inv_lfnst(pred_cu, width, height, COLOR_V, pred_cu->cr_lfnst_idx, &v_coeff[i * trans_offset], tree_type); @@ -633,6 +633,7 @@ void uvg_chroma_transform_search( else { uvg_itransformskip(state->encoder_control, v_recon_resi, &v_coeff[i * trans_offset], width, height); } + for (int j = 0; j < width * height; j++) { v_recon[trans_offset * i + j] = CLIP_TO_PIXEL(v_pred[j] + v_recon_resi[j]); } @@ -700,7 +701,7 @@ void uvg_chroma_transform_search( pred_cu, cu_loc, COLOR_U, - scan_order, + SCAN_DIAG, transforms[i] == CHROMA_TS, COEFF_ORDER_LINEAR); u_bits += coeff_cost; @@ -717,7 +718,7 @@ void uvg_chroma_transform_search( pred_cu, cu_loc, COLOR_V, - scan_order, + SCAN_DIAG, transforms[i] == CHROMA_TS, COEFF_ORDER_LINEAR); } diff --git a/src/transform.h b/src/transform.h index 4d5e8ba8..50a3f7de 100644 --- a/src/transform.h +++ b/src/transform.h @@ -104,12 +104,10 @@ void uvg_quantize_lcu_residual( void uvg_chroma_transform_search( encoder_state_t* const state, - int depth, lcu_t* const lcu, cabac_data_t* temp_cabac, const cu_loc_t* const cu_loc, const int offset, - const uint8_t mode, cu_info_t* pred_cu, uvg_pixel u_pred[1024], uvg_pixel v_pred[1024], diff --git a/tests/test_cabac_state.sh b/tests/test_cabac_state.sh index 519f9c40..e60806dc 100755 --- a/tests/test_cabac_state.sh +++ b/tests/test_cabac_state.sh @@ -6,10 +6,10 @@ set -eu cabacfile="$(mktemp)" -valgrind_test 256x128 10 yuv420p --preset veryslow --rd 3 --mip --jccr --mrl --lfnst -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}" +valgrind_test 256x128 10 yuv420p --preset veryslow --pu-depth-intra 0-4 --cclm --rd 3 --mip --jccr --mrl --lfnst -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}" python3 check_cabac_state_consistency.py "${cabacfile}" -valgrind_test 256x128 10 yuv420p --preset veryslow --rd 3 --mip --jccr --mrl --lfnst --dual-tree -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}" +valgrind_test 256x128 10 yuv420p --preset veryslow --pu-depth-intra 0-4 --cclm --rd 3 --mip --jccr --mrl --lfnst --dual-tree -p 1 --owf 0 --no-wpp --cabac-debug-file="${cabacfile}" python3 check_cabac_state_consistency.py "${cabacfile}" rm -rf "${cabacfile}"