diff --git a/src/cu.c b/src/cu.c index 301ca100..56408b33 100644 --- a/src/cu.c +++ b/src/cu.c @@ -276,10 +276,10 @@ cu_array_t * uvg_cu_array_copy_ref(cu_array_t* cua) * \param dst_y y-coordinate of the top edge of the copied area in dst * \param src source lcu */ -void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type tree_type) +void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src) { const int dst_stride = dst->stride >> 2; - const int width = tree_type != UVG_CHROMA_T ? LCU_WIDTH : LCU_WIDTH_C; + const int width = LCU_WIDTH; for (int y = 0; y < width; y += SCU_WIDTH) { for (int x = 0; x < width; x += SCU_WIDTH) { const cu_info_t *from_cu = LCU_GET_CU_AT_PX(src, x, y); @@ -373,11 +373,10 @@ int uvg_get_split_locs( int uvg_get_implicit_split( const encoder_state_t* const state, const cu_loc_t* const cu_loc, - uint8_t max_mtt_depth, - bool uses_chroma_coordinates) + uint8_t max_mtt_depth) { - bool right_ok = (state->tile->frame->width >> uses_chroma_coordinates) >= cu_loc->x + cu_loc->width; - bool bottom_ok = (state->tile->frame->height >> uses_chroma_coordinates) >= cu_loc->y + cu_loc->height; + bool right_ok = (state->tile->frame->width) >= cu_loc->x + cu_loc->width; + bool bottom_ok = (state->tile->frame->height) >= cu_loc->y + cu_loc->height; if (right_ok && bottom_ok) return NO_SPLIT; if (right_ok && max_mtt_depth != 0) return BT_HOR_SPLIT; @@ -387,22 +386,21 @@ int uvg_get_implicit_split( int uvg_get_possible_splits(const encoder_state_t * const state, - const cu_loc_t * const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6], bool - use_chroma_coordinates) + const cu_loc_t * const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6]) { - const unsigned width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; - const unsigned height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; + const unsigned width = cu_loc->width; + const unsigned height = cu_loc->height; const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1; const unsigned max_btd = state->encoder_control->cfg.max_btt_depth[slice_type] + split_tree.implicit_mtt_depth; - const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type] >> (tree_type == UVG_CHROMA_T); - const unsigned min_bt_size = 1 << MIN_SIZE >> (tree_type == UVG_CHROMA_T); - const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type] >> (tree_type == UVG_CHROMA_T); - const unsigned min_tt_size = 1 << MIN_SIZE >> (tree_type == UVG_CHROMA_T); + const unsigned max_bt_size = state->encoder_control->cfg.max_bt_size[slice_type]; + const unsigned min_bt_size = 1 << MIN_SIZE; + const unsigned max_tt_size = state->encoder_control->cfg.max_tt_size[slice_type]; + const unsigned min_tt_size = 1 << MIN_SIZE; const unsigned min_qt_size = state->encoder_control->cfg.min_qt_size[slice_type]; - const enum split_type implicitSplit = uvg_get_implicit_split(state, cu_loc, max_btd, use_chroma_coordinates); + const enum split_type implicitSplit = uvg_get_implicit_split(state, cu_loc, max_btd); splits[NO_SPLIT] = splits[QT_SPLIT] = splits[BT_HOR_SPLIT] = splits[TT_HOR_SPLIT] = splits[BT_VER_SPLIT] = splits[TT_VER_SPLIT] = true; bool can_btt = split_tree.mtt_depth < max_btd; @@ -414,7 +412,7 @@ int uvg_get_possible_splits(const encoder_state_t * const state, if (split_tree.current_depth != 0 && last_split != QT_SPLIT /* && !(width > 64 || height > 64)*/) splits[QT_SPLIT] = false; if (width <= min_qt_size) splits[QT_SPLIT] = false; - if (tree_type == UVG_CHROMA_T && width <= 4) splits[QT_SPLIT] = false; + if (tree_type == UVG_CHROMA_T && width <= 8) splits[QT_SPLIT] = false; if (implicitSplit != NO_SPLIT) { @@ -422,7 +420,7 @@ int uvg_get_possible_splits(const encoder_state_t * const state, splits[BT_HOR_SPLIT] = implicitSplit == BT_HOR_SPLIT && height <= max_bt_size; splits[BT_VER_SPLIT] = implicitSplit == BT_VER_SPLIT && width <= max_bt_size; - if (tree_type == UVG_CHROMA_T && width == 4) splits[BT_VER_SPLIT] = false; + if (tree_type == UVG_CHROMA_T && width <= 8) splits[BT_VER_SPLIT] = false; if (!splits[BT_HOR_SPLIT] && !splits[BT_VER_SPLIT] && !splits[QT_SPLIT]) splits[QT_SPLIT] = true; return 1; } @@ -459,23 +457,23 @@ int uvg_get_possible_splits(const encoder_state_t * const state, // specific check for BT splits if (height <= min_bt_size) splits[BT_HOR_SPLIT] = false; if (width > 64 && height <= 64) splits[BT_HOR_SPLIT] = false; - if (tree_type == UVG_CHROMA_T && width * height <= 16) splits[BT_HOR_SPLIT] = false; + if (tree_type == UVG_CHROMA_T && width * height <= 64) splits[BT_HOR_SPLIT] = false; if (width <= min_bt_size) splits[BT_VER_SPLIT] = false; if (width <= 64 && height > 64) splits[BT_VER_SPLIT] = false; - if (tree_type == UVG_CHROMA_T && (width * height <= 16 || width == 4)) splits[BT_VER_SPLIT] = false; + if (tree_type == UVG_CHROMA_T && (width * height <= 64 || width <= 8)) splits[BT_VER_SPLIT] = false; //if (modeType == MODE_TYPE_INTER && width * height == 32) splits[BT_VER_SPLIT] = splits[BT_HOR_SPLIT] = false; if (height <= 2 * min_tt_size || height > max_tt_size || width > max_tt_size) splits[TT_HOR_SPLIT] = false; if (width > 64 || height > 64) splits[TT_HOR_SPLIT] = false; - if (tree_type == UVG_CHROMA_T && width * height <= 16 * 2) splits[TT_HOR_SPLIT] = false; + if (tree_type == UVG_CHROMA_T && width * height <= 64 * 2) splits[TT_HOR_SPLIT] = false; if (width <= 2 * min_tt_size || width > max_tt_size || height > max_tt_size) splits[TT_VER_SPLIT] = false; if (width > 64 || height > 64) splits[TT_VER_SPLIT] = false; - if (tree_type == UVG_CHROMA_T && (width * height <= 16 * 2 || width == 8)) splits[TT_VER_SPLIT] = false; + if (tree_type == UVG_CHROMA_T && (width * height <= 64 * 2 || width <= 16)) splits[TT_VER_SPLIT] = false; //if (modeType == MODE_TYPE_INTER && width * height == 64) splits[TT_VER_SPLIT] = splits[TT_HOR_SPLIT] = false; return 0; diff --git a/src/cu.h b/src/cu.h index 36cfb239..87265ee9 100644 --- a/src/cu.h +++ b/src/cu.h @@ -203,8 +203,7 @@ int uvg_get_split_locs( cu_loc_t out[4], uint8_t* separate_chroma); int uvg_get_possible_splits(const encoder_state_t* const state, - const cu_loc_t* const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6], bool - use_chroma_coordinates); + const cu_loc_t* const cu_loc, split_tree_t split_tree, enum uvg_tree_type tree_type, bool splits[6]); #define CU_GET_MV_CAND(cu_info_ptr, reflist) \ @@ -383,8 +382,7 @@ typedef struct { cu_info_t cu[LCU_T_CU_WIDTH * LCU_T_CU_WIDTH + 1]; } lcu_t; -void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src, enum uvg_tree_type - tree_type); +void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src); int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* const lcu, bool left); int uvg_count_chroma_tree_available_edge_cus(int x, int y, int width, int height, const lcu_t* const lcu, bool left); diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index a23de174..1d121d18 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -475,14 +475,12 @@ static void encode_chroma_tu( cu_info_t* cur_pu, int8_t* scan_idx, lcu_coeff_t* coeff, - uint8_t joint_chroma, - enum - uvg_tree_type tree_type) + uint8_t joint_chroma) { int width_c = cu_loc->chroma_width; int height_c = cu_loc->chroma_height; - int x_local = (cu_loc->x >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C; - int y_local = (cu_loc->y >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C; + int x_local = (cu_loc->x >> 1) % LCU_WIDTH_C; + int y_local = (cu_loc->y >> 1) % LCU_WIDTH_C; cabac_data_t* const cabac = &state->cabac; *scan_idx = SCAN_DIAG; if(!joint_chroma){ @@ -615,7 +613,7 @@ static void encode_transform_unit( if ((chroma_cbf_set || joint_chroma) && last_split && chroma_loc) { //Need to drop const to get lfnst constraints // Use original dimensions instead of ISP split dimensions - encode_chroma_tu(state, chroma_loc, (cu_info_t*)cur_pu, &scan_idx, coeff, joint_chroma, tree_type); + encode_chroma_tu(state, chroma_loc, (cu_info_t*)cur_pu, &scan_idx, coeff, joint_chroma); } } @@ -657,7 +655,7 @@ static void encode_transform_coeff( cur_tu = uvg_cu_array_at_const(used_array, x, y); } - const int tr_limit = (TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)); + const int tr_limit = TR_MAX_WIDTH; const bool ver_split = cu_loc->height > tr_limit; const bool hor_split = cu_loc->width > tr_limit; @@ -681,10 +679,6 @@ static void encode_transform_coeff( cu_loc_t split_cu_loc[4]; const int split_count = uvg_get_split_locs(cu_loc, split, split_cu_loc,NULL); for (int i = 0; i < split_count; ++i) { - if(tree_type == UVG_CHROMA_T) { - split_cu_loc[i].chroma_width = split_cu_loc[i].width; - split_cu_loc[i].chroma_height = split_cu_loc[i].height; - } encode_transform_coeff(state, &split_cu_loc[i], only_chroma, coeff, NULL, tree_type, true, false, luma_cbf_ctx, &split_cu_loc[i], chroma_loc ? &split_cu_loc[i] : NULL); } @@ -1246,12 +1240,12 @@ uint8_t uvg_write_split_flag( // Implisit split flag when on border // Exception made in VVC with flag not being implicit if the BT can be used for // horizontal or vertical split, then this flag tells if QT or BT is used - const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; - const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; + const int cu_width = cu_loc->width; + const int cu_height = cu_loc->height; bool can_split[6]; - const bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split, tree_type == UVG_CHROMA_T); + const bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split); bool allow_split = can_split[1] || can_split[2] || can_split[3] || can_split[4] || can_split[5]; @@ -1354,11 +1348,11 @@ void uvg_encode_coding_tree( const videoframe_t * const frame = state->tile->frame; const cu_array_t* used_array = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; - const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; - const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; + const int cu_width = cu_loc->width; + const int cu_height = cu_loc->height; - const int x = tree_type != UVG_CHROMA_T ? cu_loc->x : chroma_loc->x; - const int y = tree_type != UVG_CHROMA_T ? cu_loc->y : chroma_loc->y; + const int x = cu_loc->x; + const int y = cu_loc->y; const cu_info_t* cur_cu = uvg_cu_array_at_const(used_array, x, y); @@ -1375,11 +1369,11 @@ void uvg_encode_coding_tree( // Absolute coordinates - uint16_t abs_x = x + (state->tile->offset_x >> (tree_type == UVG_CHROMA_T)); - uint16_t abs_y = y + (state->tile->offset_y >> (tree_type == UVG_CHROMA_T)); + uint16_t abs_x = x + state->tile->offset_x; + uint16_t abs_y = y + state->tile->offset_y ; - int32_t frame_width = tree_type != UVG_CHROMA_T ? ctrl->in.width : ctrl->in.width / 2; - int32_t frame_height = tree_type != UVG_CHROMA_T ? ctrl->in.height : ctrl->in.height / 2; + int32_t frame_width = ctrl->in.width; + int32_t frame_height = ctrl->in.height; // Stop if we are outside of the frame if (abs_x >= frame_width || abs_y >= frame_height) return; @@ -1412,25 +1406,14 @@ void uvg_encode_coding_tree( 0}; cu_loc_t new_cu_loc[4]; - cu_loc_t chroma_tree_loc; uint8_t separate_chroma = 0; const int splits = uvg_get_split_locs(cu_loc, split_flag, new_cu_loc, &separate_chroma); separate_chroma |= !has_chroma; for (int split = 0; split >= 1; - chroma_tree_loc.y >>= 1; - chroma_tree_loc.local_x = chroma_tree_loc.x & LCU_WIDTH_C; - chroma_tree_loc.local_y = chroma_tree_loc.y & LCU_WIDTH_C; - chroma_tree_loc.width >>= 1; - chroma_tree_loc.height >>= 1; - assert(!separate_chroma); - } uvg_encode_coding_tree(state, coeff, tree_type, &new_cu_loc[split], - separate_chroma ? chroma_loc :(tree_type == UVG_CHROMA_T ? &chroma_tree_loc : &new_cu_loc[split]), + separate_chroma ? chroma_loc : &new_cu_loc[split], new_split_tree, !separate_chroma || (split == splits - 1 && has_chroma)); } return; @@ -1714,8 +1697,8 @@ double uvg_mock_encode_coding_unit( const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; - int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); - int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); + int x_local = cu_loc->local_x; + int y_local = cu_loc->local_y; const bool is_separate_tree = chroma_loc == NULL || cu_loc->height != chroma_loc->height || cu_loc->width != chroma_loc->width; const cu_info_t* left_cu = NULL, *above_cu = NULL; diff --git a/src/encoderstate.c b/src/encoderstate.c index 32ecfeac..e8a43548 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -890,12 +890,6 @@ static void encoder_state_worker_encode_lcu_bitstream(void * opaque) if(tree_type == UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400) { uvg_cu_loc_ctor(&start, lcu->position.x * LCU_WIDTH, lcu->position.y * LCU_WIDTH, LCU_WIDTH, LCU_WIDTH); cu_loc_t chroma_tree_loc = start; - chroma_tree_loc.x >>= 1; - chroma_tree_loc.y >>= 1; - chroma_tree_loc.local_x = chroma_tree_loc.x & LCU_WIDTH_C; - chroma_tree_loc.local_y = chroma_tree_loc.y & LCU_WIDTH_C; - chroma_tree_loc.width >>= 1; - chroma_tree_loc.height >>= 1; uvg_encode_coding_tree(state, lcu->coeff, UVG_CHROMA_T, &start, &chroma_tree_loc, split_tree, true); } @@ -1175,6 +1169,12 @@ static void encoder_state_encode_leaf(encoder_state_t * const state) uvg_threadqueue_submit(state->encoder_control->threadqueue, job[0]); uvg_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[lcu->id]); +#ifdef UVG_DEBUG_PRINT_CABAC + // Ensures that the ctus are encoded in raster scan order + if(i >= state->tile->frame->width_in_lcu) { + uvg_threadqueue_job_dep_add(state->tile->wf_jobs[lcu->id], state->tile->wf_recon_jobs[(lcu->id / state->tile->frame->width_in_lcu - 1) * state->tile->frame->width_in_lcu]); + } +#endif } uvg_threadqueue_submit(state->encoder_control->threadqueue, state->tile->wf_jobs[lcu->id]); @@ -1307,10 +1307,10 @@ static void encoder_state_encode(encoder_state_t * const main_state) { if(main_state->encoder_control->cfg.dual_tree){ sub_state->tile->frame->chroma_cu_array = uvg_cu_subarray( main_state->tile->frame->chroma_cu_array, - offset_x / 2, - offset_y / 2, - sub_state->tile->frame->width_in_lcu * LCU_WIDTH_C, - sub_state->tile->frame->height_in_lcu * LCU_WIDTH_C + offset_x, + offset_y, + sub_state->tile->frame->width_in_lcu * LCU_WIDTH, + sub_state->tile->frame->height_in_lcu * LCU_WIDTH ); } } @@ -1949,10 +1949,9 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, uvg_pict if (cfg->dual_tree && state->encoder_control->chroma_format != UVG_CSP_400 && state->frame->is_irap) { assert(state->tile->frame->chroma_cu_array == NULL); - state->tile->frame->chroma_cu_array = uvg_cu_array_chroma_alloc( - state->tile->frame->width / 2, - state->tile->frame->height / 2, - state->encoder_control->chroma_format + state->tile->frame->chroma_cu_array = uvg_cu_array_alloc( + state->tile->frame->width, + state->tile->frame->height ); } // Set pictype. diff --git a/src/filter.c b/src/filter.c index cabc75e3..1fff4b55 100644 --- a/src/filter.c +++ b/src/filter.c @@ -273,8 +273,6 @@ static bool is_tu_boundary( color_t color, enum uvg_tree_type tree_type) { - x >>= tree_type == UVG_CHROMA_T; - y >>= tree_type == UVG_CHROMA_T; // if (x & 3 || y & 3) return false; const cu_info_t *const scu = uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y); @@ -1081,8 +1079,8 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, // CUs on both sides of the edge cu_info_t *cu_p; cu_info_t *cu_q; - int32_t x_coord = x << (tree_type != UVG_CHROMA_T); - int32_t y_coord = y << (tree_type != UVG_CHROMA_T); + int32_t x_coord = x << 1; + int32_t y_coord = y << 1; cu_array_t* cua = tree_type != UVG_CHROMA_T ? frame->cu_array : frame->chroma_cu_array; if (dir == EDGE_VER) { y_coord = (y + min_chroma_length * blk_idx) << (tree_type != UVG_CHROMA_T); diff --git a/src/intra.c b/src/intra.c index 3d2c1f81..cad654df 100644 --- a/src/intra.c +++ b/src/intra.c @@ -532,9 +532,8 @@ static void predict_cclm( const lcu_t* const lcu, uvg_intra_references* chroma_ref, uvg_pixel* dst, - cclm_parameters_t* cclm_params, - enum uvg_tree_type tree_type - ) + cclm_parameters_t* cclm_params +) { assert(mode == LM_CHROMA_IDX || mode == LM_CHROMA_L_IDX || mode == LM_CHROMA_T_IDX); assert(state->encoder_control->cfg.cclm); @@ -552,17 +551,14 @@ static void predict_cclm( const uvg_pixel *y_rec = lcu->rec.y + x_scu + y_scu * LCU_WIDTH; const int stride2 = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA); - - tree_type = state->encoder_control->cfg.dual_tree && state->frame->slicetype == UVG_SLICE_I ? tree_type : UVG_BOTH_T; - - const int ctu_size = tree_type == UVG_CHROMA_T ? LCU_WIDTH_C : LCU_WIDTH; + + const int ctu_size = LCU_WIDTH; if (y0) { if (y_scu == 0) available_above_right = MIN(MIN(width / 2, (64-x_scu - width * 2) / 4), (state->tile->frame->width - x0 - width* 2) / 4); for (; available_above_right < width / 2; available_above_right++) { int x_extension = x_scu + width * 2 + 4 * available_above_right; - x_extension >>= tree_type == UVG_CHROMA_T; - const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, (y_scu >> (tree_type==UVG_CHROMA_T)) - 4); + const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, (y_scu) - 4); if (x_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break; } if(y_scu == 0) { @@ -588,8 +584,7 @@ static void predict_cclm( if (x_scu == 0) available_left_below = MIN(MIN(height / 2, (64 - y_scu - height * 2) / 4), (state->tile->frame->height - y0 - height * 2) / 4); for (; available_left_below < height / 2; available_left_below++) { int y_extension = y_scu + height * 2 + 4 * available_left_below; - y_extension >>= tree_type == UVG_CHROMA_T; - const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, (x_scu >> (tree_type == UVG_CHROMA_T)) - 4, y_extension); + const cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, (x_scu) - 4, y_extension); if (y_extension >= ctu_size || pu->type == CU_NOTSET || (pu->type == CU_INTRA && pu->intra.mode_chroma == -1)) break; if(x_scu == 32 && y_scu == 0 && pu->log2_height == 6 && pu->log2_width == 6 ) break; } @@ -1617,9 +1612,8 @@ void uvg_intra_predict( const color_t color, uvg_pixel* dst, const intra_search_data_t* data, - const lcu_t* lcu, - enum uvg_tree_type tree_type - ) + const lcu_t* lcu +) { const int stride = (((state->tile->frame->width + 7) & ~7) + FRAME_PADDING_LUMA); // TODO: what is this used for? @@ -1652,8 +1646,7 @@ void uvg_intra_predict( if (width != 1 << data->pred_cu.log2_chroma_width || height != 1 << data->pred_cu.log2_chroma_height || data->cclm_parameters[color == COLOR_U ? 0 : 1].b <= 0) { predict_cclm( state, color, width, height, x, y, stride, intra_mode, lcu, refs, dst, - (cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1], - tree_type); + (cclm_parameters_t*)&data->cclm_parameters[color == COLOR_U ? 0 : 1]); } else { linear_transform_cclm(&data->cclm_parameters[color == COLOR_U ? 0 : 1], dst, dst, width, height); @@ -1781,8 +1774,7 @@ static void intra_recon_tb_leaf( const cu_loc_t* cu_loc, lcu_t *lcu, color_t color, - const intra_search_data_t* search_data, - enum uvg_tree_type tree_type) + const intra_search_data_t* search_data) { const uvg_config *cfg = &state->encoder_control->cfg; const int shift = color == COLOR_Y ? 0 : 1; @@ -1829,7 +1821,7 @@ static void intra_recon_tb_leaf( uvg_intra_build_reference(state, pu_loc, cu_loc, color, &luma_px, &pic_px, lcu, &refs, cfg->wpp, extra_refs, multi_ref_index, isp_mode); uvg_pixel pred[32 * 32]; - uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu, tree_type); + uvg_intra_predict(state, &refs, cu_loc, pu_loc, color, pred, search_data, lcu); const int index = lcu_px.x + lcu_px.y * lcu_width; uvg_pixel *block = NULL; @@ -1883,12 +1875,8 @@ void uvg_intra_recon_cu( { const uint8_t depth = 6 - uvg_g_convert_to_log2[cu_loc->width]; const vector2d_t lcu_px = { - cu_loc->local_x >> - (tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree && - state->frame->slicetype == UVG_SLICE_I), - cu_loc->local_y >> - (tree_type == UVG_CHROMA_T && state->encoder_control->cfg.dual_tree && - state->frame->slicetype == UVG_SLICE_I), + cu_loc->local_x, + cu_loc->local_y, }; const int8_t width = cu_loc->width; const int8_t height = cu_loc->height; @@ -1945,7 +1933,7 @@ void uvg_intra_recon_cu( uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false); cur_cu->intra.isp_index = 0; if(tu_loc.x % 4 == 0) { - intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type); + intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data); } uvg_quantize_lcu_residual(state, true, false, false, &tu_loc, cur_cu, lcu, @@ -1959,11 +1947,11 @@ void uvg_intra_recon_cu( // Process a leaf TU. if (has_luma) { - intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data, tree_type); + intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_Y, search_data); } if (has_chroma) { - intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data, tree_type); - intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data, tree_type); + intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_U, search_data); + intra_recon_tb_leaf(state, cu_loc, cu_loc, lcu, COLOR_V, search_data); } // TODO: not necessary to call if only luma and ISP is on @@ -2056,7 +2044,7 @@ double uvg_recon_and_estimate_cost_isp(encoder_state_t* const state, uvg_get_isp_split_loc(&pu_loc, cu_loc->x, cu_loc->y, width, height, i, split_type, false); search_data->pred_cu.intra.isp_index = 0; if (tu_loc.x % 4 == 0) { - intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data, UVG_LUMA_T); + intra_recon_tb_leaf(state, &pu_loc, cu_loc, lcu, COLOR_Y, search_data); } uvg_quantize_lcu_residual(state, true, false, false, &tu_loc, &search_data->pred_cu, lcu, diff --git a/src/intra.h b/src/intra.h index 71de9a6a..676588ec 100644 --- a/src/intra.h +++ b/src/intra.h @@ -138,9 +138,8 @@ void uvg_intra_predict( const color_t color, uvg_pixel* dst, const intra_search_data_t* data, - const lcu_t* lcu, - enum uvg_tree_type tree_type - ); + const lcu_t* lcu +); void uvg_intra_recon_cu( encoder_state_t* const state, diff --git a/src/search.c b/src/search.c index 8d7e343d..952fa1b8 100644 --- a/src/search.c +++ b/src/search.c @@ -67,10 +67,10 @@ static const int INTRA_THRESHOLD = 8; static INLINE void copy_cu_info(lcu_t *from, lcu_t *to, const cu_loc_t* const cu_loc, enum uvg_tree_type tree_type) { - const int y_limit = (cu_loc->local_y + cu_loc->height) >> (tree_type == UVG_CHROMA_T); - const int x_limit = (cu_loc->local_x + cu_loc->width) >> (tree_type == UVG_CHROMA_T); - for (int y = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); y < y_limit; y += SCU_WIDTH) { - for (int x = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); x < x_limit; x += SCU_WIDTH) { + const int y_limit = (cu_loc->local_y + cu_loc->height); + const int x_limit = (cu_loc->local_x + cu_loc->width); + for (int y = cu_loc->local_y ; y < y_limit; y += SCU_WIDTH) { + for (int x = cu_loc->local_x ; x < x_limit; x += SCU_WIDTH) { *LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y); } } @@ -86,8 +86,8 @@ static INLINE void initialize_partial_work_tree( chroma_loc, const enum uvg_tree_type tree_type) { - const int y_limit = MIN(LCU_WIDTH, state->tile->frame->height - cu_loc->y / 64 * 64) >> (tree_type == UVG_CHROMA_T); - const int x_limit = MIN(LCU_WIDTH, state->tile->frame->width - cu_loc->x / 64 * 64) >> (tree_type == UVG_CHROMA_T); + const int y_limit = MIN(LCU_WIDTH, state->tile->frame->height - cu_loc->y / 64 * 64); + const int x_limit = MIN(LCU_WIDTH, state->tile->frame->width - cu_loc->x / 64 * 64); if (cu_loc->local_x == 0) { to->left_ref = from->left_ref; @@ -150,8 +150,8 @@ static INLINE void initialize_partial_work_tree( } - const int y_start = (cu_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4; - const int x_start = (cu_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4; + const int y_start = (cu_loc->local_y) - 4; + const int x_start = (cu_loc->local_x) - 4; for (int y = y_start; y < y_limit; y += SCU_WIDTH) { *LCU_GET_CU_AT_PX(to, x_start, y) = *LCU_GET_CU_AT_PX(from, x_start, y); } @@ -159,15 +159,15 @@ static INLINE void initialize_partial_work_tree( *LCU_GET_CU_AT_PX(to, x, y_start) = *LCU_GET_CU_AT_PX(from, x, y_start); } - for (int y = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); y < y_limit; y += SCU_WIDTH) { - for (int x = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); x < x_limit; x += SCU_WIDTH) { + for (int y = cu_loc->local_y; y < y_limit; y += SCU_WIDTH) { + for (int x = cu_loc->local_x ; x < x_limit; x += SCU_WIDTH) { memset(LCU_GET_CU_AT_PX(to, x, y), 0, sizeof(cu_info_t)); } } if(chroma_loc->local_y != cu_loc->local_y || chroma_loc->local_x != cu_loc->local_x && tree_type == UVG_BOTH_T) { - const int y_start = (chroma_loc->local_y >> (tree_type == UVG_CHROMA_T)) - 4; - const int x_start = (chroma_loc->local_x >> (tree_type == UVG_CHROMA_T)) - 4; + const int y_start = (chroma_loc->local_y) - 4; + const int x_start = (chroma_loc->local_x) - 4; for (int y = y_start; y < y_limit; y += SCU_WIDTH) { *LCU_GET_CU_AT_PX(to, x_start, y) = *LCU_GET_CU_AT_PX(from, x_start, y); } @@ -190,24 +190,24 @@ static INLINE void initialize_partial_work_tree( to->top_ref = from->top_ref; *LCU_GET_TOP_RIGHT_CU(to) = *LCU_GET_TOP_RIGHT_CU(from); } - if (x_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) { + if (x_limit != LCU_WIDTH) { for (int y = y_start; y < y_limit; y += SCU_WIDTH) { memset(LCU_GET_CU_AT_PX(to, x_limit, y), 0, sizeof(cu_info_t)); } } - if (y_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) { + if (y_limit != LCU_WIDTH) { for (int x = x_start; x < x_limit; x += SCU_WIDTH) { memset(LCU_GET_CU_AT_PX(to, x, y_limit), 0, sizeof(cu_info_t)); } } } else { - if (x_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) { + if (x_limit != LCU_WIDTH) { for (int y = y_start; y < y_limit; y += SCU_WIDTH) { memset(LCU_GET_CU_AT_PX(to, x_limit, y), 0, sizeof(cu_info_t)); } } - if (y_limit != LCU_WIDTH >> (tree_type == UVG_CHROMA_T)) { + if (y_limit != LCU_WIDTH) { for (int x = x_start; x < x_limit; x += SCU_WIDTH) { memset(LCU_GET_CU_AT_PX(to, x, y_limit), 0, sizeof(cu_info_t)); } @@ -222,10 +222,10 @@ static INLINE void copy_cu_pixels( enum uvg_tree_type tree_type) { - const int x_local = cu_loc->local_x >> (tree_type == UVG_CHROMA_T); - const int y_local = cu_loc->local_y >> (tree_type == UVG_CHROMA_T); + const int x_local = cu_loc->local_x; + const int y_local = cu_loc->local_y; const int luma_index = x_local + y_local * LCU_WIDTH; - const int chroma_index = tree_type == UVG_CHROMA_T ? x_local + y_local * LCU_WIDTH_C : (x_local / 2) + (y_local / 2) * LCU_WIDTH_C; + const int chroma_index = (x_local / 2) + (y_local / 2) * LCU_WIDTH_C; if(tree_type != UVG_CHROMA_T) { uvg_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index], @@ -372,11 +372,11 @@ static void lcu_fill_chroma_cu_info(lcu_t *lcu, const cu_loc_t * const cu_loc) static void lcu_fill_chroma_cbfs(lcu_t *lcu, const cu_loc_t * const chroma_loc, enum uvg_tree_type tree_type) { - int8_t height = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_height : chroma_loc->height; - int8_t width = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_width : chroma_loc->width; + int8_t height = chroma_loc->height; + int8_t width = chroma_loc->width; uint32_t x_local = chroma_loc->local_x; uint32_t y_local = chroma_loc->local_y; - const int offset = ~((TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) - 1); + const int offset = ~((TR_MAX_WIDTH) - 1); // Set coeff flags in every CU covered by part_mode in this depth. for (uint32_t y = 0; y < height; y += SCU_WIDTH) { for (uint32_t x = 0; x < width; x += SCU_WIDTH) { @@ -728,7 +728,7 @@ static double cu_rd_cost_tr_split_accurate( const int skip_residual_coding = pred_cu->skipped || (pred_cu->type != CU_INTRA && pred_cu->cbf == 0); // cur_cu is used for TU parameters. - cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x >> (tree_type == UVG_CHROMA_T), cu_loc->local_y >> (tree_type == UVG_CHROMA_T)); + cu_info_t* const tr_cu = LCU_GET_CU_AT_PX(lcu, cu_loc->local_x, cu_loc->local_y); double coeff_bits = 0; double tr_tree_bits = 0; @@ -1132,28 +1132,28 @@ static void mark_deblocking(const cu_loc_t* const cu_loc, const cu_loc_t* const else { if (chroma_loc->x) { - for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += TR_MAX_WIDTH / 2) { - for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) { + for (int x = x_local; x < x_local + chroma_loc->width; x += TR_MAX_WIDTH) { + for (int y = y_local; y < y_local + chroma_loc->height; y += SCU_WIDTH) { LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER; } } } else if(chroma_loc->width == 64) { - for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) { - LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH / 2, y)->chroma_deblocking |= EDGE_VER; + for (int y = y_local; y < y_local + chroma_loc->height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->chroma_deblocking |= EDGE_VER; } } if(chroma_loc->y) { - for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += TR_MAX_WIDTH / 2) { - for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) { + for (int y = y_local; y < y_local + chroma_loc->height; y += TR_MAX_WIDTH) { + for (int x = x_local; x < x_local + chroma_loc->width; x += SCU_WIDTH) { LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR; } } } else if (chroma_loc->height == 64) { - for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) { - LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH / 2)->chroma_deblocking |= EDGE_HOR; + for (int x = x_local; x < x_local + chroma_loc->width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->chroma_deblocking |= EDGE_HOR; } } } @@ -1218,8 +1218,8 @@ static double search_cu( const int depth = split_tree.current_depth; const encoder_control_t* ctrl = state->encoder_control; const videoframe_t * const frame = state->tile->frame; - const int cu_width = tree_type != UVG_CHROMA_T ? cu_loc->width : cu_loc->chroma_width; - const int cu_height = tree_type != UVG_CHROMA_T ? cu_loc->height : cu_loc->chroma_height; + const int cu_width = cu_loc->width; + const int cu_height = cu_loc->height; const int x = cu_loc->x; const int y = cu_loc->y; const int luma_width = cu_loc->width; @@ -1251,8 +1251,8 @@ static double search_cu( int32_t max; } pu_depth_inter, pu_depth_intra; - int x_local = SUB_SCU(x) >> (tree_type == UVG_CHROMA_T); - int y_local = SUB_SCU(y) >> (tree_type == UVG_CHROMA_T); + int x_local = SUB_SCU(x); + int y_local = SUB_SCU(y); int32_t frame_width = frame->width; int32_t frame_height = frame->height; @@ -1611,7 +1611,7 @@ static double search_cu( bits += uvg_mock_encode_coding_unit( state, cabac, - tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, + cu_loc, is_separate_tree && !has_chroma ? NULL : chroma_loc, lcu, cur_cu, @@ -1691,7 +1691,7 @@ static double search_cu( } bool can_split[6]; - bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split, false); + bool is_implicit = uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split); const int slice_type = state->frame->is_irap ? (tree_type == UVG_CHROMA_T ? 2 : 0) : 1; const int max_btd = state->encoder_control->cfg.max_btt_depth[slice_type]; @@ -1736,7 +1736,8 @@ static double search_cu( for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) { if (!can_split[split_type] || (tree_type == UVG_CHROMA_T && split_type == TT_HOR_SPLIT && cu_loc->chroma_height == 8) - || (tree_type == UVG_CHROMA_T && split_type == BT_HOR_SPLIT && cu_loc->chroma_height == 4)) + || (tree_type == UVG_CHROMA_T && split_type == BT_HOR_SPLIT && cu_loc->chroma_height == 4) + ) continue; if (completely_inside && check_for_early_termission( @@ -1788,7 +1789,7 @@ static double search_cu( &state->search_cabac, left_cu, above_cu, - tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, + cu_loc, count_tree, tree_type, &is_implicit, @@ -1834,8 +1835,8 @@ static double search_cu( if (split_type == QT_SPLIT && completely_inside) { const cu_info_t * const t = LCU_GET_CU_AT_PX( &split_lcu[0], - new_cu_loc[split].local_x >> (tree_type == UVG_CHROMA_T), - new_cu_loc[split].local_y >> (tree_type == UVG_CHROMA_T)); + new_cu_loc[split].local_x, + new_cu_loc[split].local_y); stop_to_qt |= GET_SPLITDATA(t, depth + 1) == QT_SPLIT; } @@ -2113,10 +2114,9 @@ static void copy_lcu_to_cu_data(const encoder_state_t * const state, int x_px, i // Copy non-reference CUs to picture. uvg_cu_array_copy_from_lcu( tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, - tree_type != UVG_CHROMA_T ? x_px : x_px / 2, - tree_type != UVG_CHROMA_T ? y_px : y_px / 2, - lcu, - tree_type); + x_px, + y_px, + lcu); // Copy pixels to picture. { diff --git a/src/search_intra.c b/src/search_intra.c index 83ec950d..9416f122 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -678,7 +678,7 @@ static int search_intra_chroma_rough( for (int i = 0; i < modes_count; ++i) { const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; - uvg_intra_predict(state, &refs_u, cu_loc, &loc, COLOR_U, pred, &chroma_data[i], lcu, tree_type); + uvg_intra_predict(state, &refs_u, cu_loc, &loc, COLOR_U, pred, &chroma_data[i], lcu); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); switch (width) { case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block); @@ -697,7 +697,7 @@ static int search_intra_chroma_rough( for (int i = 0; i < modes_count; ++i) { const int8_t mode_chroma = chroma_data[i].pred_cu.intra.mode_chroma; if (mode_chroma == luma_mode || mode_chroma == 0 || mode_chroma >= 81) continue; - uvg_intra_predict(state, &refs_v, cu_loc, &loc, COLOR_V, pred, &chroma_data[i], lcu, tree_type); + uvg_intra_predict(state, &refs_v, cu_loc, &loc, COLOR_V, pred, &chroma_data[i], lcu); //costs[i] += get_cost(encoder_state, pred, orig_block, satd_func, sad_func, width); switch (width) { case 4: chroma_data[i].cost += uvg_satd_4x4(pred, orig_block); @@ -1044,9 +1044,9 @@ static uint8_t search_intra_rough( int offset = 1 << state->encoder_control->cfg.intra_rough_search_levels; search_proxy.pred_cu.intra.mode = 0; - uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[0], &search_proxy, NULL); search_proxy.pred_cu.intra.mode = 1; - uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[1], &search_proxy, NULL); get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs); mode_checked[0] = true; mode_checked[1] = true; @@ -1096,7 +1096,7 @@ static uint8_t search_intra_rough( for (int i = 0; i < PARALLEL_BLKS; ++i) { if (mode + i * offset <= 66) { search_proxy.pred_cu.intra.mode = mode + i*offset; - uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[i], &search_proxy, NULL); } } @@ -1168,7 +1168,7 @@ static uint8_t search_intra_rough( for (int block = 0; block < PARALLEL_BLKS; ++block) { search_proxy.pred_cu.intra.mode = modes_to_check[block + i]; - uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL, UVG_LUMA_T); + uvg_intra_predict(state, refs, cu_loc, cu_loc, COLOR_Y, preds[block], &search_proxy, NULL); } @@ -1259,7 +1259,7 @@ static void get_rough_cost_for_2n_modes( double bits[PARALLEL_BLKS] = { 0 }; for(int mode = 0; mode < num_modes; mode += PARALLEL_BLKS) { for (int i = 0; i < PARALLEL_BLKS; ++i) { - uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL, UVG_LUMA_T); + uvg_intra_predict(state, &refs[search_data[mode + i].pred_cu.intra.multi_ref_idx], cu_loc, cu_loc, COLOR_Y, preds[i], &search_data[mode + i], NULL); } get_cost_dual(state, preds, orig_block, satd_dual_func, sad_dual_func, width, height, costs_out); @@ -1505,8 +1505,7 @@ int8_t uvg_search_intra_chroma_rdo( COLOR_U, u_pred, &chroma_data[mode_i], - lcu, - tree_type); + lcu); uvg_intra_predict( state, &refs[COLOR_V - 1], @@ -1515,8 +1514,7 @@ int8_t uvg_search_intra_chroma_rdo( COLOR_V, v_pred, &chroma_data[mode_i], - lcu, - tree_type); + lcu); uvg_generate_residual( &lcu->ref.u[offset], u_pred,