diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 7982206e..cfe69771 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -452,8 +452,8 @@ static void encode_chroma_tu( { int width_c = cu_loc->chroma_width; int height_c = cu_loc->chroma_height; - int x_local = ((cu_loc->x >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; - int y_local = ((cu_loc->y >> (tree_type != UVG_CHROMA_T)) & ~3) % LCU_WIDTH_C; + int x_local = (cu_loc->x >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C; + int y_local = (cu_loc->y >> (tree_type != UVG_CHROMA_T)) % LCU_WIDTH_C; cabac_data_t* const cabac = &state->cabac; *scan_idx = SCAN_DIAG; if(!joint_chroma){ @@ -1639,6 +1639,7 @@ double uvg_mock_encode_coding_unit( encoder_state_t* const state, cabac_data_t* cabac, const cu_loc_t* const cu_loc, + const cu_loc_t* const chroma_loc, lcu_t* lcu, cu_info_t* cur_cu, enum uvg_tree_type tree_type, @@ -1751,7 +1752,7 @@ double uvg_mock_encode_coding_unit( if(tree_type != UVG_CHROMA_T) { uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, lcu, &bits); } - if((cur_cu->log2_height + cur_cu->log2_width >= 6 || (x % 8 != 0 && y % 8 != 0) || tree_type == UVG_CHROMA_T) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { + if((chroma_loc || tree_type == UVG_CHROMA_T) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, &bits); } } diff --git a/src/encode_coding_tree.h b/src/encode_coding_tree.h index 5a9b4023..96e0cfb7 100644 --- a/src/encode_coding_tree.h +++ b/src/encode_coding_tree.h @@ -77,6 +77,7 @@ double uvg_mock_encode_coding_unit( encoder_state_t* const state, cabac_data_t* cabac, const cu_loc_t* const cu_loc, + const cu_loc_t* const chroma_loc, lcu_t* lcu, cu_info_t* cur_cu, enum uvg_tree_type tree_type, diff --git a/src/intra.c b/src/intra.c index bb696c0c..883091c9 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1121,6 +1121,7 @@ void uvg_intra_build_reference_any( else { const int num_cus = uvg_count_available_edge_cus(cu_loc, lcu, true); px_available_left = is_dual_tree || !is_chroma ? num_cus * 4 : num_cus *2; + px_available_left -= px.x % 4; } // Limit the number of available pixels based on block size and dimensions @@ -1440,7 +1441,7 @@ void uvg_intra_build_reference_inner( int i = multi_ref_index; // Offset by multi_ref_index // Do different loop for heights smaller than 4 (possible for some ISP splits) - if (lcu_px.y % 4 != 0) { + if (px.y % 4 != 0) { do { out_left_ref[i + 1] = left_border[(i + 0 - multi_ref_index) * left_stride]; out_left_ref[i + 2] = left_border[(i + 1 - multi_ref_index) * left_stride]; diff --git a/src/search.c b/src/search.c index f08a1146..9bff608a 100644 --- a/src/search.c +++ b/src/search.c @@ -187,8 +187,8 @@ static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { //const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T)); - const int chroma_x = (cu_loc->x >> 1) & ~3; - const int chroma_y = (cu_loc->y >> 1) & ~3; + const int chroma_x = (cu_loc->x >> 1); + const int chroma_y = (cu_loc->y >> 1); const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C); copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C); @@ -208,11 +208,16 @@ static void work_tree_copy_up( bool joint, enum uvg_tree_type tree_type, - const cu_loc_t* const cu_loc) + const cu_loc_t* const cu_loc, + const cu_loc_t* const chroma_loc) { copy_cu_info (from, to, cu_loc, tree_type); - copy_cu_pixels(from, to, cu_loc, tree_type); - copy_cu_coeffs(cu_loc, from, to, joint, tree_type); + copy_cu_pixels(from, to, cu_loc, cu_loc != chroma_loc && tree_type == UVG_LUMA_T ? UVG_LUMA_T : tree_type); + copy_cu_coeffs(cu_loc, from, to, joint, cu_loc != chroma_loc && tree_type == UVG_LUMA_T ? UVG_LUMA_T : tree_type); + if (cu_loc != chroma_loc && tree_type == UVG_LUMA_T) { + copy_cu_pixels(from, to, chroma_loc, UVG_CHROMA_T); + copy_cu_coeffs(chroma_loc, from, to, joint, UVG_CHROMA_T); + } } @@ -481,7 +486,7 @@ double uvg_cu_rd_cost_chroma( lcu_t *const lcu, const cu_loc_t * const cu_loc) { - const vector2d_t lcu_px = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; + const vector2d_t lcu_px = { (cu_loc->local_x) / 2, (cu_loc->local_y) / 2 }; cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); const int skip_residual_coding = pred_cu->skipped || (pred_cu->type == CU_INTER && pred_cu->cbf == 0); @@ -786,7 +791,7 @@ static double cu_rd_cost_tr_split_accurate( if (chroma_can_use_tr_skip) { CABAC_FBITS_UPDATE(cabac, &cabac->ctx.transform_skip_model_chroma, tr_cu->tr_skip & 2, tr_tree_bits, "transform_skip_flag"); } - coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); + coeff_bits += uvg_get_coeff_cost(state, lcu->coeff.joint_uv, tr_cu, &temp_chroma_loc, COLOR_U, scan_order, 0, COEFF_ORDER_CU); } } @@ -948,6 +953,7 @@ static double search_cu( const int y = cu_loc->y; const int luma_width = cu_loc->width; const int luma_height = cu_loc->height; + const bool is_separate_tree = chroma_loc == NULL || cu_loc->height != chroma_loc->height || cu_loc->width != chroma_loc->width; assert(cu_width >= 4); double cost = MAX_DOUBLE; double inter_zero_coeff_cost = MAX_DOUBLE; @@ -1150,7 +1156,7 @@ static double search_cu( bool recon_chroma = true; bool recon_luma = tree_type != UVG_CHROMA_T; - if ((cur_cu->log2_height + cur_cu->log2_width < 6) || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { + if (is_separate_tree || !has_chroma || state->encoder_control->chroma_format == UVG_CSP_400 || tree_type == UVG_LUMA_T) { recon_chroma = false; } lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_height, cur_cu); @@ -1161,7 +1167,7 @@ static double search_cu( recon_luma, recon_chroma); - if((cur_cu->log2_height + cur_cu->log2_width < 6 && has_chroma && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 ) + if((is_separate_tree && has_chroma && tree_type != UVG_LUMA_T && state->encoder_control->chroma_format != UVG_CSP_400 ) || tree_type == UVG_CHROMA_T) { intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; uvg_intra_recon_cu(state, @@ -1263,6 +1269,7 @@ static double search_cu( state, cabac, tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, + is_separate_tree && !has_chroma ? NULL : chroma_loc, lcu, cur_cu, tree_type, @@ -1306,7 +1313,7 @@ static double search_cu( // Recursively split all the way to max search depth. if (can_split_cu) { - const int split_type = depth == 2 ? BT_HOR_SPLIT : QT_SPLIT; + const int split_type = depth == 2 ? TT_VER_SPLIT : QT_SPLIT; const split_tree_t new_split = { split_tree.split_tree | split_type << (split_tree.current_depth * 3), split_tree.current_depth + 1, @@ -1447,7 +1454,7 @@ static double search_cu( if (split_cost < cost) { // Copy split modes to this depth. cost = split_cost; - work_tree_copy_up(&split_lcu, lcu, state->encoder_control->cfg.jccr, tree_type, cu_loc); + work_tree_copy_up(&split_lcu, lcu, state->encoder_control->cfg.jccr, tree_type, cu_loc, is_separate_tree && !has_chroma ? NULL : chroma_loc); #if UVG_DEBUG //debug_split = 1; #endif diff --git a/src/search_inter.c b/src/search_inter.c index d7a0fb33..f9e0d8bb 100644 --- a/src/search_inter.c +++ b/src/search_inter.c @@ -2123,10 +2123,10 @@ void uvg_cu_cost_inter_rd2( const split_tree_t splitt_tree = { cur_cu->split_tree, depth, mtt_depth }; if (cur_cu->merged) { no_cbf_bits = CTX_ENTROPY_FBITS(&state->cabac.ctx.cu_skip_flag_model[skip_context], 1) + *inter_bitcost; - bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); + bits += uvg_mock_encode_coding_unit(state, cabac, cu_loc, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); } else { - no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); + no_cbf_bits = uvg_mock_encode_coding_unit(state, cabac, cu_loc, cu_loc, lcu, cur_cu, UVG_BOTH_T, splitt_tree); bits += no_cbf_bits - CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 0) + CTX_ENTROPY_FBITS(&cabac->ctx.cu_qt_root_cbf_model, 1); } double no_cbf_cost = ssd + no_cbf_bits * state->lambda; diff --git a/src/search_intra.c b/src/search_intra.c index 56e3814e..867b4ded 100644 --- a/src/search_intra.c +++ b/src/search_intra.c @@ -630,7 +630,7 @@ static int search_intra_chroma_rough( { const int_fast8_t log2_width_c = uvg_g_convert_to_log2[cu_loc->chroma_width]; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; - const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 }; + const vector2d_t luma_px = { cu_loc->x, cu_loc->y}; const int width = 1 << log2_width_c; const int height = width; // TODO: height for non-square blocks @@ -642,7 +642,7 @@ static int search_intra_chroma_rough( uvg_intra_references refs_v; uvg_intra_build_reference(state, &loc, &loc, COLOR_V, &luma_px, &pic_px, lcu, &refs_v, state->encoder_control->cfg.wpp, NULL, 0, 0); - vector2d_t lcu_cpx = { (cu_loc->local_x & ~7) / 2, (cu_loc->local_y & ~7) / 2 }; + vector2d_t lcu_cpx = { (cu_loc->local_x) / 2, (cu_loc->local_y) / 2 }; uvg_pixel* orig_u = &lcu->ref.u[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; uvg_pixel* orig_v = &lcu->ref.v[lcu_cpx.x + lcu_cpx.y * LCU_WIDTH_C]; @@ -1429,7 +1429,7 @@ int8_t uvg_search_intra_chroma_rdo( const int chroma_width = cu_loc->chroma_width; const int chroma_height = cu_loc->chroma_height; uvg_intra_references refs[2]; - const vector2d_t luma_px = { cu_loc->x & ~7, cu_loc->y & ~7 }; + const vector2d_t luma_px = { cu_loc->x, cu_loc->y }; const vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height, @@ -1444,7 +1444,7 @@ int8_t uvg_search_intra_chroma_rdo( cabac_data_t temp_cabac; memcpy(&temp_cabac, &state->search_cabac, sizeof(cabac_data_t)); - const int offset = ((cu_loc->local_x & ~7) >> 1) + ((cu_loc->local_y & ~7) >> 1)* LCU_WIDTH_C; + const int offset = ((cu_loc->local_x) >> 1) + ((cu_loc->local_y) >> 1)* LCU_WIDTH_C; int lfnst_modes_to_check[3]; if((is_separate || tree_type == UVG_CHROMA_T) && state->encoder_control->cfg.lfnst) { diff --git a/src/strategies/generic/intra-generic.c b/src/strategies/generic/intra-generic.c index 9a3cbe26..b7ab7e94 100644 --- a/src/strategies/generic/intra-generic.c +++ b/src/strategies/generic/intra-generic.c @@ -131,7 +131,7 @@ static void uvg_angular_pred_generic( const int_fast8_t mode_disp = vertical_mode ? pred_mode - 50 : -((int32_t)pred_mode - 18); // Sample displacement per column in fractions of 32. - const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; + const int16_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; const int side_size = vertical_mode ? log2_height : log2_width; int scale = MIN(2, side_size - pre_scale[abs(mode_disp)]); @@ -248,7 +248,7 @@ static void uvg_angular_pred_generic( // PDPC - bool PDPC_filter = ((width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) || channel_type != 0) && multi_ref_index == 0; + bool PDPC_filter = (width >= TR_MIN_WIDTH && height >= TR_MIN_WIDTH) && multi_ref_index == 0; if (pred_mode > 1 && pred_mode < 67) { if (mode_disp < 0 || multi_ref_index) { // Cannot be used with MRL. PDPC_filter = false; @@ -274,7 +274,7 @@ static void uvg_angular_pred_generic( // Do not apply PDPC if multi ref line index is other than 0 // TODO: do not do PDPC if block is in BDPCM mode - bool do_pdpc = (((width >= 4 && height >= 4) || channel_type != 0) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/); + bool do_pdpc = ((width >= 4 && height >= 4) && sample_disp >= 0 && multi_ref_index == 0 /*&& !bdpcm*/); if (do_pdpc) { int scale = (log2_width + log2_height - 2) >> 2;