diff --git a/src/cu.c b/src/cu.c index 10d8aabb..3eb7a771 100644 --- a/src/cu.c +++ b/src/cu.c @@ -399,11 +399,11 @@ int uvg_get_possible_splits(const encoder_state_t * const state, splits[NO_SPLIT] = splits[QT_SPLIT] = splits[BT_HOR_SPLIT] = splits[TT_HOR_SPLIT] = splits[BT_VER_SPLIT] = splits[TT_VER_SPLIT] = true; bool can_btt = split_tree.mtt_depth < max_btd; - const enum split_type last_split = (split_tree.split_tree >> (split_tree.current_depth * 3)) & 7; + const enum split_type last_split = (split_tree.split_tree >> (split_tree.current_depth * 3 - 3)) & 7; const enum split_type parl_split = last_split == BT_HOR_SPLIT ? BT_HOR_SPLIT : BT_VER_SPLIT; // don't allow QT-splitting below a BT split - if (split_tree.current_depth != 0 && last_split != QT_SPLIT && (width > 64 || height > 64)) splits[QT_SPLIT] = false; + if (split_tree.current_depth != 0 && last_split != QT_SPLIT /* && !(width > 64 || height > 64)*/) splits[QT_SPLIT] = false; if (width <= min_qt_size) splits[QT_SPLIT] = false; if (tree_type == UVG_CHROMA_T && width <= 4) splits[QT_SPLIT] = false; @@ -488,6 +488,7 @@ int uvg_count_available_edge_cus(const cu_loc_t* const cu_loc, const lcu_t* cons int amount = 0; if(left) { + if (cu_loc->local_y == 0 && cu_loc->local_x == 32 && cu_loc->height == 32 && cu_loc->width == 32) return 8; while (LCU_GET_CU_AT_PX(lcu, cu_loc->local_x - TR_MIN_WIDTH, cu_loc->local_y + amount)->type != CU_NOTSET && (cu_loc->local_y + amount) < LCU_WIDTH) { amount += TR_MIN_WIDTH; } diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index 54a7a639..999da9a3 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1643,7 +1643,7 @@ void uvg_encode_coding_tree( ((is_local_dual_tree && has_chroma) || tree_type == UVG_CHROMA_T) && tree_type != UVG_LUMA_T) { - int8_t luma_dir = uvg_get_co_located_luma_mode(chroma_loc->x, chroma_loc->y, chroma_loc->width, chroma_loc->height, NULL, frame->cu_array, UVG_CHROMA_T); + int8_t luma_dir = uvg_get_co_located_luma_mode(chroma_loc, cu_loc, cur_cu, NULL, frame->cu_array, UVG_CHROMA_T); encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, luma_dir,NULL); // LFNST constraints must be reset here. Otherwise the left over values will interfere when calculating new constraints cu_info_t* tmp = (cu_info_t*)cur_cu; @@ -1793,9 +1793,9 @@ double uvg_mock_encode_coding_unit( uvg_encode_intra_luma_coding_unit(state, cabac, cur_cu, cu_loc, lcu, &bits); } if((chroma_loc || tree_type == UVG_CHROMA_T) && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) { - int8_t luma_dir = uvg_get_co_located_luma_mode(chroma_loc->x, chroma_loc->y, chroma_loc->width, chroma_loc->height, - tree_type != UVG_CHROMA_T ? lcu : NULL, - tree_type == UVG_CHROMA_T ? state->tile->frame->cu_array : NULL, is_separate_tree ? UVG_CHROMA_T : tree_type); + int8_t luma_dir = uvg_get_co_located_luma_mode(chroma_loc,cu_loc , cur_cu, tree_type != UVG_CHROMA_T ? lcu : NULL, + tree_type == UVG_CHROMA_T ? state->tile->frame->cu_array : NULL, + is_separate_tree ? UVG_CHROMA_T : tree_type); encode_chroma_intra_cu(cabac, cur_cu, state->encoder_control->cfg.cclm, luma_dir, &bits); } } diff --git a/src/intra.c b/src/intra.c index bcc04359..778b779d 100644 --- a/src/intra.c +++ b/src/intra.c @@ -1603,22 +1603,26 @@ void uvg_intra_predict( // This function works on luma coordinates int8_t uvg_get_co_located_luma_mode( - int x, - int y, - int width, - int height, + const cu_loc_t* const chroma_loc, + const cu_loc_t* const cu_loc, + const cu_info_t* luma_cu, const lcu_t* const lcu, const cu_array_t* const cu_array, enum uvg_tree_type tree_type) { + int x = chroma_loc->x; + int y = chroma_loc->y; assert((cu_array || lcu) && !(cu_array && lcu)); assert(tree_type != UVG_LUMA_T && "Luma only CU shouldn't need colocated luma CU"); if(tree_type == UVG_CHROMA_T) { - x += width >> 1; - y += height >> 1; + x += chroma_loc->width >> 1; + y += chroma_loc->height >> 1; } const cu_info_t* cu; - if(cu_array) { + if (lcu && cu_loc->x <= x && x < cu_loc->x + cu_loc->width && cu_loc->y <= y && y < cu_loc->y + cu_loc->height) { + cu = luma_cu; + } + else if(cu_array) { cu = uvg_cu_array_at_const(cu_array, x, y); } else { diff --git a/src/intra.h b/src/intra.h index 9f2986eb..fc81e645 100644 --- a/src/intra.h +++ b/src/intra.h @@ -152,10 +152,9 @@ void uvg_intra_recon_cu( bool recon_chroma); int8_t uvg_get_co_located_luma_mode( - int x, - int y, - int width, - int height, + const cu_loc_t* const chroma_loc, + const cu_loc_t* const cu_loc, + const cu_info_t* luma_cu, const lcu_t* const lcu, const cu_array_t* const cu_array, enum uvg_tree_type tree_type); diff --git a/src/search.c b/src/search.c index 6a10a83e..bc55a805 100644 --- a/src/search.c +++ b/src/search.c @@ -1107,9 +1107,10 @@ static double search_cu( intra_search.pred_cu.joint_cb_cr = 0; if(tree_type == UVG_CHROMA_T || is_separate_tree) { - intra_mode = uvg_get_co_located_luma_mode(chroma_loc->x, chroma_loc->y, chroma_loc->width, chroma_loc->height, - is_separate_tree ? lcu : NULL, - tree_type == UVG_CHROMA_T ? state->tile->frame->cu_array : NULL, UVG_CHROMA_T); + intra_mode = uvg_get_co_located_luma_mode( + chroma_loc, cu_loc, &intra_search.pred_cu, is_separate_tree ? lcu : NULL, + tree_type == UVG_CHROMA_T ? state->tile->frame->cu_array : NULL, + UVG_CHROMA_T); intra_search.pred_cu.type = CU_INTRA; } else if (intra_search.pred_cu.intra.mip_flag) { intra_mode = 0; @@ -1346,86 +1347,102 @@ static double search_cu( fwrite(&state->search_cabac.ctx, 1, sizeof(state->search_cabac.ctx), state->encoder_control->cabac_debug_file); } + bool can_split[6]; + uvg_get_possible_splits(state, cu_loc, split_tree, tree_type, can_split); + can_split_cu &= can_split[1] || can_split[2] || can_split[3] || can_split[4] || can_split[5]; + // Recursively split all the way to max search depth. if (can_split_cu) { - const int split_type = depth == 2 ? TT_VER_SPLIT : QT_SPLIT; - split_tree_t new_split = { - split_tree.split_tree | split_type << (split_tree.current_depth * 3), - split_tree.current_depth + 1, - split_tree.mtt_depth + (split_type != QT_SPLIT), - 0 - }; - - double split_cost = 0.0; - int cbf = cbf_is_set_any(cur_cu->cbf); + lcu_t * split_lcu = MALLOC(lcu_t, 5); + enum split_type best_split = 0; + double best_split_cost = MAX_DOUBLE; cabac_data_t post_seach_cabac; + cabac_data_t best_split_cabac; memcpy(&post_seach_cabac, &state->search_cabac, sizeof(post_seach_cabac)); - memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac)); + for (int split_type = QT_SPLIT; split_type <= TT_VER_SPLIT; ++split_type) { + if (!can_split[split_type] || split_type != QT_SPLIT) continue; + split_tree_t new_split = { + split_tree.split_tree | split_type << (split_tree.current_depth * 3), + split_tree.current_depth + 1, + split_tree.mtt_depth + (split_type != QT_SPLIT), + 0 + }; + + double split_cost = 0.0; + int cbf = cbf_is_set_any(cur_cu->cbf); + memcpy(&state->search_cabac, &pre_search_cabac, sizeof(post_seach_cabac)); - double split_bits = 0; - lcu_t split_lcu; + double split_bits = 0; - if (cur_cu->log2_height + cur_cu->log2_width > 4) { + if (cur_cu->log2_height + cur_cu->log2_width > 4) { - state->search_cabac.update = 1; - // Add cost of cu_split_flag. - const cu_info_t* left_cu = NULL, * above_cu = NULL; - if (x) { - if (x_local || tree_type != UVG_CHROMA_T) { - left_cu = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); + state->search_cabac.update = 1; + // Add cost of cu_split_flag. + const cu_info_t* left_cu = NULL, * above_cu = NULL; + if (x) { + if (x_local || tree_type != UVG_CHROMA_T) { + left_cu = LCU_GET_CU_AT_PX(lcu, x_local - 1, y_local); + } + else { + left_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, (x >> 1) - 1, y >> 1); + } } - else { - left_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, (x >> 1) - 1, y >> 1); - } - } - if (y) { - if (y_local || tree_type != UVG_CHROMA_T) { - above_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1); - } - else { - above_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, x >> 1, (y >> 1) - 1); - } - } - uvg_write_split_flag( - state, - &state->search_cabac, - left_cu, - above_cu, - tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, - split_tree, - tree_type, - &split_bits); - } - - state->search_cabac.update = 0; - split_cost += split_bits * state->lambda; - - // If skip mode was selected for the block, skip further search. - // Skip mode means there's no coefficients in the block, so splitting - // might not give any better results but takes more time to do. - // It is ok to interrupt the search as soon as it is known that - // the split costs at least as much as not splitting. - if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { - cu_loc_t new_cu_loc[4]; - uint8_t separate_chroma = 0; - const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc, &separate_chroma); - initialize_partial_work_tree(lcu, &split_lcu, cu_loc, tree_type); - for (int split = 0; split < splits; ++split) { - new_split.part_index = split; - split_cost += search_cu(state, - &new_cu_loc[split], separate_chroma ? chroma_loc : &new_cu_loc[split], - &split_lcu, - tree_type, new_split, - !separate_chroma || split == splits - 1); - // If there is no separate chroma the block will always have chroma, otherwise it is the last block of the split that has the chroma - if (split_cost > cost) { - break; + if (y) { + if (y_local || tree_type != UVG_CHROMA_T) { + above_cu = LCU_GET_CU_AT_PX(lcu, x_local, y_local - 1); + } + else { + above_cu = uvg_cu_array_at_const(state->tile->frame->chroma_cu_array, x >> 1, (y >> 1) - 1); + } } + split_tree_t count_tree = split_tree; + count_tree.split_tree = split_tree.split_tree | split_type << (split_tree.current_depth * 3); + uvg_write_split_flag( + state, + &state->search_cabac, + left_cu, + above_cu, + tree_type != UVG_CHROMA_T ? cu_loc : &separate_tree_chroma_loc, + count_tree, + tree_type, + &split_bits); } - } else { - split_cost = INT_MAX; + state->search_cabac.update = 0; + split_cost += split_bits * state->lambda; + + // If skip mode was selected for the block, skip further search. + // Skip mode means there's no coefficients in the block, so splitting + // might not give any better results but takes more time to do. + // It is ok to interrupt the search as soon as it is known that + // the split costs at least as much as not splitting. + if (cur_cu->type == CU_NOTSET || cbf || state->encoder_control->cfg.cu_split_termination == UVG_CU_SPLIT_TERMINATION_OFF) { + cu_loc_t new_cu_loc[4]; + uint8_t separate_chroma = 0; + const int splits = uvg_get_split_locs(cu_loc, split_type, new_cu_loc, &separate_chroma); + initialize_partial_work_tree(lcu, &split_lcu[split_type - 1], cu_loc, tree_type); + for (int split = 0; split < splits; ++split) { + new_split.part_index = split; + split_cost += search_cu(state, + &new_cu_loc[split], separate_chroma ? chroma_loc : &new_cu_loc[split], + &split_lcu[split_type -1], + tree_type, new_split, + !separate_chroma || split == splits - 1); + // If there is no separate chroma the block will always have chroma, otherwise it is the last block of the split that has the chroma + if (split_cost > cost || split_cost > best_split_cost) { + break; + } + } + + } else { + split_cost = INT_MAX; + } + if (split_cost < best_split_cost) { + best_split_cost = split_cost; + best_split = split_type; + memcpy(&best_split_cabac, &state->search_cabac, sizeof(cabac_data_t)); + } } // If no search is not performed for this depth, try just the best mode @@ -1440,7 +1457,7 @@ static double search_cu( && tree_type == UVG_BOTH_T) { - cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&split_lcu, x_local, y_local); + cu_info_t *cu_d1 = LCU_GET_CU_AT_PX(&split_lcu[best_split - 1], x_local, y_local); // If the best CU in depth+1 is intra and the biggest it can be, try it. if (cu_d1->type == CU_INTRA && (cu_d1->log2_height + 1 == cur_cu->log2_height || cu_d1->log2_width + 1 == cur_cu->log2_width)) { @@ -1488,10 +1505,14 @@ static double search_cu( } } - if (split_cost < cost) { + if (best_split_cost < cost) { // Copy split modes to this depth. - cost = split_cost; - work_tree_copy_up(&split_lcu, lcu, state->encoder_control->cfg.jccr, tree_type, cu_loc, is_separate_tree && !has_chroma ? NULL : chroma_loc); + cost = best_split_cost; + memcpy(&state->search_cabac, &best_split_cabac, sizeof(best_split_cabac)); + work_tree_copy_up(&split_lcu[best_split -1], lcu, state->encoder_control->cfg.jccr, tree_type, cu_loc, is_separate_tree && !has_chroma ? NULL : chroma_loc); + downsample_cclm_rec( + state, x, y, cu_width / 2, cu_height / 2, lcu->rec.y, lcu->left_ref.y[64] + ); #if UVG_DEBUG //debug_split = 1; #endif @@ -1522,6 +1543,7 @@ static double search_cu( state, x, y, cu_width / 2, cu_height / 2, lcu->rec.y, lcu->left_ref.y[64] ); } + FREE_POINTER(split_lcu); } else if (cur_cu->log2_height + cur_cu->log2_width > 4) { // Need to copy modes down since the lower level of the work tree is used // when searching SMP and AMP blocks. @@ -1745,11 +1767,11 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con double cost = search_cu( state, &start, - NULL, + &start, &work_tree, tree_type, split_tree, - false); + true); // Save squared cost for rate control. if(state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) { @@ -1766,10 +1788,10 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) { cost = search_cu( state, &start, - NULL, + &start, &work_tree, UVG_CHROMA_T, split_tree, - false); + true); if (state->encoder_control->cfg.rc_algorithm == UVG_LAMBDA) { uvg_get_lcu_stats(state, x / LCU_WIDTH, y / LCU_WIDTH)->weight += cost * cost;