diff --git a/src/cu.h b/src/cu.h index 751a483c..a2e2234c 100644 --- a/src/cu.h +++ b/src/cu.h @@ -131,6 +131,9 @@ typedef struct uint8_t log2_width : 3; uint8_t log2_height : 3; + uint8_t log2_chroma_width : 3; + uint8_t log2_chroma_height : 3; + uint16_t cbf; uint8_t root_cbf; @@ -150,11 +153,14 @@ typedef struct uint8_t mts_last_scan_pos : 1; uint8_t violates_lfnst_constrained_luma : 1; - uint8_t violates_lfnst_constrained_chroma; + uint8_t violates_lfnst_constrained_chroma : 1; uint8_t lfnst_last_scan_pos : 1; uint8_t lfnst_idx : 2; uint8_t cr_lfnst_idx : 2; + uint8_t luma_deblocking : 2; + uint8_t chroma_deblocking : 2; + union { struct { int8_t mode; diff --git a/src/filter.c b/src/filter.c index 2f0b6a1c..5605006b 100644 --- a/src/filter.c +++ b/src/filter.c @@ -269,6 +269,7 @@ static bool is_tu_boundary( int32_t x, int32_t y, edge_dir dir, + color_t color, enum uvg_tree_type tree_type) { x >>= tree_type == UVG_CHROMA_T; @@ -276,13 +277,13 @@ static bool is_tu_boundary( // if (x & 3 || y & 3) return false; const cu_info_t *const scu = uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y); - const int tu_width = MIN(TR_MAX_WIDTH, 1 << scu->log2_width); - const int tu_height = MIN(TR_MAX_WIDTH, 1 << scu->log2_height); if (dir == EDGE_HOR) { - return (y & (tu_height - 1)) == 0; + return color == COLOR_Y ? scu->luma_deblocking & EDGE_HOR : + scu->chroma_deblocking & EDGE_HOR; } else { - return (x & (tu_width - 1)) == 0; + return color == COLOR_Y ? scu->luma_deblocking & EDGE_VER : + scu->chroma_deblocking & EDGE_VER; } } @@ -321,9 +322,9 @@ static bool is_pu_boundary(const encoder_state_t *const state, static bool is_on_8x8_grid(int x, int y, edge_dir dir) { if (dir == EDGE_HOR) { - return (y & 7) == 0 && (x & 2) == 0; + return (y & 7) == 0; } else { - return (x & 7) == 0 && (y & 2) == 0; + return (x & 7) == 0; } } @@ -603,10 +604,10 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_ bool transform_edge_4x4[2] = { false, false }; bool transform_edge_8x8[2] = { false, false }; - if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, tree_type); - if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, tree_type); - if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, tree_type); - if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, tree_type); + if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, comp, tree_type); + if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, comp, tree_type); + if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, comp, tree_type); + if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, comp, tree_type); if (comp == COLOR_Y) { if (tu_size_P_side <= 4 || tu_size_Q_side <= 4){ @@ -1066,18 +1067,18 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state, uint8_t max_filter_length_P = 0; uint8_t max_filter_length_Q = 0; - const int cu_width = 1 << (cu_q->log2_width - (tree_type != UVG_CHROMA_T)); - const int cu_height = 1 << (cu_q->log2_height - (tree_type != UVG_CHROMA_T)); + const int cu_width = 1 << (cu_q->log2_chroma_width ); + const int cu_height = 1 << (cu_q->log2_chroma_height); const int pu_size = dir == EDGE_HOR ? cu_height : cu_width; const int pu_pos = dir == EDGE_HOR ? y_coord : x_coord; const int tu_size_p_side = dir == EDGE_HOR ? - MIN(1 << (cu_p->log2_height - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH) : - MIN(1 << (cu_p->log2_width - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH); + MIN(1 << (cu_p->log2_chroma_height), TR_MAX_WIDTH) : + MIN(1 << (cu_p->log2_chroma_width), TR_MAX_WIDTH); const int tu_size_q_side = dir == EDGE_HOR ? - MIN(1 << (cu_q->log2_height - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH) : - MIN(1 << (cu_q->log2_width - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH); + MIN(1 << (cu_q->log2_chroma_height ), TR_MAX_WIDTH) : + MIN(1 << (cu_q->log2_chroma_width ), TR_MAX_WIDTH); get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord, dir, tu_boundary, tu_size_p_side, tu_size_q_side, @@ -1216,11 +1217,12 @@ static void filter_deblock_unit( // Chroma pixel coordinates. const int32_t x_c = x >> 1; const int32_t y_c = y >> 1; - if (state->encoder_control->chroma_format != UVG_CSP_400 && - (is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32) - || (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0)) + if (state->encoder_control->chroma_format != UVG_CSP_400 && + is_tu_boundary(state, x, y, dir, COLOR_UV, tree_type) + && (is_on_8x8_grid(x_c, y_c, dir == EDGE_HOR && (x_c + 4) % 32 ? EDGE_HOR : EDGE_VER) + || (x == state->tile->frame->width - 8 && dir == EDGE_HOR && y_c % 8 == 0)) && tree_type != UVG_LUMA_T) { - filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary, tree_type); + filter_deblock_edge_chroma(state, x_c, y_c, 2, dir, tu_boundary, tree_type); } } @@ -1250,11 +1252,11 @@ static void filter_deblock_lcu_inside(encoder_state_t * const state, for (int edge_y = y; edge_y < end_y; edge_y += 4) { for (int edge_x = x; edge_x < end_x; edge_x += 4) { - bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, luma_tree); + bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, COLOR_Y, luma_tree); if (tu_boundary || is_pu_boundary(state, edge_x, edge_y, dir)) { filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, luma_tree); } - if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, chroma_tree)) { + if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, COLOR_UV, chroma_tree)) { filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, chroma_tree); } } @@ -1281,7 +1283,7 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, for (int x = x_px - 8; x < x_px; x += 4) { for (int y = y_px; y < end; y += 4) { // The top edge of the whole frame is not filtered. - bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, luma_tree); + bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, COLOR_Y, luma_tree); if (y > 0 && (tu_boundary || is_pu_boundary(state, x, y, EDGE_HOR))) { filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR, tu_boundary); } @@ -1292,13 +1294,15 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state, if (state->encoder_control->chroma_format != UVG_CSP_400) { const int x_px_c = x_px >> 1; const int y_px_c = y_px >> 1; - const int x_c = x_px_c - 4; - const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1); - for (int y_c = y_px_c; y_c < end_c; y_c += 8) { - // The top edge of the whole frame is not filtered. - bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, chroma_tree); - if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) { - filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary, chroma_tree); + int x_c = x_px_c - 4; + const int end_c_y = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1); + for(; x_c < x_px_c; x_c += 2) { + for (int y_c = y_px_c; y_c < end_c_y; y_c += 8) { + // The top edge of the whole frame is not filtered. + bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, COLOR_UV, chroma_tree); + if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) { + filter_deblock_edge_chroma(state, x_c , y_c, 2, EDGE_HOR, tu_boundary, chroma_tree); + } } } } diff --git a/src/filter.h b/src/filter.h index 0d98eedd..2db9c871 100644 --- a/src/filter.h +++ b/src/filter.h @@ -46,8 +46,8 @@ * \brief Edge direction. */ typedef enum edge_dir { - EDGE_VER = 0, // vertical - EDGE_HOR = 1, // horizontal + EDGE_VER = 1, // vertical + EDGE_HOR = 2, // horizontal } edge_dir; diff --git a/src/search.c b/src/search.c index 74b3760f..18c721c6 100644 --- a/src/search.c +++ b/src/search.c @@ -39,6 +39,7 @@ #include "cu.h" #include "encoder.h" #include "encode_coding_tree.h" +#include "filter.h" #include "imagelist.h" #include "inter.h" #include "intra.h" @@ -253,6 +254,10 @@ static void work_tree_copy_up( to_cu->intra.mode_chroma = from_cu->intra.mode_chroma; to_cu->joint_cb_cr = from_cu->joint_cb_cr; to_cu->cr_lfnst_idx = from_cu->cr_lfnst_idx; + to_cu->chroma_deblocking = from_cu->chroma_deblocking; + to_cu->log2_chroma_width = from_cu->log2_chroma_width; + to_cu->log2_chroma_height = from_cu->log2_chroma_height; + cbf_copy(&to_cu->cbf, from_cu->cbf, COLOR_U); cbf_copy(&to_cu->cbf, from_cu->cbf, COLOR_V); } @@ -282,6 +287,9 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in to->log2_height = cu->log2_height; to->log2_width = cu->log2_width; + to->log2_chroma_height = cu->log2_chroma_height; + to->log2_chroma_width = cu->log2_chroma_width; + if (cu->type == CU_INTRA) { to->intra.mode = cu->intra.mode; to->intra.mode_chroma = cu->intra.mode_chroma; @@ -315,14 +323,37 @@ static void lcu_fill_chroma_cu_info(lcu_t *lcu, const cu_loc_t * const cu_loc) cu->intra.mode_chroma = bottom_right->intra.mode_chroma; cu->joint_cb_cr = bottom_right->joint_cb_cr; cu->cr_lfnst_idx = bottom_right->cr_lfnst_idx; + cu->log2_chroma_height = bottom_right->log2_chroma_height; + cu->log2_chroma_width = bottom_right->log2_chroma_width; cu->type = bottom_right->type; cu->tr_skip |= bottom_right->tr_skip & 6; } } } - +static void lcu_fill_chroma_cbfs(lcu_t *lcu, const cu_loc_t * const chroma_loc, enum uvg_tree_type tree_type) +{ + int8_t height = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_height : chroma_loc->height; + int8_t width = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_width : chroma_loc->width; + uint32_t x_local = chroma_loc->local_x; + uint32_t y_local = chroma_loc->local_y; + const int offset = ~((TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) - 1); + // Set coeff flags in every CU covered by part_mode in this depth. + for (uint32_t y = 0; y < height; y += SCU_WIDTH) { + for (uint32_t x = 0; x < width; x += SCU_WIDTH) { + // Use TU top-left CU to propagate coeff flags + cu_info_t* cu_from = LCU_GET_CU_AT_PX(lcu, x_local + (x & offset), y_local + (y & offset)); + cu_info_t* cu_to = LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y); + if (cu_from != cu_to) { + cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_U); + cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_V); + } + } + } + +} + static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu, enum uvg_tree_type tree_type) { @@ -996,6 +1027,97 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map) } +static void mark_deblocking(const cu_loc_t* const cu_loc, const cu_loc_t* const chroma_loc, lcu_t* lcu, enum uvg_tree_type tree_type, bool has_chroma, const bool is_separate_tree, int x_local, int y_local) +{ + if(tree_type != UVG_CHROMA_T) { + if(cu_loc->x) { + for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += TR_MAX_WIDTH) { + for (int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, y)->luma_deblocking |= EDGE_VER; + if(!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER; + } + } + } + else if(cu_loc->width == 64) { + for (int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->luma_deblocking |= EDGE_VER; + if (!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->chroma_deblocking |= EDGE_VER; + } + } + + if(cu_loc->y) { + for (int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += TR_MAX_WIDTH) { + for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, y)->luma_deblocking |= EDGE_HOR; + if (!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR; + } + } + } + else if (cu_loc->height == 64) { + for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->luma_deblocking |= EDGE_VER; + if (!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->chroma_deblocking |= EDGE_VER; + } + } + + if(is_separate_tree && has_chroma) { + if (chroma_loc->x) { + for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += TR_MAX_WIDTH) { + for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER; + } + } + } + else if(cu_loc->width == 64) { + for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->chroma_deblocking |= EDGE_VER; + } + } + + if (chroma_loc->y) { + for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += TR_MAX_WIDTH) { + for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR; + } + } + } + else if (cu_loc->height == 64) { + for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->chroma_deblocking |= EDGE_VER; + } + } + } + } + else { + + if (chroma_loc->x) { + for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += TR_MAX_WIDTH / 2) { + for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER; + } + } + } + else if(chroma_loc->width == 64) { + for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH / 2, y)->chroma_deblocking |= EDGE_VER; + } + } + + if(chroma_loc->y) { + for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += TR_MAX_WIDTH / 2) { + for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR; + } + } + } + else if (chroma_loc->height == 64) { + for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) { + LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH / 2)->chroma_deblocking |= EDGE_VER; + } + } + } +} + /** * Search every mode from 0 to MAX_PU_DEPTH and return cost of best mode. * - The recursion is started at depth 0 and goes in Z-order to MAX_PU_DEPTH. @@ -1090,6 +1212,11 @@ static double search_cu( cur_cu->log2_width = uvg_g_convert_to_log2[cu_width]; cur_cu->log2_height = uvg_g_convert_to_log2[cu_height]; + if(chroma_loc) { + cur_cu->log2_chroma_height = uvg_g_convert_to_log2[chroma_loc->chroma_height]; + cur_cu->log2_chroma_width = uvg_g_convert_to_log2[chroma_loc->chroma_width]; + } + // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. if ( x + luma_width <= frame_width && y + luma_height <= frame_height) @@ -1269,23 +1396,21 @@ static double search_cu( if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) || tree_type == UVG_CHROMA_T) { intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma; - lcu_fill_chroma_cu_info( - lcu, - chroma_loc); + if(tree_type != UVG_CHROMA_T) { + lcu_fill_chroma_cu_info( + lcu, + chroma_loc); + } uvg_intra_recon_cu(state, &intra_search, chroma_loc, NULL, lcu, UVG_CHROMA_T, false, true); - lcu_fill_cbf( + lcu_fill_chroma_cbfs( lcu, - chroma_loc->local_x, - chroma_loc->local_y, - chroma_loc->width, - chroma_loc->height, - cur_cu, - UVG_CHROMA_T); + chroma_loc, + tree_type); } else { assert(cur_cu->cr_lfnst_idx == 0 && "If we don't have separate tree chroma lfnst index must be 0"); } @@ -1409,6 +1534,16 @@ static double search_cu( // lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); //} cabac->update = 0; + + mark_deblocking( + cu_loc, + chroma_loc, + lcu, + tree_type, + has_chroma, + is_separate_tree, + x_local, + y_local); } bool can_split_cu =