[mtt] fix deblock

This commit is contained in:
Joose Sainio 2022-12-13 14:51:38 +02:00 committed by Marko Viitanen
parent 09baddef17
commit 6620ba8d76
4 changed files with 189 additions and 44 deletions

View file

@ -131,6 +131,9 @@ typedef struct
uint8_t log2_width : 3;
uint8_t log2_height : 3;
uint8_t log2_chroma_width : 3;
uint8_t log2_chroma_height : 3;
uint16_t cbf;
uint8_t root_cbf;
@ -150,11 +153,14 @@ typedef struct
uint8_t mts_last_scan_pos : 1;
uint8_t violates_lfnst_constrained_luma : 1;
uint8_t violates_lfnst_constrained_chroma;
uint8_t violates_lfnst_constrained_chroma : 1;
uint8_t lfnst_last_scan_pos : 1;
uint8_t lfnst_idx : 2;
uint8_t cr_lfnst_idx : 2;
uint8_t luma_deblocking : 2;
uint8_t chroma_deblocking : 2;
union {
struct {
int8_t mode;

View file

@ -269,6 +269,7 @@ static bool is_tu_boundary(
int32_t x,
int32_t y,
edge_dir dir,
color_t color,
enum uvg_tree_type tree_type)
{
x >>= tree_type == UVG_CHROMA_T;
@ -276,13 +277,13 @@ static bool is_tu_boundary(
// if (x & 3 || y & 3) return false;
const cu_info_t *const scu =
uvg_cu_array_at_const(tree_type != UVG_CHROMA_T ? state->tile->frame->cu_array : state->tile->frame->chroma_cu_array, x, y);
const int tu_width = MIN(TR_MAX_WIDTH, 1 << scu->log2_width);
const int tu_height = MIN(TR_MAX_WIDTH, 1 << scu->log2_height);
if (dir == EDGE_HOR) {
return (y & (tu_height - 1)) == 0;
return color == COLOR_Y ? scu->luma_deblocking & EDGE_HOR :
scu->chroma_deblocking & EDGE_HOR;
} else {
return (x & (tu_width - 1)) == 0;
return color == COLOR_Y ? scu->luma_deblocking & EDGE_VER :
scu->chroma_deblocking & EDGE_VER;
}
}
@ -321,9 +322,9 @@ static bool is_pu_boundary(const encoder_state_t *const state,
static bool is_on_8x8_grid(int x, int y, edge_dir dir)
{
if (dir == EDGE_HOR) {
return (y & 7) == 0 && (x & 2) == 0;
return (y & 7) == 0;
} else {
return (x & 7) == 0 && (y & 2) == 0;
return (x & 7) == 0;
}
}
@ -603,10 +604,10 @@ static INLINE void get_max_filter_length(uint8_t *filt_len_P, uint8_t *filt_len_
bool transform_edge_4x4[2] = { false, false };
bool transform_edge_8x8[2] = { false, false };
if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, tree_type);
if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, tree_type);
if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, tree_type);
if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, tree_type);
if (pos >= 4) transform_edge_4x4[0] = is_tu_boundary(state, x - x_mul * 4, y - y_mul * 4, dir, comp, tree_type);
if (pos >= 8) transform_edge_8x8[0] = is_tu_boundary(state, x - x_mul * 8, y - y_mul * 8, dir, comp, tree_type);
if (pos + 4 < len) transform_edge_4x4[1] = is_tu_boundary(state, x + x_mul * 4, y + y_mul * 4, dir, comp, tree_type);
if (pos + 8 < len) transform_edge_8x8[1] = is_tu_boundary(state, x + x_mul * 8, y + y_mul * 8, dir, comp, tree_type);
if (comp == COLOR_Y) {
if (tu_size_P_side <= 4 || tu_size_Q_side <= 4){
@ -1066,18 +1067,18 @@ static void filter_deblock_edge_chroma(encoder_state_t * const state,
uint8_t max_filter_length_P = 0;
uint8_t max_filter_length_Q = 0;
const int cu_width = 1 << (cu_q->log2_width - (tree_type != UVG_CHROMA_T));
const int cu_height = 1 << (cu_q->log2_height - (tree_type != UVG_CHROMA_T));
const int cu_width = 1 << (cu_q->log2_chroma_width );
const int cu_height = 1 << (cu_q->log2_chroma_height);
const int pu_size = dir == EDGE_HOR ? cu_height : cu_width;
const int pu_pos = dir == EDGE_HOR ? y_coord : x_coord;
const int tu_size_p_side = dir == EDGE_HOR ?
MIN(1 << (cu_p->log2_height - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH) :
MIN(1 << (cu_p->log2_width - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH);
MIN(1 << (cu_p->log2_chroma_height), TR_MAX_WIDTH) :
MIN(1 << (cu_p->log2_chroma_width), TR_MAX_WIDTH);
const int tu_size_q_side = dir == EDGE_HOR ?
MIN(1 << (cu_q->log2_height - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH) :
MIN(1 << (cu_q->log2_width - (tree_type != UVG_CHROMA_T)), TR_MAX_WIDTH);
MIN(1 << (cu_q->log2_chroma_height ), TR_MAX_WIDTH) :
MIN(1 << (cu_q->log2_chroma_width ), TR_MAX_WIDTH);
get_max_filter_length(&max_filter_length_P, &max_filter_length_Q, state, x_coord, y_coord,
dir, tu_boundary, tu_size_p_side, tu_size_q_side,
@ -1217,10 +1218,11 @@ static void filter_deblock_unit(
const int32_t x_c = x >> 1;
const int32_t y_c = y >> 1;
if (state->encoder_control->chroma_format != UVG_CSP_400 &&
(is_on_8x8_grid(x_c, y_c, dir && (x_c + 4) % 32)
|| (x == state->tile->frame->width - 8 && dir == 1 && y_c % 8 == 0))
is_tu_boundary(state, x, y, dir, COLOR_UV, tree_type)
&& (is_on_8x8_grid(x_c, y_c, dir == EDGE_HOR && (x_c + 4) % 32 ? EDGE_HOR : EDGE_VER)
|| (x == state->tile->frame->width - 8 && dir == EDGE_HOR && y_c % 8 == 0))
&& tree_type != UVG_LUMA_T) {
filter_deblock_edge_chroma(state, x_c, y_c, length, dir, tu_boundary, tree_type);
filter_deblock_edge_chroma(state, x_c, y_c, 2, dir, tu_boundary, tree_type);
}
}
@ -1250,11 +1252,11 @@ static void filter_deblock_lcu_inside(encoder_state_t * const state,
for (int edge_y = y; edge_y < end_y; edge_y += 4) {
for (int edge_x = x; edge_x < end_x; edge_x += 4) {
bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, luma_tree);
bool tu_boundary = is_tu_boundary(state, edge_x, edge_y, dir, COLOR_Y, luma_tree);
if (tu_boundary || is_pu_boundary(state, edge_x, edge_y, dir)) {
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, luma_tree);
}
if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, chroma_tree)) {
if(chroma_tree == UVG_CHROMA_T && is_tu_boundary(state, edge_x, edge_y, dir, COLOR_UV, chroma_tree)) {
filter_deblock_unit(state, edge_x, edge_y, 4, 4, dir, tu_boundary, edge_x < x, chroma_tree);
}
}
@ -1281,7 +1283,7 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
for (int x = x_px - 8; x < x_px; x += 4) {
for (int y = y_px; y < end; y += 4) {
// The top edge of the whole frame is not filtered.
bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, luma_tree);
bool tu_boundary = is_tu_boundary(state, x, y, EDGE_HOR, COLOR_Y, luma_tree);
if (y > 0 && (tu_boundary || is_pu_boundary(state, x, y, EDGE_HOR))) {
filter_deblock_edge_luma(state, x, y, 4, EDGE_HOR, tu_boundary);
}
@ -1292,13 +1294,15 @@ static void filter_deblock_lcu_rightmost(encoder_state_t * const state,
if (state->encoder_control->chroma_format != UVG_CSP_400) {
const int x_px_c = x_px >> 1;
const int y_px_c = y_px >> 1;
const int x_c = x_px_c - 4;
const int end_c = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
for (int y_c = y_px_c; y_c < end_c; y_c += 8) {
// The top edge of the whole frame is not filtered.
bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, chroma_tree);
if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) {
filter_deblock_edge_chroma(state, x_c , y_c, 4, EDGE_HOR, tu_boundary, chroma_tree);
int x_c = x_px_c - 4;
const int end_c_y = MIN(y_px_c + LCU_WIDTH_C, state->tile->frame->height >> 1);
for(; x_c < x_px_c; x_c += 2) {
for (int y_c = y_px_c; y_c < end_c_y; y_c += 8) {
// The top edge of the whole frame is not filtered.
bool tu_boundary = is_tu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR, COLOR_UV, chroma_tree);
if (y_c > 0 && (tu_boundary || is_pu_boundary(state, x_c << 1, y_c << 1, EDGE_HOR))) {
filter_deblock_edge_chroma(state, x_c , y_c, 2, EDGE_HOR, tu_boundary, chroma_tree);
}
}
}
}

View file

@ -46,8 +46,8 @@
* \brief Edge direction.
*/
typedef enum edge_dir {
EDGE_VER = 0, // vertical
EDGE_HOR = 1, // horizontal
EDGE_VER = 1, // vertical
EDGE_HOR = 2, // horizontal
} edge_dir;

View file

@ -39,6 +39,7 @@
#include "cu.h"
#include "encoder.h"
#include "encode_coding_tree.h"
#include "filter.h"
#include "imagelist.h"
#include "inter.h"
#include "intra.h"
@ -253,6 +254,10 @@ static void work_tree_copy_up(
to_cu->intra.mode_chroma = from_cu->intra.mode_chroma;
to_cu->joint_cb_cr = from_cu->joint_cb_cr;
to_cu->cr_lfnst_idx = from_cu->cr_lfnst_idx;
to_cu->chroma_deblocking = from_cu->chroma_deblocking;
to_cu->log2_chroma_width = from_cu->log2_chroma_width;
to_cu->log2_chroma_height = from_cu->log2_chroma_height;
cbf_copy(&to_cu->cbf, from_cu->cbf, COLOR_U);
cbf_copy(&to_cu->cbf, from_cu->cbf, COLOR_V);
}
@ -282,6 +287,9 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
to->log2_height = cu->log2_height;
to->log2_width = cu->log2_width;
to->log2_chroma_height = cu->log2_chroma_height;
to->log2_chroma_width = cu->log2_chroma_width;
if (cu->type == CU_INTRA) {
to->intra.mode = cu->intra.mode;
to->intra.mode_chroma = cu->intra.mode_chroma;
@ -315,6 +323,8 @@ static void lcu_fill_chroma_cu_info(lcu_t *lcu, const cu_loc_t * const cu_loc)
cu->intra.mode_chroma = bottom_right->intra.mode_chroma;
cu->joint_cb_cr = bottom_right->joint_cb_cr;
cu->cr_lfnst_idx = bottom_right->cr_lfnst_idx;
cu->log2_chroma_height = bottom_right->log2_chroma_height;
cu->log2_chroma_width = bottom_right->log2_chroma_width;
cu->type = bottom_right->type;
cu->tr_skip |= bottom_right->tr_skip & 6;
}
@ -322,6 +332,27 @@ static void lcu_fill_chroma_cu_info(lcu_t *lcu, const cu_loc_t * const cu_loc)
}
static void lcu_fill_chroma_cbfs(lcu_t *lcu, const cu_loc_t * const chroma_loc, enum uvg_tree_type tree_type)
{
int8_t height = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_height : chroma_loc->height;
int8_t width = tree_type == UVG_CHROMA_T ? chroma_loc->chroma_width : chroma_loc->width;
uint32_t x_local = chroma_loc->local_x;
uint32_t y_local = chroma_loc->local_y;
const int offset = ~((TR_MAX_WIDTH >> (tree_type == UVG_CHROMA_T)) - 1);
// Set coeff flags in every CU covered by part_mode in this depth.
for (uint32_t y = 0; y < height; y += SCU_WIDTH) {
for (uint32_t x = 0; x < width; x += SCU_WIDTH) {
// Use TU top-left CU to propagate coeff flags
cu_info_t* cu_from = LCU_GET_CU_AT_PX(lcu, x_local + (x & offset), y_local + (y & offset));
cu_info_t* cu_to = LCU_GET_CU_AT_PX(lcu, x_local + x, y_local + y);
if (cu_from != cu_to) {
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_U);
cbf_copy(&cu_to->cbf, cu_from->cbf, COLOR_V);
}
}
}
}
static void lcu_fill_cbf(lcu_t *lcu, int x_local, unsigned y_local, unsigned width, unsigned height, const cu_info_t *cur_cu, enum
uvg_tree_type tree_type)
@ -996,6 +1027,97 @@ void uvg_sort_keys_by_cost(unit_stats_map_t *__restrict map)
}
static void mark_deblocking(const cu_loc_t* const cu_loc, const cu_loc_t* const chroma_loc, lcu_t* lcu, enum uvg_tree_type tree_type, bool has_chroma, const bool is_separate_tree, int x_local, int y_local)
{
if(tree_type != UVG_CHROMA_T) {
if(cu_loc->x) {
for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += TR_MAX_WIDTH) {
for (int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->luma_deblocking |= EDGE_VER;
if(!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER;
}
}
}
else if(cu_loc->width == 64) {
for (int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->luma_deblocking |= EDGE_VER;
if (!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->chroma_deblocking |= EDGE_VER;
}
}
if(cu_loc->y) {
for (int y = cu_loc->local_y; y < cu_loc->local_y + cu_loc->height; y += TR_MAX_WIDTH) {
for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->luma_deblocking |= EDGE_HOR;
if (!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR;
}
}
}
else if (cu_loc->height == 64) {
for (int x = cu_loc->local_x; x < cu_loc->local_x + cu_loc->width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->luma_deblocking |= EDGE_VER;
if (!is_separate_tree && tree_type == UVG_BOTH_T) LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->chroma_deblocking |= EDGE_VER;
}
}
if(is_separate_tree && has_chroma) {
if (chroma_loc->x) {
for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += TR_MAX_WIDTH) {
for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER;
}
}
}
else if(cu_loc->width == 64) {
for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH, y)->chroma_deblocking |= EDGE_VER;
}
}
if (chroma_loc->y) {
for (int y = chroma_loc->local_y; y < chroma_loc->local_y + chroma_loc->height; y += TR_MAX_WIDTH) {
for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR;
}
}
}
else if (cu_loc->height == 64) {
for (int x = chroma_loc->local_x; x < chroma_loc->local_x + chroma_loc->width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH)->chroma_deblocking |= EDGE_VER;
}
}
}
}
else {
if (chroma_loc->x) {
for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += TR_MAX_WIDTH / 2) {
for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_VER;
}
}
}
else if(chroma_loc->width == 64) {
for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, TR_MAX_WIDTH / 2, y)->chroma_deblocking |= EDGE_VER;
}
}
if(chroma_loc->y) {
for (int y = y_local; y < y_local + chroma_loc->chroma_height; y += TR_MAX_WIDTH / 2) {
for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, y)->chroma_deblocking |= EDGE_HOR;
}
}
}
else if (chroma_loc->height == 64) {
for (int x = x_local; x < x_local + chroma_loc->chroma_width; x += SCU_WIDTH) {
LCU_GET_CU_AT_PX(lcu, x, TR_MAX_WIDTH / 2)->chroma_deblocking |= EDGE_VER;
}
}
}
}
/**
* Search every mode from 0 to MAX_PU_DEPTH and return cost of best mode.
* - The recursion is started at depth 0 and goes in Z-order to MAX_PU_DEPTH.
@ -1090,6 +1212,11 @@ static double search_cu(
cur_cu->log2_width = uvg_g_convert_to_log2[cu_width];
cur_cu->log2_height = uvg_g_convert_to_log2[cu_height];
if(chroma_loc) {
cur_cu->log2_chroma_height = uvg_g_convert_to_log2[chroma_loc->chroma_height];
cur_cu->log2_chroma_width = uvg_g_convert_to_log2[chroma_loc->chroma_width];
}
// If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth.
if ( x + luma_width <= frame_width && y + luma_height <= frame_height)
@ -1269,23 +1396,21 @@ static double search_cu(
if((!recon_chroma && state->encoder_control->chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T)
|| tree_type == UVG_CHROMA_T) {
intra_search.pred_cu.intra.mode_chroma = cur_cu->intra.mode_chroma;
lcu_fill_chroma_cu_info(
lcu,
chroma_loc);
if(tree_type != UVG_CHROMA_T) {
lcu_fill_chroma_cu_info(
lcu,
chroma_loc);
}
uvg_intra_recon_cu(state,
&intra_search, chroma_loc,
NULL, lcu,
UVG_CHROMA_T,
false,
true);
lcu_fill_cbf(
lcu_fill_chroma_cbfs(
lcu,
chroma_loc->local_x,
chroma_loc->local_y,
chroma_loc->width,
chroma_loc->height,
cur_cu,
UVG_CHROMA_T);
chroma_loc,
tree_type);
} else {
assert(cur_cu->cr_lfnst_idx == 0 && "If we don't have separate tree chroma lfnst index must be 0");
}
@ -1409,6 +1534,16 @@ static double search_cu(
// lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
//}
cabac->update = 0;
mark_deblocking(
cu_loc,
chroma_loc,
lcu,
tree_type,
has_chroma,
is_separate_tree,
x_local,
y_local);
}
bool can_split_cu =