From 7bd090272788009452355b01c0732831c15f6047 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Fri, 25 Oct 2013 17:14:20 +0300 Subject: [PATCH] Implement fast distortion estimation for sao. Add function for blitting pixels from one buffer to another. Several commits have been squashed to this one. --- src/encoder.c | 234 +++++++++++++++++++++++++++++++++++++++++++------- src/global.h | 9 ++ src/picture.c | 31 +++++++ src/picture.h | 3 + 4 files changed, 246 insertions(+), 31 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index ff07cb6b..b7c07cd0 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -772,27 +772,208 @@ void encode_slice_header(encoder_control* encoder) // TODO: move somewhere else (sao.h?) -#define SAO_TYPE_NONE 0 -#define SAO_TYPE_EDGE 1 -#define SAO_TYPE_BAND 2 #define Y_INDEX 0 #define U_INDEX 1 #define V_INDEX 2 #define YUV_INDEX_END 3 - -#define NUM_COLORS 3 #define NUM_SAO_OFFSETS 4 -typedef enum { COLOR_Y = 0, COLOR_U = 1, COLOR_V = 2 } color_index; +typedef enum { COLOR_Y = 0, COLOR_U = 1, COLOR_V = 2, NUM_COLORS } color_index; +typedef enum { SAO_TYPE_NONE = 0, SAO_TYPE_BAND, SAO_TYPE_EDGE } sao_type; +typedef enum { SAO_EO0 = 0, SAO_EO1, SAO_EO2, SAO_EO3, SAO_NUM_EO } sao_eo_class; +typedef enum { SAO_EO_CAT0 = 0, SAO_EO_CAT1, SAO_EO_CAT2, SAO_EO_CAT3, SAO_EO_CAT4, NUM_SAO_EDGE_CATEGORIES } sao_eo_cat; typedef struct { - int type; + sao_type type; + sao_eo_class eo_class; + int ddistortion; int merge_left_flag; int merge_up_flag; int offsets[NUM_SAO_OFFSETS]; - int eo_class; } sao_info; +//#define SIGN3(x) ((x) > 0) ? +1 : ((x) == 0 ? 0 : -1) +#define SIGN3(x) (((x) > 0) - ((x) < 0)) +#define NUM_SAO_EDGE_DIRS 4; + +typedef struct { + int x; + int y; +} vector2d; + +// Offsets of a and b in relation to c. +// dir_offset[dir][a or b] +// | | a | a | a | +// | a c b | c | c | c | +// | | b | b | b | +static const vector2d g_sao_edge_offsets[4][2] = { + { { 0, -1 }, { 0, 1 } }, + { { -1, 0 }, { 1, 0 } }, + { { -1, -1 }, { 1, 1 } }, + { { -1, 1 }, { 1, -1 } } +}; +// Mapping of edge_idx values to eo-classes. +static const unsigned g_sao_eo_idx_to_eo_category[] = { 1, 2, 0, 3, 4 }; +// Mapping relationships between a, b and c to eo_idx. +#define EO_IDX(a, b, c) (2 + SIGN3((c) - (a)) + SIGN3((c) - (b))) + +/** + * \param orig_data Original pixel data. 64x64 for luma, 32x32 for chroma. + * \param rec_data Reconstructed pixel data. 64x64 for luma, 32x32 for chroma. + * \param dir_offsets + * \param is_chroma 0 for luma, 1 for chroma. Indicates + */ +void calc_sao_edge_dir(const pixel *orig_data, const pixel *rec_data, + int eo_class, int block_width, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +{ + int y, x; + vector2d a_ofs = g_sao_edge_offsets[eo_class][0]; + vector2d b_ofs = g_sao_edge_offsets[eo_class][1]; + // Arrays orig_data and rec_data are quarter size for chroma. + + // Don't sample the edge pixels because this function doesn't have access to + // their neighbours. + for (y = 1; y < block_width - 1; ++y) { + for (x = 1; x < block_width - 1; ++x) { + const pixel *c_data = &rec_data[y * block_width + x]; + pixel a = c_data[a_ofs.y * block_width + a_ofs.x]; + pixel c = c_data[0]; + pixel b = c_data[b_ofs.y * block_width + b_ofs.x]; + + int eo_idx = EO_IDX(a, b, c); + int eo_cat = g_sao_eo_idx_to_eo_category[eo_idx]; + + cat_sum_cnt[0][eo_cat] += orig_data[y * block_width + x] - c; + cat_sum_cnt[1][eo_cat] += 1; + } + } +} + +void sao_reconstruct_color(pixel *rec_data, const sao_info *sao, color_index color) +{ + unsigned y, x; + vector2d a_ofs = g_sao_edge_offsets[sao->eo_class][0]; + vector2d b_ofs = g_sao_edge_offsets[sao->eo_class][1]; + // Arrays orig_data and rec_data are quarter size for chroma. + unsigned block_width = LCU_WIDTH >> !(color == COLOR_Y); + + for (y = 0; y < block_width; ++y) { + for (x = 0; x < block_width; ++x) { + pixel *c_data = &rec_data[y * block_width + x]; + pixel a = c_data[a_ofs.y * block_width + a_ofs.x]; + pixel c = c_data[0]; + pixel b = c_data[b_ofs.y * block_width + b_ofs.x]; + + int eo_idx = EO_IDX(a, b, c); + int eo_cat = g_sao_eo_idx_to_eo_category[eo_idx]; + + c_data[0] += sao->offsets[eo_cat]; + } + } +} + +void sao_reconstruct(picture *pic, unsigned x_ctb, unsigned y_ctb, + const sao_info *sao_luma, const sao_info *sao_chroma) +{ + pixel rec_y[LCU_LUMA_SIZE]; + pixel *y_recdata = &pic->y_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; + // TODO: sao chroma reconstruct + + // Data to tmp buffer. + picture_blit_pixels(y_recdata, rec_y, LCU_WIDTH, LCU_WIDTH, pic->width, LCU_WIDTH); + + sao_reconstruct_color(rec_y, sao_luma, COLOR_Y); + //sao_reconstruct_color(rec_u, sao_chroma, COLOR_U); + //sao_reconstruct_color(rec_v, sao_chroma, COLOR_V); + + // Copy reconstructed block from tmp buffer to rec image. + picture_blit_pixels(rec_y, y_recdata, LCU_WIDTH, LCU_WIDTH, LCU_WIDTH, pic->width); +} + +void sao_search_best_mode(const pixel *data, const pixel *recdata, + unsigned block_width, unsigned buf_size, unsigned buf_cnt, + sao_info *sao_out) +{ + sao_eo_class edge_class; + // This array is used to calculate the mean offset used to minimize distortion. + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]; + memset(cat_sum_cnt, 0, 2 * NUM_SAO_EDGE_CATEGORIES); + + sao_out->ddistortion = 0; + + for (edge_class = SAO_EO0; edge_class <= SAO_EO3; ++edge_class) { + int edge_offset[NUM_SAO_EDGE_CATEGORIES]; + int sum_ddistortion = 0; + sao_eo_cat edge_cat; + + // Call calc_sao_edge_dir once for luma and twice for chroma. + while (buf_cnt--) { + calc_sao_edge_dir(data, recdata, edge_class, block_width, cat_sum_cnt); + data += buf_size; + recdata += buf_size; + } + + for (edge_cat = SAO_EO_CAT1; edge_cat <= SAO_EO_CAT4; ++edge_cat) { + int cat_sum = cat_sum_cnt[0][edge_cat]; + int cat_cnt = cat_sum_cnt[1][edge_cat]; + + // The optimum offset can be calculated by getting the minima of the + // fast ddistortion estimation formula. The minima is the mean error + // and we round that to the nearest integer. + int offset = (cat_sum + (cat_cnt >> 1)) / cat_cnt; + edge_offset[edge_cat] = offset; + // The ddistortion is amount by which the SSE of data changes. It should + // be negative for all categories, if offset was chosen correctly. + // ddistortion = N * h^2 - 2 * h * E, where N is the number of samples + // and E is the sum of errors. + // It basically says that all pixels that are not improved by offset + // increase increase SSE by h^2 and all pixels that are improved by + // offset decrease SSE by h*E. + sum_ddistortion += cat_cnt * offset * offset - 2 * offset * cat_sum; + } + // SAO is not applied for category 0. + edge_offset[SAO_EO_CAT0] = 0; + + // Choose the offset class that offers the least error after offset. + if (sum_ddistortion < sao_out->ddistortion) { + sao_out->eo_class = edge_class; + sao_out->ddistortion = sum_ddistortion; + memcpy(sao_out->offsets, edge_offset, NUM_SAO_EDGE_CATEGORIES); + } + } +} + +sao_info sao_search_chroma(const picture *pic, unsigned x_ctb, unsigned y_ctb) +{ + sao_info sao; + sao.merge_left_flag = 0; + sao.merge_up_flag = 0; + sao.type = SAO_TYPE_NONE; + return sao; +} + +sao_info sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb) +{ + // These buffers are needed only until we switch to a LCU based data + // structure for pixels. Then we can give pointers directly to that structure + // without making copies. + // It's 2-dimensional because sao_search_best_mode takes arguments as arrays. + pixel orig_y[LCU_LUMA_SIZE]; + pixel rec_y[LCU_LUMA_SIZE]; + pixel *y_data = &pic->y_data[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; + pixel *y_recdata = &pic->y_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; + sao_info sao_params; + + // Fill temporary buffers with picture data. + picture_blit_pixels(y_data, orig_y, LCU_WIDTH, LCU_WIDTH, pic->width, LCU_WIDTH); + picture_blit_pixels(y_recdata, rec_y, LCU_WIDTH, LCU_WIDTH, pic->width, LCU_WIDTH); + + sao_search_best_mode(orig_y, rec_y, LCU_WIDTH, LCU_LUMA_SIZE, 1, &sao_params); + + return sao_params; +} + void encode_sao_offsets(encoder_control *encoder, sao_info *sao) { int i; @@ -859,34 +1040,20 @@ void encode_sao_merge_flags(encoder_control *encoder, sao_info *sao, /** * \brief Stub that encodes all LCU's as none type. */ -void encode_sao(encoder_control *encoder, unsigned x_lcu, uint16_t y_lcu) +void encode_sao(encoder_control *encoder, unsigned x_lcu, uint16_t y_lcu, + sao_info *sao_luma, sao_info *sao_chroma) { unsigned sao_type[3] = {SAO_TYPE_NONE, SAO_TYPE_NONE, SAO_TYPE_NONE}; picture *pic = encoder->in.cur_pic; - sao_info tmp_sao[3]; - sao_info *sao = &tmp_sao[0]; - // The tmp_sao and these assignments are temporary. The sao pointer will - // be given to this function. - sao[0].merge_left_flag = 0; - sao[0].merge_up_flag = 0; - sao[0].type = SAO_TYPE_NONE; - - sao[1].merge_left_flag = 0; - sao[1].merge_up_flag = 0; - sao[1].type = SAO_TYPE_NONE; - - sao[2].merge_left_flag = 0; - sao[2].merge_up_flag = 0; - sao[2].type = SAO_TYPE_NONE; - - encode_sao_merge_flags(encoder, sao, x_lcu, y_lcu); + // TODO: transmit merge flags outside sao_info + encode_sao_merge_flags(encoder, sao_luma, x_lcu, y_lcu); // If SAO is merged, nothing else needs to be coded. - if (!sao->merge_left_flag && !sao->merge_up_flag) { - encode_sao_color(encoder, &sao[COLOR_Y], COLOR_Y); - encode_sao_color(encoder, &sao[COLOR_U], COLOR_U); - encode_sao_color(encoder, &sao[COLOR_V], COLOR_V); + if (!sao_luma->merge_left_flag && !sao_luma->merge_up_flag) { + encode_sao_color(encoder, sao_luma, COLOR_Y); + encode_sao_color(encoder, sao_chroma, COLOR_U); + encode_sao_color(encoder, sao_chroma, COLOR_V); } } @@ -906,7 +1073,12 @@ void encode_slice_data(encoder_control* encoder) uint8_t depth = 0; if (encoder->sao_enable) { - encode_sao(encoder, x_ctb, y_ctb); + sao_info sao_luma = sao_search_luma(encoder->in.cur_pic, x_ctb, y_ctb); + sao_info sao_chroma = sao_search_chroma(encoder->in.cur_pic, x_ctb, y_ctb); + + // sao_do_merge(encoder, x_ctb, y_ctb, sao_luma, sao_chroma); + // sao_do_rdo(encoder, x_ctb, y_ctb, sao_luma, sao_chroma); + encode_sao(encoder, x_ctb, y_ctb, &sao_luma, &sao_chroma); } // Recursive function for looping through all the sub-blocks diff --git a/src/global.h b/src/global.h index 7e53b9a0..28fd5b47 100644 --- a/src/global.h +++ b/src/global.h @@ -65,6 +65,9 @@ typedef int16_t coefficient; /* END OF CONFIG VARIABLES */ +#define LCU_LUMA_SIZE (LCU_WIDTH * LCU_WIDTH) +#define LCU_CHROMA_SIZE (LCU_WIDTH * LCU_WIDTH >> 2) + #define MAX_REF_PIC_COUNT 5 #define AMVP_MAX_NUM_CANDS 2 @@ -80,6 +83,12 @@ typedef int16_t coefficient; #define NO_SCU_IN_LCU(no_lcu) ((no_lcu) << MAX_DEPTH) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) +#define LOG2_LCU_WIDTH 6 +// CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width +#define CU_TO_PIXEL(x, y, depth, width) (((y) << (LOG2_LCU_WIDTH - (depth))) * (width) \ + + ((x) << (LOG2_LCU_WIDTH - (depth)))) + + #define VERSION_STRING "0.2 " #define VERSION 0.2 diff --git a/src/picture.c b/src/picture.c index a45ae8bc..38a314e0 100644 --- a/src/picture.c +++ b/src/picture.c @@ -43,6 +43,37 @@ void picture_set_block_residual(picture *pic, uint32_t x_scu, uint32_t y_scu, } } +/** + * \brief BLock Image Transfer from one buffer to another. + * + * It's a stupidly simple loop that copies pixels. + * + * \param orig Start of the originating buffer. + * \param dst Start of the destination buffer. + * \param width Width of the copied region. + * \param height Height of the copied region. + * \param orig_stride Width of a row in the originating buffer. + * \param dst_stride Width of a row in the destination buffer. + * + * This should be inlined, but it's defined here for now to see if Visual + * Studios LTCG will inline it. + */ +void picture_blit_pixels(const pixel* orig, pixel *dst, + unsigned width, unsigned height, + unsigned orig_stride, unsigned dst_stride) +{ + unsigned y, x; + + for (y = 0; y < height; ++y) { + for (x = 0; x < width; ++x) { + dst[x] = orig[x]; + } + // Move pointers to the next row. + orig += orig_stride; + dst += dst_stride; + } +} + /** * \brief Set block coded status * \param pic picture to use diff --git a/src/picture.h b/src/picture.h index dab3ad99..6d05fa91 100644 --- a/src/picture.h +++ b/src/picture.h @@ -108,6 +108,9 @@ void picture_set_block_residual(picture *pic, uint32_t x_scu, uint32_t y_scu, uint8_t depth, int8_t residual); void picture_set_block_split(picture *pic, uint32_t x_scu, uint32_t y_scu, uint8_t depth, int8_t split); +void picture_blit_pixels(const pixel* orig, pixel *dst, + unsigned width, unsigned height, + unsigned orig_stride, unsigned dst_stride); picture_list * picture_list_init(int size); int picture_list_resize(picture_list *list, int size);