From 3af65b84778105ad2d30fc26cee592e0b68aceec Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Tue, 12 Nov 2013 11:55:39 +0200 Subject: [PATCH] Add SAO searching and reconstruction for chroma. One I frame and 99 P frames encoded with SAO off and on. Processed 100 frames, 6693224 bits AVG PSNR: 30.7248 37.8978 37.8287 Processed 100 frames, 6295072 bits AVG PSNR: 32.2511 38.9373 38.9818 --- src/encoder.c | 11 +++- src/sao.c | 144 +++++++++++++++++++++++++++++++------------------- src/sao.h | 5 +- 3 files changed, 103 insertions(+), 57 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 15e2ce63..45e33f9a 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -876,7 +876,11 @@ void encode_slice_data(encoder_control* encoder) if (encoder->sao_enable) { pixel *new_y_data = MALLOC(pixel, pic->width * pic->height); + pixel *new_u_data = MALLOC(pixel, (pic->width * pic->height) >> 2); + pixel *new_v_data = MALLOC(pixel, (pic->width * pic->height) >> 2); memcpy(new_y_data, pic->y_recdata, sizeof(pixel) * pic->width * pic->height); + memcpy(new_u_data, pic->u_recdata, sizeof(pixel) * (pic->width * pic->height) >> 2); + memcpy(new_v_data, pic->v_recdata, sizeof(pixel) * (pic->width * pic->height) >> 2); for (y_ctb = 0; y_ctb < encoder->in.height_in_lcu; y_ctb++) { for (x_ctb = 0; x_ctb < encoder->in.width_in_lcu; x_ctb++) { @@ -887,13 +891,18 @@ void encode_slice_data(encoder_control* encoder) init_sao_info(sao_chroma); sao_search_luma(encoder->in.cur_pic, x_ctb, y_ctb, sao_luma); + sao_search_chroma(encoder->in.cur_pic, x_ctb, y_ctb, sao_chroma); // sao_do_merge(encoder, x_ctb, y_ctb, sao_luma, sao_chroma); // sao_do_rdo(encoder, x_ctb, y_ctb, sao_luma, sao_chroma); - sao_reconstruct(encoder->in.cur_pic, new_y_data, x_ctb, y_ctb, sao_luma, sao_chroma); + sao_reconstruct(pic, new_y_data, x_ctb, y_ctb, sao_luma, COLOR_Y); + sao_reconstruct(pic, new_u_data, x_ctb, y_ctb, sao_chroma, COLOR_U); + sao_reconstruct(pic, new_v_data, x_ctb, y_ctb, sao_chroma, COLOR_V); } } free(new_y_data); + free(new_u_data); + free(new_v_data); } init_contexts(encoder,encoder->in.cur_pic->slicetype); diff --git a/src/sao.c b/src/sao.c index c92e7c39..ff50e0ba 100644 --- a/src/sao.c +++ b/src/sao.c @@ -118,11 +118,16 @@ void sao_reconstruct_color(const pixel *rec_data, pixel *new_rec_data, const sao * \param sao Sao parameters. * \param rec Top-left corner of the LCU, modified to be top-left corner of */ -void sao_calc_block_dims(const picture *pic, const sao_info *sao, vector2d *rec, +void sao_calc_block_dims(const picture *pic, color_index color_i, + const sao_info *sao, vector2d *rec, vector2d *tl, vector2d *br, vector2d *block) { vector2d a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d b_ofs = g_sao_edge_offsets[sao->eo_class][1]; + const int is_chroma = (color_i != COLOR_Y ? 1 : 0); + int width = pic->width >> is_chroma; + int height = pic->height >> is_chroma; + int block_width = LCU_WIDTH >> is_chroma; // Handle top and left. if (rec->y == 0) { @@ -141,20 +146,20 @@ void sao_calc_block_dims(const picture *pic, const sao_info *sao, vector2d *rec, } // Handle right and bottom, taking care of non-LCU sized CUs. - if (rec->y + LCU_WIDTH >= pic->height) { + if (rec->y + block_width >= height) { br->y = 0; - if (rec->y + LCU_WIDTH >= pic->height) { - block->y = pic->height - rec->y; + if (rec->y + block_width >= height) { + block->y = height - rec->y; } if (a_ofs.y == 1 || b_ofs.y == 1) { block->y -= 1; br->y += 1; } } - if (rec->x + LCU_WIDTH >= pic->width) { + if (rec->x + block_width >= width) { br->x = 0; - if (rec->x + LCU_WIDTH > pic->width) { - block->x = pic->width - rec->x; + if (rec->x + block_width > width) { + block->x = width - rec->x; } if (a_ofs.x == 1 || b_ofs.x == 1) { block->x -= 1; @@ -166,58 +171,62 @@ void sao_calc_block_dims(const picture *pic, const sao_info *sao, vector2d *rec, rec->x = (rec->x == 0 ? 0 : -1); } -void sao_reconstruct(picture *pic, pixel *new_y_data, unsigned x_ctb, unsigned y_ctb, - const sao_info *sao_luma, const sao_info *sao_chroma) +void sao_reconstruct(picture *pic, const pixel *old_rec, + unsigned x_ctb, unsigned y_ctb, + const sao_info *sao, color_index color_i) { - pixel rec_y[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)]; - pixel new_rec_y[LCU_LUMA_SIZE]; - pixel *y_recdata = &pic->y_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; - pixel *new_y_recdata = &new_y_data[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; + const int is_chroma = (color_i != COLOR_Y ? 1 : 0); + const int pic_stride = pic->width >> is_chroma; + const int lcu_stride = LCU_WIDTH >> is_chroma; + const int buf_stride = lcu_stride + 2; - int x = x_ctb * LCU_WIDTH, y = y_ctb * LCU_WIDTH; - - vector2d rec; + pixel *recdata = (color_i == COLOR_Y ? pic->y_recdata : + (color_i == COLOR_U ? pic->u_recdata : pic->v_recdata)); + pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)]; + pixel new_rec[LCU_WIDTH * LCU_WIDTH]; + // Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32. + pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)]; + const pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)]; + + vector2d ofs; vector2d tl = { 1, 1 }; vector2d br = { 1, 1 }; vector2d block = { LCU_WIDTH, LCU_WIDTH }; - if (sao_luma->type == SAO_TYPE_NONE) { + if (sao->type == SAO_TYPE_NONE) { return; } - rec.x = x; - rec.y = y; - - sao_calc_block_dims(pic, sao_luma, &rec, &tl, &br, &block); + ofs.x = x_ctb * lcu_stride; + ofs.y = y_ctb * lcu_stride; + block.x = lcu_stride; + block.y = lcu_stride; + sao_calc_block_dims(pic, color_i, sao, &ofs, &tl, &br, &block); // Data to tmp buffer. - picture_blit_pixels(&new_y_data[(y + rec.y) * pic->width + x + rec.x], rec_y, + picture_blit_pixels(&old_lcu_rec[ofs.y * pic_stride + ofs.x], + buf_rec, tl.x + block.x + br.x, tl.y + block.y + br.y, - pic->width, LCU_WIDTH + 2); + pic_stride, buf_stride); - //picture_blit_pixels(y_recdata, new_rec_y, LCU_WIDTH, LCU_WIDTH, pic->width, LCU_WIDTH); - - sao_reconstruct_color(&rec_y[tl.y * (LCU_WIDTH + 2) + tl.x], - &new_rec_y[(rec.y + tl.y) * LCU_WIDTH + rec.x + tl.x], - sao_luma, - LCU_WIDTH + 2, LCU_WIDTH, + sao_reconstruct_color(&buf_rec[tl.y * buf_stride + tl.x], + &new_rec[(ofs.y + tl.y) * lcu_stride + ofs.x + tl.x], + sao, + buf_stride, lcu_stride, block.x, block.y); - //sao_reconstruct_color(rec_u, sao_chroma, COLOR_U); - //sao_reconstruct_color(rec_v, sao_chroma, COLOR_V); - + // Copy reconstructed block from tmp buffer to rec image. - // - picture_blit_pixels(&new_rec_y[(tl.y + rec.y) * LCU_WIDTH + (tl.x + rec.x)], - &y_recdata[(tl.y + rec.y) * (pic->width) + (tl.x + rec.x)], - block.x, block.y, LCU_WIDTH, pic->width); + picture_blit_pixels(&new_rec[(tl.y + ofs.y) * lcu_stride + (tl.x + ofs.x)], + &lcu_rec[(tl.y + ofs.y) * pic_stride + (tl.x + ofs.x)], + block.x, block.y, lcu_stride, pic_stride); } -void sao_search_best_mode(const pixel *data, const pixel *recdata, +void sao_search_best_mode(const pixel *data[], const pixel *recdata[], int block_width, int block_height, - unsigned buf_size, unsigned buf_cnt, + unsigned buf_cnt, sao_info *sao_out) { sao_eo_class edge_class; @@ -235,7 +244,7 @@ void sao_search_best_mode(const pixel *data, const pixel *recdata, // Call calc_sao_edge_dir once for luma and twice for chroma. for (i = 0; i < buf_cnt; ++i) { - calc_sao_edge_dir(data + i * buf_size, recdata + i * buf_size, edge_class, + calc_sao_edge_dir(data[i], recdata[i], edge_class, block_width, block_height, cat_sum_cnt); } @@ -284,17 +293,50 @@ void sao_search_best_mode(const pixel *data, const pixel *recdata, void sao_search_chroma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao) { - + pixel orig_u[LCU_CHROMA_SIZE]; + pixel rec_u[LCU_CHROMA_SIZE]; + pixel orig_v[LCU_CHROMA_SIZE]; + pixel rec_v[LCU_CHROMA_SIZE]; + pixel *orig[2] = { orig_u, orig_v }; + pixel *rec[2] = { rec_u, rec_v }; + pixel *u_data = &pic->u_data[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)]; + pixel *u_recdata = &pic->u_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)]; + pixel *v_data = &pic->v_data[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)]; + pixel *v_recdata = &pic->v_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 1, pic->width / 2)]; + int block_width = (LCU_WIDTH / 2); + int block_height = (LCU_WIDTH / 2); + + if (x_ctb * (LCU_WIDTH / 2) + (LCU_WIDTH / 2) >= (unsigned)pic->width / 2) { + block_width = (pic->width - x_ctb * LCU_WIDTH) / 2; + } + if (y_ctb * (LCU_WIDTH / 2) + (LCU_WIDTH / 2) >= (unsigned)pic->height / 2) { + block_height = (pic->height - y_ctb * LCU_WIDTH) / 2; + } + + sao->type = SAO_TYPE_EDGE; + + // Fill temporary buffers with picture data. + // These buffers are needed only until we switch to a LCU based data + // structure for pixels. Then we can give pointers directly to that structure + // without making copies. + picture_blit_pixels(u_data, orig_u, block_width, block_height, + pic->width / 2, LCU_WIDTH / 2); + picture_blit_pixels(v_data, orig_v, block_width, block_height, + pic->width / 2, LCU_WIDTH / 2); + picture_blit_pixels(u_recdata, rec_u, block_width, block_height, + pic->width / 2, LCU_WIDTH / 2); + picture_blit_pixels(v_recdata, rec_v, block_width, block_height, + pic->width / 2, LCU_WIDTH / 2); + + sao_search_best_mode(orig, rec, block_width, block_height, 2, sao); } void sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao) { - // These buffers are needed only until we switch to a LCU based data - // structure for pixels. Then we can give pointers directly to that structure - // without making copies. - // It's 2-dimensional because sao_search_best_mode takes arguments as arrays. pixel orig_y[LCU_LUMA_SIZE]; pixel rec_y[LCU_LUMA_SIZE]; + pixel *orig[1] = { orig_y }; + pixel *rec[1] = { rec_y }; pixel *y_data = &pic->y_data[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; pixel *y_recdata = &pic->y_recdata[CU_TO_PIXEL(x_ctb, y_ctb, 0, pic->width)]; int block_width = LCU_WIDTH; @@ -306,21 +348,15 @@ void sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_inf if (y_ctb * LCU_WIDTH + LCU_WIDTH >= (unsigned)pic->height) { block_height = pic->height - y_ctb * LCU_WIDTH; } - - /*sao->offsets[SAO_EO_CAT0] = 0; - sao->offsets[SAO_EO_CAT1] = 7; - sao->offsets[SAO_EO_CAT2] = 7; - sao->offsets[SAO_EO_CAT3] = -7; - sao->offsets[SAO_EO_CAT4] = -7; - sao->eo_class = SAO_EO0; - sao->type = SAO_TYPE_EDGE; - return;*/ sao->type = SAO_TYPE_EDGE; // Fill temporary buffers with picture data. + // These buffers are needed only until we switch to a LCU based data + // structure for pixels. Then we can give pointers directly to that structure + // without making copies. picture_blit_pixels(y_data, orig_y, block_width, block_height, pic->width, LCU_WIDTH); picture_blit_pixels(y_recdata, rec_y, block_width, block_height, pic->width, LCU_WIDTH); - sao_search_best_mode(orig_y, rec_y, block_width, block_height, LCU_LUMA_SIZE, 1, sao); + sao_search_best_mode(orig, rec, block_width, block_height, 1, sao); } diff --git a/src/sao.h b/src/sao.h index e32010fe..0be99eea 100644 --- a/src/sao.h +++ b/src/sao.h @@ -47,7 +47,8 @@ typedef struct sao_info_struct { void init_sao_info(sao_info *sao); void sao_search_chroma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao); void sao_search_luma(const picture *pic, unsigned x_ctb, unsigned y_ctb, sao_info *sao); -void sao_reconstruct(picture *pic, pixel *new_y_data, unsigned x_ctb, unsigned y_ctb, - const sao_info *sao_luma, const sao_info *sao_chroma); +void sao_reconstruct(picture *pic, const pixel *old_rec, + unsigned x_ctb, unsigned y_ctb, + const sao_info *sao, color_index color_i); #endif \ No newline at end of file