From 088dd9ab96960b1bb69c36194b2acf676f4502c8 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Wed, 9 Apr 2014 17:12:28 +0300 Subject: [PATCH 1/3] Clean up intra mode search. - This changes the bitstream a little bit, because it changes the order in which the modes are tried and when two modes have the same cost the first one is chosen. - Dst buffer was removes as it was no longer used. --- src/intra.c | 106 ++++++++++++++++++--------------------------------- src/intra.h | 2 +- src/search.c | 2 - 3 files changed, 39 insertions(+), 71 deletions(-) diff --git a/src/intra.c b/src/intra.c index d19767a3..eec0def8 100644 --- a/src/intra.c +++ b/src/intra.c @@ -241,29 +241,22 @@ static uint32_t intra_pred_ratecost(int16_t mode, int8_t *intra_preds) * \param xpos source x-position * \param ypos source y-position * \param width block size to predict - * \param dst destination buffer for best prediction - * \param dststride destination width * \param sad_out sad value of best mode * \returns best intra mode - - This function derives the prediction samples for planar mode (intra coding). */ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t recstride, - uint8_t width, pixel *dst, int32_t dststride, uint32_t *sad_out, + uint8_t width, uint32_t *sad_out, int8_t *intra_preds, uint32_t *bitcost_out) { uint32_t best_sad = 0xffffffff; uint32_t sad = 0; int16_t best_mode = 1; uint32_t best_bitcost = 0; - int32_t x,y; - int16_t i; - uint32_t bitcost = 0; + int16_t mode; cost_16bit_nxn_func cost_func = get_sad_16bit_nxn_func(width); // Temporary block arrays - // TODO: alloc with alignment pixel pred[LCU_WIDTH * LCU_WIDTH + 1]; pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; @@ -273,79 +266,56 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t re uint8_t threshold = intra_hor_ver_dist_thres[g_to_bits[width]]; //!< Intra filtering threshold - #define COPY_PRED_TO_DST() for (y = 0; y < (int32_t)width; y++) { for (x = 0; x < (int32_t)width; x++) { dst[x + y*dststride] = pred[x + y*width]; } } - #define CHECK_FOR_BEST(mode, additional_sad) sad = cost_func(pred, orig_block); \ - sad += additional_sad;\ - if(sad < best_sad)\ - {\ - best_bitcost = bitcost;\ - best_sad = sad;\ - best_mode = mode;\ - COPY_PRED_TO_DST();\ - } - // Store original block for SAD computation - i = 0; - for(y = 0; y < (int32_t)width; y++) { - for(x = 0; x < (int32_t)width; x++) { - orig_block[i++] = orig[x + y*origstride]; - } - } + picture_blit_pixels(orig, orig_block, width, width, origstride, width); - // Filtered only needs the borders - for (y = -1; y < (int32_t)recstride; y++) { - rec_filtered[y*recstride - 1] = rec[y*recstride - 1]; - } - for (x = 0; x < (int32_t)recstride; x++) { - rec_filtered[x - recstride] = rec[x - recstride]; - } - // Apply filter - intra_filter(rec_filtered,recstride,width,0); - - // Test DC mode (never filtered) + // Generate filtered reference pixels. { - pixel val = intra_get_dc_pred(rec, recstride, width); - for (i = 0; i < (int32_t)(width*width); i++) { - pred[i] = val; + int16_t x, y; + for (y = -1; y < recstride; y++) { + rec_filtered[y*recstride - 1] = rec[y*recstride - 1]; } - bitcost = intra_pred_ratecost(1,intra_preds); - CHECK_FOR_BEST(1,bitcost*(int)(g_cur_lambda_cost+0.5)); + for (x = 0; x < recstride; x++) { + rec_filtered[x - recstride] = rec[x - recstride]; + } + intra_filter(rec_filtered, recstride, width, 0); } - // Check angular not requiring filtering - for (i = 2; i < 35; i++) { - int distance = MIN(abs(i - 26),abs(i - 10)); //!< Distance from top and left predictions - if(distance <= threshold) { - intra_get_angular_pred(rec, recstride, pred, width, width, i, filter); - bitcost = intra_pred_ratecost(i,intra_preds); - CHECK_FOR_BEST(i,bitcost*(int)(g_cur_lambda_cost+0.5)); + // Try all modes and select the best one. + for (mode = 0; mode < 35; mode++) { + if (mode == 0) { + intra_get_planar_pred(rec_filtered, recstride, width, pred, width); + } else if (mode == 1) { + int i; + pixel val = intra_get_dc_pred(rec, recstride, width); + for (i = 0; i < (int32_t)(width*width); i++) { + pred[i] = val; + } + } else { + int distance = MIN(abs(mode - 26),abs(mode - 10)); //!< Distance from top and left predictions + if (distance <= threshold) { + intra_get_angular_pred(rec, recstride, pred, width, width, mode, filter); + } else { + intra_get_angular_pred(rec_filtered, recstride, pred, width, width, mode, filter); + } } - } - // FROM THIS POINT FORWARD, USING FILTERED PREDICTION - - // Test planar mode (always filtered) - intra_get_planar_pred(rec_filtered, recstride, width, pred, width); - bitcost = intra_pred_ratecost(0,intra_preds); - CHECK_FOR_BEST(0,bitcost*(int)(g_cur_lambda_cost+0.5)); - - // Check angular predictions which require filtered samples - // TODO: add conditions to skip some modes on borders - // chroma can use only 26 and 10 (if not using luma-prediction) - for (i = 2; i < 35; i++) { - int distance = MIN(abs(i-26),abs(i-10)); //!< Distance from top and left predictions - if(distance > threshold) { - intra_get_angular_pred(rec_filtered, recstride, pred, width, width, i, filter); - bitcost = intra_pred_ratecost(i,intra_preds); - CHECK_FOR_BEST(i,bitcost*(int)(g_cur_lambda_cost+0.5)); + { + uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds); + sad = cost_func(pred, orig_block); + sad += mode_cost * (int)(g_cur_lambda_cost + 0.5); + if (sad < best_sad) + { + best_bitcost = mode_cost; + best_sad = sad; + best_mode = mode; + } } } // assign final sad to output *sad_out = best_sad; *bitcost_out = best_bitcost; - #undef COPY_PRED_TO_DST - #undef CHECK_FOR_BEST return best_mode; } diff --git a/src/intra.h b/src/intra.h index 58e7d15e..94d612f7 100644 --- a/src/intra.h +++ b/src/intra.h @@ -40,7 +40,7 @@ void intra_filter(pixel* ref, int32_t stride, int32_t width, int8_t mode); /* Predictions */ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t recstride, - uint8_t width, pixel *dst, int32_t dststride, uint32_t *sad_out, + uint8_t width, uint32_t *sad_out, int8_t *intra_preds, uint32_t *bitcost_out); pixel intra_get_dc_pred(pixel* pic, uint16_t pic_width, uint8_t width); diff --git a/src/search.c b/src/search.c index 6d554335..7f062b58 100644 --- a/src/search.c +++ b/src/search.c @@ -670,7 +670,6 @@ static int search_cu_intra(encoder_control *encoder, cu_info *cur_cu = &lcu->cu[cu_index]; - pixel pred_buffer[LCU_WIDTH * LCU_WIDTH + 1]; pixel rec_buffer[(LCU_WIDTH * 2 + 1) * (LCU_WIDTH * 2 + 1)]; pixel *cu_in_rec_buffer = &rec_buffer[cu_width * 2 + 8 + 1]; @@ -706,7 +705,6 @@ static int search_cu_intra(encoder_control *encoder, unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2); mode = intra_prediction(ref_pixels, LCU_WIDTH, cu_in_rec_buffer, cu_width * 2 + 8, cu_width, - pred_buffer, cu_width, &cost, candidate_modes, &bitcost); cur_cu->intra[pu_index].mode = (int8_t)mode; cur_cu->intra[pu_index].cost = cost; From d5c3ad7a2b5e96d092ba3904de702eca6a3933e6 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Thu, 10 Apr 2014 11:27:15 +0300 Subject: [PATCH 2/3] Move intra prediction generation to its own function. --- src/intra.c | 58 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/intra.c b/src/intra.c index eec0def8..9be5567d 100644 --- a/src/intra.c +++ b/src/intra.c @@ -232,6 +232,36 @@ static uint32_t intra_pred_ratecost(int16_t mode, int8_t *intra_preds) return 5; } + +/** + * \param rec Reference pixel. 0 points to unfiltered and 1 to filtered. + * \param recstride Stride for rec pixel arrays. + * \param dst + */ +static void intra_get_pred(pixel *rec[2], int recstride, pixel *dst, int width, int mode) +{ + if (mode == 0) { + intra_get_planar_pred(rec[1], recstride, width, dst, width); + } else if (mode == 1) { + int i; + pixel val = intra_get_dc_pred(rec[0], recstride, width); + for (i = 0; i < width * width; i++) { + dst[i] = val; + } + } else { + // Angular modes use smoothed reference pixels, unless the mode is close + // to being either vertical or horizontal. + int filter_threshold = intra_hor_ver_dist_thres[g_to_bits[width]]; + int dist_from_vert_or_hor = MIN(abs(mode - 26), abs(mode - 10)); + if (dist_from_vert_or_hor <= filter_threshold) { + intra_get_angular_pred(rec[0], recstride, dst, width, width, mode, width<32); + } else { + intra_get_angular_pred(rec[1], recstride, dst, width, width, mode, width<32); + } + } +} + + /** * \brief Function to test best intra prediction mode * \param orig original picture data @@ -261,10 +291,7 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t re pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - pixel* rec_filtered = &rec_filtered_temp[recstride + 1]; //!< pointer to rec_filtered_temp with offset of (1,1) - int8_t filter = (width<32); // TODO: chroma support - - uint8_t threshold = intra_hor_ver_dist_thres[g_to_bits[width]]; //!< Intra filtering threshold + pixel *ref[2] = {rec, &rec_filtered_temp[recstride + 1]}; // Store original block for SAD computation picture_blit_pixels(orig, orig_block, width, width, origstride, width); @@ -273,32 +300,17 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t re { int16_t x, y; for (y = -1; y < recstride; y++) { - rec_filtered[y*recstride - 1] = rec[y*recstride - 1]; + ref[1][y*recstride - 1] = rec[y*recstride - 1]; } for (x = 0; x < recstride; x++) { - rec_filtered[x - recstride] = rec[x - recstride]; + ref[1][x - recstride] = rec[x - recstride]; } - intra_filter(rec_filtered, recstride, width, 0); + intra_filter(ref[1], recstride, width, 0); } // Try all modes and select the best one. for (mode = 0; mode < 35; mode++) { - if (mode == 0) { - intra_get_planar_pred(rec_filtered, recstride, width, pred, width); - } else if (mode == 1) { - int i; - pixel val = intra_get_dc_pred(rec, recstride, width); - for (i = 0; i < (int32_t)(width*width); i++) { - pred[i] = val; - } - } else { - int distance = MIN(abs(mode - 26),abs(mode - 10)); //!< Distance from top and left predictions - if (distance <= threshold) { - intra_get_angular_pred(rec, recstride, pred, width, width, mode, filter); - } else { - intra_get_angular_pred(rec_filtered, recstride, pred, width, width, mode, filter); - } - } + intra_get_pred(ref, recstride, pred, width, mode); { uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds); From 40c2fa4d46ff67c3d65e84571a84dff65aaa6fef Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Thu, 10 Apr 2014 11:55:32 +0300 Subject: [PATCH 3/3] Change intra reconstruction to use the same prediction function as search. - This fixes a bug with intra search. It sometimes used filtered reference pixels for 4x4 blocks leading to inaccurate cost estimate. --- src/intra.c | 83 +++++++++++++++++++++++++++-------------------------- src/intra.h | 2 +- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/intra.c b/src/intra.c index 9be5567d..bf1913fe 100644 --- a/src/intra.c +++ b/src/intra.c @@ -238,25 +238,41 @@ static uint32_t intra_pred_ratecost(int16_t mode, int8_t *intra_preds) * \param recstride Stride for rec pixel arrays. * \param dst */ -static void intra_get_pred(pixel *rec[2], int recstride, pixel *dst, int width, int mode) +static void intra_get_pred(pixel *rec[2], int recstride, pixel *dst, int width, int mode, int is_chroma) { - if (mode == 0) { - intra_get_planar_pred(rec[1], recstride, width, dst, width); - } else if (mode == 1) { - int i; - pixel val = intra_get_dc_pred(rec[0], recstride, width); - for (i = 0; i < width * width; i++) { - dst[i] = val; - } + pixel *ref_pixels = rec[0]; + if (is_chroma || mode == 1 || width == 4) { + // For chroma, DC and 4x4 blocks, always use unfiltered reference. + } else if (mode == 0) { + // Otherwise, use filtered for planar. + ref_pixels = rec[1]; } else { // Angular modes use smoothed reference pixels, unless the mode is close // to being either vertical or horizontal. int filter_threshold = intra_hor_ver_dist_thres[g_to_bits[width]]; int dist_from_vert_or_hor = MIN(abs(mode - 26), abs(mode - 10)); + if (dist_from_vert_or_hor > filter_threshold) { + ref_pixels = rec[1]; + } + } + + if (mode == 0) { + intra_get_planar_pred(ref_pixels, recstride, width, dst, width); + } else if (mode == 1) { + int i; + pixel val = intra_get_dc_pred(ref_pixels, recstride, width); + for (i = 0; i < width * width; i++) { + dst[i] = val; + } + } else { + + int filter_threshold = intra_hor_ver_dist_thres[g_to_bits[width]]; + int dist_from_vert_or_hor = MIN(abs(mode - 26), abs(mode - 10)); + int filter = !is_chroma && width < 32; if (dist_from_vert_or_hor <= filter_threshold) { - intra_get_angular_pred(rec[0], recstride, dst, width, width, mode, width<32); + intra_get_angular_pred(ref_pixels, recstride, dst, width, width, mode, filter); } else { - intra_get_angular_pred(rec[1], recstride, dst, width, width, mode, width<32); + intra_get_angular_pred(ref_pixels, recstride, dst, width, width, mode, filter); } } } @@ -310,7 +326,7 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t re // Try all modes and select the best one. for (mode = 0; mode < 35; mode++) { - intra_get_pred(ref, recstride, pred, width, mode); + intra_get_pred(ref, recstride, pred, width, mode, 0); { uint32_t mode_cost = intra_pred_ratecost(mode, intra_preds); @@ -343,42 +359,27 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int16_t re * \param chroma chroma-block flag */ -void intra_recon(pixel* rec, uint32_t recstride, uint32_t width, pixel* dst, int32_t dststride, int8_t mode, int8_t chroma) +void intra_recon(pixel* rec, int32_t recstride, uint32_t width, pixel* dst, int32_t dststride, int8_t mode, int8_t chroma) { - int32_t x,y; pixel pred[LCU_WIDTH * LCU_WIDTH]; - int8_t filter = !chroma && width < 32; + pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; + pixel *ref[2] = {rec, &rec_filtered_temp[recstride + 1]}; - - // Filtering apply if luma and not DC - if (!chroma && mode != 1 && width > 4) { - uint8_t threshold = intra_hor_ver_dist_thres[g_to_bits[width]]; - if(MIN(abs(mode-26),abs(mode-10)) > threshold) { - intra_filter(rec,recstride,width,0); + // Generate filtered reference pixels. + { + int x, y; + for (y = -1; y < recstride; y++) { + ref[1][y*recstride - 1] = rec[y*recstride - 1]; } + for (x = 0; x < recstride; x++) { + ref[1][x - recstride] = rec[x - recstride]; + } + intra_filter(ref[1], recstride, width, 0); } - // planar - if (mode == 0) { - intra_get_planar_pred(rec, recstride, width, pred, width); - } else if (mode == 1) { // DC - pixel val = intra_get_dc_pred(rec, (uint16_t)recstride, (uint8_t)width); - for (y = 0; y < (int32_t)width; y++) { - for (x = 0; x < (int32_t)width; x++) { - dst[x + y*dststride] = val; - } - } - // Assigned value directly to output, no need to stay here - return; - } else { // directional predictions - intra_get_angular_pred(rec, recstride,pred, width, width, mode, filter); - } + intra_get_pred(ref, recstride, pred, width, mode, chroma); - for(y = 0; y < (int32_t)width; y++) { - for(x = 0; x < (int32_t)width; x++) { - dst[x+y*dststride] = pred[x+y*width]; - } - } + picture_blit_pixels(pred, dst, width, width, width, dststride); } /** diff --git a/src/intra.h b/src/intra.h index 94d612f7..94e1bf45 100644 --- a/src/intra.h +++ b/src/intra.h @@ -47,7 +47,7 @@ pixel intra_get_dc_pred(pixel* pic, uint16_t pic_width, uint8_t width); void intra_get_planar_pred(pixel* src,int32_t srcstride, uint32_t width, pixel* dst, int32_t dststride); void intra_get_angular_pred(pixel* src, int32_t src_stride, pixel* p_dst, int32_t dst_stride, int32_t width, int32_t dir_mode, int8_t filter); -void intra_recon(pixel* rec, uint32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); +void intra_recon(pixel* rec, int32_t rec_stride, uint32_t width, pixel* dst, int32_t dst_stride, int8_t mode, int8_t chroma); void intra_recon_lcu(encoder_control* encoder, cabac_data *cabac, int x, int y, int depth, lcu_t *lcu, uint32_t pic_width, uint32_t pic_height);