From 46b6b1243ba14f42eb19a313b154091178edb8b6 Mon Sep 17 00:00:00 2001 From: Ari Koivula Date: Mon, 8 Sep 2014 14:01:55 +0300 Subject: [PATCH] Add --rd=3 mode and enable searching of intra depth 0. - intra_build_reference_border was overflowing at depth 0 because it uses arrays just large enough to accommodate 32x32 transforms, which is the biggest transform. - For similar reasons search_intra_rough doesn't work at depth 0. - The --rd=3 mode tries all modes with transform search. It also works without rough search so it was used to test depth 0 search. If --rd=3 is not on intra split at depth 0 is not searched for. Conflicts: src/search.c --- src/config.c | 2 +- src/global.h | 16 ++++++++++++---- src/rdo.c | 2 +- src/search.c | 31 +++++++++++++++++++++++++------ src/transform.c | 4 ++-- 5 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/config.c b/src/config.c index 86d80958..a7d52a84 100644 --- a/src/config.c +++ b/src/config.c @@ -368,7 +368,7 @@ static int config_parse(config *cfg, const char *name, const char *value) { int rdo = 0; if (sscanf(value, "%d", &rdo)) { - if (rdo < 0 || rdo > 2) { + if (rdo < 0 || rdo > 3) { fprintf(stderr, "--rd parameter out of range [0..2], set to 1\n"); rdo = 1; } diff --git a/src/global.h b/src/global.h index 6476d3d0..df74c657 100644 --- a/src/global.h +++ b/src/global.h @@ -60,11 +60,19 @@ typedef int16_t coefficient; //spec: references to variables defined in Rec. ITU-T H.265 (04/2013) //Search depth for intra and inter. Block sizes: 0 => 64x64, 1 => 32x32, 2 => 16x16, 3 => 8x8, 4 => 4x4 -#define MAX_INTER_SEARCH_DEPTH 3 -#define MIN_INTER_SEARCH_DEPTH 0 +#ifndef MAX_INTER_SEARCH_DEPTH +# define MAX_INTER_SEARCH_DEPTH 3 +#endif +#ifndef MIN_INTER_SEARCH_DEPTH +# define MIN_INTER_SEARCH_DEPTH 0 +#endif -#define MAX_INTRA_SEARCH_DEPTH 4 -#define MIN_INTRA_SEARCH_DEPTH 1 +#ifndef MAX_INTRA_SEARCH_DEPTH +# define MAX_INTRA_SEARCH_DEPTH 4 +#endif +#ifndef MIN_INTRA_SEARCH_DEPTH +# define MIN_INTRA_SEARCH_DEPTH 0 +#endif // Maximum CU depth when descending form LCU level. #define MAX_DEPTH 3 /*!< spec: log2_diff_max_min_luma_coding_block_size */ diff --git a/src/rdo.c b/src/rdo.c index 480dd773..78106fe2 100644 --- a/src/rdo.c +++ b/src/rdo.c @@ -170,7 +170,7 @@ uint32_t rdo_cost_intra(encoder_state * const encoder_state, pixel *pred, pixel } cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); // Full RDO - } else if(encoder->rdo == 2) { + } else if(encoder->rdo >= 2) { coeffcost = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode); cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5); diff --git a/src/search.c b/src/search.c index 9e7c1a1c..d83e5f1b 100644 --- a/src/search.c +++ b/src/search.c @@ -804,7 +804,7 @@ static int cu_rd_cost_luma(const encoder_state *const encoder_state, } cost += (coeff_cost + (coeff_cost >> 1)) * (int32_t)(encoder_state->global->cur_lambda_cost + 0.5); - } else if (rdo == 2) { + } else if (rdo >= 2) { int coeff_cost = 0; coefficient coeff_temp[32 * 32]; @@ -890,7 +890,7 @@ static int cu_rd_cost_chroma(const encoder_state *const encoder_state, } cost += (coeff_cost + (coeff_cost >> 1)) * (int32_t)(encoder_state->global->cur_lambda_cost + 0.5); - } else if (rdo == 2) { + } else if (rdo >= 2) { coefficient coeff_temp[16 * 16]; int8_t scan_order = get_scan_order(pred_cu->type, pred_cu->intra[0].mode_chroma, depth); @@ -1243,30 +1243,47 @@ static int search_cu_intra(encoder_state * const encoder_state, // Get intra predictors intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); + if (depth > 0) { // Build reconstructed block to use in prediction with extrapolated borders intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8, rec_buffer, cu_width * 2 + 8, 0, frame->width, frame->height, lcu); + } // Find best intra mode for 2Nx2N. { pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2); + int8_t modes[35]; uint32_t costs[35]; - int8_t number_of_modes = search_intra_rough(encoder_state, + int8_t number_of_modes; + bool skip_rough_search = (depth == 0 || encoder_state->encoder_control->rdo >= 3); + if (!skip_rough_search) { + number_of_modes = search_intra_rough(encoder_state, ref_pixels, LCU_WIDTH, cu_in_rec_buffer, cu_width * 2 + 8, cu_width, candidate_modes, modes, costs); + } else { + number_of_modes = 35; + for (int i = 0; i < number_of_modes; ++i) { + modes[i] = i; + costs[i] = MAX_INT; + } + } // Set transform depth to current depth, meaning no transform splits. lcu_set_trdepth(lcu, x_px, y_px, depth, depth); - if (encoder_state->encoder_control->rdo == 2) { - int num_modes_to_check = MIN(number_of_modes, (cu_width <= 8) ? 8 : 3); + if (encoder_state->encoder_control->rdo >= 2) { + int number_of_modes_to_search = (cu_width <= 8) ? 8 : 3; + if (encoder_state->encoder_control->rdo == 3) { + number_of_modes_to_search = 35; + } + int num_modes_to_check = MIN(number_of_modes, number_of_modes_to_search); search_intra_rdo(encoder_state, lcu_px.x, lcu_px.y, depth, ref_pixels, LCU_WIDTH, @@ -1316,6 +1333,8 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept return 0; } + lcu_t *lcu = &work_tree[depth]; + cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; // Assign correct depth cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth; @@ -1387,7 +1406,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept } // Recursively split all the way to max search depth. - if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) { + if (depth < MAX_INTRA_SEARCH_DEPTH || (depth < MAX_INTER_SEARCH_DEPTH && encoder_state->global->slicetype != SLICE_I)) { int half_cu = cu_width / 2; // Using Cost = lambda * 9 to compensate on the price of the split int split_cost = (int)(encoder_state->global->cur_lambda_cost + 0.5) * 9; diff --git a/src/transform.c b/src/transform.c index bdcdfdc2..21f2bb3e 100644 --- a/src/transform.c +++ b/src/transform.c @@ -540,7 +540,7 @@ int quantize_residual_trskip( // Estimate bit cost of encoding the coeffs as ~(1.5 * abs_sum). unsigned abs_coeffs = coefficients_calc_abs(noskip.coeff, 4, 4); noskip.cost += (abs_coeffs + (abs_coeffs / 2)) * bit_cost; - } else if (encoder_state->encoder_control->rdo == 2) { + } else if (encoder_state->encoder_control->rdo >= 2) { noskip.cost += get_coeff_cost(encoder_state, noskip.coeff, 4, 0, scan_order) * bit_cost; } @@ -558,7 +558,7 @@ int quantize_residual_trskip( // Estimate bit cost of encoding the coeffs as ~(1.5 * abs_sum + 1). unsigned abs_coeffs = coefficients_calc_abs(skip.coeff, 4, 4); skip.cost += (1 + abs_coeffs + (abs_coeffs / 2)) * bit_cost; - } else if (encoder_state->encoder_control->rdo == 2) { + } else if (encoder_state->encoder_control->rdo >= 2) { skip.cost += get_coeff_cost(encoder_state, skip.coeff, 4, 0, scan_order) * bit_cost; } }