Add --rd=3 mode and enable searching of intra depth 0.

- intra_build_reference_border was overflowing at depth 0 because it uses
  arrays just large enough to accommodate 32x32 transforms, which is the
  biggest transform.
- For similar reasons search_intra_rough doesn't work at depth 0.
- The --rd=3 mode tries all modes with transform search. It also works without
  rough search so it was used to test depth 0 search. If --rd=3 is not on intra
  split at depth 0 is not searched for.

Conflicts:
	src/search.c
This commit is contained in:
Ari Koivula 2014-09-08 14:01:55 +03:00
parent c5fa824347
commit 46b6b1243b
5 changed files with 41 additions and 14 deletions

View file

@ -368,7 +368,7 @@ static int config_parse(config *cfg, const char *name, const char *value)
{ {
int rdo = 0; int rdo = 0;
if (sscanf(value, "%d", &rdo)) { if (sscanf(value, "%d", &rdo)) {
if (rdo < 0 || rdo > 2) { if (rdo < 0 || rdo > 3) {
fprintf(stderr, "--rd parameter out of range [0..2], set to 1\n"); fprintf(stderr, "--rd parameter out of range [0..2], set to 1\n");
rdo = 1; rdo = 1;
} }

View file

@ -60,11 +60,19 @@ typedef int16_t coefficient;
//spec: references to variables defined in Rec. ITU-T H.265 (04/2013) //spec: references to variables defined in Rec. ITU-T H.265 (04/2013)
//Search depth for intra and inter. Block sizes: 0 => 64x64, 1 => 32x32, 2 => 16x16, 3 => 8x8, 4 => 4x4 //Search depth for intra and inter. Block sizes: 0 => 64x64, 1 => 32x32, 2 => 16x16, 3 => 8x8, 4 => 4x4
#define MAX_INTER_SEARCH_DEPTH 3 #ifndef MAX_INTER_SEARCH_DEPTH
#define MIN_INTER_SEARCH_DEPTH 0 # define MAX_INTER_SEARCH_DEPTH 3
#endif
#ifndef MIN_INTER_SEARCH_DEPTH
# define MIN_INTER_SEARCH_DEPTH 0
#endif
#define MAX_INTRA_SEARCH_DEPTH 4 #ifndef MAX_INTRA_SEARCH_DEPTH
#define MIN_INTRA_SEARCH_DEPTH 1 # define MAX_INTRA_SEARCH_DEPTH 4
#endif
#ifndef MIN_INTRA_SEARCH_DEPTH
# define MIN_INTRA_SEARCH_DEPTH 0
#endif
// Maximum CU depth when descending form LCU level. // Maximum CU depth when descending form LCU level.
#define MAX_DEPTH 3 /*!< spec: log2_diff_max_min_luma_coding_block_size */ #define MAX_DEPTH 3 /*!< spec: log2_diff_max_min_luma_coding_block_size */

View file

@ -170,7 +170,7 @@ uint32_t rdo_cost_intra(encoder_state * const encoder_state, pixel *pred, pixel
} }
cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5);
// Full RDO // Full RDO
} else if(encoder->rdo == 2) { } else if(encoder->rdo >= 2) {
coeffcost = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode); coeffcost = get_coeff_cost(encoder_state, temp_coeff, width, 0, luma_scan_mode);
cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5); cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5);

View file

@ -804,7 +804,7 @@ static int cu_rd_cost_luma(const encoder_state *const encoder_state,
} }
cost += (coeff_cost + (coeff_cost >> 1)) * (int32_t)(encoder_state->global->cur_lambda_cost + 0.5); cost += (coeff_cost + (coeff_cost >> 1)) * (int32_t)(encoder_state->global->cur_lambda_cost + 0.5);
} else if (rdo == 2) { } else if (rdo >= 2) {
int coeff_cost = 0; int coeff_cost = 0;
coefficient coeff_temp[32 * 32]; coefficient coeff_temp[32 * 32];
@ -890,7 +890,7 @@ static int cu_rd_cost_chroma(const encoder_state *const encoder_state,
} }
cost += (coeff_cost + (coeff_cost >> 1)) * (int32_t)(encoder_state->global->cur_lambda_cost + 0.5); cost += (coeff_cost + (coeff_cost >> 1)) * (int32_t)(encoder_state->global->cur_lambda_cost + 0.5);
} else if (rdo == 2) { } else if (rdo >= 2) {
coefficient coeff_temp[16 * 16]; coefficient coeff_temp[16 * 16];
int8_t scan_order = get_scan_order(pred_cu->type, pred_cu->intra[0].mode_chroma, depth); int8_t scan_order = get_scan_order(pred_cu->type, pred_cu->intra[0].mode_chroma, depth);
@ -1243,30 +1243,47 @@ static int search_cu_intra(encoder_state * const encoder_state,
// Get intra predictors // Get intra predictors
intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu); intra_get_dir_luma_predictor(x_px, y_px, candidate_modes, cur_cu, left_cu, above_cu);
if (depth > 0) {
// Build reconstructed block to use in prediction with extrapolated borders // Build reconstructed block to use in prediction with extrapolated borders
intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8, intra_build_reference_border(encoder_state->encoder_control, x_px, y_px, cu_width * 2 + 8,
rec_buffer, cu_width * 2 + 8, 0, rec_buffer, cu_width * 2 + 8, 0,
frame->width, frame->width,
frame->height, frame->height,
lcu); lcu);
}
// Find best intra mode for 2Nx2N. // Find best intra mode for 2Nx2N.
{ {
pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2); unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
int8_t modes[35]; int8_t modes[35];
uint32_t costs[35]; uint32_t costs[35];
int8_t number_of_modes = search_intra_rough(encoder_state, int8_t number_of_modes;
bool skip_rough_search = (depth == 0 || encoder_state->encoder_control->rdo >= 3);
if (!skip_rough_search) {
number_of_modes = search_intra_rough(encoder_state,
ref_pixels, LCU_WIDTH, ref_pixels, LCU_WIDTH,
cu_in_rec_buffer, cu_width * 2 + 8, cu_in_rec_buffer, cu_width * 2 + 8,
cu_width, candidate_modes, cu_width, candidate_modes,
modes, costs); modes, costs);
} else {
number_of_modes = 35;
for (int i = 0; i < number_of_modes; ++i) {
modes[i] = i;
costs[i] = MAX_INT;
}
}
// Set transform depth to current depth, meaning no transform splits. // Set transform depth to current depth, meaning no transform splits.
lcu_set_trdepth(lcu, x_px, y_px, depth, depth); lcu_set_trdepth(lcu, x_px, y_px, depth, depth);
if (encoder_state->encoder_control->rdo == 2) { if (encoder_state->encoder_control->rdo >= 2) {
int num_modes_to_check = MIN(number_of_modes, (cu_width <= 8) ? 8 : 3); int number_of_modes_to_search = (cu_width <= 8) ? 8 : 3;
if (encoder_state->encoder_control->rdo == 3) {
number_of_modes_to_search = 35;
}
int num_modes_to_check = MIN(number_of_modes, number_of_modes_to_search);
search_intra_rdo(encoder_state, search_intra_rdo(encoder_state,
lcu_px.x, lcu_px.y, depth, lcu_px.x, lcu_px.y, depth,
ref_pixels, LCU_WIDTH, ref_pixels, LCU_WIDTH,
@ -1316,6 +1333,8 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
return 0; return 0;
} }
lcu_t *lcu = &work_tree[depth];
cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
// Assign correct depth // Assign correct depth
cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth; cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
@ -1387,7 +1406,7 @@ static int search_cu(encoder_state * const encoder_state, int x, int y, int dept
} }
// Recursively split all the way to max search depth. // Recursively split all the way to max search depth.
if (depth < MAX_INTRA_SEARCH_DEPTH || depth < MAX_INTER_SEARCH_DEPTH) { if (depth < MAX_INTRA_SEARCH_DEPTH || (depth < MAX_INTER_SEARCH_DEPTH && encoder_state->global->slicetype != SLICE_I)) {
int half_cu = cu_width / 2; int half_cu = cu_width / 2;
// Using Cost = lambda * 9 to compensate on the price of the split // Using Cost = lambda * 9 to compensate on the price of the split
int split_cost = (int)(encoder_state->global->cur_lambda_cost + 0.5) * 9; int split_cost = (int)(encoder_state->global->cur_lambda_cost + 0.5) * 9;

View file

@ -540,7 +540,7 @@ int quantize_residual_trskip(
// Estimate bit cost of encoding the coeffs as ~(1.5 * abs_sum). // Estimate bit cost of encoding the coeffs as ~(1.5 * abs_sum).
unsigned abs_coeffs = coefficients_calc_abs(noskip.coeff, 4, 4); unsigned abs_coeffs = coefficients_calc_abs(noskip.coeff, 4, 4);
noskip.cost += (abs_coeffs + (abs_coeffs / 2)) * bit_cost; noskip.cost += (abs_coeffs + (abs_coeffs / 2)) * bit_cost;
} else if (encoder_state->encoder_control->rdo == 2) { } else if (encoder_state->encoder_control->rdo >= 2) {
noskip.cost += get_coeff_cost(encoder_state, noskip.coeff, 4, 0, scan_order) * bit_cost; noskip.cost += get_coeff_cost(encoder_state, noskip.coeff, 4, 0, scan_order) * bit_cost;
} }
@ -558,7 +558,7 @@ int quantize_residual_trskip(
// Estimate bit cost of encoding the coeffs as ~(1.5 * abs_sum + 1). // Estimate bit cost of encoding the coeffs as ~(1.5 * abs_sum + 1).
unsigned abs_coeffs = coefficients_calc_abs(skip.coeff, 4, 4); unsigned abs_coeffs = coefficients_calc_abs(skip.coeff, 4, 4);
skip.cost += (1 + abs_coeffs + (abs_coeffs / 2)) * bit_cost; skip.cost += (1 + abs_coeffs + (abs_coeffs / 2)) * bit_cost;
} else if (encoder_state->encoder_control->rdo == 2) { } else if (encoder_state->encoder_control->rdo >= 2) {
skip.cost += get_coeff_cost(encoder_state, skip.coeff, 4, 0, scan_order) * bit_cost; skip.cost += get_coeff_cost(encoder_state, skip.coeff, 4, 0, scan_order) * bit_cost;
} }
} }