Merge branch 'intra-NxN'

Conflicts:
	src/search.c
This commit is contained in:
Ari Koivula 2014-03-12 15:08:39 +02:00
commit 462b75f0f3
4 changed files with 70 additions and 138 deletions

View file

@ -1871,7 +1871,7 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
if(cur_cu->type == CU_INTRA)
{
int pu_index = x_pu&1 + 2 * (y_pu&1);
int pu_index = PU_INDEX(x_pu, y_pu);
int luma_mode = cur_cu->intra[pu_index].mode;
scan_idx_luma = SCAN_DIAG;
@ -1944,7 +1944,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
// Check for non-zero coeffs
cbf_y = 0;
memset(cur_cu->coeff_top_y, 0, MAX_PU_DEPTH + 4);
for (i = 0; i < width * width; i++) {
if (coeff_y[i] != 0) {
// Found one, we can break here
@ -1955,8 +1954,12 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
cur_cu->coeff_top_y[d] = 1;
}
} else {
int pu_index = x_pu&1 + 2 * (y_pu&1);
int pu_index = (x_pu & 1) + 2 * (y_pu & 1);
int d;
cur_cu->coeff_top_y[depth + pu_index] = 1;
for (d = 0; d < depth; ++d) {
cur_cu->coeff_top_y[d] = 1;
}
}
break;
}
@ -2014,7 +2017,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
}
transform_chroma(encoder, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block);
memset(cur_cu->coeff_top_u, 0, MAX_PU_DEPTH + 4);
for (i = 0; i < chroma_size; i++) {
if (coeff_u[i] != 0) {
int d;
@ -2025,7 +2027,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8
}
}
transform_chroma(encoder, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block);
memset(cur_cu->coeff_top_v, 0, MAX_PU_DEPTH + 4);
for (i = 0; i < chroma_size; i++) {
if (coeff_v[i] != 0) {
int d;

View file

@ -59,7 +59,7 @@ typedef int16_t coefficient;
#define MAX_INTER_SEARCH_DEPTH 3
#define MIN_INTER_SEARCH_DEPTH 0
#define MAX_INTRA_SEARCH_DEPTH 3 /*!< Max search depth -> min block size (3 == 8x8) */
#define MAX_INTRA_SEARCH_DEPTH 4 /*!< Max search depth -> min block size (3 == 8x8) */
#define MIN_INTRA_SEARCH_DEPTH 1 /*!< Min search depth -> max block size (0 == 64x64) */
@ -99,6 +99,7 @@ typedef int16_t coefficient;
#define NO_SCU_IN_LCU(no_lcu) ((no_lcu) << MAX_DEPTH)
#define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val))
#define UNREFERENCED_PARAMETER(p) (p)
#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2) + 2 * ((y_pu) % 2))
#define LOG2_LCU_WIDTH 6
// CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width

View file

@ -100,8 +100,6 @@ pixel intra_get_dc_pred(pixel *pic, uint16_t picwidth, uint8_t width)
return (pixel)((sum + width) / (width + width));
}
#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2) + 2 * ((y_pu) % 2))
/**
* \brief Function for deriving intra luma predictions
* \param pic picture to use
@ -120,24 +118,24 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
int8_t left_intra_dir = 1;
int8_t above_intra_dir = 1;
if (cur_cu->part_size == SIZE_NxN && (x & 7) == 1) {
if (x & 4) {
// If current CU is NxN and PU is on the right half, take mode from the
// left half of the same CU.
left_intra_dir = cur_cu->intra[PU_INDEX(0, y_cu<<1)].mode;
left_intra_dir = cur_cu->intra[PU_INDEX(0, y >> 2)].mode;
} else if (left_cu && left_cu->type == CU_INTRA) {
// Otherwise take the mode from the right side of the CU on the left.
left_intra_dir = left_cu->intra[PU_INDEX(1, y_cu<<1)].mode;
left_intra_dir = left_cu->intra[PU_INDEX(1, y >> 2)].mode;
}
if (cur_cu->part_size == SIZE_NxN && (y & 7) == 1) {
if (y & 4) {
// If current CU is NxN and PU is on the bottom half, take mode from the
// top half of the same CU.
above_intra_dir = cur_cu->intra[PU_INDEX(x_cu<<1, 0)].mode;
above_intra_dir = cur_cu->intra[PU_INDEX(x >> 2, 0)].mode;
} else if (above_cu && above_cu->type == CU_INTRA &&
(y_cu * (LCU_WIDTH>>MAX_DEPTH)) % LCU_WIDTH != 0)
{
// Otherwise take the mode from the bottom half of the CU above.
above_intra_dir = above_cu->intra[PU_INDEX(x_cu<<1, 1)].mode;
above_intra_dir = above_cu->intra[PU_INDEX(x >> 2, 1)].mode;
}
// If the predictions are the same, add new predictions
@ -158,7 +156,7 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds,
// add planar mode if it's not yet present
if (left_intra_dir && above_intra_dir ) {
preds[2] = 0; // PLANAR_IDX;
} else { // else we add 26 or 1
} else { // Add DC mode if it's not present, otherwise 26.
preds[2] = (left_intra_dir+above_intra_dir)<2? 26 : 1;
}
}
@ -778,69 +776,50 @@ void intra_recon_lcu(encoder_control* encoder, int x, int y, int depth, lcu_t *l
pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1];
int8_t width = LCU_WIDTH >> depth;
int8_t width_c = LCU_WIDTH >> (depth + 1);
int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2);
static vector2d offsets[4] = {{0,0},{1,0},{0,1},{1,1}};
int num_pu = (cur_cu->part_size == SIZE_2Nx2N ? 1 : 4);
int i;
if (cur_cu->part_size == SIZE_NxN) {
width = width_c;
}
int i = PU_INDEX(x >> 2, y >> 2);
cur_cu->intra[0].mode_chroma = 36; // TODO: Chroma intra prediction
// Reconstruct chroma
rec_shift = &rec[width_c * 2 + 8 + 1];
intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
pic_width/2, pic_height/2, lcu);
intra_recon(rec_shift,
width_c * 2 + 8,
width_c,
recbase_u,
rec_stride >> 1,
cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
1);
// Reconstruct chroma.
if (!(x & 4 || y & 4)) {
rec_shift = &rec[width_c * 2 + 8 + 1];
intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1,
pic_width/2, pic_height/2, lcu);
intra_recon(rec_shift,
width_c * 2 + 8,
width_c,
recbase_u,
rec_stride >> 1,
cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
1);
intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2,
pic_width/2, pic_height/2, lcu);
intra_recon(rec_shift,
width_c * 2 + 8,
width_c,
recbase_v,
rec_stride >> 1,
cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
2);
for (i = 0; i < num_pu; ++i) {
// Build reconstructed block to use in prediction with extrapolated borders
int x_off = offsets[i].x * width;
int y_off = offsets[i].y * width;
recbase_y = &lcu->rec.y[x_local + x_off + (y_local+y_off) * LCU_WIDTH];
rec_shift = &rec[width * 2 + 8 + 1];
intra_build_reference_border(x+x_off, y+y_off,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
pic_width, pic_height, lcu);
intra_recon(rec_shift, width * 2 + 8,
width, recbase_y, rec_stride, cur_cu->intra[i].mode, 0);
// Filter DC-prediction
if (cur_cu->intra[i].mode == 1 && width < 32) {
intra_dc_pred_filtering(rec_shift, width * 2 + 8, recbase_y,
rec_stride, width, width);
}
// Handle NxN mode by doing quant/transform and inverses for the next NxN block
if (cur_cu->part_size == SIZE_NxN) {
encode_transform_tree(encoder, x + x_off, y + y_off, depth+1, lcu);
}
intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2,
pic_width/2, pic_height/2, lcu);
intra_recon(rec_shift,
width_c * 2 + 8,
width_c,
recbase_v,
rec_stride >> 1,
cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode,
2);
}
// If we coded NxN block, fetch the coded block flags to this level
if (cur_cu->part_size == SIZE_NxN) {
cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4];
cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1];
cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1];
return;
// Build reconstructed block to use in prediction with extrapolated borders
recbase_y = &lcu->rec.y[x_local + y_local * LCU_WIDTH];
rec_shift = &rec[width * 2 + 8 + 1];
intra_build_reference_border(x, y,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0,
pic_width, pic_height, lcu);
intra_recon(rec_shift, width * 2 + 8,
width, recbase_y, rec_stride, cur_cu->intra[i].mode, 0);
// Filter DC-prediction
if (cur_cu->intra[i].mode == 1 && width < 32) {
intra_dc_pred_filtering(rec_shift, width * 2 + 8, recbase_y,
rec_stride, width, width);
}
encode_transform_tree(encoder, x, y, depth, lcu);

View file

@ -477,7 +477,7 @@ static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lc
/**
* Copy all non-reference CU data from depth+1 to depth.
*/
static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH])
static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
{
// Copy non-reference CUs.
{
@ -530,14 +530,14 @@ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX
/**
* Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH.
*/
static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH])
static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
{
// TODO: clean up to remove the copy pasta
const int width_px = LCU_WIDTH >> depth;
int d;
for (d = depth + 1; d < MAX_PU_DEPTH; ++d) {
for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) {
const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH;
const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH;
const int width_cu = width_px >> MAX_DEPTH;
@ -553,7 +553,7 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M
}
// Copy reconstructed pixels.
for (d = depth + 1; d < MAX_PU_DEPTH; ++d) {
for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) {
const int x = SUB_SCU(x_px);
const int y = SUB_SCU(y_px);
@ -584,11 +584,11 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr
// NxN can only be applied to a single CU at a time.
if (part_mode == SIZE_NxN) {
cu_info *cu = &lcu_cu[x_cu + y_cu * LCU_T_CU_WIDTH];
cu->depth = depth;
cu->depth = MAX_DEPTH;
cu->type = CU_INTRA;
// It is assumed that cu->intra[].mode's are already set.
cu->part_size = part_mode;
cu->tr_depth = depth + 1;
cu->tr_depth = depth;
return;
}
@ -704,69 +704,18 @@ static int search_cu_intra(encoder_control *encoder,
uint32_t cost = -1;
int16_t mode = -1;
pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH];
unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2);
mode = intra_prediction(ref_pixels, LCU_WIDTH,
cu_in_rec_buffer, cu_width * 2 + 8, cu_width,
pred_buffer, cu_width,
&cost, candidate_modes, &bitcost);
cur_cu->intra[0].mode = (int8_t)mode;
cur_cu->intra[0].cost = cost;
cur_cu->part_size = SIZE_2Nx2N;
cur_cu->intra[pu_index].mode = (int8_t)mode;
cur_cu->intra[pu_index].cost = cost;
}
// Do search for NxN split.
if (0 && depth == MAX_DEPTH) { //TODO: reactivate NxN when _something_ is done to make it better
static const vector2d offsets[4] = {{0,0},{4,0},{0,4},{4,4}};
const int nxn_width = 4;
cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost = bitcost;
return cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].cost;
// Save 2Nx2N information to compare with NxN.
int nn_cost = cur_cu->intra[0].cost;
int8_t nn_mode = cur_cu->intra[0].mode;
int cost = (int)(g_cur_lambda_cost * 4.5); // +0.5 to round to nearest
int nxn_i;
cu_in_rec_buffer = &rec_buffer[nxn_width * 2 + 8 + 1];
bitcost_nxn = 0;
for (nxn_i = 0; nxn_i < 4; ++nxn_i) {
const vector2d nxn_px = { x_px + offsets[nxn_i].x,
y_px + offsets[nxn_i].y };
intra_get_dir_luma_predictor(nxn_px.x, nxn_px.y, candidate_modes,
cur_cu, left_cu, above_cu);
intra_build_reference_border(nxn_px.x, nxn_px.y, nxn_width * 2 + 8,
rec_buffer, nxn_width * 2 + 8, 0,
encoder->in.cur_pic->width, encoder->in.cur_pic->height,
lcu);
{
uint32_t nxn_cost = -1;
int16_t nxn_mode = -1;
uint32_t bitcost_temp = 0;
pixel *ref_pixels = &lcu->ref.y[nxn_px.x + nxn_px.y * LCU_WIDTH];
nxn_mode = intra_prediction(ref_pixels, encoder->in.width,
cu_in_rec_buffer, nxn_width * 2 + 8, nxn_width,
pred_buffer, nxn_width,
&nxn_cost, candidate_modes, &bitcost_temp);
cur_cu->intra[nxn_i].mode = (int8_t)nxn_mode;
cost += nxn_cost;
bitcost_nxn += bitcost_temp;
}
}
// Choose between 2Nx2N and NxN.
if (nn_cost <= cost) {
cur_cu->intra[0].cost = nn_cost;
cur_cu->intra[0].mode = nn_mode;
} else {
cur_cu->intra[0].cost = cost;
cur_cu->part_size = SIZE_NxN;
bitcost = bitcost_nxn;
}
}
cur_cu->intra[0].bitcost = bitcost;
return cur_cu->intra[0].cost;
}
/**
@ -841,8 +790,10 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH];
// Assign correct depth
cur_cu->depth = depth; cur_cu->tr_depth = depth ? depth : 1;
cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N;
cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth;
cur_cu->tr_depth = depth > 0 ? depth : 1;
cur_cu->type = CU_NOTSET;
cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N;
// If the CU is completely inside the frame at this depth, search for
// prediction modes at this depth.
if (x + cu_width <= encoder->in.width &&
@ -874,7 +825,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo
// Reconstruct best mode because we need the reconstructed pixels for
// mode search of adjacent CUs.
if (cur_cu->type == CU_INTRA) {
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size);
lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size);
intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height);
} else if (cur_cu->type == CU_INTER) {
inter_recon_lcu(encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]);
@ -1117,11 +1068,11 @@ static void copy_lcu_to_cu_data(encoder_control *encoder, int x_px, int y_px, co
*/
static void search_lcu(encoder_control *encoder, int x, int y)
{
lcu_t work_tree[MAX_PU_DEPTH];
lcu_t work_tree[MAX_PU_DEPTH + 1];
int depth;
memset(work_tree, 0, sizeof(lcu_t)*MAX_PU_DEPTH);
// Initialize work tree.
for (depth = 0; depth < MAX_PU_DEPTH; ++depth) {
for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) {
memset(&work_tree[depth], 0, sizeof(work_tree[depth]));
init_lcu_t(encoder, x, y, &work_tree[depth]);
}