diff --git a/src/encoder.c b/src/encoder.c index d971c824..fe16f596 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1871,7 +1871,7 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8 if(cur_cu->type == CU_INTRA) { - int pu_index = x_pu&1 + 2 * (y_pu&1); + int pu_index = PU_INDEX(x_pu, y_pu); int luma_mode = cur_cu->intra[pu_index].mode; scan_idx_luma = SCAN_DIAG; @@ -1944,7 +1944,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8 // Check for non-zero coeffs cbf_y = 0; - memset(cur_cu->coeff_top_y, 0, MAX_PU_DEPTH + 4); for (i = 0; i < width * width; i++) { if (coeff_y[i] != 0) { // Found one, we can break here @@ -1955,8 +1954,12 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8 cur_cu->coeff_top_y[d] = 1; } } else { - int pu_index = x_pu&1 + 2 * (y_pu&1); + int pu_index = (x_pu & 1) + 2 * (y_pu & 1); + int d; cur_cu->coeff_top_y[depth + pu_index] = 1; + for (d = 0; d < depth; ++d) { + cur_cu->coeff_top_y[d] = 1; + } } break; } @@ -2014,7 +2017,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8 } transform_chroma(encoder, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); - memset(cur_cu->coeff_top_u, 0, MAX_PU_DEPTH + 4); for (i = 0; i < chroma_size; i++) { if (coeff_u[i] != 0) { int d; @@ -2025,7 +2027,6 @@ void encode_transform_tree(encoder_control* encoder, int32_t x, int32_t y, uint8 } } transform_chroma(encoder, cur_cu, chroma_depth, base_v, pred_v, coeff_v, scan_idx_chroma, pre_quant_coeff, block); - memset(cur_cu->coeff_top_v, 0, MAX_PU_DEPTH + 4); for (i = 0; i < chroma_size; i++) { if (coeff_v[i] != 0) { int d; diff --git a/src/global.h b/src/global.h index 4675ade1..a9a47d58 100644 --- a/src/global.h +++ b/src/global.h @@ -59,7 +59,7 @@ typedef int16_t coefficient; #define MAX_INTER_SEARCH_DEPTH 3 #define MIN_INTER_SEARCH_DEPTH 0 -#define MAX_INTRA_SEARCH_DEPTH 3 /*!< Max search depth -> min block size (3 == 8x8) */ +#define MAX_INTRA_SEARCH_DEPTH 4 /*!< Max search depth -> min block size (3 == 8x8) */ #define MIN_INTRA_SEARCH_DEPTH 1 /*!< Min search depth -> max block size (0 == 64x64) */ @@ -99,6 +99,7 @@ typedef int16_t coefficient; #define NO_SCU_IN_LCU(no_lcu) ((no_lcu) << MAX_DEPTH) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) #define UNREFERENCED_PARAMETER(p) (p) +#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2) + 2 * ((y_pu) % 2)) #define LOG2_LCU_WIDTH 6 // CU_TO_PIXEL = y * lcu_width * pic_width + x * lcu_width diff --git a/src/intra.c b/src/intra.c index 8b68cd07..42f589ad 100644 --- a/src/intra.c +++ b/src/intra.c @@ -100,8 +100,6 @@ pixel intra_get_dc_pred(pixel *pic, uint16_t picwidth, uint8_t width) return (pixel)((sum + width) / (width + width)); } -#define PU_INDEX(x_pu, y_pu) (((x_pu) % 2) + 2 * ((y_pu) % 2)) - /** * \brief Function for deriving intra luma predictions * \param pic picture to use @@ -120,24 +118,24 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds, int8_t left_intra_dir = 1; int8_t above_intra_dir = 1; - if (cur_cu->part_size == SIZE_NxN && (x & 7) == 1) { + if (x & 4) { // If current CU is NxN and PU is on the right half, take mode from the // left half of the same CU. - left_intra_dir = cur_cu->intra[PU_INDEX(0, y_cu<<1)].mode; + left_intra_dir = cur_cu->intra[PU_INDEX(0, y >> 2)].mode; } else if (left_cu && left_cu->type == CU_INTRA) { // Otherwise take the mode from the right side of the CU on the left. - left_intra_dir = left_cu->intra[PU_INDEX(1, y_cu<<1)].mode; + left_intra_dir = left_cu->intra[PU_INDEX(1, y >> 2)].mode; } - if (cur_cu->part_size == SIZE_NxN && (y & 7) == 1) { + if (y & 4) { // If current CU is NxN and PU is on the bottom half, take mode from the // top half of the same CU. - above_intra_dir = cur_cu->intra[PU_INDEX(x_cu<<1, 0)].mode; + above_intra_dir = cur_cu->intra[PU_INDEX(x >> 2, 0)].mode; } else if (above_cu && above_cu->type == CU_INTRA && (y_cu * (LCU_WIDTH>>MAX_DEPTH)) % LCU_WIDTH != 0) { // Otherwise take the mode from the bottom half of the CU above. - above_intra_dir = above_cu->intra[PU_INDEX(x_cu<<1, 1)].mode; + above_intra_dir = above_cu->intra[PU_INDEX(x >> 2, 1)].mode; } // If the predictions are the same, add new predictions @@ -158,7 +156,7 @@ int8_t intra_get_dir_luma_predictor(uint32_t x, uint32_t y, int8_t* preds, // add planar mode if it's not yet present if (left_intra_dir && above_intra_dir ) { preds[2] = 0; // PLANAR_IDX; - } else { // else we add 26 or 1 + } else { // Add DC mode if it's not present, otherwise 26. preds[2] = (left_intra_dir+above_intra_dir)<2? 26 : 1; } } @@ -778,69 +776,50 @@ void intra_recon_lcu(encoder_control* encoder, int x, int y, int depth, lcu_t *l pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; int8_t width = LCU_WIDTH >> depth; - int8_t width_c = LCU_WIDTH >> (depth + 1); + int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); static vector2d offsets[4] = {{0,0},{1,0},{0,1},{1,1}}; int num_pu = (cur_cu->part_size == SIZE_2Nx2N ? 1 : 4); - int i; - - if (cur_cu->part_size == SIZE_NxN) { - width = width_c; - } + int i = PU_INDEX(x >> 2, y >> 2); cur_cu->intra[0].mode_chroma = 36; // TODO: Chroma intra prediction - // Reconstruct chroma - rec_shift = &rec[width_c * 2 + 8 + 1]; - intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1, - pic_width/2, pic_height/2, lcu); - intra_recon(rec_shift, - width_c * 2 + 8, - width_c, - recbase_u, - rec_stride >> 1, - cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode, - 1); + // Reconstruct chroma. + if (!(x & 4 || y & 4)) { + rec_shift = &rec[width_c * 2 + 8 + 1]; + intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 1, + pic_width/2, pic_height/2, lcu); + intra_recon(rec_shift, + width_c * 2 + 8, + width_c, + recbase_u, + rec_stride >> 1, + cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode, + 1); - intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2, - pic_width/2, pic_height/2, lcu); - intra_recon(rec_shift, - width_c * 2 + 8, - width_c, - recbase_v, - rec_stride >> 1, - cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode, - 2); - - for (i = 0; i < num_pu; ++i) { - // Build reconstructed block to use in prediction with extrapolated borders - int x_off = offsets[i].x * width; - int y_off = offsets[i].y * width; - recbase_y = &lcu->rec.y[x_local + x_off + (y_local+y_off) * LCU_WIDTH]; - - rec_shift = &rec[width * 2 + 8 + 1]; - intra_build_reference_border(x+x_off, y+y_off,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0, - pic_width, pic_height, lcu); - intra_recon(rec_shift, width * 2 + 8, - width, recbase_y, rec_stride, cur_cu->intra[i].mode, 0); - - // Filter DC-prediction - if (cur_cu->intra[i].mode == 1 && width < 32) { - intra_dc_pred_filtering(rec_shift, width * 2 + 8, recbase_y, - rec_stride, width, width); - } - - // Handle NxN mode by doing quant/transform and inverses for the next NxN block - if (cur_cu->part_size == SIZE_NxN) { - encode_transform_tree(encoder, x + x_off, y + y_off, depth+1, lcu); - } + intra_build_reference_border(x, y,(int16_t)width_c * 2 + 8, rec, (int16_t)width_c * 2 + 8, 2, + pic_width/2, pic_height/2, lcu); + intra_recon(rec_shift, + width_c * 2 + 8, + width_c, + recbase_v, + rec_stride >> 1, + cur_cu->intra[0].mode_chroma != 36 ? cur_cu->intra[0].mode_chroma : cur_cu->intra[0].mode, + 2); } - // If we coded NxN block, fetch the coded block flags to this level - if (cur_cu->part_size == SIZE_NxN) { - cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cur_cu->coeff_top_y[depth+2] | cur_cu->coeff_top_y[depth+3] | cur_cu->coeff_top_y[depth+4]; - cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1]; - cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1]; - return; + // Build reconstructed block to use in prediction with extrapolated borders + recbase_y = &lcu->rec.y[x_local + y_local * LCU_WIDTH]; + + rec_shift = &rec[width * 2 + 8 + 1]; + intra_build_reference_border(x, y,(int16_t)width * 2 + 8, rec, (int16_t)width * 2 + 8, 0, + pic_width, pic_height, lcu); + intra_recon(rec_shift, width * 2 + 8, + width, recbase_y, rec_stride, cur_cu->intra[i].mode, 0); + + // Filter DC-prediction + if (cur_cu->intra[i].mode == 1 && width < 32) { + intra_dc_pred_filtering(rec_shift, width * 2 + 8, recbase_y, + rec_stride, width, width); } encode_transform_tree(encoder, x, y, depth, lcu); diff --git a/src/search.c b/src/search.c index 01f5db02..d25b0566 100644 --- a/src/search.c +++ b/src/search.c @@ -477,7 +477,7 @@ static int search_cu_inter(encoder_control *encoder, int x, int y, int depth, lc /** * Copy all non-reference CU data from depth+1 to depth. */ -static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH]) +static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) { // Copy non-reference CUs. { @@ -530,14 +530,14 @@ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX /** * Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH. */ -static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH]) +static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) { // TODO: clean up to remove the copy pasta const int width_px = LCU_WIDTH >> depth; int d; - for (d = depth + 1; d < MAX_PU_DEPTH; ++d) { + for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { const int x_cu = SUB_SCU(x_px) >> MAX_DEPTH; const int y_cu = SUB_SCU(y_px) >> MAX_DEPTH; const int width_cu = width_px >> MAX_DEPTH; @@ -553,7 +553,7 @@ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[M } // Copy reconstructed pixels. - for (d = depth + 1; d < MAX_PU_DEPTH; ++d) { + for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { const int x = SUB_SCU(x_px); const int y = SUB_SCU(y_px); @@ -584,11 +584,11 @@ static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pr // NxN can only be applied to a single CU at a time. if (part_mode == SIZE_NxN) { cu_info *cu = &lcu_cu[x_cu + y_cu * LCU_T_CU_WIDTH]; - cu->depth = depth; + cu->depth = MAX_DEPTH; cu->type = CU_INTRA; // It is assumed that cu->intra[].mode's are already set. cu->part_size = part_mode; - cu->tr_depth = depth + 1; + cu->tr_depth = depth; return; } @@ -704,69 +704,18 @@ static int search_cu_intra(encoder_control *encoder, uint32_t cost = -1; int16_t mode = -1; pixel *ref_pixels = &lcu->ref.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; + unsigned pu_index = PU_INDEX(x_px >> 2, y_px >> 2); mode = intra_prediction(ref_pixels, LCU_WIDTH, cu_in_rec_buffer, cu_width * 2 + 8, cu_width, pred_buffer, cu_width, &cost, candidate_modes, &bitcost); - cur_cu->intra[0].mode = (int8_t)mode; - cur_cu->intra[0].cost = cost; - cur_cu->part_size = SIZE_2Nx2N; + cur_cu->intra[pu_index].mode = (int8_t)mode; + cur_cu->intra[pu_index].cost = cost; } - // Do search for NxN split. - if (0 && depth == MAX_DEPTH) { //TODO: reactivate NxN when _something_ is done to make it better - static const vector2d offsets[4] = {{0,0},{4,0},{0,4},{4,4}}; - const int nxn_width = 4; + cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].bitcost = bitcost; + return cur_cu->intra[PU_INDEX(x_px >> 2, y_px >> 2)].cost; - // Save 2Nx2N information to compare with NxN. - int nn_cost = cur_cu->intra[0].cost; - int8_t nn_mode = cur_cu->intra[0].mode; - int cost = (int)(g_cur_lambda_cost * 4.5); // +0.5 to round to nearest - - int nxn_i; - - cu_in_rec_buffer = &rec_buffer[nxn_width * 2 + 8 + 1]; - - bitcost_nxn = 0; - - for (nxn_i = 0; nxn_i < 4; ++nxn_i) { - const vector2d nxn_px = { x_px + offsets[nxn_i].x, - y_px + offsets[nxn_i].y }; - intra_get_dir_luma_predictor(nxn_px.x, nxn_px.y, candidate_modes, - cur_cu, left_cu, above_cu); - intra_build_reference_border(nxn_px.x, nxn_px.y, nxn_width * 2 + 8, - rec_buffer, nxn_width * 2 + 8, 0, - encoder->in.cur_pic->width, encoder->in.cur_pic->height, - lcu); - { - uint32_t nxn_cost = -1; - int16_t nxn_mode = -1; - uint32_t bitcost_temp = 0; - pixel *ref_pixels = &lcu->ref.y[nxn_px.x + nxn_px.y * LCU_WIDTH]; - nxn_mode = intra_prediction(ref_pixels, encoder->in.width, - cu_in_rec_buffer, nxn_width * 2 + 8, nxn_width, - pred_buffer, nxn_width, - &nxn_cost, candidate_modes, &bitcost_temp); - cur_cu->intra[nxn_i].mode = (int8_t)nxn_mode; - cost += nxn_cost; - bitcost_nxn += bitcost_temp; - } - } - - // Choose between 2Nx2N and NxN. - if (nn_cost <= cost) { - cur_cu->intra[0].cost = nn_cost; - cur_cu->intra[0].mode = nn_mode; - } else { - cur_cu->intra[0].cost = cost; - cur_cu->part_size = SIZE_NxN; - bitcost = bitcost_nxn; - } - } - - cur_cu->intra[0].bitcost = bitcost; - - return cur_cu->intra[0].cost; } /** @@ -841,8 +790,10 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo cur_cu = &(&work_tree[depth])->cu[LCU_CU_OFFSET+(x_local>>3) + (y_local>>3)*LCU_T_CU_WIDTH]; // Assign correct depth - cur_cu->depth = depth; cur_cu->tr_depth = depth ? depth : 1; - cur_cu->type = CU_NOTSET; cur_cu->part_size = SIZE_2Nx2N; + cur_cu->depth = depth > MAX_DEPTH ? MAX_DEPTH : depth; + cur_cu->tr_depth = depth > 0 ? depth : 1; + cur_cu->type = CU_NOTSET; + cur_cu->part_size = depth > MAX_DEPTH ? SIZE_NxN : SIZE_2Nx2N; // If the CU is completely inside the frame at this depth, search for // prediction modes at this depth. if (x + cu_width <= encoder->in.width && @@ -874,7 +825,7 @@ static int search_cu(encoder_control *encoder, int x, int y, int depth, lcu_t wo // Reconstruct best mode because we need the reconstructed pixels for // mode search of adjacent CUs. if (cur_cu->type == CU_INTRA) { - lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[0].mode, cur_cu->part_size); + lcu_set_intra_mode(&work_tree[depth], x, y, depth, cur_cu->intra[PU_INDEX(x >> 2, y >> 2)].mode, cur_cu->part_size); intra_recon_lcu(encoder, x, y, depth,&work_tree[depth],encoder->in.cur_pic->width,encoder->in.cur_pic->height); } else if (cur_cu->type == CU_INTER) { inter_recon_lcu(encoder->ref->pics[cur_cu->inter.mv_ref], x, y, LCU_WIDTH>>depth, cur_cu->inter.mv, &work_tree[depth]); @@ -1117,11 +1068,11 @@ static void copy_lcu_to_cu_data(encoder_control *encoder, int x_px, int y_px, co */ static void search_lcu(encoder_control *encoder, int x, int y) { - lcu_t work_tree[MAX_PU_DEPTH]; + lcu_t work_tree[MAX_PU_DEPTH + 1]; int depth; - memset(work_tree, 0, sizeof(lcu_t)*MAX_PU_DEPTH); // Initialize work tree. - for (depth = 0; depth < MAX_PU_DEPTH; ++depth) { + for (depth = 0; depth <= MAX_PU_DEPTH; ++depth) { + memset(&work_tree[depth], 0, sizeof(work_tree[depth])); init_lcu_t(encoder, x, y, &work_tree[depth]); }