diff --git a/src/encoder.c b/src/encoder.c index a6551d59..53a5c1d7 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -2812,64 +2812,6 @@ void encode_coding_tree(encoder_state * const encoder_state, /* end coding_unit */ } -static void transform_chroma(encoder_state * const encoder_state, cu_info *cur_cu, - int depth, const pixel *base_u, pixel *pred_u, - coefficient *coeff_u, int8_t scan_idx_chroma, - coefficient *pre_quant_coeff, coefficient *block) -{ - const encoder_control * const encoder = encoder_state->encoder_control; - int base_stride = LCU_WIDTH; - int pred_stride = LCU_WIDTH; - - int8_t width_c = LCU_WIDTH >> (depth + 1); - - int i = 0; - unsigned ac_sum = 0; - - int y, x; - - for (y = 0; y < width_c; y++) { - for (x = 0; x < width_c; x++) { - block[i] = ((int16_t)base_u[x + y * (base_stride >> 1)]) - - pred_u[x + y * (pred_stride >> 1)]; - i++; - } - } - - transform2d(encoder, block, pre_quant_coeff, width_c, 65535); - if (encoder->rdoq_enable) { - rdoq(encoder_state, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, - scan_idx_chroma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth); - } else { - quant(encoder_state, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, - scan_idx_chroma, cur_cu->type); - } -} - - -static void reconstruct_chroma(const encoder_state * const encoder_state, cu_info *cur_cu, - int depth, coefficient *coeff_u, - pixel *recbase_u, pixel *pred_u, int color_type, - coefficient *pre_quant_coeff, coefficient *block) -{ - int8_t width_c = LCU_WIDTH >> (depth + 1); - - int i, y, x; - - dequant(encoder_state, coeff_u, pre_quant_coeff, width_c, width_c, (int8_t)color_type, cur_cu->type); - itransform2d(encoder_state->encoder_control, block, pre_quant_coeff, width_c, 65535); - - i = 0; - - for (y = 0; y < width_c; y++) { - for (x = 0; x < width_c; x++) { - int16_t val = block[i++] + pred_u[x + y * LCU_WIDTH_C]; - //TODO: support 10+bits - recbase_u[x + y * LCU_WIDTH_C] = (uint8_t)CLIP(0, 255, val); - } - } -} - coeff_scan_order_t get_scan_order(int8_t cu_type, int intra_mode, int depth) { @@ -2888,350 +2830,6 @@ coeff_scan_order_t get_scan_order(int8_t cu_type, int intra_mode, int depth) } -int quantize_residual_chroma(encoder_state * const encoder_state, - cu_info *cur_cu, int luma_depth, color_index color, - const pixel *base_u, pixel *recbase_u, coefficient *orig_coeff_u) -{ - pixel pred_u[LCU_WIDTH*LCU_WIDTH>>2]; - coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; - - int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; - - const int chroma_depth = (luma_depth == MAX_PU_DEPTH ? luma_depth - 1 : luma_depth); - const int8_t width_c = LCU_WIDTH >> (chroma_depth + 1); - - const coeff_scan_order_t scan_idx_chroma = get_scan_order(cur_cu->type, cur_cu->intra[0].mode_chroma, luma_depth); - - int has_coeffs = 0; - - { - int y, x; - for (y = 0; y < width_c; y++) { - for (x = 0; x < width_c; x++) { - pred_u[x + y * LCU_WIDTH_C] = recbase_u[x + y * LCU_WIDTH_C]; - } - } - } - - transform_chroma(encoder_state, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); - { - int i; - for (i = 0; i < width_c * width_c; i++) { - if (coeff_u[i] != 0) { - has_coeffs = 1; - break; - } - } - } - // Copy coefficients, even if they are all zeroes. - { - int i = 0; - int y, x; - for (y = 0; y < width_c; y++) { - for (x = 0; x < width_c; x++) { - orig_coeff_u[x + y * LCU_WIDTH_C] = coeff_u[i]; - i++; - } - } - } - if (has_coeffs) { - reconstruct_chroma(encoder_state, cur_cu, chroma_depth, - coeff_u, recbase_u, pred_u, (color == COLOR_U ? 2 : 3), - pre_quant_coeff, block); - } - - return has_coeffs; -} - - -void decide_trskip(encoder_state * const encoder_state, cu_info *cur_cu, int8_t depth, int pu_index, - int16_t *residual, uint32_t *ac_sum) -{ - const encoder_control * const encoder = encoder_state->encoder_control; - const coeff_scan_order_t scan_idx_luma = get_scan_order(cur_cu->type, cur_cu->intra[pu_index].mode, depth); - const int8_t width = LCU_WIDTH >> depth; - - //int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; - - int i; - coefficient temp_block[16]; coefficient temp_coeff[16]; - coefficient temp_block2[16]; coefficient temp_coeff2[16]; - uint32_t cost = 0,cost2 = 0; - uint32_t coeffcost = 0,coeffcost2 = 0; - - // Test for transform skip - transformskip(encoder, residual,pre_quant_coeff, width); - if (encoder->rdoq_enable) { - rdoq(encoder_state, pre_quant_coeff, temp_coeff, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type,0); - } else { - quant(encoder_state, pre_quant_coeff, temp_coeff, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type); - } - dequant(encoder_state, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type); - itransformskip(encoder, temp_block,pre_quant_coeff,width); - - transform2d(encoder, residual,pre_quant_coeff,width,0); - if (encoder->rdoq_enable) { - rdoq(encoder_state, pre_quant_coeff, temp_coeff2, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type,0); - } else { - quant(encoder_state, pre_quant_coeff, temp_coeff2, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type); - } - dequant(encoder_state, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type); - itransform2d(encoder, temp_block2,pre_quant_coeff,width,0); - - // SSD between original and reconstructed - for (i = 0; i < 16; i++) { - int diff = temp_block[i] - residual[i]; - cost += diff*diff; - - diff = temp_block2[i] - residual[i]; - cost2 += diff*diff; - } - - // Simple RDO - if(encoder->rdo == 1) { - // SSD between reconstruction and original + sum of coeffs - for (i = 0; i < 16; i++) { - coeffcost += abs((int)temp_coeff[i]); - coeffcost2 += abs((int)temp_coeff2[i]); - } - cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); - cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); - // Full RDO - } else if(encoder->rdo == 2) { - coeffcost = get_coeff_cost(encoder_state, temp_coeff, 4, 0, scan_idx_luma); - coeffcost2 = get_coeff_cost(encoder_state, temp_coeff2, 4, 0, scan_idx_luma); - - cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5); - cost2 += coeffcost2*((int)encoder_state->global->cur_lambda_cost+0.5); - } - - cur_cu->intra[pu_index].tr_skip = (cost < cost2); -} - - -/** - * This function calculates the residual coefficients for a region of the LCU - * (defined by x, y and depth) and updates the reconstruction with the - * kvantized residual. - * - * It handles recursion for transform split, but that is currently only work - * for 64x64 inter to 32x32 transform blocks. - * - * Inputs are: - * - lcu->rec pixels after prediction for the area - * - lcu->ref reference pixels for the area - * - lcu->cu for the area - * - * Outputs are: - * - lcu->rec reconstruction after quantized residual - * - lcu->coeff quantized coefficients for the area - * - lcu->cbf coded block flags for the area - * - lcu->cu.intra[].tr_skip for the area - */ -void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32_t y, const uint8_t depth, lcu_t* lcu) -{ - const encoder_control * const encoder = encoder_state->encoder_control; - // we have 64>>depth transform size - const vector2d lcu_px = {x & 0x3f, y & 0x3f}; - const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4); - cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x>>3) + (lcu_px.y>>3)*LCU_T_CU_WIDTH]; - const int8_t width = LCU_WIDTH>>depth; - - int i; - - // Tell clang-analyzer what is up. For some reason it can't figure out from - // asserting just depth. - assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); - - // Split transform and increase depth - if (depth == 0 || cur_cu->tr_depth > depth) { - int offset = width / 2; - encode_transform_tree(encoder_state, x, y, depth+1, lcu); - encode_transform_tree(encoder_state, x + offset, y, depth+1, lcu); - encode_transform_tree(encoder_state, x, y + offset, depth+1, lcu); - encode_transform_tree(encoder_state, x + offset, y + offset, depth+1, lcu); - - // Propagate coded block flags from child CUs to parent CU. - if (depth < MAX_DEPTH) { - cu_info *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset)>>3) + (lcu_px.y>>3) *LCU_T_CU_WIDTH]; - cu_info *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x>>3) + ((lcu_px.y+offset)>>3)*LCU_T_CU_WIDTH]; - cu_info *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset)>>3) + ((lcu_px.y+offset)>>3)*LCU_T_CU_WIDTH]; - if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) { - cbf_set(&cur_cu->cbf.y, depth); - } - if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) { - cbf_set(&cur_cu->cbf.u, depth); - } - if (cbf_is_set(cu_a->cbf.v, depth+1) || cbf_is_set(cu_b->cbf.v, depth+1) || cbf_is_set(cu_c->cbf.v, depth+1)) { - cbf_set(&cur_cu->cbf.v, depth); - } - } - - return; - } - - { - const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH; - - // Pointers to current location in arrays with prediction. - pixel *recbase_y = &lcu->rec.y[luma_offset]; - // Pointers to current location in arrays with reference. - const pixel *base_y = &lcu->ref.y[luma_offset]; - // Pointers to current location in arrays with kvantized coefficients. - coefficient *orig_coeff_y = &lcu->coeff.y[luma_offset]; - - // Temporary buffers. Not really used for much. Possibly unnecessary. - pixel pred_y[LCU_WIDTH*LCU_WIDTH]; - // Buffers for coefficients. - coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; - - // Temporary buffers for kvantization and transformation. - int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; - - uint32_t ac_sum = 0; - uint8_t scan_idx_luma = SCAN_DIAG; - - #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD - uint32_t residual_sum = 0; - #endif - - // Clear coded block flag structures for depths lower than current depth. - // This should ensure that the CBF data doesn't get corrupted if this function - // is called more than once. - cbf_clear(&cur_cu->cbf.y, depth + pu_index); - if (pu_index == 0) { - cbf_clear(&cur_cu->cbf.u, depth); - cbf_clear(&cur_cu->cbf.v, depth); - } - - // Pick coeff scan mode according to intra prediction mode. - if (cur_cu->type == CU_INTRA) { - int chroma_mode = cur_cu->intra[0].mode_chroma; - if (chroma_mode == 36) { - chroma_mode = cur_cu->intra[pu_index].mode; - } - scan_idx_luma = get_scan_order(cur_cu->type, cur_cu->intra[pu_index].mode, depth); - } - - // Copy Luma and Chroma to the pred-block - for(y = 0; y < width; y++) { - for(x = 0; x < width; x++) { - pred_y[x+y*LCU_WIDTH]=recbase_y[x+y*LCU_WIDTH]; - } - } - - // Get residual by subtracting prediction - i = 0; - ac_sum = 0; - - for (y = 0; y < width; y++) { - for (x = 0; x < width; x++) { - block[i] = ((int16_t)base_y[x + y * LCU_WIDTH]) - - pred_y[x + y * LCU_WIDTH]; - #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD - residual_sum += block[i]; - #endif - i++; - } - } - #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD - #define RESIDUAL_THRESHOLD 500 - if(residual_sum < RESIDUAL_THRESHOLD/(width)) { - memset(block, 0, sizeof(int16_t)*(width)*(width)); - } - #endif - - // For 4x4 blocks, check for transform skip - if(width == 4 && encoder->trskip_enable) { - decide_trskip(encoder_state, cur_cu, depth, pu_index, block, &ac_sum); - } - - // Transform and quant residual to coeffs - if(width == 4 && cur_cu->intra[pu_index].tr_skip) { - transformskip(encoder, block,pre_quant_coeff,width); - } else { - transform2d(encoder, block,pre_quant_coeff,width,0); - } - - if (encoder->rdoq_enable) { - rdoq(encoder_state, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, - scan_idx_luma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth); - } else { - quant(encoder_state, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type); - } - - // Check for non-zero coeffs - for (i = 0; i < width * width; i++) { - if (coeff_y[i] != 0) { - // Found one, we can break here - cbf_set(&cur_cu->cbf.y, depth + pu_index); - break; - } - } - - // Copy coefficients, even if they are all zeroes. This takes care of the - // case where the original coefficients aren't already zeroed. - { - int i = 0; - for (y = 0; y < width; y++) { - for (x = 0; x < width; x++) { - orig_coeff_y[x + y * LCU_WIDTH] = coeff_y[i]; - i++; - } - } - } - - if (cbf_is_set(cur_cu->cbf.y, depth + pu_index)) { - // Combine inverese quantized coefficients with the prediction to get - // reconstructed image. - //picture_set_block_residual(cur_pic,x_cu,y_cu,depth,1); - int i; - - dequant(encoder_state, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type); - if(width == 4 && cur_cu->intra[pu_index].tr_skip) { - itransformskip(encoder, block,pre_quant_coeff,width); - } else { - itransform2d(encoder, block,pre_quant_coeff,width,0); - } - - i = 0; - - for (y = 0; y < width; y++) { - for (x = 0; x < width; x++) { - int val = block[i++] + pred_y[x + y * LCU_WIDTH]; - //TODO: support 10+bits - recbase_y[x + y * LCU_WIDTH] = (pixel)CLIP(0, 255, val); - } - } - } - } - - // If luma is 4x4, do chroma for the 8x8 luma area when handling the top - // left PU because the coordinates are correct. - if (depth <= MAX_DEPTH || pu_index == 0) { - const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH / 2; - pixel *recbase_u = &lcu->rec.u[chroma_offset]; - pixel *recbase_v = &lcu->rec.v[chroma_offset]; - const pixel *base_u = &lcu->ref.u[chroma_offset]; - const pixel *base_v = &lcu->ref.v[chroma_offset]; - coefficient *orig_coeff_u = &lcu->coeff.u[chroma_offset]; - coefficient *orig_coeff_v = &lcu->coeff.v[chroma_offset]; - - if (cur_cu->intra[0].mode_chroma == 36) { - cur_cu->intra[0].mode_chroma = cur_cu->intra[0].mode; - } - if (quantize_residual_chroma(encoder_state, cur_cu, depth, COLOR_U, base_u, recbase_u, orig_coeff_u)) { - cbf_set(&cur_cu->cbf.u, depth); - } - if (quantize_residual_chroma(encoder_state, cur_cu, depth, COLOR_V, base_v, recbase_v, orig_coeff_v)) { - cbf_set(&cur_cu->cbf.v, depth); - } - } -} - static void encode_transform_unit(encoder_state * const encoder_state, int x_pu, int y_pu, int depth, int tr_depth) { diff --git a/src/encoder.h b/src/encoder.h index 2570fb66..f69c067c 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -253,12 +253,13 @@ void encode_last_significant_xy(encoder_state *encoder, uint8_t type, uint8_t scan); void encode_coeff_nxn(encoder_state *encoder, int16_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode, int8_t tr_skip); -void encode_transform_tree(encoder_state *encoder_state, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu ); void encode_transform_coeff(encoder_state *encoder_state, int32_t x_cu, int32_t y_cu, int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v); void encode_block_residual(const encoder_control * const encoder, uint16_t x_ctb, uint16_t y_ctb, uint8_t depth); +coeff_scan_order_t get_scan_order(int8_t cu_type, int intra_mode, int depth); + static const uint8_t g_group_idx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, diff --git a/src/transform.c b/src/transform.c index dabf971f..8469ebd8 100644 --- a/src/transform.c +++ b/src/transform.c @@ -26,9 +26,11 @@ #include #include #include +#include #include "config.h" #include "nal.h" +#include "rdo.h" ////////////////////////////////////////////////////////////////////////// // INITIALIZATIONS @@ -799,3 +801,407 @@ void dequant(const encoder_state * const encoder_state, int16_t *q_coef, int16_t } } + +static void transform_chroma(encoder_state * const encoder_state, cu_info *cur_cu, + int depth, const pixel *base_u, pixel *pred_u, + coefficient *coeff_u, int8_t scan_idx_chroma, + coefficient *pre_quant_coeff, coefficient *block) +{ + const encoder_control * const encoder = encoder_state->encoder_control; + int base_stride = LCU_WIDTH; + int pred_stride = LCU_WIDTH; + + int8_t width_c = LCU_WIDTH >> (depth + 1); + + int i = 0; + unsigned ac_sum = 0; + + int y, x; + + for (y = 0; y < width_c; y++) { + for (x = 0; x < width_c; x++) { + block[i] = ((int16_t)base_u[x + y * (base_stride >> 1)]) - + pred_u[x + y * (pred_stride >> 1)]; + i++; + } + } + + transform2d(encoder, block, pre_quant_coeff, width_c, 65535); + if (encoder->rdoq_enable) { + rdoq(encoder_state, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, + scan_idx_chroma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth); + } else { + quant(encoder_state, pre_quant_coeff, coeff_u, width_c, width_c, &ac_sum, 2, + scan_idx_chroma, cur_cu->type); + } +} + + +static void reconstruct_chroma(const encoder_state * const encoder_state, cu_info *cur_cu, + int depth, coefficient *coeff_u, + pixel *recbase_u, pixel *pred_u, int color_type, + coefficient *pre_quant_coeff, coefficient *block) +{ + int8_t width_c = LCU_WIDTH >> (depth + 1); + + int i, y, x; + + dequant(encoder_state, coeff_u, pre_quant_coeff, width_c, width_c, (int8_t)color_type, cur_cu->type); + itransform2d(encoder_state->encoder_control, block, pre_quant_coeff, width_c, 65535); + + i = 0; + + for (y = 0; y < width_c; y++) { + for (x = 0; x < width_c; x++) { + int16_t val = block[i++] + pred_u[x + y * LCU_WIDTH_C]; + //TODO: support 10+bits + recbase_u[x + y * LCU_WIDTH_C] = (uint8_t)CLIP(0, 255, val); + } + } +} + + +int quantize_residual_chroma(encoder_state * const encoder_state, + cu_info *cur_cu, int luma_depth, color_index color, + const pixel *base_u, pixel *recbase_u, coefficient *orig_coeff_u) +{ + pixel pred_u[LCU_WIDTH*LCU_WIDTH>>2]; + coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; + + int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; + int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; + + const int chroma_depth = (luma_depth == MAX_PU_DEPTH ? luma_depth - 1 : luma_depth); + const int8_t width_c = LCU_WIDTH >> (chroma_depth + 1); + + const coeff_scan_order_t scan_idx_chroma = get_scan_order(cur_cu->type, cur_cu->intra[0].mode_chroma, luma_depth); + + int has_coeffs = 0; + + { + int y, x; + for (y = 0; y < width_c; y++) { + for (x = 0; x < width_c; x++) { + pred_u[x + y * LCU_WIDTH_C] = recbase_u[x + y * LCU_WIDTH_C]; + } + } + } + + transform_chroma(encoder_state, cur_cu, chroma_depth, base_u, pred_u, coeff_u, scan_idx_chroma, pre_quant_coeff, block); + { + int i; + for (i = 0; i < width_c * width_c; i++) { + if (coeff_u[i] != 0) { + has_coeffs = 1; + break; + } + } + } + // Copy coefficients, even if they are all zeroes. + { + int i = 0; + int y, x; + for (y = 0; y < width_c; y++) { + for (x = 0; x < width_c; x++) { + orig_coeff_u[x + y * LCU_WIDTH_C] = coeff_u[i]; + i++; + } + } + } + if (has_coeffs) { + reconstruct_chroma(encoder_state, cur_cu, chroma_depth, + coeff_u, recbase_u, pred_u, (color == COLOR_U ? 2 : 3), + pre_quant_coeff, block); + } + + return has_coeffs; +} + + +void decide_trskip(encoder_state * const encoder_state, cu_info *cur_cu, int8_t depth, int pu_index, + int16_t *residual, uint32_t *ac_sum) +{ + const encoder_control * const encoder = encoder_state->encoder_control; + const coeff_scan_order_t scan_idx_luma = get_scan_order(cur_cu->type, cur_cu->intra[pu_index].mode, depth); + const int8_t width = LCU_WIDTH >> depth; + + //int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; + int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; + + int i; + coefficient temp_block[16]; coefficient temp_coeff[16]; + coefficient temp_block2[16]; coefficient temp_coeff2[16]; + uint32_t cost = 0,cost2 = 0; + uint32_t coeffcost = 0,coeffcost2 = 0; + + // Test for transform skip + transformskip(encoder, residual,pre_quant_coeff, width); + if (encoder->rdoq_enable) { + rdoq(encoder_state, pre_quant_coeff, temp_coeff, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type,0); + } else { + quant(encoder_state, pre_quant_coeff, temp_coeff, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type); + } + dequant(encoder_state, temp_coeff, pre_quant_coeff, 4, 4, 0, cur_cu->type); + itransformskip(encoder, temp_block,pre_quant_coeff,width); + + transform2d(encoder, residual,pre_quant_coeff,width,0); + if (encoder->rdoq_enable) { + rdoq(encoder_state, pre_quant_coeff, temp_coeff2, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type,0); + } else { + quant(encoder_state, pre_quant_coeff, temp_coeff2, 4, 4, ac_sum, 0, scan_idx_luma, cur_cu->type); + } + dequant(encoder_state, temp_coeff2, pre_quant_coeff, 4, 4, 0, cur_cu->type); + itransform2d(encoder, temp_block2,pre_quant_coeff,width,0); + + // SSD between original and reconstructed + for (i = 0; i < 16; i++) { + int diff = temp_block[i] - residual[i]; + cost += diff*diff; + + diff = temp_block2[i] - residual[i]; + cost2 += diff*diff; + } + + // Simple RDO + if(encoder->rdo == 1) { + // SSD between reconstruction and original + sum of coeffs + for (i = 0; i < 16; i++) { + coeffcost += abs((int)temp_coeff[i]); + coeffcost2 += abs((int)temp_coeff2[i]); + } + cost += (1 + coeffcost + (coeffcost>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); + cost2 += (coeffcost2 + (coeffcost2>>1))*((int)encoder_state->global->cur_lambda_cost+0.5); + // Full RDO + } else if(encoder->rdo == 2) { + coeffcost = get_coeff_cost(encoder_state, temp_coeff, 4, 0, scan_idx_luma); + coeffcost2 = get_coeff_cost(encoder_state, temp_coeff2, 4, 0, scan_idx_luma); + + cost += coeffcost*((int)encoder_state->global->cur_lambda_cost+0.5); + cost2 += coeffcost2*((int)encoder_state->global->cur_lambda_cost+0.5); + } + + cur_cu->intra[pu_index].tr_skip = (cost < cost2); +} + + +/** + * This function calculates the residual coefficients for a region of the LCU + * (defined by x, y and depth) and updates the reconstruction with the + * kvantized residual. + * + * It handles recursion for transform split, but that is currently only work + * for 64x64 inter to 32x32 transform blocks. + * + * Inputs are: + * - lcu->rec pixels after prediction for the area + * - lcu->ref reference pixels for the area + * - lcu->cu for the area + * + * Outputs are: + * - lcu->rec reconstruction after quantized residual + * - lcu->coeff quantized coefficients for the area + * - lcu->cbf coded block flags for the area + * - lcu->cu.intra[].tr_skip for the area + */ +void encode_transform_tree(encoder_state * const encoder_state, int32_t x, int32_t y, const uint8_t depth, lcu_t* lcu) +{ + const encoder_control * const encoder = encoder_state->encoder_control; + // we have 64>>depth transform size + const vector2d lcu_px = {x & 0x3f, y & 0x3f}; + const int pu_index = PU_INDEX(lcu_px.x / 4, lcu_px.y / 4); + cu_info *cur_cu = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x>>3) + (lcu_px.y>>3)*LCU_T_CU_WIDTH]; + const int8_t width = LCU_WIDTH>>depth; + + int i; + + // Tell clang-analyzer what is up. For some reason it can't figure out from + // asserting just depth. + assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); + + // Split transform and increase depth + if (depth == 0 || cur_cu->tr_depth > depth) { + int offset = width / 2; + encode_transform_tree(encoder_state, x, y, depth+1, lcu); + encode_transform_tree(encoder_state, x + offset, y, depth+1, lcu); + encode_transform_tree(encoder_state, x, y + offset, depth+1, lcu); + encode_transform_tree(encoder_state, x + offset, y + offset, depth+1, lcu); + + // Propagate coded block flags from child CUs to parent CU. + if (depth < MAX_DEPTH) { + cu_info *cu_a = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset)>>3) + (lcu_px.y>>3) *LCU_T_CU_WIDTH]; + cu_info *cu_b = &lcu->cu[LCU_CU_OFFSET + (lcu_px.x>>3) + ((lcu_px.y+offset)>>3)*LCU_T_CU_WIDTH]; + cu_info *cu_c = &lcu->cu[LCU_CU_OFFSET + ((lcu_px.x + offset)>>3) + ((lcu_px.y+offset)>>3)*LCU_T_CU_WIDTH]; + if (cbf_is_set(cu_a->cbf.y, depth+1) || cbf_is_set(cu_b->cbf.y, depth+1) || cbf_is_set(cu_c->cbf.y, depth+1)) { + cbf_set(&cur_cu->cbf.y, depth); + } + if (cbf_is_set(cu_a->cbf.u, depth+1) || cbf_is_set(cu_b->cbf.u, depth+1) || cbf_is_set(cu_c->cbf.u, depth+1)) { + cbf_set(&cur_cu->cbf.u, depth); + } + if (cbf_is_set(cu_a->cbf.v, depth+1) || cbf_is_set(cu_b->cbf.v, depth+1) || cbf_is_set(cu_c->cbf.v, depth+1)) { + cbf_set(&cur_cu->cbf.v, depth); + } + } + + return; + } + + { + const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH; + + // Pointers to current location in arrays with prediction. + pixel *recbase_y = &lcu->rec.y[luma_offset]; + // Pointers to current location in arrays with reference. + const pixel *base_y = &lcu->ref.y[luma_offset]; + // Pointers to current location in arrays with kvantized coefficients. + coefficient *orig_coeff_y = &lcu->coeff.y[luma_offset]; + + // Temporary buffers. Not really used for much. Possibly unnecessary. + pixel pred_y[LCU_WIDTH*LCU_WIDTH]; + // Buffers for coefficients. + coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; + + // Temporary buffers for kvantization and transformation. + int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; + int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; + + uint32_t ac_sum = 0; + uint8_t scan_idx_luma = SCAN_DIAG; + + #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD + uint32_t residual_sum = 0; + #endif + + // Clear coded block flag structures for depths lower than current depth. + // This should ensure that the CBF data doesn't get corrupted if this function + // is called more than once. + cbf_clear(&cur_cu->cbf.y, depth + pu_index); + if (pu_index == 0) { + cbf_clear(&cur_cu->cbf.u, depth); + cbf_clear(&cur_cu->cbf.v, depth); + } + + // Pick coeff scan mode according to intra prediction mode. + if (cur_cu->type == CU_INTRA) { + int chroma_mode = cur_cu->intra[0].mode_chroma; + if (chroma_mode == 36) { + chroma_mode = cur_cu->intra[pu_index].mode; + } + scan_idx_luma = get_scan_order(cur_cu->type, cur_cu->intra[pu_index].mode, depth); + } + + // Copy Luma and Chroma to the pred-block + for(y = 0; y < width; y++) { + for(x = 0; x < width; x++) { + pred_y[x+y*LCU_WIDTH]=recbase_y[x+y*LCU_WIDTH]; + } + } + + // Get residual by subtracting prediction + i = 0; + ac_sum = 0; + + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { + block[i] = ((int16_t)base_y[x + y * LCU_WIDTH]) - + pred_y[x + y * LCU_WIDTH]; + #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD + residual_sum += block[i]; + #endif + i++; + } + } + #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD + #define RESIDUAL_THRESHOLD 500 + if(residual_sum < RESIDUAL_THRESHOLD/(width)) { + memset(block, 0, sizeof(int16_t)*(width)*(width)); + } + #endif + + // For 4x4 blocks, check for transform skip + if(width == 4 && encoder->trskip_enable) { + decide_trskip(encoder_state, cur_cu, depth, pu_index, block, &ac_sum); + } + + // Transform and quant residual to coeffs + if(width == 4 && cur_cu->intra[pu_index].tr_skip) { + transformskip(encoder, block,pre_quant_coeff,width); + } else { + transform2d(encoder, block,pre_quant_coeff,width,0); + } + + if (encoder->rdoq_enable) { + rdoq(encoder_state, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, + scan_idx_luma, cur_cu->type, cur_cu->tr_depth-cur_cu->depth); + } else { + quant(encoder_state, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type); + } + + // Check for non-zero coeffs + for (i = 0; i < width * width; i++) { + if (coeff_y[i] != 0) { + // Found one, we can break here + cbf_set(&cur_cu->cbf.y, depth + pu_index); + break; + } + } + + // Copy coefficients, even if they are all zeroes. This takes care of the + // case where the original coefficients aren't already zeroed. + { + int i = 0; + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { + orig_coeff_y[x + y * LCU_WIDTH] = coeff_y[i]; + i++; + } + } + } + + if (cbf_is_set(cur_cu->cbf.y, depth + pu_index)) { + // Combine inverese quantized coefficients with the prediction to get + // reconstructed image. + //picture_set_block_residual(cur_pic,x_cu,y_cu,depth,1); + int i; + + dequant(encoder_state, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type); + if(width == 4 && cur_cu->intra[pu_index].tr_skip) { + itransformskip(encoder, block,pre_quant_coeff,width); + } else { + itransform2d(encoder, block,pre_quant_coeff,width,0); + } + + i = 0; + + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { + int val = block[i++] + pred_y[x + y * LCU_WIDTH]; + //TODO: support 10+bits + recbase_y[x + y * LCU_WIDTH] = (pixel)CLIP(0, 255, val); + } + } + } + } + + // If luma is 4x4, do chroma for the 8x8 luma area when handling the top + // left PU because the coordinates are correct. + if (depth <= MAX_DEPTH || pu_index == 0) { + const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH / 2; + pixel *recbase_u = &lcu->rec.u[chroma_offset]; + pixel *recbase_v = &lcu->rec.v[chroma_offset]; + const pixel *base_u = &lcu->ref.u[chroma_offset]; + const pixel *base_v = &lcu->ref.v[chroma_offset]; + coefficient *orig_coeff_u = &lcu->coeff.u[chroma_offset]; + coefficient *orig_coeff_v = &lcu->coeff.v[chroma_offset]; + + if (cur_cu->intra[0].mode_chroma == 36) { + cur_cu->intra[0].mode_chroma = cur_cu->intra[0].mode; + } + if (quantize_residual_chroma(encoder_state, cur_cu, depth, COLOR_U, base_u, recbase_u, orig_coeff_u)) { + cbf_set(&cur_cu->cbf.u, depth); + } + if (quantize_residual_chroma(encoder_state, cur_cu, depth, COLOR_V, base_v, recbase_v, orig_coeff_v)) { + cbf_set(&cur_cu->cbf.v, depth); + } + } +} + diff --git a/src/transform.h b/src/transform.h index 4cc8870a..f54f773c 100644 --- a/src/transform.h +++ b/src/transform.h @@ -46,4 +46,6 @@ void itransform2d(const encoder_control *encoder, int16_t *block,int16_t *coeff, int32_t get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset); +void encode_transform_tree(encoder_state *encoder_state, int32_t x, int32_t y, uint8_t depth, lcu_t* lcu); + #endif