From 96a0f03298ea6d58e5c738544e80fe46dc4a0d26 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 11 Oct 2013 11:40:37 +0300 Subject: [PATCH 01/19] Refactoring encoder.c in preparation for adding merge-mode --- src/encoder.c | 187 +++++++++++++++++++++++++------------------------- 1 file changed, 93 insertions(+), 94 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 07e4a785..142de191 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -959,14 +959,14 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // parseRefFrmIdx int32_t ref_frame = cur_cu->inter.mv_ref; - cabac.ctx = &g_cu_ref_pic_model[0]; + cabac.ctx = &g_cu_ref_pic_model[0]; CABAC_BIN(&cabac, (ref_frame == 0) ? 0 : 1, "ref_frame_flag"); if (ref_frame > 0) { uint32_t i; uint32_t ref_num = encoder->ref_idx_num[ref_list_idx] - 2; - cabac.ctx = &g_cu_ref_pic_model[1]; + cabac.ctx = &g_cu_ref_pic_model[1]; ref_frame--; for (i = 0; i < ref_num; ++i) { @@ -976,14 +976,11 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, CABAC_BIN(&cabac, symbol, "ref_frame_flag2"); } else { CABAC_BIN_EP(&cabac, symbol, "ref_frame_flag2"); - } - - if (symbol == 0) { - break; - } - } } + if (symbol == 0) break; } + } + } // Get MV candidates inter_get_mv_cand(encoder, x_ctb, y_ctb, depth, mv_cand); @@ -992,145 +989,147 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, cur_cu->inter.mv_ref = 0; // Default to candidate 0 // Only check when candidates are different - if (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1]) { + if (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1]) { uint16_t cand_1_diff = abs(cur_cu->inter.mv[0] - mv_cand[0][0]) + abs( cur_cu->inter.mv[1] - mv_cand[0][1]); uint16_t cand_2_diff = abs(cur_cu->inter.mv[0] - mv_cand[1][0]) + abs( cur_cu->inter.mv[1] - mv_cand[1][1]); // Select candidate 1 if it's closer - if (cand_2_diff < cand_1_diff) { + if (cand_2_diff < cand_1_diff) { cur_cu->inter.mv_ref = 1; - } - } + } + } if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { const int32_t mvd_hor = cur_cu->inter.mv[0] - mv_cand[cur_cu->inter.mv_ref][0]; const int32_t mvd_ver = cur_cu->inter.mv[1] - mv_cand[cur_cu->inter.mv_ref][1]; const int8_t hor_abs_gr0 = mvd_hor != 0; const int8_t ver_abs_gr0 = mvd_ver != 0; - const uint32_t mvd_hor_abs = abs(mvd_hor); - const uint32_t mvd_ver_abs = abs(mvd_ver); + const uint32_t mvd_hor_abs = abs(mvd_hor); + const uint32_t mvd_ver_abs = abs(mvd_ver); - cabac.ctx = &g_cu_mvd_model[0]; - CABAC_BIN(&cabac, (mvd_hor!=0)?1:0, "abs_mvd_greater0_flag_hor"); - CABAC_BIN(&cabac, (mvd_ver!=0)?1:0, "abs_mvd_greater0_flag_ver"); + cabac.ctx = &g_cu_mvd_model[0]; + CABAC_BIN(&cabac, (mvd_hor!=0)?1:0, "abs_mvd_greater0_flag_hor"); + CABAC_BIN(&cabac, (mvd_ver!=0)?1:0, "abs_mvd_greater0_flag_ver"); - cabac.ctx = &g_cu_mvd_model[1]; + cabac.ctx = &g_cu_mvd_model[1]; if (hor_abs_gr0) { - CABAC_BIN(&cabac, (mvd_hor_abs>1)?1:0, "abs_mvd_greater1_flag_hor"); - } + CABAC_BIN(&cabac, (mvd_hor_abs>1)?1:0, "abs_mvd_greater1_flag_hor"); + } if (ver_abs_gr0) { - CABAC_BIN(&cabac, (mvd_ver_abs>1)?1:0, "abs_mvd_greater1_flag_ver"); - } + CABAC_BIN(&cabac, (mvd_ver_abs>1)?1:0, "abs_mvd_greater1_flag_ver"); + } if (hor_abs_gr0) { if (mvd_hor_abs > 1) { - cabac_write_ep_ex_golomb(&cabac,mvd_hor_abs-2, 1); - } + cabac_write_ep_ex_golomb(&cabac,mvd_hor_abs-2, 1); + } - CABAC_BIN_EP(&cabac, (mvd_hor>0)?0:1, "mvd_sign_flag_hor"); - } + CABAC_BIN_EP(&cabac, (mvd_hor>0)?0:1, "mvd_sign_flag_hor"); + } if (ver_abs_gr0) { if (mvd_ver_abs > 1) { - cabac_write_ep_ex_golomb(&cabac,mvd_ver_abs-2, 1); - } - - CABAC_BIN_EP(&cabac, (mvd_ver>0)?0:1, "mvd_sign_flag_ver"); - } - - // Inter reconstruction - inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, - y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, - encoder->in.cur_pic); - - // Mark this block as "coded" (can be used for predictions..) - picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); + cabac_write_ep_ex_golomb(&cabac,mvd_ver_abs-2, 1); } + CABAC_BIN_EP(&cabac, (mvd_ver>0)?0:1, "mvd_sign_flag_ver"); + } + } + // Signal which candidate MV to use cabac_write_unary_max_symbol(&cabac, g_mvp_idx_model, cur_cu->inter.mv_ref, 1, - AMVP_MAX_NUM_CANDS - 1); - } - } + AMVP_MAX_NUM_CANDS - 1); } + } + } // for ref_list + } // if !merge + + + // Inter reconstruction + inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, + y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, + encoder->in.cur_pic); + + // Mark this block as "coded" (can be used for predictions..) + picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); - if (1) { + { pixel *base_y = &encoder->in.cur_pic->y_data[x_ctb*(LCU_WIDTH>>(MAX_DEPTH)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH))) *encoder->in.width]; pixel *base_u = &encoder->in.cur_pic->u_data[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; pixel *base_v = &encoder->in.cur_pic->v_data[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - uint32_t width = LCU_WIDTH>>depth; + uint32_t width = LCU_WIDTH>>depth; - /* INTRAPREDICTION VARIABLES */ - int16_t pred[LCU_WIDTH*LCU_WIDTH+1]; - int16_t predU[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t predV[LCU_WIDTH*LCU_WIDTH>>2]; + /* INTRAPREDICTION VARIABLES */ + int16_t pred[LCU_WIDTH*LCU_WIDTH+1]; + int16_t predU[LCU_WIDTH*LCU_WIDTH>>2]; + int16_t predV[LCU_WIDTH*LCU_WIDTH>>2]; pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_ctb*(LCU_WIDTH>>(MAX_DEPTH)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH))) *encoder->in.width]; pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; pixel *recbase_v = &encoder->in.cur_pic->v_recdata[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - /* TODO: dynamic memory allocation */ - int16_t coeff_y[LCU_WIDTH*LCU_WIDTH*2]; - int16_t coeff_u[LCU_WIDTH*LCU_WIDTH>>1]; - int16_t coeff_v[LCU_WIDTH*LCU_WIDTH>>1]; - int8_t residual = 0; + /* TODO: dynamic memory allocation */ + int16_t coeff_y[LCU_WIDTH*LCU_WIDTH*2]; + int16_t coeff_u[LCU_WIDTH*LCU_WIDTH>>1]; + int16_t coeff_v[LCU_WIDTH*LCU_WIDTH>>1]; + int8_t residual = 0; - /* Initialize helper structure for transform */ - transform_info ti; - memset(&ti, 0, sizeof(transform_info)); + /* Initialize helper structure for transform */ + transform_info ti; + memset(&ti, 0, sizeof(transform_info)); - ti.x_ctb = x_ctb; ti.y_ctb = y_ctb; + ti.x_ctb = x_ctb; ti.y_ctb = y_ctb; - /* Base pointers */ - ti.base = base_y; ti.base_u = base_u; ti.base_v = base_v; - ti.base_stride = encoder->in.width; + /* Base pointers */ + ti.base = base_y; ti.base_u = base_u; ti.base_v = base_v; + ti.base_stride = encoder->in.width; - // Prediction pointers - ti.pred = pred; ti.pred_u = predU; ti.pred_v = predV; - ti.pred_stride = (LCU_WIDTH>>depth); + // Prediction pointers + ti.pred = pred; ti.pred_u = predU; ti.pred_v = predV; + ti.pred_stride = (LCU_WIDTH>>depth); - // Reconstruction pointers - ti.recbase = recbase_y; ti.recbase_u = recbase_u; ti.recbase_v = recbase_v; - ti.recbase_stride = encoder->in.width; + // Reconstruction pointers + ti.recbase = recbase_y; ti.recbase_u = recbase_u; ti.recbase_v = recbase_v; + ti.recbase_stride = encoder->in.width; - // Coeff pointers - ti.coeff[0] = coeff_y; ti.coeff[1] = coeff_u; ti.coeff[2] = coeff_v; - ti.block_type = CU_INTER; + // Coeff pointers + ti.coeff[0] = coeff_y; ti.coeff[1] = coeff_u; ti.coeff[2] = coeff_v; + ti.block_type = CU_INTER; - // Handle transforms, quant and reconstruction - ti.idx = 0; - encode_transform_tree(encoder,&ti, depth); + // Handle transforms, quant and reconstruction + ti.idx = 0; + encode_transform_tree(encoder,&ti, depth); - // Coded block pattern - ti.cb_top[0] = (ti.cb[0] & 0x1 || ti.cb[1] & 0x1 || ti.cb[2] & 0x1 || ti.cb[3] & 0x1)?1:0; - ti.cb_top[1] = (ti.cb[0] & 0x2 || ti.cb[1] & 0x2 || ti.cb[2] & 0x2 || ti.cb[3] & 0x2)?1:0; - ti.cb_top[2] = (ti.cb[0] & 0x4 || ti.cb[1] & 0x4 || ti.cb[2] & 0x4 || ti.cb[3] & 0x4)?1:0; + // Coded block pattern + ti.cb_top[0] = (ti.cb[0] & 0x1 || ti.cb[1] & 0x1 || ti.cb[2] & 0x1 || ti.cb[3] & 0x1)?1:0; + ti.cb_top[1] = (ti.cb[0] & 0x2 || ti.cb[1] & 0x2 || ti.cb[2] & 0x2 || ti.cb[3] & 0x2)?1:0; + ti.cb_top[2] = (ti.cb[0] & 0x4 || ti.cb[1] & 0x4 || ti.cb[2] & 0x4 || ti.cb[3] & 0x4)?1:0; - residual = ti.cb_top[0] | ti.cb_top[1] | ti.cb_top[2]; - if(depth == 0) { - picture_set_block_residual(encoder->in.cur_pic,x_ctb ,y_ctb ,depth+1,ti.cb[0] & 0x1); - picture_set_block_residual(encoder->in.cur_pic,x_ctb + 4,y_ctb ,depth+1,ti.cb[1] & 0x1); - picture_set_block_residual(encoder->in.cur_pic,x_ctb ,y_ctb + 4,depth+1,ti.cb[2] & 0x1); - picture_set_block_residual(encoder->in.cur_pic,x_ctb + 4,y_ctb + 4,depth+1,ti.cb[3] & 0x1); - } else { - picture_set_block_residual(encoder->in.cur_pic,x_ctb,y_ctb,depth,ti.cb_top[0]); - } + residual = ti.cb_top[0] | ti.cb_top[1] | ti.cb_top[2]; + if(depth == 0) { + picture_set_block_residual(encoder->in.cur_pic,x_ctb ,y_ctb ,depth+1,ti.cb[0] & 0x1); + picture_set_block_residual(encoder->in.cur_pic,x_ctb + 4,y_ctb ,depth+1,ti.cb[1] & 0x1); + picture_set_block_residual(encoder->in.cur_pic,x_ctb ,y_ctb + 4,depth+1,ti.cb[2] & 0x1); + picture_set_block_residual(encoder->in.cur_pic,x_ctb + 4,y_ctb + 4,depth+1,ti.cb[3] & 0x1); + } else { + picture_set_block_residual(encoder->in.cur_pic,x_ctb,y_ctb,depth,ti.cb_top[0]); + } - cabac.ctx = &g_cu_qt_root_cbf_model; - CABAC_BIN(&cabac, residual, "rqt_root_cbf"); - // Code (possible) coeffs to bitstream - ti.idx = 0; - if(residual) { - encode_transform_coeff(encoder, &ti,depth, 0); - } - } - } + cabac.ctx = &g_cu_qt_root_cbf_model; + CABAC_BIN(&cabac, residual, "rqt_root_cbf"); + // Code (possible) coeffs to bitstream + ti.idx = 0; + if(residual) { + encode_transform_coeff(encoder, &ti,depth, 0); + } + } + // END for each part } else if (cur_cu->type == CU_INTRA) { From 52335adda04180f3be95f66a256af4288beb9d2c Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 11 Oct 2013 11:59:10 +0300 Subject: [PATCH 02/19] Split merge candidate derivation to its own function --- src/inter.c | 56 ++++++++++++++++++++++++++++++++++------------------- src/inter.h | 2 ++ 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/src/inter.c b/src/inter.c index e2a26b3e..bbdc41a0 100644 --- a/src/inter.c +++ b/src/inter.c @@ -227,18 +227,21 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, const in } /** - * \brief Get MV prediction for current block + * \brief Get merge candidates for current block * \param encoder encoder control struct to use * \param x_cu block x position in SCU * \param y_cu block y position in SCU * \param depth current block depth - * \param mv_pred[2][2] 2x motion vector prediction + * \param b0 candidate b0 + * \param b1 candidate b1 + * \param b2 candidate b2 + * \param a0 candidate a0 + * \param a1 candidate a1 */ -void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[2][2]) +void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, + cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1) { uint8_t cur_block_in_scu = (LCU_WIDTH>>depth) / CU_MIN_SIZE_PIXELS; //!< the width of the current block on SCU - uint8_t candidates = 0; - /* Predictor block locations ____ _______ @@ -248,37 +251,50 @@ void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int __| | |A1|_________| |A0| - */ - cu_info *b0, *b1, *b2, *a0, *a1; - - b0 = b1 = b2 = a0 = a1 = NULL; + */ // A0 and A1 availability testing if (x_cu != 0) { - a1 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu + cur_block_in_scu - 1) * (encoder->in.width_in_lcu<coded) a1 = NULL; + *a1 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu + cur_block_in_scu - 1) * (encoder->in.width_in_lcu<coded) *a1 = NULL; if (y_cu + cur_block_in_scu < encoder->in.height_in_lcu<in.cur_pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu + cur_block_in_scu) * (encoder->in.width_in_lcu<coded) a0 = NULL; + *a0 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu + cur_block_in_scu) * (encoder->in.width_in_lcu<coded) *a0 = NULL; } } // B0, B1 and B2 availability testing if (y_cu != 0) { - if (x_cu + cur_block_in_scu < encoder->in.width_in_lcu<in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu + (y_cu - 1) * (encoder->in.width_in_lcu<coded) b0 = NULL; + *b0 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu + (y_cu - 1) * (encoder->in.width_in_lcu<coded) *b0 = NULL; } - b1 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu - 1 + (y_cu - 1) * (encoder->in.width_in_lcu<coded) b1 = NULL; + *b1 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + cur_block_in_scu - 1 + (y_cu - 1) * (encoder->in.width_in_lcu<coded) *b1 = NULL; if (x_cu != 0) { - b2 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu - 1) * (encoder->in.width_in_lcu<coded) b2 = NULL; + *b2 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu - 1 + (y_cu - 1) * (encoder->in.width_in_lcu<coded) *b2 = NULL; } } +} + +/** + * \brief Get MV prediction for current block + * \param encoder encoder control struct to use + * \param x_cu block x position in SCU + * \param y_cu block y position in SCU + * \param depth current block depth + * \param mv_pred[2][2] 2x motion vector prediction + */ +void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[2][2]) +{ + uint8_t candidates = 0; + + cu_info *b0, *b1, *b2, *a0, *a1; + b0 = b1 = b2 = a0 = a1 = NULL; + inter_get_spatial_merge_candidates(encoder, x_cu, y_cu, depth, &b0, &b1, &b2, &a0, &a1); // Left predictors if (a0 && a0->type == CU_INTER) { diff --git a/src/inter.h b/src/inter.h index 58c35c89..4553f894 100644 --- a/src/inter.h +++ b/src/inter.h @@ -21,6 +21,8 @@ void inter_set_block(picture* pic,uint32_t x_cu, uint32_t y_cu, uint8_t depth, cu_info *cur_cu); void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, const int16_t mv[2], picture* dst); +void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, + cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1); void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[2][2]); #endif From db266e74ff7afe0f0d563e2d4965b2e5d278164e Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 11 Oct 2013 16:12:04 +0300 Subject: [PATCH 03/19] Added merge mode selection (NOT WORKING!) and a function to get candidates --- src/encoder.c | 82 ++++++++++++++++++++++++----------------------- src/inter.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/inter.h | 1 + 3 files changed, 130 insertions(+), 41 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 142de191..88e7841a 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -741,26 +741,26 @@ void encode_slice_header(encoder_control* encoder) for (j = 0; j < ref_negative; j++) { WRITE_UE(encoder->stream, 0, "delta_poc_s0_minus1"); WRITE_U(encoder->stream,1,1, "used_by_curr_pic_s0_flag"); - } - - //WRITE_UE(encoder->stream, 0, "short_term_ref_pic_set_idx"); } + //WRITE_UE(encoder->stream, 0, "short_term_ref_pic_set_idx"); + } + //end if //end if if (encoder->sao_enable) { WRITE_U(encoder->stream, 1,1, "slice_sao_luma_flag"); WRITE_U(encoder->stream, 0,1, "slice_sao_chroma_flag"); - } + } if (encoder->in.cur_pic->slicetype != SLICE_I) { WRITE_U(encoder->stream, 0, 1, "num_ref_idx_active_override_flag"); - WRITE_UE(encoder->stream, 0, "five_minus_max_num_merge_cand"); - } + WRITE_UE(encoder->stream, 5-MRG_MAX_NUM_CANDS, "five_minus_max_num_merge_cand"); + } if (encoder->in.cur_pic->slicetype == SLICE_B) { WRITE_U(encoder->stream, 0, 1, "mvd_l1_zero_flag"); - } + } // Skip flags that are not present // if !entropy_slice_flag @@ -903,53 +903,57 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // FOR each part // Mergeflag uint8_t merge_flag = 0; - cabac.ctx = &g_cu_merge_flag_ext_model; + int16_t unary_idx = 0; + int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; + int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); + for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { + if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && + merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { + //merge_flag = 1; + break; + } + } + cabac.ctx = &g_cu_merge_flag_ext_model; CABAC_BIN(&cabac, merge_flag, "MergeFlag"); if (merge_flag) { //merge - // MergeIndex - int16_t unary_idx = 0; //pcCU->getMergeIndex( uiAbsPartIdx ); - int16_t num_cand = 0; //pcCU->getSlice()->getMaxNumMergeCand(); - int32_t ui; - if (num_cand > 1) { + int32_t ui; for (ui = 0; ui < num_cand - 1; ui++) { - int32_t symbol = (ui == unary_idx) ? 0 : 1; + int32_t symbol = (ui != unary_idx); if (ui == 0) { cabac.ctx = &g_cu_merge_idx_ext_model; CABAC_BIN(&cabac, symbol, "MergeIndex"); } else { CABAC_BIN_EP(&cabac,symbol,"MergeIndex"); - } - - if (symbol == 0) { - break; - } - } } + + if (symbol == 0) break; + } + } } else { uint32_t ref_list_idx; - int16_t mv_cand[2][2]; + int16_t mv_cand[2][2]; - /* - // Void TEncSbac::codeInterDir( TComDataCU* pcCU, UInt uiAbsPartIdx ) - if(encoder->in.cur_pic->slicetype == SLICE_B) - { - // Code Inter Dir - const UInt uiInterDir = pcCU->getInterDir( uiAbsPartIdx ) - 1; - const UInt uiCtx = pcCU->getCtxInterDir( uiAbsPartIdx ); - ContextModel *pCtx = m_cCUInterDirSCModel.get( 0 ); - if (pcCU->getPartitionSize(uiAbsPartIdx) == SIZE_2Nx2N || pcCU->getHeight(uiAbsPartIdx) != 8 ) - { - m_pcBinIf->encodeBin( uiInterDir == 2 ? 1 : 0, *( pCtx + uiCtx ) ); - } - if (uiInterDir < 2) - { - m_pcBinIf->encodeBin( uiInterDir, *( pCtx + 4 ) ); - } - } - */ + /* + // Void TEncSbac::codeInterDir( TComDataCU* pcCU, UInt uiAbsPartIdx ) + if(encoder->in.cur_pic->slicetype == SLICE_B) + { + // Code Inter Dir + const UInt uiInterDir = pcCU->getInterDir( uiAbsPartIdx ) - 1; + const UInt uiCtx = pcCU->getCtxInterDir( uiAbsPartIdx ); + ContextModel *pCtx = m_cCUInterDirSCModel.get( 0 ); + if (pcCU->getPartitionSize(uiAbsPartIdx) == SIZE_2Nx2N || pcCU->getHeight(uiAbsPartIdx) != 8 ) + { + m_pcBinIf->encodeBin( uiInterDir == 2 ? 1 : 0, *( pCtx + uiCtx ) ); + } + if (uiInterDir < 2) + { + m_pcBinIf->encodeBin( uiInterDir, *( pCtx + 4 ) ); + } + } + */ for (ref_list_idx = 0; ref_list_idx < 2; ref_list_idx++) { //if(encoder->ref_idx_num[uiRefListIdx] > 0) diff --git a/src/inter.c b/src/inter.c index bbdc41a0..2e76b210 100644 --- a/src/inter.c +++ b/src/inter.c @@ -328,15 +328,99 @@ void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int } #if ENABLE_TEMPORAL_MVP - if(candidates < 2) { + if(candidates < AMVP_MAX_NUM_CANDS) { //TODO: add temporal mv predictor } #endif // Fill with (0,0) - while (candidates < 2) { + while (candidates < AMVP_MAX_NUM_CANDS) { mv_cand[candidates][0] = 0; mv_cand[candidates][1] = 0; candidates++; } } + +/** + * \brief Get merge predictions for current block + * \param encoder encoder control struct to use + * \param x_cu block x position in SCU + * \param y_cu block y position in SCU + * \param depth current block depth + * \param mv_pred[MRG_MAX_NUM_CANDS][2] MRG_MAX_NUM_CANDS motion vector prediction + */ +uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][2]) +{ + uint8_t candidates = 0; + uint8_t i = 0; + int8_t duplicate = 0; + + cu_info *b0, *b1, *b2, *a0, *a1; + b0 = b1 = b2 = a0 = a1 = NULL; + inter_get_spatial_merge_candidates(encoder, x_cu, y_cu, depth, &b0, &b1, &b2, &a0, &a1); + +#define CHECK_DUPLICATE(X,Y) {duplicate = 0; for(i = 0; i < candidates; i++) { \ + if(mv_cand[i][0] == (X) && mv_cand[i][1] == (Y)) { \ + duplicate = 1; break; } }} + + if (a1 && a1->type == CU_INTER) { + mv_cand[candidates][0] = a1->inter.mv[0]; + mv_cand[candidates][1] = a1->inter.mv[1]; + candidates++; + } + + if (b1 && b1->type == CU_INTER) { + if(candidates) CHECK_DUPLICATE(b1->inter.mv[0],b1->inter.mv[1]); + if(!duplicate) { + mv_cand[candidates][0] = b1->inter.mv[0]; + mv_cand[candidates][1] = b1->inter.mv[1]; + candidates++; + } + } + + if (b0 && b0->type == CU_INTER) { + if(candidates) CHECK_DUPLICATE(b0->inter.mv[0],b0->inter.mv[1]); + if(!duplicate) { + mv_cand[candidates][0] = b0->inter.mv[0]; + mv_cand[candidates][1] = b0->inter.mv[1]; + candidates++; + } + } + + if (a0 && a0->type == CU_INTER) { + if(candidates) CHECK_DUPLICATE(a0->inter.mv[0],a0->inter.mv[1]); + if(!duplicate) { + mv_cand[candidates][0] = a0->inter.mv[0]; + mv_cand[candidates][1] = a0->inter.mv[1]; + candidates++; + } + } + + if(b2 && b2->type == CU_INTER) { + if(candidates) CHECK_DUPLICATE(b2->inter.mv[0],b2->inter.mv[1]); + if(!duplicate) { + mv_cand[candidates][0] = b2->inter.mv[0]; + mv_cand[candidates][1] = b2->inter.mv[1]; + candidates++; + } + } + + +#if ENABLE_TEMPORAL_MVP + if(candidates < AMVP_MAX_NUM_CANDS) { + //TODO: add temporal mv predictor + } +#endif + + // Fill with (0,0) + /* + i = candidates; + while (candidates < MRG_MAX_NUM_CANDS) { + mv_cand[candidates][0] = 0; + mv_cand[candidates][1] = 0; + candidates++; + } + */ + return candidates; +} + diff --git a/src/inter.h b/src/inter.h index 4553f894..e3c7f4a6 100644 --- a/src/inter.h +++ b/src/inter.h @@ -24,5 +24,6 @@ void inter_recon(picture *ref,int32_t xpos, int32_t ypos,int32_t width, const in void inter_get_spatial_merge_candidates(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, cu_info **b0, cu_info **b1,cu_info **b2,cu_info **a0,cu_info **a1); void inter_get_mv_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[2][2]); +uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][2]); #endif From d9e6d8413daab40d19638f1019d7d661ee446c27 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 15 Oct 2013 17:56:50 +0300 Subject: [PATCH 04/19] Added coeff data to picture-struct --- src/encmain.c | 7 +++++++ src/picture.c | 5 +++++ src/picture.h | 2 ++ 3 files changed, 14 insertions(+) diff --git a/src/encmain.c b/src/encmain.c index 0bd39731..7e6b038d 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -154,6 +154,9 @@ int main(int argc, char *argv[]) init_encoder_input(&encoder->in, input, cfg->width, cfg->height); + // Init coeff data table + encoder->in.cur_pic->coeff = MALLOC(coefficient, cfg->width * cfg->height); + // Start coding cycle while data on input and not on the last frame while(!feof(input) && (!cfg->frames || encoder->frame < cfg->frames)) { int32_t diff; @@ -202,6 +205,10 @@ int main(int argc, char *argv[]) // TODO: reuse memory from old reference encoder->in.cur_pic = picture_init(encoder->in.width, encoder->in.height, encoder->in.width_in_lcu, encoder->in.height_in_lcu); + // Copy pointer from the last cur_pic because we don't want to reallocate it + encoder->in.cur_pic->coeff = encoder->ref->pics[0]->coeff; + encoder->ref->pics[0]->coeff = NULL; + encoder->frame++; } // Coding finished diff --git a/src/picture.c b/src/picture.c index 9b77d23c..5e89aa7b 100644 --- a/src/picture.c +++ b/src/picture.c @@ -244,6 +244,8 @@ picture *picture_init(int32_t width, int32_t height, memset(pic->cu_array[i], 0, sizeof(cu_info) * cu_array_size); } + pic->coeff = NULL; + return pic; } @@ -275,6 +277,9 @@ int picture_destroy(picture *pic) free(pic->cu_array); pic->cu_array = NULL; + free(pic->coeff); + pic->coeff = NULL; + return 1; } diff --git a/src/picture.h b/src/picture.h index 764d621c..9cf4d65d 100644 --- a/src/picture.h +++ b/src/picture.h @@ -73,6 +73,8 @@ typedef struct pixel* u_recdata; // \brief Pointer to reconstructed U-data. pixel* v_recdata; // \brief Pointer to reconstructed V-data. + coefficient* coeff; //!< \brief coefficient pointer + int32_t width; // \brief Luma pixel array width. int32_t height; // \brief Luma pixel array height. int32_t height_in_lcu; // \brief Picture width in number of LCU's. From d236d58981fdb61d18d085731c2c072389997af6 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Thu, 17 Oct 2013 15:14:22 +0300 Subject: [PATCH 05/19] Added more data to cu_info and renamed "residual" to "coeff_y/u/v" in the struct --- src/filter.c | 4 ++-- src/picture.c | 6 +++--- src/picture.h | 40 ++++++++++++++++++++++------------------ 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/filter.c b/src/filter.c index 49883a41..e2e6c04f 100644 --- a/src/filter.c +++ b/src/filter.c @@ -198,8 +198,8 @@ void filter_deblock_edge_luma(encoder_control *encoder, // Intra blocks have strength 2 if(cu_q->type == CU_INTRA || cu_p->type == CU_INTRA) { strength = 2; - // Non-zero residual and transform boundary - } else if(cu_q->residual || cu_p->residual) { + // Non-zero residual/coeffs and transform boundary + } else if(cu_q->coeff_y || cu_p->coeff_y) { strength = 1; // Absolute motion vector diff between blocks >= 1 (Integer pixel) } else if((abs(cu_q->inter.mv[0] - cu_p->inter.mv[0]) >= 4) || (abs(cu_q->inter.mv[1] - cu_p->inter.mv[1]) >= 4)) { diff --git a/src/picture.c b/src/picture.c index 5e89aa7b..1253a083 100644 --- a/src/picture.c +++ b/src/picture.c @@ -26,10 +26,10 @@ * \param x_scu x SCU position (smallest CU) * \param y_scu y SCU position (smallest CU) * \param depth current CU depth - * \param residual residual status + * \param coeff_y residual status */ void picture_set_block_residual(picture *pic, uint32_t x_scu, uint32_t y_scu, - uint8_t depth, int8_t residual) + uint8_t depth, int8_t coeff_y) { uint32_t x, y; int width_in_scu = pic->width_in_lcu << MAX_DEPTH; @@ -38,7 +38,7 @@ void picture_set_block_residual(picture *pic, uint32_t x_scu, uint32_t y_scu, for (y = y_scu; y < y_scu + block_scu_width; ++y) { int cu_row = y * width_in_scu; for (x = x_scu; x < x_scu + block_scu_width; ++x) { - pic->cu_array[MAX_DEPTH][cu_row + x].residual = residual; + pic->cu_array[MAX_DEPTH][cu_row + x].coeff_y = coeff_y; } } } diff --git a/src/picture.h b/src/picture.h index 9cf4d65d..12088a89 100644 --- a/src/picture.h +++ b/src/picture.h @@ -52,10 +52,14 @@ typedef struct */ typedef struct { - int8_t type; - int8_t depth; - int8_t coded; - int8_t residual; + int8_t type; //!< \brief block type, CU_INTER / CU_INTRA + int8_t depth; //!< \brief depth / size of this block + int8_t part_size; //!< \brief Currently only 2Nx2N, TODO: AMP/SMP/NxN parts + int8_t tr_depth; //!< \brief transform depth + int8_t coded; //!< \brief flag to indicate this block is coded and reconstructed + int8_t coeff_y; //!< \brief is there coded coeffs Y + int8_t coeff_u; //!< \brief is there coded coeffs U + int8_t coeff_v; //!< \brief is there coded coeffs V cu_info_intra intra; cu_info_inter inter; } cu_info; @@ -65,22 +69,22 @@ typedef struct */ typedef struct { - pixel* y_data; // \brief Pointer to luma pixel array. - pixel* u_data; // \brief Pointer to chroma U pixel array. - pixel* v_data; // \brief Pointer to chroma V pixel array. + pixel* y_data; //!< \brief Pointer to luma pixel array. + pixel* u_data; //!< \brief Pointer to chroma U pixel array. + pixel* v_data; //!< \brief Pointer to chroma V pixel array. - pixel* y_recdata; // \brief Pointer to reconstructed Y-data. - pixel* u_recdata; // \brief Pointer to reconstructed U-data. - pixel* v_recdata; // \brief Pointer to reconstructed V-data. + pixel* y_recdata; //!< \brief Pointer to reconstructed Y-data. + pixel* u_recdata; //!< \brief Pointer to reconstructed U-data. + pixel* v_recdata; //!< \brief Pointer to reconstructed V-data. coefficient* coeff; //!< \brief coefficient pointer - int32_t width; // \brief Luma pixel array width. - int32_t height; // \brief Luma pixel array height. - int32_t height_in_lcu; // \brief Picture width in number of LCU's. - int32_t width_in_lcu; // \brief Picture height in number of LCU's. - uint8_t referenced; // \brief Whether this picture is referenced. - cu_info** cu_array; // \brief Info for each CU at each depth. + int32_t width; //!< \brief Luma pixel array width. + int32_t height; //!< \brief Luma pixel array height. + int32_t height_in_lcu; //!< \brief Picture width in number of LCU's. + int32_t width_in_lcu; //!< \brief Picture height in number of LCU's. + uint8_t referenced; //!< \brief Whether this picture is referenced. + cu_info** cu_array; //!< \brief Info for each CU at each depth. uint8_t type; uint8_t slicetype; } picture; @@ -90,8 +94,8 @@ typedef struct */ typedef struct { - picture** pics; // \brief Pointer to array of picture pointers. - unsigned int size; // \brief Array size. + picture** pics; //!< \brief Pointer to array of picture pointers. + unsigned int size; //!< \brief Array size. unsigned int used_size; } picture_list; From dda53f48a7f70fd79047e5b9d03cdf429af8c828 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 18 Oct 2013 11:39:13 +0300 Subject: [PATCH 06/19] Refactoring encoder transform/quant related functions, cu_info and picture --- src/encmain.c | 18 ++- src/encoder.c | 368 ++++++++++++++++---------------------------------- src/encoder.h | 37 +---- src/global.h | 1 + src/picture.c | 34 +++-- src/picture.h | 16 ++- 6 files changed, 167 insertions(+), 307 deletions(-) diff --git a/src/encmain.c b/src/encmain.c index 7e6b038d..d64f8860 100644 --- a/src/encmain.c +++ b/src/encmain.c @@ -155,7 +155,14 @@ int main(int argc, char *argv[]) init_encoder_input(&encoder->in, input, cfg->width, cfg->height); // Init coeff data table - encoder->in.cur_pic->coeff = MALLOC(coefficient, cfg->width * cfg->height); + encoder->in.cur_pic->coeff_y = MALLOC(coefficient, cfg->width * cfg->height); + encoder->in.cur_pic->coeff_u = MALLOC(coefficient, (cfg->width * cfg->height) >> 2); + encoder->in.cur_pic->coeff_v = MALLOC(coefficient, (cfg->width * cfg->height) >> 2); + + // Init predicted data table + encoder->in.cur_pic->pred_y = MALLOC(pixel, cfg->width * cfg->height); + encoder->in.cur_pic->pred_u = MALLOC(pixel, (cfg->width * cfg->height) >> 2); + encoder->in.cur_pic->pred_v = MALLOC(pixel, (cfg->width * cfg->height) >> 2); // Start coding cycle while data on input and not on the last frame while(!feof(input) && (!cfg->frames || encoder->frame < cfg->frames)) { @@ -206,8 +213,13 @@ int main(int argc, char *argv[]) encoder->in.cur_pic = picture_init(encoder->in.width, encoder->in.height, encoder->in.width_in_lcu, encoder->in.height_in_lcu); // Copy pointer from the last cur_pic because we don't want to reallocate it - encoder->in.cur_pic->coeff = encoder->ref->pics[0]->coeff; - encoder->ref->pics[0]->coeff = NULL; + MOVE_POINTER(encoder->in.cur_pic->coeff_y,encoder->ref->pics[0]->coeff_y); + MOVE_POINTER(encoder->in.cur_pic->coeff_u,encoder->ref->pics[0]->coeff_u); + MOVE_POINTER(encoder->in.cur_pic->coeff_v,encoder->ref->pics[0]->coeff_v); + + MOVE_POINTER(encoder->in.cur_pic->pred_y,encoder->ref->pics[0]->pred_y); + MOVE_POINTER(encoder->in.cur_pic->pred_u,encoder->ref->pics[0]->pred_u); + MOVE_POINTER(encoder->in.cur_pic->pred_v,encoder->ref->pics[0]->pred_v); encoder->frame++; } diff --git a/src/encoder.c b/src/encoder.c index 88e7841a..507e7d4b 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -887,7 +887,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // Prediction mode if (encoder->in.cur_pic->slicetype != SLICE_I) { cabac.ctx = &g_cu_pred_mode_model; - CABAC_BIN(&cabac, (cur_cu->type == CU_INTRA) ? 1 : 0, "PredMode"); + CABAC_BIN(&cabac, (cur_cu->type == CU_INTRA), "PredMode"); } // Signal PartSize on max depth @@ -1061,79 +1061,20 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // Mark this block as "coded" (can be used for predictions..) picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); - - { - pixel *base_y = &encoder->in.cur_pic->y_data[x_ctb*(LCU_WIDTH>>(MAX_DEPTH)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH))) *encoder->in.width]; - pixel *base_u = &encoder->in.cur_pic->u_data[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - pixel *base_v = &encoder->in.cur_pic->v_data[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - uint32_t width = LCU_WIDTH>>depth; - - /* INTRAPREDICTION VARIABLES */ - int16_t pred[LCU_WIDTH*LCU_WIDTH+1]; - int16_t predU[LCU_WIDTH*LCU_WIDTH>>2]; - int16_t predV[LCU_WIDTH*LCU_WIDTH>>2]; - - pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_ctb*(LCU_WIDTH>>(MAX_DEPTH)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH))) *encoder->in.width]; - pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - pixel *recbase_v = &encoder->in.cur_pic->v_recdata[x_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)) + (y_ctb*(LCU_WIDTH>>(MAX_DEPTH+1)))*(encoder->in.width>>1)]; - - /* TODO: dynamic memory allocation */ - int16_t coeff_y[LCU_WIDTH*LCU_WIDTH*2]; - int16_t coeff_u[LCU_WIDTH*LCU_WIDTH>>1]; - int16_t coeff_v[LCU_WIDTH*LCU_WIDTH>>1]; - int8_t residual = 0; - - /* Initialize helper structure for transform */ - transform_info ti; - memset(&ti, 0, sizeof(transform_info)); - - ti.x_ctb = x_ctb; ti.y_ctb = y_ctb; - - /* Base pointers */ - ti.base = base_y; ti.base_u = base_u; ti.base_v = base_v; - ti.base_stride = encoder->in.width; - - // Prediction pointers - ti.pred = pred; ti.pred_u = predU; ti.pred_v = predV; - ti.pred_stride = (LCU_WIDTH>>depth); - - // Reconstruction pointers - ti.recbase = recbase_y; ti.recbase_u = recbase_u; ti.recbase_v = recbase_v; - ti.recbase_stride = encoder->in.width; - - // Coeff pointers - ti.coeff[0] = coeff_y; ti.coeff[1] = coeff_u; ti.coeff[2] = coeff_v; - ti.block_type = CU_INTER; - - // Handle transforms, quant and reconstruction - ti.idx = 0; - encode_transform_tree(encoder,&ti, depth); - - // Coded block pattern - ti.cb_top[0] = (ti.cb[0] & 0x1 || ti.cb[1] & 0x1 || ti.cb[2] & 0x1 || ti.cb[3] & 0x1)?1:0; - ti.cb_top[1] = (ti.cb[0] & 0x2 || ti.cb[1] & 0x2 || ti.cb[2] & 0x2 || ti.cb[3] & 0x2)?1:0; - ti.cb_top[2] = (ti.cb[0] & 0x4 || ti.cb[1] & 0x4 || ti.cb[2] & 0x4 || ti.cb[3] & 0x4)?1:0; - - residual = ti.cb_top[0] | ti.cb_top[1] | ti.cb_top[2]; - if(depth == 0) { - picture_set_block_residual(encoder->in.cur_pic,x_ctb ,y_ctb ,depth+1,ti.cb[0] & 0x1); - picture_set_block_residual(encoder->in.cur_pic,x_ctb + 4,y_ctb ,depth+1,ti.cb[1] & 0x1); - picture_set_block_residual(encoder->in.cur_pic,x_ctb ,y_ctb + 4,depth+1,ti.cb[2] & 0x1); - picture_set_block_residual(encoder->in.cur_pic,x_ctb + 4,y_ctb + 4,depth+1,ti.cb[3] & 0x1); - } else { - picture_set_block_residual(encoder->in.cur_pic,x_ctb,y_ctb,depth,ti.cb_top[0]); - } - + encode_transform_tree(encoder,x_ctb, y_ctb, depth); + // Only need to signal coded block flag if not skipped or merged + // skip = no coded residual, merge = coded residual + if (!cur_cu->merged) { cabac.ctx = &g_cu_qt_root_cbf_model; - CABAC_BIN(&cabac, residual, "rqt_root_cbf"); - // Code (possible) coeffs to bitstream - ti.idx = 0; - if(residual) { - encode_transform_coeff(encoder, &ti,depth, 0); - } + CABAC_BIN(&cabac, cur_cu->coeff_y | cur_cu->coeff_u | cur_cu->coeff_v, "rqt_root_cbf"); } - + // Code (possible) coeffs to bitstream + + if(cur_cu->coeff_y | cur_cu->coeff_u | cur_cu->coeff_v) { + encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0); + } + // END for each part } else if (cur_cu->type == CU_INTRA) { @@ -1149,9 +1090,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, uint32_t width = LCU_WIDTH>>depth; // INTRAPREDICTION VARIABLES - int16_t pred_y[LCU_WIDTH * LCU_WIDTH + 1]; - int16_t pred_u[LCU_WIDTH * LCU_WIDTH >> 2]; - int16_t pred_v[LCU_WIDTH * LCU_WIDTH >> 2]; + int16_t pred_y[LCU_WIDTH * LCU_WIDTH]; pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; @@ -1251,8 +1190,8 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, if (intra_pred_mode == allowed_chroma_dir[i]) { allowed_chroma_dir[i] = 34; /* VER+8 mode */ break; - } } + } for (i = 0; i < 4; i++) { if (intra_pred_mode_chroma_temp == allowed_chroma_dir[i]) { @@ -1269,54 +1208,8 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // Coeff // Transform tree - { - // TODO: dynamic memory allocation - int16_t coeff_y[LCU_WIDTH * LCU_WIDTH * 2]; - int16_t coeff_u[LCU_WIDTH * LCU_WIDTH >> 1]; - int16_t coeff_v[LCU_WIDTH * LCU_WIDTH >> 1]; - - // Initialize helper structure for transform - transform_info ti; - memset(&ti, 0, sizeof(transform_info)); - - ti.x_ctb = x_ctb; ti.y_ctb = y_ctb; - - // Base pointers - ti.base = base_y; ti.base_u = base_u; ti.base_v = base_v; - ti.base_stride = encoder->in.width; - - // Prediction pointers - ti.pred = pred_y; ti.pred_u = pred_u; ti.pred_v = pred_v; - ti.pred_stride = (LCU_WIDTH>>depth); - - // Reconstruction pointers - ti.recbase = recbase_y; ti.recbase_u = recbase_u; ti.recbase_v = recbase_v; - ti.recbase_stride = encoder->in.width; - - // Coeff pointers - ti.coeff[0] = coeff_y; ti.coeff[1] = coeff_u; ti.coeff[2] = coeff_v; - - // Prediction info - ti.intra_pred_mode = intra_pred_mode; - ti.intra_pred_mode_chroma = intra_pred_mode_chroma; - - // Handle transforms, quant and reconstruction - ti.idx = 0; - ti.block_type = CU_INTRA; - encode_transform_tree(encoder,&ti, depth); - - // Coded block pattern - ti.cb_top[0] = (ti.cb[0] & 0x1 || ti.cb[1] & 0x1 || ti.cb[2] & 0x1 - || ti.cb[3] & 0x1) ? 1 : 0; - ti.cb_top[1] = (ti.cb[0] & 0x2 || ti.cb[1] & 0x2 || ti.cb[2] & 0x2 - || ti.cb[3] & 0x2) ? 1 : 0; - ti.cb_top[2] = (ti.cb[0] & 0x4 || ti.cb[1] & 0x4 || ti.cb[2] & 0x4 - || ti.cb[3] & 0x4) ? 1 : 0; - - // Code (possible) coeffs to bitstream - ti.idx = 0; - encode_transform_coeff(encoder, &ti,depth, 0); - } + encode_transform_tree(encoder, x_ctb, y_ctb, depth); + encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0); // end Transform tree // end Coeff @@ -1374,80 +1267,44 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, } -void encode_transform_tree(encoder_control *encoder, transform_info *ti, - uint8_t depth) +void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, uint8_t depth) { // we have 64>>depth transform size int x,y,i; int32_t width = LCU_WIDTH>>depth; - - if (depth == 0) { // Split 64x64 - // Prepare for multi-level splitting - ti->split[ti->idx] = 1<in.cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; // Split transform and increase depth - if (ti->split[ti->idx] & (1 << depth)) { - uint8_t change = 1<<(MAX_DEPTH-1-depth); - ti->idx = 0; encode_transform_tree(encoder,ti,depth+1); - ti->x_ctb += change; - ti->idx = 1; encode_transform_tree(encoder,ti,depth+1); - ti->x_ctb -= change; ti->y_ctb += change; - ti->idx = 2; encode_transform_tree(encoder,ti,depth+1); - ti->x_ctb += change; - ti->idx = 3; encode_transform_tree(encoder,ti,depth+1); + if (depth == 0 || cur_cu->tr_depth > depth) { + uint8_t offset = 1<<(MAX_DEPTH-1-depth); + encode_transform_tree(encoder, x_cu, y_cu, depth+1); + encode_transform_tree(encoder, x_cu + offset, y_cu, depth+1); + encode_transform_tree(encoder, x_cu, y_cu + offset, depth+1); + encode_transform_tree(encoder, x_cu + offset, y_cu + offset, depth+1); return; } { - uint8_t cb_y = 0, cb_u = 0, cb_v = 0; - int32_t coeff_fourth = ((LCU_WIDTH>>(depth))*(LCU_WIDTH>>(depth)))+1; + // INTRAPREDICTION VARIABLES + pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + pixel *recbase_v = &encoder->in.cur_pic->v_recdata[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + int32_t recbase_stride = encoder->in.width; - int32_t base_stride = ti->base_stride; - int32_t recbase_stride = ti->recbase_stride; - int32_t pred_stride = ti->pred_stride; + pixel *base_y = &encoder->in.cur_pic->y_data[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + pixel *base_u = &encoder->in.cur_pic->u_data[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + pixel *base_v = &encoder->in.cur_pic->v_data[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + int32_t base_stride = encoder->in.width; - int32_t recbase_offset[4] = { - 0, width, ti->recbase_stride * width, - ti->recbase_stride * width + width - }; - int32_t base_offset[4] = { - 0, width, ti->base_stride * width, - ti->base_stride * width + width - }; - int32_t pred_offset[4] = { - 0, width, ti->pred_stride * width, - ti->pred_stride * width + width - }; - - int32_t recbase_offset_c[4] = { - 0, width >> 1, (ti->recbase_stride >> 1) * (width >> 1), - (ti->recbase_stride >> 1) *(width >> 1) + (width >> 1) - }; - int32_t base_offset_c[4] = { - 0, width >> 1, (ti->base_stride >> 1) * (width >> 1), - (ti->base_stride >> 1) * (width >> 1) + (width >> 1) - }; - int32_t pred_offset_c[4] = { - 0, width >> 1, (ti->pred_stride >> 1) * (width >> 1), - (ti->pred_stride >> 1) * (width >> 1) + (width >> 1) - }; - - pixel *base_y = &ti->base[base_offset[ti->idx]]; - pixel *base_u = &ti->base_u[base_offset_c[ti->idx]]; - pixel *base_v = &ti->base_v[base_offset_c[ti->idx]]; - - pixel *recbase_y = &ti->recbase[recbase_offset[ti->idx]]; - pixel *recbase_u = &ti->recbase_u[recbase_offset_c[ti->idx]]; - pixel *recbase_v = &ti->recbase_v[recbase_offset_c[ti->idx]]; - - int16_t *pred_y = &ti->pred[pred_offset[ti->idx]]; - int16_t *pred_u = &ti->pred_u[pred_offset_c[ti->idx]]; - int16_t *pred_v = &ti->pred_v[pred_offset_c[ti->idx]]; - - int16_t *coeff_y = &ti->coeff[0][ti->idx * coeff_fourth]; - int16_t *coeff_u = &ti->coeff[1][ti->idx * coeff_fourth >> 1]; - int16_t *coeff_v = &ti->coeff[2][ti->idx * coeff_fourth >> 1]; + pixel *pred_y = &encoder->in.cur_pic->pred_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + pixel *pred_u = &encoder->in.cur_pic->pred_u[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + pixel *pred_v = &encoder->in.cur_pic->pred_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + int32_t pred_stride = encoder->in.width; + + int16_t *coeff_y = &encoder->in.cur_pic->coeff_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + int16_t *coeff_u = &encoder->in.cur_pic->coeff_u[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + int16_t *coeff_v = &encoder->in.cur_pic->coeff_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + int32_t coeff_stride = encoder->in.width; // Quant and transform here... int16_t block[LCU_WIDTH*LCU_WIDTH>>2]; @@ -1461,7 +1318,7 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, uint32_t ac_sum = 0; uint32_t ctx_idx; - uint32_t scan_idx_luma = SCAN_DIAG; + uint32_t scan_idx_luma = SCAN_DIAG; uint32_t scan_idx_chroma = SCAN_DIAG; uint8_t dir_mode; #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD @@ -1478,20 +1335,21 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, default: ctx_idx = 0; break; } - if(ti->block_type == CU_INTRA) + if(cur_cu->type == CU_INTRA) { //if multiple scans supported for transform size if (ctx_idx > 3 && ctx_idx < 6) { - scan_idx_luma = abs((int32_t) ti->intra_pred_mode - 26) < 5 ? 1 : (abs((int32_t)ti->intra_pred_mode - 10) < 5 ? 2 : 0); + scan_idx_luma = abs((int32_t) cur_cu->intra.mode - 26) < 5 ? 1 : (abs((int32_t)cur_cu->intra.mode - 10) < 5 ? 2 : 0); } - + // TODO : chroma intra prediction + cur_cu->intra.mode_chroma = 36; // Chroma scanmode ctx_idx++; - dir_mode = ti->intra_pred_mode_chroma; + dir_mode = cur_cu->intra.mode_chroma; if (dir_mode == 36) { // TODO: support NxN - dir_mode = ti->intra_pred_mode; + dir_mode = cur_cu->intra.mode; } if (ctx_idx > 4 && ctx_idx < 7) { // if multiple scans supported for transform size @@ -1499,51 +1357,52 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, } // Build reconstructed block to use in prediction with extrapolated borders - intra_build_reference_border(encoder->in.cur_pic, ti->x_ctb, ti->y_ctb, + intra_build_reference_border(encoder->in.cur_pic, x_cu, y_cu, (LCU_WIDTH >> (depth)) * 2 + 8, rec, (LCU_WIDTH >> (depth)) * 2 + 8, 0); intra_recon(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, - ti->x_ctb * (LCU_WIDTH >> (MAX_DEPTH)), ti->y_ctb * (LCU_WIDTH >> (MAX_DEPTH)), - width, pred_y, pred_stride, ti->intra_pred_mode, 0); + x_cu * (LCU_WIDTH >> (MAX_DEPTH)), y_cu * (LCU_WIDTH >> (MAX_DEPTH)), + width, pred_y, pred_stride, cur_cu->intra.mode, 0); // Filter DC-prediction - if (ti->intra_pred_mode == 1 && width < 32) { + if (cur_cu->intra.mode == 1 && width < 32) { intra_dc_pred_filtering(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, pred_y, width, LCU_WIDTH >> depth, LCU_WIDTH >> depth); } - - if (ti->intra_pred_mode_chroma != 36 - && ti->intra_pred_mode_chroma == ti->intra_pred_mode) { - ti->intra_pred_mode_chroma = 36; - } + + // TODO : chroma intra prediction + if (cur_cu->intra.mode_chroma != 36 + && cur_cu->intra.mode_chroma == cur_cu->intra.mode) { + cur_cu->intra.mode_chroma = 36; + } - intra_build_reference_border(encoder->in.cur_pic, ti->x_ctb, ti->y_ctb, + intra_build_reference_border(encoder->in.cur_pic, x_cu, y_cu, (LCU_WIDTH >> (depth + 1)) * 2 + 8, rec, (LCU_WIDTH >> (depth + 1)) * 2 + 8, 1); intra_recon(rec_shift_u, (LCU_WIDTH >> (depth + 1)) * 2 + 8, - ti->x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), - ti->y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), + x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), + y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), width >> 1, pred_u, pred_stride >> 1, - ti->intra_pred_mode_chroma != 36 ? ti->intra_pred_mode_chroma : ti->intra_pred_mode, + cur_cu->intra.mode_chroma != 36 ? cur_cu->intra.mode_chroma : cur_cu->intra.mode, 1); - intra_build_reference_border(encoder->in.cur_pic, ti->x_ctb, ti->y_ctb, + intra_build_reference_border(encoder->in.cur_pic, x_cu, y_cu, (LCU_WIDTH >> (depth + 1)) * 2 + 8, rec, (LCU_WIDTH >> (depth + 1)) * 2 + 8, 2); intra_recon(rec_shift_u, (LCU_WIDTH >> (depth + 1)) * 2 + 8, - ti->x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), - ti->y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), + x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), + y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), width >> 1, pred_v, pred_stride >> 1, - ti->intra_pred_mode_chroma != 36 ? ti->intra_pred_mode_chroma : ti->intra_pred_mode, + cur_cu->intra.mode_chroma != 36 ? cur_cu->intra.mode_chroma : cur_cu->intra.mode, 1); // This affects reconstruction, do after that - picture_set_block_coded(encoder->in.cur_pic, ti->x_ctb, ti->y_ctb, depth, 1); + picture_set_block_coded(encoder->in.cur_pic, x_cu, y_cu, depth, 1); } else { // Inter mode for(y = 0; y < LCU_WIDTH>>depth; y++) { for(x = 0; x < LCU_WIDTH>>depth; x++) { @@ -1582,21 +1441,21 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, // Transform and quant residual to coeffs transform2d(block,pre_quant_coeff,width,0); - quant(encoder, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, ti->block_type); + quant(encoder, pre_quant_coeff, coeff_y, width, width, &ac_sum, 0, scan_idx_luma, cur_cu->type); // Check for non-zero coeffs for (i = 0; i < width * width; i++) { if (coeff_y[i] != 0) { // Found one, we can break here - cb_y = 1; + cur_cu->coeff_y = 1; break; } } // if non-zero coeffs - if (cb_y) { + if (cur_cu->coeff_y) { // RECONSTRUCT for predictions - dequant(encoder, coeff_y, pre_quant_coeff, width, width, 0, ti->block_type); + dequant(encoder, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type); itransform2d(block,pre_quant_coeff,width,0); i = 0; @@ -1614,9 +1473,9 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, for (y = 0; y < LCU_WIDTH >> depth; y++) { for (x = 0; x < LCU_WIDTH >> depth; x++) { recbase_y[x + y * recbase_stride] = (uint8_t)CLIP(0, 255, pred_y[x + y * pred_stride]); - } } } + } if (encoder->in.video_format != FORMAT_400) { // Chroma U @@ -1633,12 +1492,12 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); quant(encoder, pre_quant_coeff, coeff_u, width >> 1, width >> 1, &ac_sum, 2, - scan_idx_chroma, ti->block_type); + scan_idx_chroma, cur_cu->type); for (i = 0; i < width *width >> 2; i++) { if (coeff_u[i] != 0) { // Found one, we can break here - cb_u = 1; + cur_cu->coeff_u = 1; break; } } @@ -1657,19 +1516,19 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, transform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); quant(encoder, pre_quant_coeff, coeff_v, width >> 1, width >> 1, &ac_sum, 3, - scan_idx_chroma, ti->block_type); + scan_idx_chroma, cur_cu->type); for (i = 0; i < width *width >> 2; i++) { if (coeff_v[i] != 0) { // Found one, we can break here - cb_v = 1; + cur_cu->coeff_v = 1; break; } } - if (cb_u) { + if (cur_cu->coeff_u) { // RECONSTRUCT for predictions - dequant(encoder, coeff_u, pre_quant_coeff, width >> 1, width >> 1, 2, ti->block_type); + dequant(encoder, coeff_u, pre_quant_coeff, width >> 1, width >> 1, 2, cur_cu->type); itransform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); i = 0; @@ -1689,13 +1548,13 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, for (x = 0; x < LCU_WIDTH >> (depth + 1); x++) { recbase_u[x + y * (recbase_stride >> 1)] = (uint8_t)CLIP(0, 255, pred_u[x + y * (pred_stride >> 1)]); - } } } + } - if (cb_v) { + if (cur_cu->coeff_v) { // RECONSTRUCT for predictions - dequant(encoder, coeff_v, pre_quant_coeff, width >> 1, width >> 1, 3, ti->block_type); + dequant(encoder, coeff_v, pre_quant_coeff, width >> 1, width >> 1, 3, cur_cu->type); itransform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); i = 0; @@ -1715,25 +1574,23 @@ void encode_transform_tree(encoder_control *encoder, transform_info *ti, for (x = 0; x < LCU_WIDTH >> (depth + 1); x++) { recbase_v[x + y * (recbase_stride >> 1)] = (uint8_t)CLIP(0, 255, pred_v[x + y * (pred_stride >> 1)]); - } } } } - - // Store coded block pattern - ti->cb[ti->idx] = cb_y | (cb_u << 1) | (cb_v << 2); - // END INTRAPREDICTION + } + return; } // end Residual Coding } -void encode_transform_coeff(encoder_control *encoder, transform_info *ti, +void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, int8_t depth, int8_t tr_depth) { + cu_info *cur_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; int8_t width = LCU_WIDTH>>depth; - int8_t split = ((ti->split[ti->idx]&(1<tr_depth > depth||!depth); int8_t cb_y, cb_u, cb_v; int32_t coeff_fourth = ((LCU_WIDTH>>(depth))*(LCU_WIDTH>>(depth)))+1; @@ -1747,39 +1604,42 @@ void encode_transform_coeff(encoder_control *encoder, transform_info *ti, // Chroma data is also signaled BEFORE transform split // Chroma data is not signaled if it was set to 0 before split if (encoder->in.video_format != FORMAT_400) { + uint8_t offset = 1<<(MAX_DEPTH-1-depth); + + cu_info *cur_cu_idx_2 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; + cu_info *cur_cu_idx_3 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + cu_info *cur_cu_idx_4 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + // Non-zero chroma U Tcoeffs - int8_t cb_flag = (tr_depth == 0) ? ti->cb_top[1] : ((ti->cb[ti->idx] & 0x2) ? 1 - : 0); + int8_t cb_flag = (!split) ? cur_cu->coeff_u : (cur_cu->coeff_u | cur_cu_idx_2->coeff_u | cur_cu_idx_3->coeff_u | cur_cu_idx_4->coeff_u); cabac.ctx = &g_qt_cbf_model_chroma[tr_depth]; - if (tr_depth == 0 || ti->cb_top[1]) { + if (tr_depth == 0 /*|| ti->cb_top[1]*/) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_u"); } // Non-zero chroma V Tcoeffs // NOTE: Using the same ctx as before - cb_flag = (tr_depth == 0) ? ti->cb_top[2] : ((ti->cb[ti->idx] & 0x4) ? 1 : 0); + cb_flag = (!split) ? cur_cu->coeff_v : (cur_cu->coeff_v | cur_cu_idx_2->coeff_v | cur_cu_idx_3->coeff_v | cur_cu_idx_4->coeff_v); - if (tr_depth == 0 || ti->cb_top[2]) { + if (tr_depth == 0 /*|| ti->cb_top[2]*/) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_v"); } } if (split) { - ti->idx = 0; encode_transform_coeff(encoder, ti, depth + 1, tr_depth + 1); - ti->idx = 1; encode_transform_coeff(encoder, ti, depth + 1, tr_depth + 1); - ti->idx = 2; encode_transform_coeff(encoder, ti, depth + 1, tr_depth + 1); - ti->idx = 3; encode_transform_coeff(encoder, ti, depth + 1, tr_depth + 1); + uint8_t offset = 1<<(MAX_DEPTH-1-depth); + encode_transform_coeff(encoder, x_cu, y_cu, depth + 1, tr_depth + 1); + encode_transform_coeff(encoder, x_cu + offset, y_cu, depth + 1, tr_depth + 1); + encode_transform_coeff(encoder, x_cu, y_cu + offset, depth + 1, tr_depth + 1); + encode_transform_coeff(encoder, x_cu + offset, y_cu + offset, depth + 1, tr_depth + 1); return; } - cb_y = ti->cb[ti->idx] & 0x1; - cb_u = (ti->cb[ti->idx] & 0x2) ? 1 : 0; - cb_v = (ti->cb[ti->idx] & 0x4) ? 1 : 0; - if(ti->block_type == CU_INTRA || tr_depth || cb_u || cb_v) { + if(cur_cu->type == CU_INTRA || tr_depth || cb_u || cb_v) { // Non-zero luma Tcoeffs - cabac.ctx = &g_qt_cbf_model_luma[tr_depth ? 0 : 1]; - CABAC_BIN(&cabac, cb_y, "cbf_luma"); + cabac.ctx = &g_qt_cbf_model_luma[!tr_depth]; + CABAC_BIN(&cabac, cur_cu->coeff_y, "cbf_luma"); } @@ -1802,8 +1662,8 @@ void encode_transform_coeff(encoder_control *encoder, transform_info *ti, // CoeffNxN // Residual Coding - if (cb_y) { - if (ti->block_type == CU_INTER) { + if (cur_cu->coeff_y) { + if (cur_cu->type == CU_INTER) { scan_idx = SCAN_DIAG; } else { // Luma (Intra) scanmode @@ -1818,18 +1678,18 @@ void encode_transform_coeff(encoder_control *encoder, transform_info *ti, encode_coeff_nxn(encoder, &ti->coeff[0][ti->idx * coeff_fourth], width, 0, scan_idx); } - if (cb_u || cb_v) { + if (cur_cu->coeff_u || cur_cu->coeff_v) { int8_t chroma_width = width >> 1; - if(ti->block_type == CU_INTER) { + if(cur_cu->type == CU_INTER) { scan_idx = SCAN_DIAG; } else { // Chroma scanmode ctx_idx++; - dir_mode = ti->intra_pred_mode_chroma; + dir_mode = cur_cu->intra.mode_chroma; if (dir_mode == 36) { // TODO: support NxN - dir_mode = ti->intra_pred_mode; + dir_mode = cur_cu->intra.mode; } scan_idx = SCAN_DIAG; @@ -1839,12 +1699,12 @@ void encode_transform_coeff(encoder_control *encoder, transform_info *ti, } } - if (cb_u) { + if (cur_cu->coeff_u) { encode_coeff_nxn(encoder, &ti->coeff[1][ti->idx * coeff_fourth >> 1], chroma_width, 2, scan_idx); } - if (cb_v) { + if (cur_cu->coeff_v) { encode_coeff_nxn(encoder, &ti->coeff[2][ti->idx * coeff_fourth >> 1], chroma_width, 2, scan_idx); } @@ -1852,7 +1712,7 @@ void encode_transform_coeff(encoder_control *encoder, transform_info *ti, } } -void encode_coeff_nxn(encoder_control *encoder, int16_t *coeff, uint8_t width, +void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t width, uint8_t type, int8_t scan_mode) { int c1 = 1; diff --git a/src/encoder.h b/src/encoder.h index 2d37686f..f88046ad 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -67,39 +67,6 @@ typedef struct int8_t tc_offset_div2; // \brief (deblocking)tc offset (div 2), range -6...6 } encoder_control; -typedef struct -{ - int8_t idx; - pixel *base; - pixel *base_u; - pixel *base_v; - - pixel *recbase; - pixel *recbase_u; - pixel *recbase_v; - - int16_t *pred; - int16_t *pred_u; - int16_t *pred_v; - - int32_t base_stride; - int32_t recbase_stride; - int32_t pred_stride; - - // TODO: unify luma+chroma arrays - int16_t *coeff[3]; - int8_t cb_top[3]; - int8_t cb[4]; - int8_t intra_pred_mode; - int8_t intra_pred_mode_chroma; - int32_t split[4]; - - int8_t block_type; - - int32_t x_ctb,y_ctb; - -} transform_info; - void init_tables(void); void init_encoder_control(encoder_control *control, bitstream *output); void init_encoder_input(encoder_input *input, FILE* inputfile, @@ -119,9 +86,9 @@ void encode_last_significant_xy(encoder_control *encoder, uint8_t lastpos_x, uint8_t type, uint8_t scan); void encode_coeff_nxn(encoder_control *encoder, int16_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode); -void encode_transform_tree(encoder_control *encoder, transform_info *ti, +void encode_transform_tree(encoder_control *encoder, int32_t x_cu, int32_t y_cu, uint8_t depth); -void encode_transform_coeff(encoder_control *encoder, transform_info *ti, +void encode_transform_coeff(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int8_t tr_depth); extern int16_t g_lambda_cost[55]; diff --git a/src/global.h b/src/global.h index d54a322c..ced9716d 100644 --- a/src/global.h +++ b/src/global.h @@ -108,5 +108,6 @@ typedef int16_t coefficient; #endif #define FREE_POINTER(pointer) { free(pointer); pointer = NULL; } +#define MOVE_POINTER(dst_pointer,src_pointer) { dst_pointer = src_pointer; src_pointer = NULL; } #endif \ No newline at end of file diff --git a/src/picture.c b/src/picture.c index 1253a083..d88e56a9 100644 --- a/src/picture.c +++ b/src/picture.c @@ -244,7 +244,8 @@ picture *picture_init(int32_t width, int32_t height, memset(pic->cu_array[i], 0, sizeof(cu_info) * cu_array_size); } - pic->coeff = NULL; + pic->coeff_y = NULL; pic->coeff_u = NULL; pic->coeff_v = NULL; + pic->pred_y = NULL; pic->pred_u = NULL; pic->pred_v = NULL; return pic; } @@ -277,8 +278,13 @@ int picture_destroy(picture *pic) free(pic->cu_array); pic->cu_array = NULL; - free(pic->coeff); - pic->coeff = NULL; + FREE_POINTER(pic->coeff_y); + FREE_POINTER(pic->coeff_u); + FREE_POINTER(pic->coeff_v); + + FREE_POINTER(pic->pred_y); + FREE_POINTER(pic->pred_u); + FREE_POINTER(pic->pred_v); return 1; } @@ -414,7 +420,7 @@ unsigned satd_16bit_8x8_general(int16_t *piOrg, int32_t iStrideOrg, int16_t *piC } \ } \ return sum; \ - } + } // These macros define sadt_16bit_NxN for N = 8, 16, 32, 64 SATD_NXN(8, int16_t, 16bit) @@ -422,7 +428,7 @@ SATD_NXN(16, int16_t, 16bit) SATD_NXN(32, int16_t, 16bit) SATD_NXN(64, int16_t, 16bit) - + for (y = 0; y < 32; y += 8) { // Function macro for defining SAD calculating functions // for fixed size blocks. #define SAD_NXN(n, pixel_type, suffix) \ @@ -438,7 +444,7 @@ SATD_NXN(64, int16_t, 16bit) } \ } \ return sum; \ - } + } // These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64 // with function signatures of cost_16bit_nxn_func. @@ -469,9 +475,9 @@ cost_16bit_nxn_func get_satd_16bit_nxn_func(unsigned n) return &satd_16bit_64x64; default: return NULL; + } } -} - + /** * \brief Get a function that calculates SAD for NxN block. * @@ -480,7 +486,7 @@ cost_16bit_nxn_func get_satd_16bit_nxn_func(unsigned n) * \returns Pointer to cost_16bit_nxn_func. */ cost_16bit_nxn_func get_sad_16bit_nxn_func(unsigned n) -{ + { switch (n) { case 4: return &sad_16bit_4x4; @@ -494,7 +500,7 @@ cost_16bit_nxn_func get_sad_16bit_nxn_func(unsigned n) return &sad_16bit_64x64; default: return NULL; - } + } } /** @@ -510,7 +516,7 @@ unsigned satd_nxn_16bit(int16_t *block1, int16_t *block2, unsigned n) { cost_16bit_nxn_func sad_func = get_satd_16bit_nxn_func(n); return sad_func(block1, block2); -} + } /** * \brief Calculate SAD for NxN block of size N. @@ -532,10 +538,10 @@ unsigned sad_nxn_16bit(int16_t *block1, int16_t *block2, unsigned n) for (row = 0; row < n; row += n) { for (x = 0; x < n; ++x) { sum += abs(block1[row + x] - block2[row + x]); - } - } - return sum; } + } + return sum; +} } /** diff --git a/src/picture.h b/src/picture.h index 12088a89..1ad6eced 100644 --- a/src/picture.h +++ b/src/picture.h @@ -32,6 +32,7 @@ enum { REF_PIC_LIST_0 = 0, REF_PIC_LIST_1 = 1, REF_PIC_LIST_X = 100 }; typedef struct { int8_t mode; + int8_t mode_chroma; uint32_t cost; } cu_info_intra; @@ -57,9 +58,15 @@ typedef struct int8_t part_size; //!< \brief Currently only 2Nx2N, TODO: AMP/SMP/NxN parts int8_t tr_depth; //!< \brief transform depth int8_t coded; //!< \brief flag to indicate this block is coded and reconstructed + int8_t skipped; //!< \brief flag to indicate this block is skipped + int8_t merged; //!< \brief flag to indicate this block is merged int8_t coeff_y; //!< \brief is there coded coeffs Y int8_t coeff_u; //!< \brief is there coded coeffs U int8_t coeff_v; //!< \brief is there coded coeffs V + + int8_t coeff_top_y; //!< \brief is there coded coeffs Y in top level + int8_t coeff_top_u; //!< \brief is there coded coeffs U in top level + int8_t coeff_top_v; //!< \brief is there coded coeffs V in top level cu_info_intra intra; cu_info_inter inter; } cu_info; @@ -77,7 +84,13 @@ typedef struct pixel* u_recdata; //!< \brief Pointer to reconstructed U-data. pixel* v_recdata; //!< \brief Pointer to reconstructed V-data. - coefficient* coeff; //!< \brief coefficient pointer + pixel* pred_y; //!< \brief Pointer to predicted Y + pixel* pred_u; //!< \brief Pointer to predicted U + pixel* pred_v; //!< \brief Pointer to predicted V + + coefficient* coeff_y; //!< \brief coefficient pointer Y + coefficient* coeff_u; //!< \brief coefficient pointer U + coefficient* coeff_v; //!< \brief coefficient pointer V int32_t width; //!< \brief Luma pixel array width. int32_t height; //!< \brief Luma pixel array height. @@ -121,6 +134,7 @@ int picture_list_rem(picture_list *list, int n, int8_t destroy); typedef unsigned (*cost_16bit_nxn_func)(int16_t *block1, int16_t *block2); + cost_16bit_nxn_func get_satd_16bit_nxn_func(unsigned n); cost_16bit_nxn_func get_sad_16bit_nxn_func(unsigned n); From c44f0ff5406718e53ecaba3e5cdd0876c6fdf2ac Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 18 Oct 2013 14:23:21 +0300 Subject: [PATCH 07/19] Refactoring: all int16_t pixel info in intra to pixel typedef --- src/encoder.c | 91 +++++++++++++++++++++++++++++++++++++++------------ src/global.h | 2 +- src/intra.c | 49 ++++++++++++++------------- src/intra.h | 16 ++++----- src/picture.c | 27 ++++++++------- src/picture.h | 6 ++-- src/search.c | 15 ++++----- 7 files changed, 125 insertions(+), 81 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 507e7d4b..a3dd0b0a 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1090,15 +1090,15 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, uint32_t width = LCU_WIDTH>>depth; // INTRAPREDICTION VARIABLES - int16_t pred_y[LCU_WIDTH * LCU_WIDTH]; + pixel pred_y[LCU_WIDTH * LCU_WIDTH]; pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; pixel *recbase_v = &encoder->in.cur_pic->v_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; // SEARCH BEST INTRA MODE (AGAIN) - int16_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - int16_t *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; + pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; + pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, (LCU_WIDTH >> (depth)) * 2 + 8, rec, (LCU_WIDTH >> (depth)) * 2 + 8, 0); @@ -1301,9 +1301,12 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, pixel *pred_v = &encoder->in.cur_pic->pred_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; int32_t pred_stride = encoder->in.width; - int16_t *coeff_y = &encoder->in.cur_pic->coeff_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; - int16_t *coeff_u = &encoder->in.cur_pic->coeff_u[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; - int16_t *coeff_v = &encoder->in.cur_pic->coeff_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; + coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; + coefficient coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; + coefficient *orig_coeff_y = &encoder->in.cur_pic->coeff_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + coefficient *orig_coeff_u = &encoder->in.cur_pic->coeff_u[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + coefficient *orig_coeff_v = &encoder->in.cur_pic->coeff_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; int32_t coeff_stride = encoder->in.width; // Quant and transform here... @@ -1312,9 +1315,9 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // INTRA PREDICTION // TODO: split to a function! - int16_t rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - int16_t *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; - int16_t *rec_shift_u = &rec[(LCU_WIDTH >> (depth + 1)) * 2 + 8 + 1]; + pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; + pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; + pixel *rec_shift_u = &rec[(LCU_WIDTH >> (depth + 1)) * 2 + 8 + 1]; uint32_t ac_sum = 0; uint32_t ctx_idx; @@ -1454,14 +1457,21 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // if non-zero coeffs if (cur_cu->coeff_y) { + i = 0; + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { + orig_coeff_y[x + y * coeff_stride] = coeff_y[i]; + i++; + } + } // RECONSTRUCT for predictions dequant(encoder, coeff_y, pre_quant_coeff, width, width, 0, cur_cu->type); itransform2d(block,pre_quant_coeff,width,0); i = 0; - for (y = 0; y < LCU_WIDTH >> depth; y++) { - for (x = 0; x < LCU_WIDTH >> depth; x++) { + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { int16_t val = block[i++] + pred_y[x + y * pred_stride]; //TODO: support 10+bits recbase_y[x + y * recbase_stride] = (uint8_t)CLIP(0, 255, val); @@ -1470,8 +1480,8 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // END RECONTRUCTION } else { // without coeffs, we only use the prediction - for (y = 0; y < LCU_WIDTH >> depth; y++) { - for (x = 0; x < LCU_WIDTH >> depth; x++) { + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { recbase_y[x + y * recbase_stride] = (uint8_t)CLIP(0, 255, pred_y[x + y * pred_stride]); } } @@ -1526,7 +1536,18 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, } } - if (cur_cu->coeff_u) { + if (cur_cu->coeff_u || cur_cu->coeff_v) { + i = 0; + for (y = 0; y < width>>1; y++) { + for (x = 0; x < width>>1; x++) { + orig_coeff_u[x + y * (coeff_stride>>1)] = coeff_u[i]; + orig_coeff_v[x + y * (coeff_stride>>1)] = coeff_v[i]; + i++; + } + } + } + + if (cur_cu->coeff_u) { // RECONSTRUCT for predictions dequant(encoder, coeff_u, pre_quant_coeff, width >> 1, width >> 1, 2, cur_cu->type); itransform2d(block,pre_quant_coeff,LCU_WIDTH>>(depth+1),65535); @@ -1590,8 +1611,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, { cu_info *cur_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; int8_t width = LCU_WIDTH>>depth; - int8_t split = (cur_cu->tr_depth > depth||!depth); - int8_t cb_y, cb_u, cb_v; + int8_t split = 0;//(cur_cu->tr_depth > depth||!depth); int32_t coeff_fourth = ((LCU_WIDTH>>(depth))*(LCU_WIDTH>>(depth)))+1; if (depth != 0 && depth != MAX_DEPTH + 1) { @@ -1636,7 +1656,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, return; } - if(cur_cu->type == CU_INTRA || tr_depth || cb_u || cb_v) { + if(cur_cu->type == CU_INTRA || tr_depth || cur_cu->coeff_u || cur_cu->coeff_v) { // Non-zero luma Tcoeffs cabac.ctx = &g_qt_cbf_model_luma[!tr_depth]; CABAC_BIN(&cabac, cur_cu->coeff_y, "cbf_luma"); @@ -1644,10 +1664,39 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, { + coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; + coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; + coefficient coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; + int32_t coeff_stride = encoder->in.width; + uint32_t ctx_idx; uint32_t scan_idx = SCAN_DIAG; uint32_t dir_mode; + if (cur_cu->coeff_y) { + int x,y; + coefficient *orig_pos = &encoder->in.cur_pic->coeff_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + for (y = 0; y < width; y++) { + for (x = 0; x < width; x++) { + coeff_y[x+y*width] = orig_pos[x]; + } + orig_pos += coeff_stride; + } + } + if (cur_cu->coeff_u || cur_cu->coeff_v) { + int x,y; + coefficient *orig_pos_u = &encoder->in.cur_pic->coeff_u[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + coefficient *orig_pos_v = &encoder->in.cur_pic->coeff_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + for (y = 0; y < (width>>1); y++) { + for (x = 0; x < (width>>1); x++) { + coeff_u[x+y*(width>>1)] = orig_pos_u[x]; + coeff_v[x+y*(width>>1)] = orig_pos_v[x]; + } + orig_pos_u += coeff_stride>>1; + orig_pos_v += coeff_stride>>1; + } + } + switch (width) { case 2: ctx_idx = 6; break; case 4: ctx_idx = 5; break; @@ -1667,7 +1716,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, scan_idx = SCAN_DIAG; } else { // Luma (Intra) scanmode - dir_mode = ti->intra_pred_mode; + dir_mode = cur_cu->intra.mode; //if multiple scans supported for transform size if (ctx_idx > 3 && ctx_idx < 6) { @@ -1675,7 +1724,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, } } - encode_coeff_nxn(encoder, &ti->coeff[0][ti->idx * coeff_fourth], width, 0, scan_idx); + encode_coeff_nxn(encoder, coeff_y, width, 0, scan_idx); } if (cur_cu->coeff_u || cur_cu->coeff_v) { @@ -1700,12 +1749,12 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, } if (cur_cu->coeff_u) { - encode_coeff_nxn(encoder, &ti->coeff[1][ti->idx * coeff_fourth >> 1], + encode_coeff_nxn(encoder, coeff_u, chroma_width, 2, scan_idx); } if (cur_cu->coeff_v) { - encode_coeff_nxn(encoder, &ti->coeff[2][ti->idx * coeff_fourth >> 1], + encode_coeff_nxn(encoder, coeff_v, chroma_width, 2, scan_idx); } } diff --git a/src/global.h b/src/global.h index ced9716d..3c37ebf7 100644 --- a/src/global.h +++ b/src/global.h @@ -45,7 +45,7 @@ typedef int16_t coefficient; #define LCU_WIDTH 64 /*!< Largest Coding Unit (IT'S 64x64, DO NOT TOUCH!) */ #define MAX_INTER_SEARCH_DEPTH 3 -#define MIN_INTER_SEARCH_DEPTH 0 +#define MIN_INTER_SEARCH_DEPTH 1 #define MAX_INTRA_SEARCH_DEPTH 3 /*!< Max search depth -> min block size (3 == 8x8) */ #define MIN_INTRA_SEARCH_DEPTH 1 /*!< Min search depth -> max block size (0 == 64x64) */ diff --git a/src/intra.c b/src/intra.c index f2953227..fc2372d8 100644 --- a/src/intra.c +++ b/src/intra.c @@ -77,7 +77,7 @@ int8_t intra_get_block_mode(picture *pic, uint32_t x_cu, uint32_t y_cu, uint8_t * \param width block width * \returns DC prediction */ -int16_t intra_get_dc_pred(int16_t *pic, uint16_t picwidth, uint32_t xpos, uint32_t ypos, uint8_t width) +int16_t intra_get_dc_pred(pixel *pic, uint16_t picwidth, uint32_t xpos, uint32_t ypos, uint8_t width) { int32_t i, sum = 0; @@ -155,11 +155,11 @@ int8_t intra_get_dir_luma_predictor(picture* pic, uint32_t x_cu, uint32_t y_cu, * \param preds output buffer for 3 predictions * \returns (predictions are found)?1:0 */ -void intra_filter(int16_t *ref, int32_t stride,int32_t width, int8_t mode) +void intra_filter(pixel *ref, int32_t stride,int32_t width, int8_t mode) { #define FWIDTH (LCU_WIDTH*2+1) - int16_t filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples - int16_t *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1) + pixel filtered[FWIDTH * FWIDTH]; //!< temporary buffer for filtered samples + pixel *filteredShift = &filtered[FWIDTH+1]; //!< pointer to temporary buffer with offset (1,1) int x,y; if (!mode) { @@ -213,8 +213,8 @@ void intra_filter(int16_t *ref, int32_t stride,int32_t width, int8_t mode) This function derives the prediction samples for planar mode (intra coding). */ -int16_t intra_prediction(pixel *orig, int32_t origstride, int16_t *rec, int32_t recstride, uint32_t xpos, - uint32_t ypos, uint32_t width, int16_t *dst, int32_t dststride, uint32_t *sad_out) +int16_t intra_prediction(pixel *orig, int32_t origstride, pixel *rec, int32_t recstride, uint32_t xpos, + uint32_t ypos, uint32_t width, pixel *dst, int32_t dststride, uint32_t *sad_out) { uint32_t best_sad = 0xffffffff; uint32_t sad = 0; @@ -225,11 +225,11 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, int16_t *rec, int32_t // Temporary block arrays // TODO: alloc with alignment - int16_t pred[LCU_WIDTH * LCU_WIDTH + 1]; - int16_t orig_block[LCU_WIDTH * LCU_WIDTH + 1]; - int16_t rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; + pixel pred[LCU_WIDTH * LCU_WIDTH + 1]; + pixel orig_block[LCU_WIDTH * LCU_WIDTH + 1]; + pixel rec_filtered_temp[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8) + 1]; - int16_t* rec_filtered = &rec_filtered_temp[recstride + 1]; //!< pointer to rec_filtered_temp with offset of (1,1) + pixel* rec_filtered = &rec_filtered_temp[recstride + 1]; //!< pointer to rec_filtered_temp with offset of (1,1) pixel *orig_shift = &orig[xpos + ypos*origstride]; //!< pointer to orig with offset of (1,1) int8_t filter = (width<32); // TODO: chroma support @@ -318,10 +318,10 @@ int16_t intra_prediction(pixel *orig, int32_t origstride, int16_t *rec, int32_t * \param chroma chroma-block flag */ -void intra_recon(int16_t* rec,uint32_t recstride, uint32_t xpos, uint32_t ypos,uint32_t width, int16_t* dst,int32_t dststride, int8_t mode, int8_t chroma) +void intra_recon(pixel* rec,uint32_t recstride, uint32_t xpos, uint32_t ypos,uint32_t width, pixel* dst,int32_t dststride, int8_t mode, int8_t chroma) { int32_t x,y,i; - int16_t pred[LCU_WIDTH * LCU_WIDTH]; + pixel pred[LCU_WIDTH * LCU_WIDTH]; int8_t filter = !chroma&&(width<32); #define COPY_PRED_TO_DST() for(y = 0; y < (int32_t)width; y++) { for(x = 0; x < (int32_t)width; x++) { dst[x+y*dststride] = pred[x+y*width]; } } @@ -362,12 +362,12 @@ void intra_recon(int16_t* rec,uint32_t recstride, uint32_t xpos, uint32_t ypos,u * \param chroma signaling if chroma is used, 0 = luma, 1 = U and 2 = V * */ -void intra_build_reference_border(picture *pic, int32_t x_cu, int32_t y_cu,int16_t outwidth, int16_t *dst, int32_t dststride, int8_t chroma) +void intra_build_reference_border(picture *pic, int32_t x_cu, int32_t y_cu,int16_t outwidth, pixel *dst, int32_t dststride, int8_t chroma) { int32_t left_column; //!< left column iterator - int16_t val; //!< variable to store extrapolated value + pixel val; //!< variable to store extrapolated value int32_t i; //!< index iterator - int16_t dc_val = 1<<(g_bitdepth-1); //!< default predictor value + pixel dc_val = 1<<(g_bitdepth-1); //!< default predictor value int32_t top_row; //!< top row iterator int32_t src_width = (pic->width>>(chroma?1:0)); //!< source picture width int32_t src_height = (pic->height>>(chroma?1:0));//!< source picture height @@ -443,7 +443,7 @@ const int32_t inv_ang_table[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; * \brief this functions constructs the angular intra prediction from border samples * */ -void intra_get_angular_pred(int16_t* src, int32_t src_stride, int16_t* dst, int32_t dst_stride, int32_t width, +void intra_get_angular_pred(pixel* src, int32_t src_stride, pixel* dst, int32_t dst_stride, int32_t width, int32_t height, int32_t dir_mode, int8_t left_avail,int8_t top_avail, int8_t filter) { int32_t k,l; @@ -460,10 +460,10 @@ void intra_get_angular_pred(int16_t* src, int32_t src_stride, int16_t* dst, int3 int32_t inv_angle = inv_ang_table[abs_ang]; // Do angular predictions - int16_t *ref_main; - int16_t *ref_side; - int16_t ref_above[2 * LCU_WIDTH + 1]; - int16_t ref_left[2 * LCU_WIDTH + 1]; + pixel *ref_main; + pixel *ref_side; + pixel ref_above[2 * LCU_WIDTH + 1]; + pixel ref_left[2 * LCU_WIDTH + 1]; abs_ang = ang_table[abs_ang]; intra_pred_angle = sign_ang * abs_ang; @@ -522,7 +522,7 @@ void intra_get_angular_pred(int16_t* src, int32_t src_stride, int16_t* dst, int3 // Do linear filtering for (l = 0; l < blk_size; l++) { ref_main_index = l + delta_int + 1; - dst[k * dst_stride + l] = (int16_t) ( (minus_delta_fract * ref_main[ref_main_index] + dst[k * dst_stride + l] = (pixel) ( (minus_delta_fract * ref_main[ref_main_index] + delta_fract * ref_main[ref_main_index + 1] + 16) >> 5); } } else { @@ -536,7 +536,7 @@ void intra_get_angular_pred(int16_t* src, int32_t src_stride, int16_t* dst, int3 // Flip the block if this is the horizontal mode if (mode_hor) { - int16_t tmp; + pixel tmp; for (k=0;kwidth >> (chroma ? 1 : 0)); // source picture width int32_t src_height = (pic->height >> (chroma ? 1 : 0)); // source picture height @@ -325,12 +325,9 @@ void search_tree(encoder_control *encoder, uint32_t width = LCU_WIDTH >> depth; // INTRAPREDICTION - int16_t pred[LCU_WIDTH * LCU_WIDTH + 1]; - int16_t rec[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8)]; - int16_t *recShift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; - - //int16_t *pred = (int16_t*)malloc(LCU_WIDTH*LCU_WIDTH*sizeof(int16_t)); - //int16_t *rec = (int16_t*)malloc((LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)*sizeof(int16_t)); + pixel pred[LCU_WIDTH * LCU_WIDTH + 1]; + pixel rec[(LCU_WIDTH * 2 + 8) * (LCU_WIDTH * 2 + 8)]; + pixel *recShift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; // Build reconstructed block to use in prediction with extrapolated borders search_buildReferenceBorder(encoder->in.cur_pic, x_ctb, y_ctb, From 927155de2b8fae4997b3ce245c7993c9fdff238d Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 18 Oct 2013 16:23:15 +0300 Subject: [PATCH 08/19] Set correct block residual bit -> fixes deblocking --- src/encoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/encoder.c b/src/encoder.c index a3dd0b0a..185265de 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1457,6 +1457,8 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // if non-zero coeffs if (cur_cu->coeff_y) { + + picture_set_block_residual(encoder->in.cur_pic,x_cu,y_cu,depth,1); i = 0; for (y = 0; y < width; y++) { for (x = 0; x < width; x++) { From bc9ddb64e57a74957d2c26e76b8e703a360a91d8 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 18 Oct 2013 16:54:11 +0300 Subject: [PATCH 09/19] Fixed luma reconstruction overflow/underflow --- src/encoder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 185265de..2b51323a 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1474,9 +1474,9 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, for (y = 0; y < width; y++) { for (x = 0; x < width; x++) { - int16_t val = block[i++] + pred_y[x + y * pred_stride]; + int val = block[i++] + pred_y[x + y * pred_stride]; //TODO: support 10+bits - recbase_y[x + y * recbase_stride] = (uint8_t)CLIP(0, 255, val); + recbase_y[x + y * recbase_stride] = (pixel)CLIP(0, 255, val); } } // END RECONTRUCTION @@ -1484,7 +1484,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // without coeffs, we only use the prediction for (y = 0; y < width; y++) { for (x = 0; x < width; x++) { - recbase_y[x + y * recbase_stride] = (uint8_t)CLIP(0, 255, pred_y[x + y * pred_stride]); + recbase_y[x + y * recbase_stride] = (pixel)CLIP(0, 255, pred_y[x + y * pred_stride]); } } } From 2efcc1267882d1ce44e66b3f4db0da00dd793ccf Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Fri, 18 Oct 2013 17:42:16 +0300 Subject: [PATCH 10/19] Fixed DC-filtering stride --- src/encoder.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 2b51323a..125019b8 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1369,7 +1369,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // Filter DC-prediction if (cur_cu->intra.mode == 1 && width < 32) { intra_dc_pred_filtering(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, pred_y, - width, LCU_WIDTH >> depth, LCU_WIDTH >> depth); + pred_stride, LCU_WIDTH >> depth, LCU_WIDTH >> depth); } // TODO : chroma intra prediction @@ -1628,12 +1628,12 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, if (encoder->in.video_format != FORMAT_400) { uint8_t offset = 1<<(MAX_DEPTH-1-depth); - cu_info *cur_cu_idx_2 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; - cu_info *cur_cu_idx_3 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; - cu_info *cur_cu_idx_4 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + //cu_info *cur_cu_idx_2 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; + //cu_info *cur_cu_idx_3 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + //cu_info *cur_cu_idx_4 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; // Non-zero chroma U Tcoeffs - int8_t cb_flag = (!split) ? cur_cu->coeff_u : (cur_cu->coeff_u | cur_cu_idx_2->coeff_u | cur_cu_idx_3->coeff_u | cur_cu_idx_4->coeff_u); + int8_t cb_flag = (!split) ? cur_cu->coeff_u : cur_cu->coeff_u;//(cur_cu->coeff_u | cur_cu_idx_2->coeff_u | cur_cu_idx_3->coeff_u | cur_cu_idx_4->coeff_u); cabac.ctx = &g_qt_cbf_model_chroma[tr_depth]; if (tr_depth == 0 /*|| ti->cb_top[1]*/) { @@ -1642,7 +1642,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // Non-zero chroma V Tcoeffs // NOTE: Using the same ctx as before - cb_flag = (!split) ? cur_cu->coeff_v : (cur_cu->coeff_v | cur_cu_idx_2->coeff_v | cur_cu_idx_3->coeff_v | cur_cu_idx_4->coeff_v); + cb_flag = (!split) ? cur_cu->coeff_v : cur_cu->coeff_v;//(cur_cu->coeff_v | cur_cu_idx_2->coeff_v | cur_cu_idx_3->coeff_v | cur_cu_idx_4->coeff_v); if (tr_depth == 0 /*|| ti->cb_top[2]*/) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_v"); From bcb900371f3f608799c16cd5d1fd36d815d1d3a2 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 22 Oct 2013 12:09:18 +0300 Subject: [PATCH 11/19] Added top_coeff array to cu_info and implemented derivation logic --- src/encoder.c | 58 ++++++++++++++++++++++++++++----------------------- src/picture.h | 6 +++--- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 125019b8..97e90584 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -840,8 +840,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, } if (!border || (border_split_x && border_split_y)) { encode_coding_tree(encoder, x_ctb + change, y_ctb + change, depth + 1); - } - + } return; } } @@ -1057,21 +1056,19 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, encoder->in.cur_pic); - // Mark this block as "coded" (can be used for predictions..) picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); - encode_transform_tree(encoder,x_ctb, y_ctb, depth); // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual if (!cur_cu->merged) { cabac.ctx = &g_cu_qt_root_cbf_model; - CABAC_BIN(&cabac, cur_cu->coeff_y | cur_cu->coeff_u | cur_cu->coeff_v, "rqt_root_cbf"); + CABAC_BIN(&cabac, cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth], "rqt_root_cbf"); } // Code (possible) coeffs to bitstream - if(cur_cu->coeff_y | cur_cu->coeff_u | cur_cu->coeff_v) { + if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0); } @@ -1277,10 +1274,23 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // Split transform and increase depth if (depth == 0 || cur_cu->tr_depth > depth) { uint8_t offset = 1<<(MAX_DEPTH-1-depth); + cu_info *cu_a = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; + cu_info *cu_b = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + cu_info *cu_c = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; encode_transform_tree(encoder, x_cu, y_cu, depth+1); encode_transform_tree(encoder, x_cu + offset, y_cu, depth+1); encode_transform_tree(encoder, x_cu, y_cu + offset, depth+1); encode_transform_tree(encoder, x_cu + offset, y_cu + offset, depth+1); + + // Derive coded coeff flags from the next depth + cur_cu->coeff_top_y[depth] = cur_cu->coeff_top_y[depth+1] | cu_a->coeff_top_y[depth+1] | cu_b->coeff_top_y[depth+1] + | cu_c->coeff_top_y[depth+1]; + cur_cu->coeff_top_u[depth] = cur_cu->coeff_top_u[depth+1] | cu_a->coeff_top_u[depth+1] | cu_b->coeff_top_u[depth+1] + | cu_c->coeff_top_u[depth+1]; + cur_cu->coeff_top_v[depth] = cur_cu->coeff_top_v[depth+1] | cu_a->coeff_top_v[depth+1] | cu_b->coeff_top_v[depth+1] + | cu_c->coeff_top_v[depth+1]; + + return; } @@ -1301,7 +1311,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, pixel *pred_v = &encoder->in.cur_pic->pred_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; int32_t pred_stride = encoder->in.width; - coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; + coefficient coeff_y[LCU_WIDTH*LCU_WIDTH<<2]; coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; coefficient coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; coefficient *orig_coeff_y = &encoder->in.cur_pic->coeff_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; @@ -1450,7 +1460,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, for (i = 0; i < width * width; i++) { if (coeff_y[i] != 0) { // Found one, we can break here - cur_cu->coeff_y = 1; + cur_cu->coeff_top_y[depth] = cur_cu->coeff_y = 1; break; } } @@ -1509,7 +1519,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, for (i = 0; i < width *width >> 2; i++) { if (coeff_u[i] != 0) { // Found one, we can break here - cur_cu->coeff_u = 1; + cur_cu->coeff_top_u[depth] = cur_cu->coeff_u = 1; break; } } @@ -1533,7 +1543,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, for (i = 0; i < width *width >> 2; i++) { if (coeff_v[i] != 0) { // Found one, we can break here - cur_cu->coeff_v = 1; + cur_cu->coeff_top_v[depth] = cur_cu->coeff_v = 1; break; } } @@ -1628,23 +1638,19 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, if (encoder->in.video_format != FORMAT_400) { uint8_t offset = 1<<(MAX_DEPTH-1-depth); - //cu_info *cur_cu_idx_2 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; - //cu_info *cur_cu_idx_3 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; - //cu_info *cur_cu_idx_4 = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; - // Non-zero chroma U Tcoeffs - int8_t cb_flag = (!split) ? cur_cu->coeff_u : cur_cu->coeff_u;//(cur_cu->coeff_u | cur_cu_idx_2->coeff_u | cur_cu_idx_3->coeff_u | cur_cu_idx_4->coeff_u); + int8_t cb_flag = (!split) ? cur_cu->coeff_u : cur_cu->coeff_top_u[depth]; cabac.ctx = &g_qt_cbf_model_chroma[tr_depth]; - if (tr_depth == 0 /*|| ti->cb_top[1]*/) { + if (tr_depth == 0 || cur_cu->coeff_top_u[depth]) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_u"); } // Non-zero chroma V Tcoeffs // NOTE: Using the same ctx as before - cb_flag = (!split) ? cur_cu->coeff_v : cur_cu->coeff_v;//(cur_cu->coeff_v | cur_cu_idx_2->coeff_v | cur_cu_idx_3->coeff_v | cur_cu_idx_4->coeff_v); + cb_flag = (!split) ? cur_cu->coeff_v : cur_cu->coeff_top_v[depth]; - if (tr_depth == 0 /*|| ti->cb_top[2]*/) { + if (tr_depth == 0 || cur_cu->coeff_top_v[depth]) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_v"); } } @@ -1666,7 +1672,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, { - coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; + coefficient coeff_y[LCU_WIDTH*LCU_WIDTH+1]; coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; coefficient coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; int32_t coeff_stride = encoder->in.width; @@ -1800,7 +1806,7 @@ void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t widt if (coeff[i] != 0) { num_nonzero++; } - } + } scan_cg = g_sig_last_scan[scan_mode][log2_block_size > 3 ? log2_block_size - 3 : 0]; @@ -1820,7 +1826,7 @@ void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t widt if (coeff[pos_last] != 0) { sig_coeffgroup_flag[(num_blk_side * (POSY >> shift) + (POSX >> shift))] = 1; - } + } num_nonzero -= (coeff[pos_last] != 0) ? 1 : 0; #undef POSY @@ -1885,7 +1891,7 @@ void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t widt log2_block_size, width, type); cabac.ctx = &baseCtx[ctx_sig]; CABAC_BIN(&cabac, sig, "significant_coeff_flag"); - } + } if (sig) { abs_coeff[num_non_zero] = abs(coeff[blk_pos]); @@ -1894,14 +1900,14 @@ void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t widt if (last_nz_pos_in_cg == -1) { last_nz_pos_in_cg = scan_pos_sig; - } + } first_nz_pos_in_cg = scan_pos_sig; - } } + } } else { scan_pos_sig = sub_pos - 1; - } + } if (num_non_zero > 0) { int8_t sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= @@ -1912,7 +1918,7 @@ void encode_coeff_nxn(encoder_control *encoder, coefficient *coeff, uint8_t widt if (c1 == 0) { ctx_set++; - } + } c1 = 1; diff --git a/src/picture.h b/src/picture.h index ec892683..2ba93b2f 100644 --- a/src/picture.h +++ b/src/picture.h @@ -64,9 +64,9 @@ typedef struct int8_t coeff_u; //!< \brief is there coded coeffs U int8_t coeff_v; //!< \brief is there coded coeffs V - int8_t coeff_top_y; //!< \brief is there coded coeffs Y in top level - int8_t coeff_top_u; //!< \brief is there coded coeffs U in top level - int8_t coeff_top_v; //!< \brief is there coded coeffs V in top level + int8_t coeff_top_y[MAX_DEPTH+1]; //!< \brief is there coded coeffs Y in top level + int8_t coeff_top_u[MAX_DEPTH+1]; //!< \brief is there coded coeffs U in top level + int8_t coeff_top_v[MAX_DEPTH+1]; //!< \brief is there coded coeffs V in top level cu_info_intra intra; cu_info_inter inter; } cu_info; From 4cec2963ace35367463d305b4e937190b985a28b Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 22 Oct 2013 12:33:11 +0300 Subject: [PATCH 12/19] Fixed transform splitting to allow 64x64 inter blocks --- src/encoder.c | 19 ++++++++++++++----- src/global.h | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 97e90584..1a0a7d9a 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1623,7 +1623,7 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, { cu_info *cur_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; int8_t width = LCU_WIDTH>>depth; - int8_t split = 0;//(cur_cu->tr_depth > depth||!depth); + int8_t split = (cur_cu->tr_depth > depth||!depth); int32_t coeff_fourth = ((LCU_WIDTH>>(depth))*(LCU_WIDTH>>(depth)))+1; if (depth != 0 && depth != MAX_DEPTH + 1) { @@ -1639,27 +1639,36 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, uint8_t offset = 1<<(MAX_DEPTH-1-depth); // Non-zero chroma U Tcoeffs - int8_t cb_flag = (!split) ? cur_cu->coeff_u : cur_cu->coeff_top_u[depth]; + int8_t cb_flag = !split ? cur_cu->coeff_u : cur_cu->coeff_top_u[depth]; cabac.ctx = &g_qt_cbf_model_chroma[tr_depth]; - if (tr_depth == 0 || cur_cu->coeff_top_u[depth]) { + if (tr_depth == 0 || cur_cu->coeff_top_u[depth-1]) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_u"); } // Non-zero chroma V Tcoeffs // NOTE: Using the same ctx as before - cb_flag = (!split) ? cur_cu->coeff_v : cur_cu->coeff_top_v[depth]; + cb_flag = !split ? cur_cu->coeff_v : cur_cu->coeff_top_v[depth]; - if (tr_depth == 0 || cur_cu->coeff_top_v[depth]) { + if (tr_depth == 0 || cur_cu->coeff_top_v[depth-1]) { CABAC_BIN(&cabac, cb_flag, "cbf_chroma_v"); } } if (split) { uint8_t offset = 1<<(MAX_DEPTH-1-depth); + cu_info *cu_a = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; + cu_info *cu_b = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; + cu_info *cu_c = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; encode_transform_coeff(encoder, x_cu, y_cu, depth + 1, tr_depth + 1); + cu_a->coeff_top_y[depth] = cur_cu->coeff_top_y[depth]; cu_a->coeff_top_u[depth] = cur_cu->coeff_top_u[depth]; + cu_a->coeff_top_v[depth] = cur_cu->coeff_top_v[depth]; encode_transform_coeff(encoder, x_cu + offset, y_cu, depth + 1, tr_depth + 1); + cu_b->coeff_top_y[depth] = cur_cu->coeff_top_y[depth]; cu_b->coeff_top_u[depth] = cur_cu->coeff_top_u[depth]; + cu_b->coeff_top_v[depth] = cur_cu->coeff_top_v[depth]; encode_transform_coeff(encoder, x_cu, y_cu + offset, depth + 1, tr_depth + 1); + cu_c->coeff_top_y[depth] = cur_cu->coeff_top_y[depth]; cu_c->coeff_top_u[depth] = cur_cu->coeff_top_u[depth]; + cu_c->coeff_top_v[depth] = cur_cu->coeff_top_v[depth]; encode_transform_coeff(encoder, x_cu + offset, y_cu + offset, depth + 1, tr_depth + 1); return; } diff --git a/src/global.h b/src/global.h index 3c37ebf7..ced9716d 100644 --- a/src/global.h +++ b/src/global.h @@ -45,7 +45,7 @@ typedef int16_t coefficient; #define LCU_WIDTH 64 /*!< Largest Coding Unit (IT'S 64x64, DO NOT TOUCH!) */ #define MAX_INTER_SEARCH_DEPTH 3 -#define MIN_INTER_SEARCH_DEPTH 1 +#define MIN_INTER_SEARCH_DEPTH 0 #define MAX_INTRA_SEARCH_DEPTH 3 /*!< Max search depth -> min block size (3 == 8x8) */ #define MIN_INTRA_SEARCH_DEPTH 1 /*!< Min search depth -> max block size (0 == 64x64) */ From 0cce17453ca7f5e506f24fa812f026b66d9ef9a7 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 22 Oct 2013 13:04:58 +0300 Subject: [PATCH 13/19] Simplified chroma-coeff-coded-flag derivation on transform split --- src/encoder.c | 33 ++++++++++++++------------------- src/encoder.h | 2 +- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 1a0a7d9a..d95b603e 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -1069,7 +1069,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // Code (possible) coeffs to bitstream if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { - encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0); + encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0, 0, 0); } @@ -1206,7 +1206,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // Coeff // Transform tree encode_transform_tree(encoder, x_ctb, y_ctb, depth); - encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0); + encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0, 0, 0); // end Transform tree // end Coeff @@ -1619,12 +1619,15 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, } void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, - int8_t depth, int8_t tr_depth) + int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v) { cu_info *cur_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; int8_t width = LCU_WIDTH>>depth; int8_t split = (cur_cu->tr_depth > depth||!depth); int32_t coeff_fourth = ((LCU_WIDTH>>(depth))*(LCU_WIDTH>>(depth)))+1; + + int8_t cb_flag_u = !split ? cur_cu->coeff_u : cur_cu->coeff_top_u[depth]; + int8_t cb_flag_v = !split ? cur_cu->coeff_v : cur_cu->coeff_top_v[depth]; if (depth != 0 && depth != MAX_DEPTH + 1) { cabac.ctx = &g_trans_subdiv_model[5 - ((g_convert_to_bit[LCU_WIDTH] + 2) - @@ -1639,19 +1642,17 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, uint8_t offset = 1<<(MAX_DEPTH-1-depth); // Non-zero chroma U Tcoeffs - int8_t cb_flag = !split ? cur_cu->coeff_u : cur_cu->coeff_top_u[depth]; cabac.ctx = &g_qt_cbf_model_chroma[tr_depth]; - if (tr_depth == 0 || cur_cu->coeff_top_u[depth-1]) { - CABAC_BIN(&cabac, cb_flag, "cbf_chroma_u"); + if (tr_depth == 0 || parent_coeff_u) { + CABAC_BIN(&cabac, cb_flag_u, "cbf_chroma_u"); } // Non-zero chroma V Tcoeffs // NOTE: Using the same ctx as before - cb_flag = !split ? cur_cu->coeff_v : cur_cu->coeff_top_v[depth]; - if (tr_depth == 0 || cur_cu->coeff_top_v[depth-1]) { - CABAC_BIN(&cabac, cb_flag, "cbf_chroma_v"); + if (tr_depth == 0 || parent_coeff_v) { + CABAC_BIN(&cabac, cb_flag_v, "cbf_chroma_v"); } } @@ -1660,16 +1661,10 @@ void encode_transform_coeff(encoder_control *encoder, int32_t x_cu,int32_t y_cu, cu_info *cu_a = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + y_cu * (encoder->in.width_in_lcu << MAX_DEPTH)]; cu_info *cu_b = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; cu_info *cu_c = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_cu + offset + (y_cu + offset) * (encoder->in.width_in_lcu << MAX_DEPTH)]; - encode_transform_coeff(encoder, x_cu, y_cu, depth + 1, tr_depth + 1); - cu_a->coeff_top_y[depth] = cur_cu->coeff_top_y[depth]; cu_a->coeff_top_u[depth] = cur_cu->coeff_top_u[depth]; - cu_a->coeff_top_v[depth] = cur_cu->coeff_top_v[depth]; - encode_transform_coeff(encoder, x_cu + offset, y_cu, depth + 1, tr_depth + 1); - cu_b->coeff_top_y[depth] = cur_cu->coeff_top_y[depth]; cu_b->coeff_top_u[depth] = cur_cu->coeff_top_u[depth]; - cu_b->coeff_top_v[depth] = cur_cu->coeff_top_v[depth]; - encode_transform_coeff(encoder, x_cu, y_cu + offset, depth + 1, tr_depth + 1); - cu_c->coeff_top_y[depth] = cur_cu->coeff_top_y[depth]; cu_c->coeff_top_u[depth] = cur_cu->coeff_top_u[depth]; - cu_c->coeff_top_v[depth] = cur_cu->coeff_top_v[depth]; - encode_transform_coeff(encoder, x_cu + offset, y_cu + offset, depth + 1, tr_depth + 1); + encode_transform_coeff(encoder, x_cu, y_cu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder, x_cu + offset, y_cu, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder, x_cu, y_cu + offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); + encode_transform_coeff(encoder, x_cu + offset, y_cu + offset, depth + 1, tr_depth + 1, cb_flag_u, cb_flag_v); return; } diff --git a/src/encoder.h b/src/encoder.h index f88046ad..b984f1f3 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -89,7 +89,7 @@ void encode_coeff_nxn(encoder_control *encoder, int16_t *coeff, uint8_t width, void encode_transform_tree(encoder_control *encoder, int32_t x_cu, int32_t y_cu, uint8_t depth); void encode_transform_coeff(encoder_control *encoder, int32_t x_cu, int32_t y_cu, - int8_t depth, int8_t tr_depth); + int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v); extern int16_t g_lambda_cost[55]; extern uint32_t* g_sig_last_scan[3][7]; From b20b583d9bedc516fb751cfc307de2393dee848f Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 22 Oct 2013 16:27:50 +0300 Subject: [PATCH 14/19] Moved all residual/coeff functionality to encode_block_residual() --- src/encoder.c | 252 +++++++++++++++++++++++++++++--------------------- src/encoder.h | 2 + src/search.c | 7 ++ 3 files changed, 157 insertions(+), 104 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index d95b603e..5c248de5 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -771,8 +771,7 @@ void encode_slice_header(encoder_control* encoder) void encode_slice_data(encoder_control* encoder) { uint16_t x_ctb, y_ctb; - - scalinglist_process(); + init_contexts(encoder,encoder->in.cur_pic->slicetype); // Loop through every LCU in the slice @@ -1052,14 +1051,6 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, } // if !merge - // Inter reconstruction - inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, - y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, - encoder->in.cur_pic); - // Mark this block as "coded" (can be used for predictions..) - picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); - encode_transform_tree(encoder,x_ctb, y_ctb, depth); - // Only need to signal coded block flag if not skipped or merged // skip = no coded residual, merge = coded residual if (!cur_cu->merged) { @@ -1081,35 +1072,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, int8_t mpm_preds = -1; int i; uint32_t flag; - pixel *base_y = &encoder->in.cur_pic->y_data[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; - pixel *base_u = &encoder->in.cur_pic->u_data[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; - pixel *base_v = &encoder->in.cur_pic->v_data[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; - uint32_t width = LCU_WIDTH>>depth; - - // INTRAPREDICTION VARIABLES - pixel pred_y[LCU_WIDTH * LCU_WIDTH]; - - pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; - pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; - pixel *recbase_v = &encoder->in.cur_pic->v_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; - - // SEARCH BEST INTRA MODE (AGAIN) - pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; - intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, - (LCU_WIDTH >> (depth)) * 2 + 8, rec, - (LCU_WIDTH >> (depth)) * 2 + 8, 0); - cur_cu->intra.mode = (int8_t)intra_prediction(encoder->in.cur_pic->y_data, - encoder->in.width, - rec_shift, - (LCU_WIDTH >> (depth)) * 2 + 8, - x_ctb * (LCU_WIDTH >> (MAX_DEPTH)), - y_ctb * (LCU_WIDTH >> (MAX_DEPTH)), - width, pred_y, width, - &cur_cu->intra.cost); - intra_pred_mode = cur_cu->intra.mode; - intra_set_block_mode(encoder->in.cur_pic, x_ctb, y_ctb, depth, - intra_pred_mode); + uint32_t width = LCU_WIDTH>>depth; #if ENABLE_PCM == 1 // Code must start after variable initialization @@ -1204,8 +1167,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // END OF PREDINFO CODING // Coeff - // Transform tree - encode_transform_tree(encoder, x_ctb, y_ctb, depth); + // Transform tree encode_transform_coeff(encoder, x_ctb, y_ctb, depth, 0, 0, 0); // end Transform tree // end Coeff @@ -1311,7 +1273,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, pixel *pred_v = &encoder->in.cur_pic->pred_v[x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; int32_t pred_stride = encoder->in.width; - coefficient coeff_y[LCU_WIDTH*LCU_WIDTH<<2]; + coefficient coeff_y[LCU_WIDTH*LCU_WIDTH]; coefficient coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; coefficient coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; coefficient *orig_coeff_y = &encoder->in.cur_pic->coeff_y[x_cu * (LCU_WIDTH >> (MAX_DEPTH)) + (y_cu * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; @@ -1324,10 +1286,7 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, int16_t pre_quant_coeff[LCU_WIDTH*LCU_WIDTH>>2]; // INTRA PREDICTION - // TODO: split to a function! - pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; - pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; - pixel *rec_shift_u = &rec[(LCU_WIDTH >> (depth + 1)) * 2 + 8 + 1]; + uint32_t ac_sum = 0; uint32_t ctx_idx; @@ -1364,71 +1323,25 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu,int32_t y_cu, // TODO: support NxN dir_mode = cur_cu->intra.mode; } - if (ctx_idx > 4 && ctx_idx < 7) { // if multiple scans supported for transform size scan_idx_chroma = abs((int32_t) dir_mode - 26) < 5 ? 1 : (abs((int32_t)dir_mode - 10) < 5 ? 2 : 0); } + } - // Build reconstructed block to use in prediction with extrapolated borders - intra_build_reference_border(encoder->in.cur_pic, x_cu, y_cu, - (LCU_WIDTH >> (depth)) * 2 + 8, rec, (LCU_WIDTH >> (depth)) * 2 + 8, 0); - intra_recon(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, - x_cu * (LCU_WIDTH >> (MAX_DEPTH)), y_cu * (LCU_WIDTH >> (MAX_DEPTH)), - width, pred_y, pred_stride, cur_cu->intra.mode, 0); - // Filter DC-prediction - if (cur_cu->intra.mode == 1 && width < 32) { - intra_dc_pred_filtering(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, pred_y, - pred_stride, LCU_WIDTH >> depth, LCU_WIDTH >> depth); - } - - // TODO : chroma intra prediction - if (cur_cu->intra.mode_chroma != 36 - && cur_cu->intra.mode_chroma == cur_cu->intra.mode) { - cur_cu->intra.mode_chroma = 36; - } - - intra_build_reference_border(encoder->in.cur_pic, x_cu, y_cu, - (LCU_WIDTH >> (depth + 1)) * 2 + 8, rec, - (LCU_WIDTH >> (depth + 1)) * 2 + 8, - 1); - intra_recon(rec_shift_u, - (LCU_WIDTH >> (depth + 1)) * 2 + 8, - x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), - y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), - width >> 1, - pred_u, - pred_stride >> 1, - cur_cu->intra.mode_chroma != 36 ? cur_cu->intra.mode_chroma : cur_cu->intra.mode, - 1); - intra_build_reference_border(encoder->in.cur_pic, x_cu, y_cu, - (LCU_WIDTH >> (depth + 1)) * 2 + 8, - rec, (LCU_WIDTH >> (depth + 1)) * 2 + 8, - 2); - intra_recon(rec_shift_u, (LCU_WIDTH >> (depth + 1)) * 2 + 8, - x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), - y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)), - width >> 1, - pred_v, - pred_stride >> 1, - cur_cu->intra.mode_chroma != 36 ? cur_cu->intra.mode_chroma : cur_cu->intra.mode, - 1); - - // This affects reconstruction, do after that - picture_set_block_coded(encoder->in.cur_pic, x_cu, y_cu, depth, 1); - } else { // Inter mode - for(y = 0; y < LCU_WIDTH>>depth; y++) { - for(x = 0; x < LCU_WIDTH>>depth; x++) { - pred_y[x+y*pred_stride]=recbase_y[x+y*base_stride]; - } - } - for(y = 0; y < LCU_WIDTH>>(depth+1); y++) { - for(x = 0; x < LCU_WIDTH>>(depth+1); x++) { - pred_u[x+y*(pred_stride>>1)]=recbase_u[x+y*(base_stride>>1)]; - pred_v[x+y*(pred_stride>>1)]=recbase_v[x+y*(base_stride>>1)]; - } + // Copy Luma and Chroma to the pred-block + for(y = 0; y < LCU_WIDTH>>depth; y++) { + for(x = 0; x < LCU_WIDTH>>depth; x++) { + pred_y[x+y*pred_stride]=recbase_y[x+y*recbase_stride]; } } + for(y = 0; y < LCU_WIDTH>>(depth+1); y++) { + for(x = 0; x < LCU_WIDTH>>(depth+1); x++) { + pred_u[x+y*(pred_stride>>1)]=recbase_u[x+y*(recbase_stride>>1)]; + pred_v[x+y*(pred_stride>>1)]=recbase_v[x+y*(recbase_stride>>1)]; + } + } + // INTRA PREDICTION ENDS HERE // Get residual by subtracting prediction @@ -2060,3 +1973,134 @@ void encode_last_significant_xy(encoder_control *encoder, // end LastSignificantXY } + + +/** + * \brief This function reconstructs inter/intra predictions and produces coded residual to the buffer + */ +void encode_block_residual(encoder_control *encoder, + uint16_t x_ctb, uint16_t y_ctb, uint8_t depth) +{ + cu_info *cur_cu = &encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)]; + uint8_t split_flag = GET_SPLITDATA(cur_cu, depth); + uint8_t split_model = 0; + + // Check for slice border + uint8_t border_x = ((encoder->in.width) < (x_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; + uint8_t border_y = ((encoder->in.height) < (y_ctb * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> depth))) ? 1 : 0; + uint8_t border_split_x = ((encoder->in.width) < ((x_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; + uint8_t border_split_y = ((encoder->in.height) < ((y_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; + uint8_t border = border_x | border_y; /*!< are we in any border CU */ + + scalinglist_process(); + + // When not in MAX_DEPTH, insert split flag and split the blocks if needed + if (depth != MAX_DEPTH) { + if (split_flag || border) { + // Split blocks and remember to change x and y block positions + uint8_t change = 1<<(MAX_DEPTH-1-depth); + encode_block_residual(encoder, x_ctb, y_ctb, depth + 1); + + if (!border_x || border_split_x) { + encode_block_residual(encoder, x_ctb + change, y_ctb, depth + 1); + } + if (!border_y || border_split_y) { + encode_block_residual(encoder, x_ctb, y_ctb + change, depth + 1); + } + if (!border || (border_split_x && border_split_y)) { + encode_block_residual(encoder, x_ctb + change, y_ctb + change, depth + 1); + } + return; + } + } + + if (cur_cu->type == CU_INTRA) { + uint32_t width = LCU_WIDTH>>depth; + + // INTRAPREDICTION VARIABLES + pixel pred_y[LCU_WIDTH * LCU_WIDTH]; + + pixel *recbase_y = &encoder->in.cur_pic->y_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH))) * encoder->in.width]; + pixel *recbase_u = &encoder->in.cur_pic->u_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + pixel *recbase_v = &encoder->in.cur_pic->v_recdata[x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)) + (y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1))) * (encoder->in.width >> 1)]; + int32_t rec_stride = encoder->in.width; + + // SEARCH BEST INTRA MODE (AGAIN) + pixel rec[(LCU_WIDTH*2+8)*(LCU_WIDTH*2+8)]; + pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; + pixel *rec_shift_u = &rec[(LCU_WIDTH >> (depth + 1)) * 2 + 8 + 1]; + + cur_cu->intra.mode_chroma = 36; + + intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, + (LCU_WIDTH >> (depth)) * 2 + 8, rec, + (LCU_WIDTH >> (depth)) * 2 + 8, 0); + cur_cu->intra.mode = (int8_t)intra_prediction(encoder->in.cur_pic->y_data, + encoder->in.width, + rec_shift, + (LCU_WIDTH >> (depth)) * 2 + 8, + x_ctb * (LCU_WIDTH >> (MAX_DEPTH)), + y_ctb * (LCU_WIDTH >> (MAX_DEPTH)), + width, pred_y, width, + &cur_cu->intra.cost); + intra_set_block_mode(encoder->in.cur_pic, x_ctb, y_ctb, depth, + cur_cu->intra.mode); + + // Build reconstructed block to use in prediction with extrapolated borders + intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, + (LCU_WIDTH >> (depth)) * 2 + 8, rec, (LCU_WIDTH >> (depth)) * 2 + 8, 0); + intra_recon(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, + x_ctb * (LCU_WIDTH >> (MAX_DEPTH)), y_ctb * (LCU_WIDTH >> (MAX_DEPTH)), + width, recbase_y, rec_stride, cur_cu->intra.mode, 0); + + // Filter DC-prediction + if (cur_cu->intra.mode == 1 && width < 32) { + intra_dc_pred_filtering(rec_shift, (LCU_WIDTH >> (depth)) * 2 + 8, recbase_y, + rec_stride, LCU_WIDTH >> depth, LCU_WIDTH >> depth); + } + + // TODO : chroma intra prediction + if (cur_cu->intra.mode_chroma != 36 + && cur_cu->intra.mode_chroma == cur_cu->intra.mode) { + cur_cu->intra.mode_chroma = 36; + } + + intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, + (LCU_WIDTH >> (depth + 1)) * 2 + 8, rec, + (LCU_WIDTH >> (depth + 1)) * 2 + 8, + 1); + + intra_recon(rec_shift_u, + (LCU_WIDTH >> (depth + 1)) * 2 + 8, + x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), + y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), + width >> 1, + recbase_u, + rec_stride >> 1, + cur_cu->intra.mode_chroma != 36 ? cur_cu->intra.mode_chroma : cur_cu->intra.mode, + 1); + intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, + (LCU_WIDTH >> (depth + 1)) * 2 + 8, + rec, (LCU_WIDTH >> (depth + 1)) * 2 + 8, + 2); + intra_recon(rec_shift_u, (LCU_WIDTH >> (depth + 1)) * 2 + 8, + x_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), + y_ctb * (LCU_WIDTH >> (MAX_DEPTH + 1)), + width >> 1, + recbase_v, + rec_stride >> 1, + cur_cu->intra.mode_chroma != 36 ? cur_cu->intra.mode_chroma : cur_cu->intra.mode, + 1); + + } else { + // Inter reconstruction + inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, + y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, + encoder->in.cur_pic); + } + + // Mark this block as "coded" (can be used for predictions..) + picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); + encode_transform_tree(encoder,x_ctb, y_ctb, depth); + +} diff --git a/src/encoder.h b/src/encoder.h index b984f1f3..a44d35db 100644 --- a/src/encoder.h +++ b/src/encoder.h @@ -90,6 +90,8 @@ void encode_transform_tree(encoder_control *encoder, int32_t x_cu, int32_t y_cu, uint8_t depth); void encode_transform_coeff(encoder_control *encoder, int32_t x_cu, int32_t y_cu, int8_t depth, int8_t tr_depth, uint8_t parent_coeff_u, uint8_t parent_coeff_v); +void encode_block_residual(encoder_control *encoder, + uint16_t x_ctb, uint16_t y_ctb, uint8_t depth); extern int16_t g_lambda_cost[55]; extern uint32_t* g_sig_last_scan[3][7]; diff --git a/src/search.c b/src/search.c index e9aaac1f..2712b247 100644 --- a/src/search.c +++ b/src/search.c @@ -413,6 +413,8 @@ uint32_t search_best_mode(encoder_control *encoder, return best_cost; } + + /** * \brief */ @@ -441,9 +443,14 @@ void search_slice_data(encoder_control *encoder) if (RENDER_CU) { render_cu_file(encoder, encoder->in.cur_pic, depth, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, fp2); } + + encode_block_residual(encoder, x_lcu << MAX_DEPTH, y_lcu << MAX_DEPTH, depth); + } } + + if (RENDER_CU && fp) { close_cu_file(fp); fp = 0; From bb9d8ee9dd0ac22509608155b89d660ab0d9beda Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 22 Oct 2013 16:53:18 +0300 Subject: [PATCH 15/19] Fixed motion vector difference calculation --- src/encoder.c | 53 ++++++++++++++++++++++++++------------------------- src/picture.h | 2 ++ 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 5c248de5..73081690 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -932,8 +932,6 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, } } else { uint32_t ref_list_idx; - int16_t mv_cand[2][2]; - /* // Void TEncSbac::codeInterDir( TComDataCU* pcCU, UInt uiAbsPartIdx ) if(encoder->in.cur_pic->slicetype == SLICE_B) @@ -982,30 +980,11 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, if (symbol == 0) break; } } - } - - // Get MV candidates - inter_get_mv_cand(encoder, x_ctb, y_ctb, depth, mv_cand); - - // Select better candidate - cur_cu->inter.mv_ref = 0; // Default to candidate 0 - - // Only check when candidates are different - if (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1]) { - uint16_t cand_1_diff = abs(cur_cu->inter.mv[0] - mv_cand[0][0]) + abs( - cur_cu->inter.mv[1] - mv_cand[0][1]); - uint16_t cand_2_diff = abs(cur_cu->inter.mv[0] - mv_cand[1][0]) + abs( - cur_cu->inter.mv[1] - mv_cand[1][1]); - - // Select candidate 1 if it's closer - if (cand_2_diff < cand_1_diff) { - cur_cu->inter.mv_ref = 1; - } - } + } if (!(/*pcCU->getSlice()->getMvdL1ZeroFlag() &&*/ encoder->ref_list == REF_PIC_LIST_1 && cur_cu->inter.mv_dir == 3)) { - const int32_t mvd_hor = cur_cu->inter.mv[0] - mv_cand[cur_cu->inter.mv_ref][0]; - const int32_t mvd_ver = cur_cu->inter.mv[1] - mv_cand[cur_cu->inter.mv_ref][1]; + const int32_t mvd_hor = cur_cu->inter.mvd[0]; + const int32_t mvd_ver = cur_cu->inter.mvd[1]; const int8_t hor_abs_gr0 = mvd_hor != 0; const int8_t ver_abs_gr0 = mvd_ver != 0; const uint32_t mvd_hor_abs = abs(mvd_hor); @@ -2030,7 +2009,7 @@ void encode_block_residual(encoder_control *encoder, pixel *rec_shift = &rec[(LCU_WIDTH >> (depth)) * 2 + 8 + 1]; pixel *rec_shift_u = &rec[(LCU_WIDTH >> (depth + 1)) * 2 + 8 + 1]; - cur_cu->intra.mode_chroma = 36; + cur_cu->intra.mode_chroma = 36; // TODO: Chroma intra prediction intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, (LCU_WIDTH >> (depth)) * 2 + 8, rec, @@ -2044,7 +2023,7 @@ void encode_block_residual(encoder_control *encoder, width, pred_y, width, &cur_cu->intra.cost); intra_set_block_mode(encoder->in.cur_pic, x_ctb, y_ctb, depth, - cur_cu->intra.mode); + cur_cu->intra.mode); // Build reconstructed block to use in prediction with extrapolated borders intra_build_reference_border(encoder->in.cur_pic, x_ctb, y_ctb, @@ -2093,6 +2072,28 @@ void encode_block_residual(encoder_control *encoder, 1); } else { + int16_t mv_cand[2][2]; + // Get MV candidates + inter_get_mv_cand(encoder, x_ctb, y_ctb, depth, mv_cand); + + // Select better candidate + cur_cu->inter.mv_ref = 0; // Default to candidate 0 + + // Only check when candidates are different + if (mv_cand[0][0] != mv_cand[1][0] || mv_cand[0][1] != mv_cand[1][1]) { + uint16_t cand_1_diff = abs(cur_cu->inter.mv[0] - mv_cand[0][0]) + abs( + cur_cu->inter.mv[1] - mv_cand[0][1]); + uint16_t cand_2_diff = abs(cur_cu->inter.mv[0] - mv_cand[1][0]) + abs( + cur_cu->inter.mv[1] - mv_cand[1][1]); + + // Select candidate 1 if it's closer + if (cand_2_diff < cand_1_diff) { + cur_cu->inter.mv_ref = 1; + } + } + cur_cu->inter.mvd[0] = cur_cu->inter.mv[0] - mv_cand[cur_cu->inter.mv_ref][0]; + cur_cu->inter.mvd[1] = cur_cu->inter.mv[1] - mv_cand[cur_cu->inter.mv_ref][1]; + // Inter reconstruction inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, diff --git a/src/picture.h b/src/picture.h index 2ba93b2f..87065ab6 100644 --- a/src/picture.h +++ b/src/picture.h @@ -43,7 +43,9 @@ typedef struct { int8_t mode; uint32_t cost; + int16_t mv[2]; + int16_t mvd[2]; uint8_t mv_ref; // \brief Index of the encoder_control.ref array. uint8_t mv_dir; // \brief Probably describes if mv_ref is forward, backward or both. Might not be needed? } cu_info_inter; From 8883fb27aa4678c888c8d2d447fecdb6812c8c76 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Tue, 22 Oct 2013 17:40:55 +0300 Subject: [PATCH 16/19] Implemented skip/merge mode checking, disabled for now because it's not working --- src/encoder.c | 86 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 73081690..e310e3d8 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -844,40 +844,57 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, } } - // Encode skip flag + + + // Encode skip flag if (encoder->in.cur_pic->slicetype != SLICE_I) { - int8_t ctx_skip = 0; - // uiCtxSkip = aboveskipped + leftskipped; - cabac.ctx = &g_cu_skip_flag_model[ctx_skip]; - CABAC_BIN(&cabac, (cur_cu->type == CU_SKIP) ? 1 : 0, "SkipFlag"); - } + int8_t ctx_skip = 0; // uiCtxSkip = aboveskipped + leftskipped; + int ui; + int16_t unary_idx = 0; + int8_t skipflag = 0; + int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; + int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); - // IF SKIP - if (cur_cu->type == CU_SKIP) { - // Encode merge index - //TODO: calculate/fetch merge candidates - int16_t unary_idx = 0; //pcCU->getMergeIndex( uiAbsPartIdx ); - int16_t num_cand = 0; //pcCU->getSlice()->getMaxNumMergeCand(); - int32_t ui; - - if (num_cand > 1) { - for (ui = 0; ui < num_cand - 1; ui++) { - int32_t symbol = (ui == unary_idx) ? 0 : 1; - - if (ui == 0) { - cabac.ctx = &g_cu_merge_idx_ext_model; - CABAC_BIN(&cabac, symbol, "MergeIndex"); - } else { - CABAC_BIN_EP(&cabac,symbol,"MergeIndex"); - } - - if (symbol == 0) { + if (!cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { + // Encode merge index + for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { + if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && + merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { + //cur_cu->skipped = 1; break; } } } + // Get left and top skipped flags and if they are present and true, increase model number + if (x_ctb > 0 && (&encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb - 1 + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)])->skipped) { + ctx_skip++; + } - return; + if (y_ctb > 0 && (&encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb + (y_ctb - 1) * (encoder->in.width_in_lcu << MAX_DEPTH)])->skipped) { + ctx_skip++; + } + + cabac.ctx = &g_cu_skip_flag_model[ctx_skip]; + CABAC_BIN(&cabac, cur_cu->skipped, "SkipFlag"); + + // IF SKIP + if (cur_cu->skipped) { + if (num_cand > 1) { + for (ui = 0; ui < num_cand - 1; ui++) { + int32_t symbol = (ui != unary_idx); + if (ui == 0) { + cabac.ctx = &g_cu_merge_idx_ext_model; + CABAC_BIN(&cabac, symbol, "MergeIndex"); + } else { + CABAC_BIN_EP(&cabac,symbol,"MergeIndex"); + } + if (symbol == 0) { + break; + } + } + } + return; + } } // ENDIF SKIP @@ -899,34 +916,31 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, //end partsize if (cur_cu->type == CU_INTER) { // FOR each part - // Mergeflag - uint8_t merge_flag = 0; + // Mergeflag int16_t unary_idx = 0; int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { - //merge_flag = 1; + //cur_cu->merged = 1; break; } - } + } cabac.ctx = &g_cu_merge_flag_ext_model; - CABAC_BIN(&cabac, merge_flag, "MergeFlag"); - - if (merge_flag) { //merge + CABAC_BIN(&cabac, cur_cu->merged, "MergeFlag"); + num_cand = MRG_MAX_NUM_CANDS; + if (cur_cu->merged) { //merge if (num_cand > 1) { int32_t ui; for (ui = 0; ui < num_cand - 1; ui++) { int32_t symbol = (ui != unary_idx); - if (ui == 0) { cabac.ctx = &g_cu_merge_idx_ext_model; CABAC_BIN(&cabac, symbol, "MergeIndex"); } else { CABAC_BIN_EP(&cabac,symbol,"MergeIndex"); } - if (symbol == 0) break; } } From e1f0274b51822bba44fd6bcab8c31ba5be542848 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Wed, 23 Oct 2013 15:14:26 +0300 Subject: [PATCH 17/19] Merge mode working on blocks > 8x8 --- src/encoder.c | 51 ++++++++++++++++++++++++++++++++++++--------------- src/inter.c | 4 ++-- src/picture.c | 23 +++++++++++++++++++++++ src/picture.h | 4 +++- 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index e310e3d8..7999d808 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -860,12 +860,12 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { - //cur_cu->skipped = 1; + //picture_set_block_skipped(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); break; } } } - // Get left and top skipped flags and if they are present and true, increase model number + // Get left and top skipped flags and if they are present and true, increase context number if (x_ctb > 0 && (&encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb - 1 + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)])->skipped) { ctx_skip++; } @@ -917,16 +917,20 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, if (cur_cu->type == CU_INTER) { // FOR each part // Mergeflag - int16_t unary_idx = 0; + int16_t num_cand = 0; + /* int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; - int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); - for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { - if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && - merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { - //cur_cu->merged = 1; - break; + int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); + if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { + for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { + if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && + merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { + cur_cu->merged = 1; + break; + } } - } + } + */ cabac.ctx = &g_cu_merge_flag_ext_model; CABAC_BIN(&cabac, cur_cu->merged, "MergeFlag"); num_cand = MRG_MAX_NUM_CANDS; @@ -934,12 +938,12 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, if (num_cand > 1) { int32_t ui; for (ui = 0; ui < num_cand - 1; ui++) { - int32_t symbol = (ui != unary_idx); + int32_t symbol = (ui != cur_cu->merge_idx); if (ui == 0) { - cabac.ctx = &g_cu_merge_idx_ext_model; - CABAC_BIN(&cabac, symbol, "MergeIndex"); + cabac.ctx = &g_cu_merge_idx_ext_model; + CABAC_BIN(&cabac, symbol, "MergeIndex"); } else { - CABAC_BIN_EP(&cabac,symbol,"MergeIndex"); + CABAC_BIN_EP(&cabac,symbol,"MergeIndex"); } if (symbol == 0) break; } @@ -2087,6 +2091,18 @@ void encode_block_residual(encoder_control *encoder, } else { int16_t mv_cand[2][2]; + + int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; + int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); + for(cur_cu->merge_idx = 0; cur_cu->merge_idx < num_cand; cur_cu->merge_idx++) { + if(merge_cand[cur_cu->merge_idx][0] == cur_cu->inter.mv[0] && + merge_cand[cur_cu->merge_idx][1] == cur_cu->inter.mv[1]) { + cur_cu->merged = 1; + break; + } + } + + // Get MV candidates inter_get_mv_cand(encoder, x_ctb, y_ctb, depth, mv_cand); @@ -2111,11 +2127,16 @@ void encode_block_residual(encoder_control *encoder, // Inter reconstruction inter_recon(encoder->ref->pics[0], x_ctb * CU_MIN_SIZE_PIXELS, y_ctb * CU_MIN_SIZE_PIXELS, LCU_WIDTH >> depth, cur_cu->inter.mv, - encoder->in.cur_pic); + encoder->in.cur_pic); } // Mark this block as "coded" (can be used for predictions..) picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); encode_transform_tree(encoder,x_ctb, y_ctb, depth); + if(cur_cu->merged &&!cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { + cur_cu->merged = 0; + //cur_cu->skipped = 1; + } + } diff --git a/src/inter.c b/src/inter.c index 2e76b210..3f9846ed 100644 --- a/src/inter.c +++ b/src/inter.c @@ -412,9 +412,9 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c } #endif - // Fill with (0,0) + // Fill with (0,0) + //i = candidates; /* - i = candidates; while (candidates < MRG_MAX_NUM_CANDS) { mv_cand[candidates][0] = 0; mv_cand[candidates][1] = 0; diff --git a/src/picture.c b/src/picture.c index 3983d392..1cebe141 100644 --- a/src/picture.c +++ b/src/picture.c @@ -20,6 +20,29 @@ #define PSNRMAX (255.0 * 255.0) +/** + * \brief Set block skipped + * \param pic picture to use + * \param x_scu x SCU position (smallest CU) + * \param y_scu y SCU position (smallest CU) + * \param depth current CU depth + * \param skipped skipped flag + */ +void picture_set_block_skipped(picture *pic, uint32_t x_scu, uint32_t y_scu, + uint8_t depth, int8_t skipped) +{ + uint32_t x, y; + int width_in_scu = pic->width_in_lcu << MAX_DEPTH; + int block_scu_width = (LCU_WIDTH >> depth) / (LCU_WIDTH >> MAX_DEPTH); + + for (y = y_scu; y < y_scu + block_scu_width; ++y) { + int cu_row = y * width_in_scu; + for (x = x_scu; x < x_scu + block_scu_width; ++x) { + pic->cu_array[MAX_DEPTH][cu_row + x].skipped = skipped; + } + } +} + /** * \brief Set block residual status * \param pic picture to use diff --git a/src/picture.h b/src/picture.h index 87065ab6..662dbcaf 100644 --- a/src/picture.h +++ b/src/picture.h @@ -62,6 +62,7 @@ typedef struct int8_t coded; //!< \brief flag to indicate this block is coded and reconstructed int8_t skipped; //!< \brief flag to indicate this block is skipped int8_t merged; //!< \brief flag to indicate this block is merged + int8_t merge_idx; //!< \brief merge index int8_t coeff_y; //!< \brief is there coded coeffs Y int8_t coeff_u; //!< \brief is there coded coeffs U int8_t coeff_v; //!< \brief is there coded coeffs V @@ -127,7 +128,8 @@ void picture_set_block_residual(picture *pic, uint32_t x_scu, uint32_t y_scu, uint8_t depth, int8_t residual); void picture_set_block_split(picture *pic, uint32_t x_scu, uint32_t y_scu, uint8_t depth, int8_t split); - +void picture_set_block_skipped(picture *pic, uint32_t x_scu, uint32_t y_scu, + uint8_t depth, int8_t skipped); picture_list * picture_list_init(int size); int picture_list_resize(picture_list *list, int size); int picture_list_destroy(picture_list *list); From ded4c18bf6d4c788994af6fd014d9aea7bd51cae Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Wed, 23 Oct 2013 16:50:11 +0300 Subject: [PATCH 18/19] Fixed merge candidate duplicate removal and implemented skip mode selection --- src/encoder.c | 42 ++++++++++-------------------------------- src/inter.c | 32 ++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 46 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 7999d808..133d4ac3 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -850,21 +850,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, if (encoder->in.cur_pic->slicetype != SLICE_I) { int8_t ctx_skip = 0; // uiCtxSkip = aboveskipped + leftskipped; int ui; - int16_t unary_idx = 0; - int8_t skipflag = 0; - int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; - int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); - - if (!cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { - // Encode merge index - for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { - if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && - merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { - //picture_set_block_skipped(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); - break; - } - } - } + int16_t num_cand = MRG_MAX_NUM_CANDS; // Get left and top skipped flags and if they are present and true, increase context number if (x_ctb > 0 && (&encoder->in.cur_pic->cu_array[MAX_DEPTH][x_ctb - 1 + y_ctb * (encoder->in.width_in_lcu << MAX_DEPTH)])->skipped) { ctx_skip++; @@ -881,7 +867,7 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, if (cur_cu->skipped) { if (num_cand > 1) { for (ui = 0; ui < num_cand - 1; ui++) { - int32_t symbol = (ui != unary_idx); + int32_t symbol = (ui != cur_cu->merge_idx); if (ui == 0) { cabac.ctx = &g_cu_merge_idx_ext_model; CABAC_BIN(&cabac, symbol, "MergeIndex"); @@ -918,19 +904,6 @@ void encode_coding_tree(encoder_control *encoder, uint16_t x_ctb, // FOR each part // Mergeflag int16_t num_cand = 0; - /* - int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; - int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); - if(cur_cu->coeff_top_y[depth] | cur_cu->coeff_top_u[depth] | cur_cu->coeff_top_v[depth]) { - for(unary_idx = 0; unary_idx < num_cand; unary_idx++) { - if(merge_cand[unary_idx][0] == cur_cu->inter.mv[0] && - merge_cand[unary_idx][1] == cur_cu->inter.mv[1]) { - cur_cu->merged = 1; - break; - } - } - } - */ cabac.ctx = &g_cu_merge_flag_ext_model; CABAC_BIN(&cabac, cur_cu->merged, "MergeFlag"); num_cand = MRG_MAX_NUM_CANDS; @@ -2092,8 +2065,11 @@ void encode_block_residual(encoder_control *encoder, } else { int16_t mv_cand[2][2]; + // Search for merge mode candidate int16_t merge_cand[MRG_MAX_NUM_CANDS][2]; - int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); + // Get list of candidates + int16_t num_cand = inter_get_merge_cand(encoder, x_ctb, y_ctb, depth, merge_cand); + // Check every candidate to find a match for(cur_cu->merge_idx = 0; cur_cu->merge_idx < num_cand; cur_cu->merge_idx++) { if(merge_cand[cur_cu->merge_idx][0] == cur_cu->inter.mv[0] && merge_cand[cur_cu->merge_idx][1] == cur_cu->inter.mv[1]) { @@ -2134,9 +2110,11 @@ void encode_block_residual(encoder_control *encoder, picture_set_block_coded(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); encode_transform_tree(encoder,x_ctb, y_ctb, depth); - if(cur_cu->merged &&!cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { + // if merge is selected but no coefficients to code -> skip mode + if(cur_cu->merged && !cur_cu->coeff_top_y[depth] && !cur_cu->coeff_top_u[depth] && !cur_cu->coeff_top_v[depth]) { cur_cu->merged = 0; - //cur_cu->skipped = 1; + picture_set_block_skipped(encoder->in.cur_pic, x_ctb, y_ctb, depth, 1); + cur_cu->skipped = 1; } } diff --git a/src/inter.c b/src/inter.c index 3f9846ed..d3a00a8c 100644 --- a/src/inter.c +++ b/src/inter.c @@ -359,9 +359,9 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c b0 = b1 = b2 = a0 = a1 = NULL; inter_get_spatial_merge_candidates(encoder, x_cu, y_cu, depth, &b0, &b1, &b2, &a0, &a1); -#define CHECK_DUPLICATE(X,Y) {duplicate = 0; for(i = 0; i < candidates; i++) { \ - if(mv_cand[i][0] == (X) && mv_cand[i][1] == (Y)) { \ - duplicate = 1; break; } }} +#define CHECK_DUPLICATE(CU1,CU2) {duplicate = 0; if ((CU2) && (CU2)->type == CU_INTER && \ + (CU1)->inter.mv[0] == (CU2)->inter.mv[0] && \ + (CU1)->inter.mv[1] == (CU2)->inter.mv[1]) duplicate = 1; } if (a1 && a1->type == CU_INTER) { mv_cand[candidates][0] = a1->inter.mv[0]; @@ -370,7 +370,7 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c } if (b1 && b1->type == CU_INTER) { - if(candidates) CHECK_DUPLICATE(b1->inter.mv[0],b1->inter.mv[1]); + if(candidates) CHECK_DUPLICATE(b1, a1); if(!duplicate) { mv_cand[candidates][0] = b1->inter.mv[0]; mv_cand[candidates][1] = b1->inter.mv[1]; @@ -379,7 +379,7 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c } if (b0 && b0->type == CU_INTER) { - if(candidates) CHECK_DUPLICATE(b0->inter.mv[0],b0->inter.mv[1]); + if(candidates) CHECK_DUPLICATE(b0,b1); if(!duplicate) { mv_cand[candidates][0] = b0->inter.mv[0]; mv_cand[candidates][1] = b0->inter.mv[1]; @@ -388,7 +388,7 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c } if (a0 && a0->type == CU_INTER) { - if(candidates) CHECK_DUPLICATE(a0->inter.mv[0],a0->inter.mv[1]); + if(candidates) CHECK_DUPLICATE(a0,a1); if(!duplicate) { mv_cand[candidates][0] = a0->inter.mv[0]; mv_cand[candidates][1] = a0->inter.mv[1]; @@ -396,12 +396,17 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c } } - if(b2 && b2->type == CU_INTER) { - if(candidates) CHECK_DUPLICATE(b2->inter.mv[0],b2->inter.mv[1]); - if(!duplicate) { - mv_cand[candidates][0] = b2->inter.mv[0]; - mv_cand[candidates][1] = b2->inter.mv[1]; - candidates++; + if (candidates != 4) { + if(b2 && b2->type == CU_INTER) { + CHECK_DUPLICATE(b2,a1); + if(!duplicate) { + CHECK_DUPLICATE(b2,b1); + if(!duplicate) { + mv_cand[candidates][0] = b2->inter.mv[0]; + mv_cand[candidates][1] = b2->inter.mv[1]; + candidates++; + } + } } } @@ -412,8 +417,7 @@ uint8_t inter_get_merge_cand(encoder_control *encoder, int32_t x_cu, int32_t y_c } #endif - // Fill with (0,0) - //i = candidates; + // Fill with (0,0) /* while (candidates < MRG_MAX_NUM_CANDS) { mv_cand[candidates][0] = 0; From caa010a9729b8cb3e35c3f6287bb50c588c9b895 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 28 Oct 2013 11:47:54 +0200 Subject: [PATCH 19/19] Changed scalinglist_process() to be done on frame level --- src/encoder.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/encoder.c b/src/encoder.c index 133d4ac3..a9194a39 100644 --- a/src/encoder.c +++ b/src/encoder.c @@ -259,6 +259,7 @@ void init_encoder_input(encoder_input *input, FILE *inputfile, void encode_one_frame(encoder_control* encoder) { + // output parameters before first frame if (encoder->frame == 0) { // Video Parameter Set (VPS) @@ -289,8 +290,9 @@ void encode_one_frame(encoder_control* encoder) cabac_start(&cabac); encoder->in.cur_pic->slicetype = SLICE_I; encoder->in.cur_pic->type = NAL_IDR_W_RADL; - search_slice_data(encoder); - + scalinglist_process(); + search_slice_data(encoder); + encode_slice_header(encoder); bitstream_align(encoder->stream); encode_slice_data(encoder); @@ -304,8 +306,9 @@ void encode_one_frame(encoder_control* encoder) cabac_start(&cabac); encoder->in.cur_pic->slicetype = SLICE_P; encoder->in.cur_pic->type = NAL_TRAIL_R; + scalinglist_process(); search_slice_data(encoder); - + encode_slice_header(encoder); bitstream_align(encoder->stream); encode_slice_data(encoder); @@ -1962,8 +1965,6 @@ void encode_block_residual(encoder_control *encoder, uint8_t border_split_y = ((encoder->in.height) < ((y_ctb + 1) * (LCU_WIDTH >> MAX_DEPTH) + (LCU_WIDTH >> (depth + 1)))) ? 0 : 1; uint8_t border = border_x | border_y; /*!< are we in any border CU */ - scalinglist_process(); - // When not in MAX_DEPTH, insert split flag and split the blocks if needed if (depth != MAX_DEPTH) { if (split_flag || border) {