diff --git a/src/cu.h b/src/cu.h index e3555d08..ddddaf55 100644 --- a/src/cu.h +++ b/src/cu.h @@ -147,7 +147,7 @@ enum uvg_tree_type { */ typedef struct { - uint8_t type : 2; //!< \brief block type, one of cu_type_t values + uint8_t type : 3; //!< \brief block type, one of cu_type_t values uint8_t depth : 3; //!< \brief depth / size of this block uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values uint8_t tr_depth : 3; //!< \brief transform depth diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index cb27099b..fa73e08e 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1262,95 +1262,6 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state, if (cabac->only_count && bits_out) *bits_out += bits; } -/** -static void encode_part_mode(encoder_state_t * const state, - cabac_data_t * const cabac, - const cu_info_t * const cur_cu, - int depth) -{ - // Binarization from Table 9-34 of the HEVC spec: - // - // | log2CbSize > | log2CbSize == - // | MinCbLog2SizeY | MinCbLog2SizeY - // -------+-------+----------+---------+-----------+---------- - // pred | part | AMP | AMP | | - // mode | mode | disabled | enabled | size == 8 | size > 8 - // -------+-------+----------+---------+-----------+---------- - // intra | 2Nx2N | - - | 1 1 - // | NxN | - - | 0 0 - // -------+-------+--------------------+---------------------- - // inter | 2Nx2N | 1 1 | 1 1 - // | 2NxN | 01 011 | 01 01 - // | Nx2N | 00 001 | 00 001 - // | NxN | - - | - 000 - // | 2NxnU | - 0100 | - - - // | 2NxnD | - 0101 | - - - // | nLx2N | - 0000 | - - - // | nRx2N | - 0001 | - - - // -------+-------+--------------------+---------------------- - // - // - // Context indices from Table 9-37 of the HEVC spec: - // - // binIdx - // | 0 1 2 3 - // ------------------------------+------------------ - // log2CbSize == MinCbLog2SizeY | 0 1 2 bypass - // log2CbSize > MinCbLog2SizeY | 0 1 3 bypass - // ------------------------------+------------------ - double bits = 0; - if (cur_cu->type == CU_INTRA) { - if (depth == MAX_DEPTH) { - cabac->cur_ctx = &(cabac->ctx.part_size_model[0]); - if (cur_cu->part_size == SIZE_2Nx2N) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N"); - } else { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode NxN"); - } - } - } else { - - cabac->cur_ctx = &(cabac->ctx.part_size_model[0]); - if (cur_cu->part_size == SIZE_2Nx2N) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N"); - return bits; - } - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode split"); - - cabac->cur_ctx = &(cabac->ctx.part_size_model[1]); - if (cur_cu->part_size == SIZE_2NxN || - cur_cu->part_size == SIZE_2NxnU || - cur_cu->part_size == SIZE_2NxnD) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 1, bits, "part_mode vertical"); - } else { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 0, bits, "part_mode horizontal"); - } - - if (state->encoder_control->cfg.amp_enable && depth < MAX_DEPTH) { - cabac->cur_ctx = &(cabac->ctx.part_size_model[3]); - - if (cur_cu->part_size == SIZE_2NxN || - cur_cu->part_size == SIZE_Nx2N) { - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 1, bits, "part_mode SMP"); - return bits; - } - CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 0, bits, "part_mode AMP"); - - if (cur_cu->part_size == SIZE_2NxnU || - cur_cu->part_size == SIZE_nLx2N) { - CABAC_BINS_EP(cabac, 0, 1, "part_mode AMP"); - if(cabac->only_count) bits += 1; - } else { - CABAC_BINS_EP(cabac, 1, 1, "part_mode AMP"); - if(cabac->only_count) bits += 1; - } - } - } - return bits; -} -**/ - - bool uvg_write_split_flag( const encoder_state_t * const state, cabac_data_t* cabac, @@ -1684,7 +1595,7 @@ void uvg_encode_coding_tree( } else #endif - if (cur_cu->type == CU_INTER) { + if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { uint8_t imv_mode = UVG_IMV_OFF; const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size]; @@ -1706,10 +1617,10 @@ void uvg_encode_coding_tree( // 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel if (ctrl->cfg.amvr && non_zero_mvd) { cabac->cur_ctx = &(cabac->ctx.imv_flag[0]); - CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag"); + if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag"); if (imv_mode > UVG_IMV_OFF) { cabac->cur_ctx = &(cabac->ctx.imv_flag[4]); - CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag"); + if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag"); if (imv_mode < UVG_IMV_HPEL) { cabac->cur_ctx = &(cabac->ctx.imv_flag[1]); CABAC_BIN(cabac, (imv_mode > UVG_IMV_FPEL), "imv_flag"); // 1 indicates 4PEL, 0 FPEL diff --git a/src/encoder_state-ctors_dtors.c b/src/encoder_state-ctors_dtors.c index bb1300af..037f61d8 100644 --- a/src/encoder_state-ctors_dtors.c +++ b/src/encoder_state-ctors_dtors.c @@ -122,6 +122,18 @@ static int encoder_state_config_tile_init(encoder_state_t * const state, state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS); state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu); + if (state->encoder_control->cfg.ibc) { + // Allocate pixel buffer for each LCU row + state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu); + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { + state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4 + state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE]; + state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2]; + } + } + state->tile->frame->rec = NULL; state->tile->frame->source = NULL; @@ -197,6 +209,15 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { FREE_POINTER(state->tile->frame->hmvp_lut); FREE_POINTER(state->tile->frame->hmvp_size); + if (state->encoder_control->cfg.ibc) { + for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) { + FREE_POINTER(state->tile->frame->ibc_buffer_y[i]); + } + FREE_POINTER(state->tile->frame->ibc_buffer_y); + FREE_POINTER(state->tile->frame->ibc_buffer_u); + FREE_POINTER(state->tile->frame->ibc_buffer_v); + } + uvg_videoframe_free(state->tile->frame); state->tile->frame = NULL; FREE_POINTER(state->tile->wf_jobs); diff --git a/src/encoderstate.c b/src/encoderstate.c index 9bed1b86..bee55980 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -250,6 +250,58 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state, frame->rec->stride / 2, 1); } } + + // Fill IBC buffer + if (state->encoder_control->cfg.ibc) { + + uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x; + uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1; + uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH; + + // If the buffer is full shift all the lines LCU_WIDTH left + if (lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH) { + for (uint32_t i = 0; i < LCU_WIDTH; i++) { + memmove( + &frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH], + &frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH + LCU_WIDTH], + sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH - LCU_WIDTH)); + } + if (state->encoder_control->chroma_format != UVG_CSP_400) { + for (uint32_t i = 0; i < LCU_WIDTH_C; i++) { + memmove( + &frame->ibc_buffer_u[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C], + &frame->ibc_buffer_u[ibc_buffer_row] + [i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C], + sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C)); + memmove( + &frame->ibc_buffer_v[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C], + &frame->ibc_buffer_v[ibc_buffer_row] + [i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C], + sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C)); + } + } + } + + const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x)); + const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y)); + + uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x], + &frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x], + ibc_block_width, ibc_block_height, + frame->rec->stride, IBC_BUFFER_WIDTH); + + if (state->encoder_control->chroma_format != UVG_CSP_400) { + uvg_pixels_blit(&frame->rec->u[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)], + &frame->ibc_buffer_u[ibc_buffer_row][ibc_buffer_pos_x_c], + ibc_block_width>>1, ibc_block_height>>1, + frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C); + uvg_pixels_blit(&frame->rec->v[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)], + &frame->ibc_buffer_v[ibc_buffer_row][ibc_buffer_pos_x_c], + ibc_block_width>>1, ibc_block_height>>1, + frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C); + + } + } } diff --git a/src/encoderstate.h b/src/encoderstate.h index 40e1dc24..55d265e3 100644 --- a/src/encoderstate.h +++ b/src/encoderstate.h @@ -192,9 +192,6 @@ typedef struct encoder_state_config_frame_t { double *c_para; double *k_para; - - cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row - uint8_t* hmvp_size; //!< \brief HMVP LUT size bool jccr_sign; } encoder_state_config_frame_t; diff --git a/src/global.h b/src/global.h index 1c2da76f..773f9c15 100644 --- a/src/global.h +++ b/src/global.h @@ -176,7 +176,6 @@ typedef int32_t mv_t; //! pow(2, MIN_SIZE) #define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE) -//! Round frame size up to this interval (8 pixels) #define CONF_WINDOW_PAD_IN_PIXELS ((1 << MIN_SIZE)<<1) //! spec: CtbSizeY @@ -259,6 +258,9 @@ typedef int32_t mv_t; * */ #define IBC_MRG_MAX_NUM_CANDS 6 +#define IBC_BUFFER_SIZE (256*128) +#define IBC_BUFFER_WIDTH (IBC_BUFFER_SIZE / LCU_WIDTH) +#define IBC_BUFFER_WIDTH_C ((IBC_BUFFER_SIZE / LCU_WIDTH) >> 1) #define MAX_NUM_HMVP_CANDS 5 diff --git a/src/inter.c b/src/inter.c index f89ddf50..7f4c81bf 100644 --- a/src/inter.c +++ b/src/inter.c @@ -626,49 +626,63 @@ void uvg_inter_pred_pu(const encoder_state_t * const state, const int pu_h = PU_GET_H(cu->part_size, width, i_pu); cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y)); - if (pu->inter.mv_dir == 3) { - const uvg_picture *const refs[2] = { - state->frame->ref->images[ - state->frame->ref_LX[0][ - pu->inter.mv_ref[0]]], - state->frame->ref->images[ - state->frame->ref_LX[1][ - pu->inter.mv_ref[1]]], - }; - uvg_inter_recon_bipred(state, - refs[0], refs[1], - pu_x, pu_y, - pu_w, pu_h, - pu->inter.mv, - lcu, - predict_luma, predict_chroma); + if (pu->type == CU_IBC) { + const int offset = x_scu + y_scu * LCU_WIDTH; + const int offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; + uvg_pixels_blit(lcu->rec.y + offset, lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH); + uvg_pixels_blit(lcu->rec.u + offset_c, lcu->rec.joint_u + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(lcu->rec.v + offset_c, lcu->rec.joint_v + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); + } else { + + if (pu->inter.mv_dir == 3) { + const uvg_picture * const refs[2] = { + state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]], + state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]], + }; + uvg_inter_recon_bipred( + state, + refs[0], + refs[1], + pu_x, + pu_y, + pu_w, + pu_h, + pu->inter.mv, + lcu, + predict_luma, + predict_chroma); + } else { + const int mv_idx = pu->inter.mv_dir - 1; + const uvg_picture * const ref = + (cu->type == CU_IBC) ? + state->tile->frame->rec : + (state->frame->ref + ->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]]); + + const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x); + const unsigned offset_chroma = + SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2; + yuv_t lcu_adapter; + lcu_adapter.size = pu_w * pu_h; + lcu_adapter.y = lcu->rec.y + offset_luma, + lcu_adapter.u = lcu->rec.u + offset_chroma, + lcu_adapter.v = lcu->rec.v + offset_chroma, + + inter_recon_unipred( + state, + ref, + pu_x, + pu_y, + pu_w, + pu_h, + LCU_WIDTH, + pu->inter.mv[mv_idx], + &lcu_adapter, + NULL, + predict_luma, + predict_chroma); + } } - else { - const int mv_idx = pu->inter.mv_dir - 1; - const uvg_picture *const ref = - state->frame->ref->images[ - state->frame->ref_LX[mv_idx][ - pu->inter.mv_ref[mv_idx]]]; - - const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x); - const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2; - yuv_t lcu_adapter; - lcu_adapter.size = pu_w * pu_h; - lcu_adapter.y = lcu->rec.y + offset_luma, - lcu_adapter.u = lcu->rec.u + offset_chroma, - lcu_adapter.v = lcu->rec.v + offset_chroma, - - inter_recon_unipred(state, - ref, - pu_x, pu_y, - pu_w, pu_h, - LCU_WIDTH, - pu->inter.mv[mv_idx], - &lcu_adapter, - NULL, - predict_luma, predict_chroma); - } - if (predict_chroma && state->encoder_control->cfg.jccr) { const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); diff --git a/src/search.c b/src/search.c index 2e594126..59c99473 100644 --- a/src/search.c +++ b/src/search.c @@ -179,7 +179,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in } } -static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width) +static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type) { const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size; const int num_pu = uvg_part_mode_num_parts[part_mode]; @@ -191,7 +191,7 @@ static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width) const int height_pu = PU_GET_H(part_mode, cu_width, i); cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu); - pu->type = CU_INTER; + pu->type = type; lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu); } } @@ -1034,7 +1034,7 @@ static double search_cu( lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu); - } else if (cur_cu->type == CU_INTER) { + } else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) { if (!cur_cu->skipped) { @@ -1080,7 +1080,7 @@ static double search_cu( inter_bitcost += cur_cu->merge_idx; } } - lcu_fill_inter(lcu, x_local, y_local, cu_width); + lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type); lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu); } } diff --git a/src/videoframe.h b/src/videoframe.h index e1a82181..54f17689 100644 --- a/src/videoframe.h +++ b/src/videoframe.h @@ -78,6 +78,9 @@ typedef struct videoframe int32_t poc; //!< \brief Picture order count cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row + uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row + uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row + uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row uint8_t* hmvp_size; //!< \brief HMVP LUT size bool source_lmcs_mapped; //!< \brief Indicate if source_lmcs is available and mapped to LMCS