[ibc] Add IBC buffers

This commit is contained in:
Marko Viitanen 2022-06-29 08:59:20 +03:00
parent 6ec4c37b47
commit b49d32af21
9 changed files with 143 additions and 143 deletions

View file

@ -147,7 +147,7 @@ enum uvg_tree_type {
*/
typedef struct
{
uint8_t type : 2; //!< \brief block type, one of cu_type_t values
uint8_t type : 3; //!< \brief block type, one of cu_type_t values
uint8_t depth : 3; //!< \brief depth / size of this block
uint8_t part_size : 3; //!< \brief partition mode, one of part_mode_t values
uint8_t tr_depth : 3; //!< \brief transform depth

View file

@ -1262,95 +1262,6 @@ void uvg_encode_intra_luma_coding_unit(const encoder_state_t * const state,
if (cabac->only_count && bits_out) *bits_out += bits;
}
/**
static void encode_part_mode(encoder_state_t * const state,
cabac_data_t * const cabac,
const cu_info_t * const cur_cu,
int depth)
{
// Binarization from Table 9-34 of the HEVC spec:
//
// | log2CbSize > | log2CbSize ==
// | MinCbLog2SizeY | MinCbLog2SizeY
// -------+-------+----------+---------+-----------+----------
// pred | part | AMP | AMP | |
// mode | mode | disabled | enabled | size == 8 | size > 8
// -------+-------+----------+---------+-----------+----------
// intra | 2Nx2N | - - | 1 1
// | NxN | - - | 0 0
// -------+-------+--------------------+----------------------
// inter | 2Nx2N | 1 1 | 1 1
// | 2NxN | 01 011 | 01 01
// | Nx2N | 00 001 | 00 001
// | NxN | - - | - 000
// | 2NxnU | - 0100 | - -
// | 2NxnD | - 0101 | - -
// | nLx2N | - 0000 | - -
// | nRx2N | - 0001 | - -
// -------+-------+--------------------+----------------------
//
//
// Context indices from Table 9-37 of the HEVC spec:
//
// binIdx
// | 0 1 2 3
// ------------------------------+------------------
// log2CbSize == MinCbLog2SizeY | 0 1 2 bypass
// log2CbSize > MinCbLog2SizeY | 0 1 3 bypass
// ------------------------------+------------------
double bits = 0;
if (cur_cu->type == CU_INTRA) {
if (depth == MAX_DEPTH) {
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
if (cur_cu->part_size == SIZE_2Nx2N) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
} else {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode NxN");
}
}
} else {
cabac->cur_ctx = &(cabac->ctx.part_size_model[0]);
if (cur_cu->part_size == SIZE_2Nx2N) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 1, bits, "part_mode 2Nx2N");
return bits;
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[0]), 0, bits, "part_mode split");
cabac->cur_ctx = &(cabac->ctx.part_size_model[1]);
if (cur_cu->part_size == SIZE_2NxN ||
cur_cu->part_size == SIZE_2NxnU ||
cur_cu->part_size == SIZE_2NxnD) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 1, bits, "part_mode vertical");
} else {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[1]), 0, bits, "part_mode horizontal");
}
if (state->encoder_control->cfg.amp_enable && depth < MAX_DEPTH) {
cabac->cur_ctx = &(cabac->ctx.part_size_model[3]);
if (cur_cu->part_size == SIZE_2NxN ||
cur_cu->part_size == SIZE_Nx2N) {
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 1, bits, "part_mode SMP");
return bits;
}
CABAC_FBITS_UPDATE(cabac, &(cabac->ctx.part_size_model[3]), 0, bits, "part_mode AMP");
if (cur_cu->part_size == SIZE_2NxnU ||
cur_cu->part_size == SIZE_nLx2N) {
CABAC_BINS_EP(cabac, 0, 1, "part_mode AMP");
if(cabac->only_count) bits += 1;
} else {
CABAC_BINS_EP(cabac, 1, 1, "part_mode AMP");
if(cabac->only_count) bits += 1;
}
}
}
return bits;
}
**/
bool uvg_write_split_flag(
const encoder_state_t * const state,
cabac_data_t* cabac,
@ -1684,7 +1595,7 @@ void uvg_encode_coding_tree(
} else
#endif
if (cur_cu->type == CU_INTER) {
if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
uint8_t imv_mode = UVG_IMV_OFF;
const int num_pu = uvg_part_mode_num_parts[cur_cu->part_size];
@ -1706,10 +1617,10 @@ void uvg_encode_coding_tree(
// 0 = off, 1 = fullpel, 2 = 4-pel, 3 = half-pel
if (ctrl->cfg.amvr && non_zero_mvd) {
cabac->cur_ctx = &(cabac->ctx.imv_flag[0]);
CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag");
if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode > UVG_IMV_OFF), "imv_flag");
if (imv_mode > UVG_IMV_OFF) {
cabac->cur_ctx = &(cabac->ctx.imv_flag[4]);
CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag");
if(cur_cu->type != CU_IBC) CABAC_BIN(cabac, (imv_mode < UVG_IMV_HPEL), "imv_flag");
if (imv_mode < UVG_IMV_HPEL) {
cabac->cur_ctx = &(cabac->ctx.imv_flag[1]);
CABAC_BIN(cabac, (imv_mode > UVG_IMV_FPEL), "imv_flag"); // 1 indicates 4PEL, 0 FPEL

View file

@ -122,6 +122,18 @@ static int encoder_state_config_tile_init(encoder_state_t * const state,
state->tile->frame->hmvp_lut = malloc(sizeof(cu_info_t) * height_in_lcu * MAX_NUM_HMVP_CANDS);
state->tile->frame->hmvp_size = calloc(1, sizeof(uint8_t) * height_in_lcu);
if (state->encoder_control->cfg.ibc) {
// Allocate pixel buffer for each LCU row
state->tile->frame->ibc_buffer_y = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_buffer_u = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
state->tile->frame->ibc_buffer_v = malloc(sizeof(uvg_pixel*) * state->tile->frame->height_in_lcu);
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
state->tile->frame->ibc_buffer_y[i] = (uvg_pixel*)malloc(IBC_BUFFER_SIZE * 3); // ToDo: we don't need this much, but it would also support 4:4:4
state->tile->frame->ibc_buffer_u[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE];
state->tile->frame->ibc_buffer_v[i] = &state->tile->frame->ibc_buffer_y[i][IBC_BUFFER_SIZE * 2];
}
}
state->tile->frame->rec = NULL;
state->tile->frame->source = NULL;
@ -197,6 +209,15 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) {
FREE_POINTER(state->tile->frame->hmvp_lut);
FREE_POINTER(state->tile->frame->hmvp_size);
if (state->encoder_control->cfg.ibc) {
for (uint32_t i = 0; i < state->tile->frame->height_in_lcu; i++) {
FREE_POINTER(state->tile->frame->ibc_buffer_y[i]);
}
FREE_POINTER(state->tile->frame->ibc_buffer_y);
FREE_POINTER(state->tile->frame->ibc_buffer_u);
FREE_POINTER(state->tile->frame->ibc_buffer_v);
}
uvg_videoframe_free(state->tile->frame);
state->tile->frame = NULL;
FREE_POINTER(state->tile->wf_jobs);

View file

@ -250,6 +250,58 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
frame->rec->stride / 2, 1);
}
}
// Fill IBC buffer
if (state->encoder_control->cfg.ibc) {
uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x;
uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1;
uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH;
// If the buffer is full shift all the lines LCU_WIDTH left
if (lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH) {
for (uint32_t i = 0; i < LCU_WIDTH; i++) {
memmove(
&frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH],
&frame->ibc_buffer_y[ibc_buffer_row][i * IBC_BUFFER_WIDTH + LCU_WIDTH],
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH - LCU_WIDTH));
}
if (state->encoder_control->chroma_format != UVG_CSP_400) {
for (uint32_t i = 0; i < LCU_WIDTH_C; i++) {
memmove(
&frame->ibc_buffer_u[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C],
&frame->ibc_buffer_u[ibc_buffer_row]
[i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C],
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C));
memmove(
&frame->ibc_buffer_v[ibc_buffer_row][i * IBC_BUFFER_WIDTH_C],
&frame->ibc_buffer_v[ibc_buffer_row]
[i * IBC_BUFFER_WIDTH_C + LCU_WIDTH_C],
sizeof(uvg_pixel) * (IBC_BUFFER_WIDTH_C - LCU_WIDTH_C));
}
}
}
const uint32_t ibc_block_width = MIN(LCU_WIDTH, (state->tile->frame->width-lcu->position_px.x));
const uint32_t ibc_block_height = MIN(LCU_WIDTH, (state->tile->frame->height-lcu->position_px.y));
uvg_pixels_blit(&frame->rec->y[lcu->position_px.y * frame->rec->stride + lcu->position_px.x],
&frame->ibc_buffer_y[ibc_buffer_row][ibc_buffer_pos_x],
ibc_block_width, ibc_block_height,
frame->rec->stride, IBC_BUFFER_WIDTH);
if (state->encoder_control->chroma_format != UVG_CSP_400) {
uvg_pixels_blit(&frame->rec->u[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)],
&frame->ibc_buffer_u[ibc_buffer_row][ibc_buffer_pos_x_c],
ibc_block_width>>1, ibc_block_height>>1,
frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C);
uvg_pixels_blit(&frame->rec->v[(lcu->position_px.y >> 1) * (frame->rec->stride >> 1) + (lcu->position_px.x >> 1)],
&frame->ibc_buffer_v[ibc_buffer_row][ibc_buffer_pos_x_c],
ibc_block_width>>1, ibc_block_height>>1,
frame->rec->stride >> 1, IBC_BUFFER_WIDTH_C);
}
}
}

View file

@ -192,9 +192,6 @@ typedef struct encoder_state_config_frame_t {
double *c_para;
double *k_para;
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uint8_t* hmvp_size; //!< \brief HMVP LUT size
bool jccr_sign;
} encoder_state_config_frame_t;

View file

@ -176,7 +176,6 @@ typedef int32_t mv_t;
//! pow(2, MIN_SIZE)
#define CU_MIN_SIZE_PIXELS (1 << MIN_SIZE)
//! Round frame size up to this interval (8 pixels)
#define CONF_WINDOW_PAD_IN_PIXELS ((1 << MIN_SIZE)<<1)
//! spec: CtbSizeY
@ -259,6 +258,9 @@ typedef int32_t mv_t;
*
*/
#define IBC_MRG_MAX_NUM_CANDS 6
#define IBC_BUFFER_SIZE (256*128)
#define IBC_BUFFER_WIDTH (IBC_BUFFER_SIZE / LCU_WIDTH)
#define IBC_BUFFER_WIDTH_C ((IBC_BUFFER_SIZE / LCU_WIDTH) >> 1)
#define MAX_NUM_HMVP_CANDS 5

View file

@ -626,49 +626,63 @@ void uvg_inter_pred_pu(const encoder_state_t * const state,
const int pu_h = PU_GET_H(cu->part_size, width, i_pu);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, SUB_SCU(pu_x), SUB_SCU(pu_y));
if (pu->inter.mv_dir == 3) {
const uvg_picture *const refs[2] = {
state->frame->ref->images[
state->frame->ref_LX[0][
pu->inter.mv_ref[0]]],
state->frame->ref->images[
state->frame->ref_LX[1][
pu->inter.mv_ref[1]]],
};
uvg_inter_recon_bipred(state,
refs[0], refs[1],
pu_x, pu_y,
pu_w, pu_h,
pu->inter.mv,
lcu,
predict_luma, predict_chroma);
if (pu->type == CU_IBC) {
const int offset = x_scu + y_scu * LCU_WIDTH;
const int offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
uvg_pixels_blit(lcu->rec.y + offset, lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH);
uvg_pixels_blit(lcu->rec.u + offset_c, lcu->rec.joint_u + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(lcu->rec.v + offset_c, lcu->rec.joint_v + offset_c, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
} else {
if (pu->inter.mv_dir == 3) {
const uvg_picture * const refs[2] = {
state->frame->ref->images[state->frame->ref_LX[0][pu->inter.mv_ref[0]]],
state->frame->ref->images[state->frame->ref_LX[1][pu->inter.mv_ref[1]]],
};
uvg_inter_recon_bipred(
state,
refs[0],
refs[1],
pu_x,
pu_y,
pu_w,
pu_h,
pu->inter.mv,
lcu,
predict_luma,
predict_chroma);
} else {
const int mv_idx = pu->inter.mv_dir - 1;
const uvg_picture * const ref =
(cu->type == CU_IBC) ?
state->tile->frame->rec :
(state->frame->ref
->images[state->frame->ref_LX[mv_idx][pu->inter.mv_ref[mv_idx]]]);
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
const unsigned offset_chroma =
SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
yuv_t lcu_adapter;
lcu_adapter.size = pu_w * pu_h;
lcu_adapter.y = lcu->rec.y + offset_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma,
inter_recon_unipred(
state,
ref,
pu_x,
pu_y,
pu_w,
pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx],
&lcu_adapter,
NULL,
predict_luma,
predict_chroma);
}
}
else {
const int mv_idx = pu->inter.mv_dir - 1;
const uvg_picture *const ref =
state->frame->ref->images[
state->frame->ref_LX[mv_idx][
pu->inter.mv_ref[mv_idx]]];
const unsigned offset_luma = SUB_SCU(pu_y) * LCU_WIDTH + SUB_SCU(pu_x);
const unsigned offset_chroma = SUB_SCU(pu_y) / 2 * LCU_WIDTH_C + SUB_SCU(pu_x) / 2;
yuv_t lcu_adapter;
lcu_adapter.size = pu_w * pu_h;
lcu_adapter.y = lcu->rec.y + offset_luma,
lcu_adapter.u = lcu->rec.u + offset_chroma,
lcu_adapter.v = lcu->rec.v + offset_chroma,
inter_recon_unipred(state,
ref,
pu_x, pu_y,
pu_w, pu_h,
LCU_WIDTH,
pu->inter.mv[mv_idx],
&lcu_adapter,
NULL,
predict_luma, predict_chroma);
}
if (predict_chroma && state->encoder_control->cfg.jccr) {
const int offset = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
uvg_pixels_blit(lcu->rec.u + offset, lcu->rec.joint_u + offset, width / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);

View file

@ -179,7 +179,7 @@ static void lcu_fill_cu_info(lcu_t *lcu, int x_local, int y_local, int width, in
}
}
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width, uint8_t type)
{
const part_mode_t part_mode = LCU_GET_CU_AT_PX(lcu, x_local, y_local)->part_size;
const int num_pu = uvg_part_mode_num_parts[part_mode];
@ -191,7 +191,7 @@ static void lcu_fill_inter(lcu_t *lcu, int x_local, int y_local, int cu_width)
const int height_pu = PU_GET_H(part_mode, cu_width, i);
cu_info_t *pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
pu->type = CU_INTER;
pu->type = type;
lcu_fill_cu_info(lcu, x_pu, y_pu, width_pu, height_pu, pu);
}
}
@ -1034,7 +1034,7 @@ static double search_cu(
lcu_fill_cu_info(lcu, x_local, y_local, cu_width, cu_width, cur_cu);
} else if (cur_cu->type == CU_INTER) {
} else if (cur_cu->type == CU_INTER || cur_cu->type == CU_IBC) {
if (!cur_cu->skipped) {
@ -1080,7 +1080,7 @@ static double search_cu(
inter_bitcost += cur_cu->merge_idx;
}
}
lcu_fill_inter(lcu, x_local, y_local, cu_width);
lcu_fill_inter(lcu, x_local, y_local, cu_width, cur_cu->type);
lcu_fill_cbf(lcu, x_local, y_local, cu_width, cur_cu);
}
}

View file

@ -78,6 +78,9 @@ typedef struct videoframe
int32_t poc; //!< \brief Picture order count
cu_info_t* hmvp_lut; //!< \brief Look-up table for HMVP, one for each LCU row
uvg_pixel **ibc_buffer_y; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_u; //!< \brief Intra Block Copy buffer for each LCU row
uvg_pixel **ibc_buffer_v; //!< \brief Intra Block Copy buffer for each LCU row
uint8_t* hmvp_size; //!< \brief HMVP LUT size
bool source_lmcs_mapped; //!< \brief Indicate if source_lmcs is available and mapped to LMCS