[ibc] Fix bugs on IBC reconstruction and add a simple search for I-frames

This commit is contained in:
Marko Viitanen 2022-07-02 18:18:42 +03:00
parent d9164f3cfe
commit cc4c757695
5 changed files with 99 additions and 14 deletions

View file

@ -131,7 +131,7 @@ void uvg_dbg_yuview_init(const encoder_control_t* const encoder, char* filename,
fprintf(yuview_output, "%%;scaleFactor;16\r\n");
fprintf(yuview_output, "%%;type;13;MVInterL0;vector\r\n");
fprintf(yuview_output, "%%;vectorColor;0;0;0;255\r\n");
fprintf(yuview_output, "%%;scaleFactor;16\r\n");
fprintf(yuview_output, "%%;scaleFactor;4\r\n");
fprintf(yuview_output, "%%;type;14;MVInterL1;vector\r\n");
fprintf(yuview_output, "%%;vectorColor;255;255;255;255\r\n");
fprintf(yuview_output, "%%;scaleFactor;16\r\n");

View file

@ -1458,7 +1458,7 @@ void uvg_encode_coding_tree(
// CABAC_BIN(cabac, 0, "split_transform_flag");
}
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1);
DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1);
if (ctrl->cfg.lossless) {
cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass;

View file

@ -254,7 +254,7 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state,
// Fill IBC buffer
if (state->encoder_control->cfg.ibc) {
uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x;
uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH >= IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x;
uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1;
uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH;

View file

@ -612,20 +612,20 @@ static void ibc_recon_cu(const encoder_state_t * const state,
int32_t mv_y = cu->inter.mv[0][1] >> UVG_IMV_4PEL;
uint32_t ibc_row = y / LCU_WIDTH;
int32_t buffer_x = ((x - x_scu) + LCU_WIDTH < IBC_BUFFER_WIDTH ?
int32_t buffer_x = ((x - x_scu) + LCU_WIDTH <= IBC_BUFFER_WIDTH ?
x :
x - (((x - x_scu) + LCU_WIDTH) - IBC_BUFFER_WIDTH)) + mv_x;
x - (((x - x_scu)) - IBC_BUFFER_WIDTH)) + mv_x;
int32_t buffer_y = y_scu + mv_y;
// The whole block must fir to the left of the current position
// The whole block must be to the left of the current position
assert(-mv_x >= width);
// Predicted block completely outside of this LCU
if (mv_x + x_scu + width <= 0) {
if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width, width, IBC_BUFFER_WIDTH, LCU_WIDTH);
if (predict_chroma) {
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
}
} else if (mv_x + x_scu + width >= width) { // Completely in current LCU
if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x], lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH);
@ -639,15 +639,15 @@ static void ibc_recon_cu(const encoder_state_t * const state,
uint32_t width_lcu = width - width_buffer;
if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width_buffer, width, IBC_BUFFER_WIDTH, LCU_WIDTH);
if (predict_chroma) {
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.u + offset_c, width_buffer / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.v + offset_c, width_buffer / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C);
}
offset += width_buffer;
offset_c += width_buffer/2;
offset_c += width_buffer/2 + (width_buffer&1);
if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x + width_buffer], lcu->rec.y + offset, width_lcu, width, LCU_WIDTH, LCU_WIDTH);
if (predict_chroma) {
if (predict_chroma && (width_lcu / 2)) {
uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.u + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.v + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C);
}
@ -1202,8 +1202,8 @@ static void get_ibc_merge_candidates(const encoder_state_t * const state,
const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH);
const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS;
int32_t num_cand = state->tile->frame->hmvp_size_ibc[ctu_row];
for (int i = 0; i < MIN(MAX_NUM_HMVP_CANDS,num_cand); i++) {
cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + num_cand - 1 - i];
for (int i = 0; i < MIN(4,num_cand); i++) {
cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + i];
mv_cand[candidates][0] = cand->inter.mv[0][0];
mv_cand[candidates][1] = cand->inter.mv[0][1];
candidates++;

View file

@ -1008,6 +1008,91 @@ static double search_cu(
}
}
// Simple IBC search
if (can_use_intra && state->frame->slicetype == UVG_SLICE_I
&& state->encoder_control->cfg.ibc) {
cu_info_t cu_backup = *cur_cu;
uint32_t ibc_cost = MAX_INT;
uint32_t ibc_cost_y = MAX_INT;
uint32_t base_cost = MAX_INT;
uint32_t base_cost_y = MAX_INT;
if(cur_cu->type == CU_INTRA) {
uvg_intra_recon_cu(state,x, y,depth, &intra_search,NULL,lcu);
} else {
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, state->encoder_control->chroma_format != UVG_CSP_400);
}
bool ibc_better = false;
cur_cu->type = CU_IBC;
cur_cu->inter.mv_dir = 1;
cur_cu->skipped = false;
cur_cu->merged = false;
cur_cu->inter.mv_cand0 = 0;
optimized_sad_func_ptr_t optimized_sad = uvg_get_optimized_sad(cu_width);
uint32_t source_stride = state->tile->frame->width;
const int x_scu = SUB_SCU(x);
const int y_scu = SUB_SCU(y);
const uint32_t offset = x_scu + y_scu * LCU_WIDTH;
const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C;
mv_t best_vector[2] = {0, 0};
if (optimized_sad != NULL) {
base_cost_y = base_cost = optimized_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width, LCU_WIDTH, source_stride);
if(state->encoder_control->chroma_format != UVG_CSP_400) {
base_cost += optimized_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2);
base_cost += optimized_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2);
}
} else {
base_cost_y = base_cost = uvg_reg_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width,cu_width, LCU_WIDTH, source_stride);
if(state->encoder_control->chroma_format != UVG_CSP_400) {
base_cost += uvg_reg_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2);
base_cost += uvg_reg_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2);
}
}
for(int i = 0; i < 8; i++) {
cur_cu->inter.mv[0][0] = (-cu_width - i) << UVG_IMV_4PEL;
cur_cu->inter.mv[0][1] = 0;
if (x -cu_width - i < 0) break;
uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, state->encoder_control->chroma_format != UVG_CSP_400);
if (optimized_sad != NULL) {
ibc_cost_y = ibc_cost = optimized_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width, LCU_WIDTH, source_stride);
if(state->encoder_control->chroma_format != UVG_CSP_400) {
ibc_cost += optimized_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2);
ibc_cost += optimized_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2);
}
} else {
ibc_cost_y = ibc_cost = uvg_reg_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width,cu_width, LCU_WIDTH, source_stride);
if(state->encoder_control->chroma_format != UVG_CSP_400) {
ibc_cost += uvg_reg_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2);
ibc_cost += uvg_reg_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2);
}
}
if (ibc_cost_y < base_cost_y) {
ibc_better = true;
base_cost_y = ibc_cost_y;
best_vector[0] = cur_cu->inter.mv[0][0];
best_vector[1] = cur_cu->inter.mv[0][1];
//break;
}
}
if (!ibc_better) *cur_cu = cu_backup;
else {
cur_cu->inter.mv[0][0] = best_vector[0];
cur_cu->inter.mv[0][1] = best_vector[1];
//fprintf(stderr, "Coding IBC: %d, %d: %d, %d size: %d\r\n", x,y,cur_cu->inter.mv[0][0] / 4, cur_cu->inter.mv[0][1] / 4, cu_width);
}
}
// Reconstruct best mode because we need the reconstructed pixels for
// mode search of adjacent CUs.
if (cur_cu->type == CU_INTRA) {