From cc4c7576950117e6675df8ecf83b2b3db0857b70 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Sat, 2 Jul 2022 18:18:42 +0300 Subject: [PATCH] [ibc] Fix bugs on IBC reconstruction and add a simple search for I-frames --- src/debug.c | 2 +- src/encode_coding_tree.c | 2 +- src/encoderstate.c | 2 +- src/inter.c | 22 +++++------ src/search.c | 85 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 14 deletions(-) diff --git a/src/debug.c b/src/debug.c index eed773ee..1a2f00a0 100644 --- a/src/debug.c +++ b/src/debug.c @@ -131,7 +131,7 @@ void uvg_dbg_yuview_init(const encoder_control_t* const encoder, char* filename, fprintf(yuview_output, "%%;scaleFactor;16\r\n"); fprintf(yuview_output, "%%;type;13;MVInterL0;vector\r\n"); fprintf(yuview_output, "%%;vectorColor;0;0;0;255\r\n"); - fprintf(yuview_output, "%%;scaleFactor;16\r\n"); + fprintf(yuview_output, "%%;scaleFactor;4\r\n"); fprintf(yuview_output, "%%;type;14;MVInterL1;vector\r\n"); fprintf(yuview_output, "%%;vectorColor;255;255;255;255\r\n"); fprintf(yuview_output, "%%;scaleFactor;16\r\n"); diff --git a/src/encode_coding_tree.c b/src/encode_coding_tree.c index fa73e08e..88aec44e 100644 --- a/src/encode_coding_tree.c +++ b/src/encode_coding_tree.c @@ -1458,7 +1458,7 @@ void uvg_encode_coding_tree( // CABAC_BIN(cabac, 0, "split_transform_flag"); } - DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, (cur_cu->type == CU_INTRA)?0:1); + DBG_YUVIEW_VALUE(state->frame->poc, DBG_YUVIEW_CU_TYPE, abs_x, abs_y, cu_width, cu_width, cur_cu->type-1); if (ctrl->cfg.lossless) { cabac->cur_ctx = &cabac->ctx.cu_transquant_bypass; diff --git a/src/encoderstate.c b/src/encoderstate.c index 7bb12de8..e6f8546e 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -254,7 +254,7 @@ static void encoder_state_recdata_to_bufs(encoder_state_t * const state, // Fill IBC buffer if (state->encoder_control->cfg.ibc) { - uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH > IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x; + uint32_t ibc_buffer_pos_x = lcu->position_px.x + LCU_WIDTH >= IBC_BUFFER_WIDTH ? IBC_BUFFER_WIDTH - LCU_WIDTH: lcu->position_px.x; uint32_t ibc_buffer_pos_x_c = ibc_buffer_pos_x >> 1; uint32_t ibc_buffer_row = lcu->position_px.y / LCU_WIDTH; diff --git a/src/inter.c b/src/inter.c index 5fd8c21c..944f9c47 100644 --- a/src/inter.c +++ b/src/inter.c @@ -612,20 +612,20 @@ static void ibc_recon_cu(const encoder_state_t * const state, int32_t mv_y = cu->inter.mv[0][1] >> UVG_IMV_4PEL; uint32_t ibc_row = y / LCU_WIDTH; - int32_t buffer_x = ((x - x_scu) + LCU_WIDTH < IBC_BUFFER_WIDTH ? + int32_t buffer_x = ((x - x_scu) + LCU_WIDTH <= IBC_BUFFER_WIDTH ? x : - x - (((x - x_scu) + LCU_WIDTH) - IBC_BUFFER_WIDTH)) + mv_x; + x - (((x - x_scu)) - IBC_BUFFER_WIDTH)) + mv_x; int32_t buffer_y = y_scu + mv_y; - // The whole block must fir to the left of the current position + // The whole block must be to the left of the current position assert(-mv_x >= width); // Predicted block completely outside of this LCU if (mv_x + x_scu + width <= 0) { if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width, width, IBC_BUFFER_WIDTH, LCU_WIDTH); if (predict_chroma) { - uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); - uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); } } else if (mv_x + x_scu + width >= width) { // Completely in current LCU if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x], lcu->rec.y + offset, width, width, LCU_WIDTH, LCU_WIDTH); @@ -639,15 +639,15 @@ static void ibc_recon_cu(const encoder_state_t * const state, uint32_t width_lcu = width - width_buffer; if(predict_luma) uvg_pixels_blit(&state->tile->frame->ibc_buffer_y[ibc_row][buffer_y * IBC_BUFFER_WIDTH + buffer_x], lcu->rec.y + offset, width_buffer, width, IBC_BUFFER_WIDTH, LCU_WIDTH); if (predict_chroma) { - uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.u + offset_c, width_buffer / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); - uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y >> 1) * IBC_BUFFER_WIDTH_C + (buffer_x >> 1)], lcu->rec.v + offset_c, width_buffer / 2, width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&state->tile->frame->ibc_buffer_u[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.u + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); + uvg_pixels_blit(&state->tile->frame->ibc_buffer_v[ibc_row][(buffer_y / 2) * IBC_BUFFER_WIDTH_C + (buffer_x / 2)], lcu->rec.v + offset_c, width_buffer / 2 + (width_buffer&1), width / 2, IBC_BUFFER_WIDTH_C, LCU_WIDTH_C); } offset += width_buffer; - offset_c += width_buffer/2; + offset_c += width_buffer/2 + (width_buffer&1); if(predict_luma) uvg_pixels_blit(&lcu->rec.y[(y_scu + mv_y) * LCU_WIDTH + x_scu + mv_x + width_buffer], lcu->rec.y + offset, width_lcu, width, LCU_WIDTH, LCU_WIDTH); - if (predict_chroma) { + if (predict_chroma && (width_lcu / 2)) { uvg_pixels_blit(&lcu->rec.u[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.u + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); uvg_pixels_blit(&lcu->rec.v[((y_scu+mv_y) / 2) * LCU_WIDTH_C + (x_scu + mv_x + width_buffer) / 2], lcu->rec.v + offset_c, width_lcu / 2, width / 2, LCU_WIDTH_C, LCU_WIDTH_C); } @@ -1202,8 +1202,8 @@ static void get_ibc_merge_candidates(const encoder_state_t * const state, const uint32_t ctu_row = (y >> LOG2_LCU_WIDTH); const uint32_t ctu_row_mul_five = ctu_row * MAX_NUM_HMVP_CANDS; int32_t num_cand = state->tile->frame->hmvp_size_ibc[ctu_row]; - for (int i = 0; i < MIN(MAX_NUM_HMVP_CANDS,num_cand); i++) { - cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + num_cand - 1 - i]; + for (int i = 0; i < MIN(4,num_cand); i++) { + cu_info_t* cand = &state->tile->frame->hmvp_lut_ibc[ctu_row_mul_five + i]; mv_cand[candidates][0] = cand->inter.mv[0][0]; mv_cand[candidates][1] = cand->inter.mv[0][1]; candidates++; diff --git a/src/search.c b/src/search.c index 3fefd1c2..9743905e 100644 --- a/src/search.c +++ b/src/search.c @@ -1008,6 +1008,91 @@ static double search_cu( } } + // Simple IBC search + if (can_use_intra && state->frame->slicetype == UVG_SLICE_I + && state->encoder_control->cfg.ibc) { + cu_info_t cu_backup = *cur_cu; + + uint32_t ibc_cost = MAX_INT; + uint32_t ibc_cost_y = MAX_INT; + uint32_t base_cost = MAX_INT; + uint32_t base_cost_y = MAX_INT; + + + if(cur_cu->type == CU_INTRA) { + uvg_intra_recon_cu(state,x, y,depth, &intra_search,NULL,lcu); + } else { + uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, state->encoder_control->chroma_format != UVG_CSP_400); + } + + bool ibc_better = false; + cur_cu->type = CU_IBC; + cur_cu->inter.mv_dir = 1; + cur_cu->skipped = false; + cur_cu->merged = false; + cur_cu->inter.mv_cand0 = 0; + optimized_sad_func_ptr_t optimized_sad = uvg_get_optimized_sad(cu_width); + uint32_t source_stride = state->tile->frame->width; + const int x_scu = SUB_SCU(x); + const int y_scu = SUB_SCU(y); + const uint32_t offset = x_scu + y_scu * LCU_WIDTH; + const uint32_t offset_c = x_scu / 2 + y_scu / 2 * LCU_WIDTH_C; + + mv_t best_vector[2] = {0, 0}; + + + if (optimized_sad != NULL) { + base_cost_y = base_cost = optimized_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width, LCU_WIDTH, source_stride); + if(state->encoder_control->chroma_format != UVG_CSP_400) { + base_cost += optimized_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2); + base_cost += optimized_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2); + } + } else { + base_cost_y = base_cost = uvg_reg_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width,cu_width, LCU_WIDTH, source_stride); + if(state->encoder_control->chroma_format != UVG_CSP_400) { + base_cost += uvg_reg_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2); + base_cost += uvg_reg_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2); + } + } + + for(int i = 0; i < 8; i++) { + cur_cu->inter.mv[0][0] = (-cu_width - i) << UVG_IMV_4PEL; + cur_cu->inter.mv[0][1] = 0; + + if (x -cu_width - i < 0) break; + + uvg_inter_recon_cu(state, lcu, x, y, CU_WIDTH_FROM_DEPTH(depth), true, state->encoder_control->chroma_format != UVG_CSP_400); + + if (optimized_sad != NULL) { + ibc_cost_y = ibc_cost = optimized_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width, LCU_WIDTH, source_stride); + if(state->encoder_control->chroma_format != UVG_CSP_400) { + ibc_cost += optimized_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2); + ibc_cost += optimized_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, LCU_WIDTH_C, source_stride / 2); + } + } else { + ibc_cost_y = ibc_cost = uvg_reg_sad(lcu->rec.y + offset, &state->tile->frame->source->y[y * source_stride + x], cu_width,cu_width, LCU_WIDTH, source_stride); + if(state->encoder_control->chroma_format != UVG_CSP_400) { + ibc_cost += uvg_reg_sad(lcu->rec.u + offset_c, &state->tile->frame->source->u[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2); + ibc_cost += uvg_reg_sad(lcu->rec.v + offset_c, &state->tile->frame->source->v[(y / 2) * source_stride / 2 + x / 2], cu_width / 2, cu_width / 2, LCU_WIDTH_C, source_stride / 2); + } + } + if (ibc_cost_y < base_cost_y) { + ibc_better = true; + base_cost_y = ibc_cost_y; + best_vector[0] = cur_cu->inter.mv[0][0]; + best_vector[1] = cur_cu->inter.mv[0][1]; + //break; + } + } + + if (!ibc_better) *cur_cu = cu_backup; + else { + cur_cu->inter.mv[0][0] = best_vector[0]; + cur_cu->inter.mv[0][1] = best_vector[1]; + //fprintf(stderr, "Coding IBC: %d, %d: %d, %d size: %d\r\n", x,y,cur_cu->inter.mv[0][0] / 4, cur_cu->inter.mv[0][1] / 4, cu_width); + } + } + // Reconstruct best mode because we need the reconstructed pixels for // mode search of adjacent CUs. if (cur_cu->type == CU_INTRA) {