From 4cd5bc38a1ec618caf6f142b84d35df8b3a680f9 Mon Sep 17 00:00:00 2001 From: Marko Viitanen Date: Mon, 24 May 2021 17:23:17 +0300 Subject: [PATCH] [LMCS] Luma mapping working after some rework, have to keep the reconstruction in the mapped domain --- src/encoderstate.c | 35 ++++++++++++++++++++------ src/search.c | 10 -------- src/strategies/avx2/quant-avx2.c | 14 +++++++++-- src/strategies/generic/quant-generic.c | 16 +++++++++--- 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/src/encoderstate.c b/src/encoderstate.c index 6e67d143..d70a0294 100644 --- a/src/encoderstate.c +++ b/src/encoderstate.c @@ -646,6 +646,19 @@ static void encoder_state_worker_encode_lcu(void * opaque) set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp); } + + if (state->encoder_control->cfg.lmcs_enable) { + kvz_pixel* luma = &state->tile->frame->rec->y[lcu->position_px.x + lcu->position_px.y * state->tile->frame->rec->stride]; + for (int y = 0; y < LCU_WIDTH; y++) { + if (lcu->position_px.y + y < state->tile->frame->rec->height) { + for (int x = 0; x < LCU_WIDTH; x++) { + if (lcu->position_px.x + x < state->tile->frame->rec->width) luma[x] = state->tile->frame->lmcs_aps->m_invLUT[luma[x]]; + } + } + luma += state->tile->frame->rec->stride; + } + } + if (encoder->cfg.deblock_enable) { kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y); } @@ -771,6 +784,18 @@ static void encoder_state_worker_encode_lcu_search(void * opaque) set_cu_qps(state, lcu->position_px.x, lcu->position_px.y, 0, &last_qp, &prev_qp); } + if (state->encoder_control->cfg.lmcs_enable) { + kvz_pixel* luma = &state->tile->frame->rec->y[lcu->position_px.x + lcu->position_px.y * state->tile->frame->rec->stride]; + for (int y = 0; y < LCU_WIDTH; y++) { + if (lcu->position_px.y+y < state->tile->frame->rec->height) { + for (int x = 0; x < LCU_WIDTH; x++) { + if (lcu->position_px.x+x < state->tile->frame->rec->width) luma[x] = state->tile->frame->lmcs_aps->m_invLUT[luma[x]]; + } + } + luma += state->tile->frame->rec->stride; + } + } + if (encoder->cfg.deblock_enable) { kvz_filter_deblock_lcu(state, lcu->position_px.x, lcu->position_px.y); } @@ -1343,13 +1368,6 @@ static void encoder_set_source_picture(encoder_state_t * const state, kvz_pictur } state->tile->frame->rec_lmcs = state->tile->frame->rec; - if (state->encoder_control->cfg.lmcs_enable) { - state->tile->frame->source_lmcs = kvz_image_alloc(state->encoder_control->chroma_format, frame->width, frame->height); - state->tile->frame->rec_lmcs = kvz_image_alloc(state->encoder_control->chroma_format, frame->width, frame->height); - state->tile->frame->lmcs_aps = calloc(1, sizeof(lmcs_aps)); - kvz_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth); - } - kvz_videoframe_set_poc(state->tile->frame, state->frame->poc); } @@ -1618,6 +1636,9 @@ static void encoder_state_init_new_frame(encoder_state_t * const state, kvz_pict } if (state->encoder_control->cfg.lmcs_enable) { + state->tile->frame->lmcs_aps = calloc(1, sizeof(lmcs_aps)); + kvz_init_lmcs_aps(state->tile->frame->lmcs_aps, state->encoder_control->cfg.width, state->encoder_control->cfg.height, LCU_CU_WIDTH, LCU_CU_WIDTH, state->encoder_control->bitdepth); + // ToDo: support other signal types in LMCS kvz_lmcs_preanalyzer(state, state->tile->frame, state->tile->frame->lmcs_aps, RESHAPE_SIGNAL_SDR); kvz_construct_reshaper_lmcs(state->tile->frame->lmcs_aps); diff --git a/src/search.c b/src/search.c index 0c8474e2..52b891eb 100644 --- a/src/search.c +++ b/src/search.c @@ -910,9 +910,6 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i memcpy(&lcu->top_ref.y[x_min_in_lcu], &hor_buf->y[luma_offset], luma_bytes); - if(state->encoder_control->cfg.lmcs_enable) - for (int i = 0; i < luma_bytes; i++) lcu->top_ref.y[x_min_in_lcu + i] = state->tile->frame->lmcs_aps->m_fwdLUT[lcu->top_ref.y[x_min_in_lcu + i]]; - if (state->encoder_control->chroma_format != KVZ_CSP_400) { memcpy(&lcu->top_ref.u[x_min_in_lcu], &hor_buf->u[chroma_offset], chroma_bytes); memcpy(&lcu->top_ref.v[x_min_in_lcu], &hor_buf->v[chroma_offset], chroma_bytes); @@ -928,9 +925,6 @@ static void init_lcu_t(const encoder_state_t * const state, const int x, const i memcpy(&lcu->left_ref.y[y_min_in_lcu], &ver_buf->y[luma_offset], luma_bytes); - if (state->encoder_control->cfg.lmcs_enable) - for (int i = 0; i < luma_bytes; i++) lcu->left_ref.y[y_min_in_lcu + i] = state->tile->frame->lmcs_aps->m_fwdLUT[lcu->left_ref.y[y_min_in_lcu + i]]; - if (state->encoder_control->chroma_format != KVZ_CSP_400) { memcpy(&lcu->left_ref.u[y_min_in_lcu], &ver_buf->u[chroma_offset], chroma_bytes); memcpy(&lcu->left_ref.v[y_min_in_lcu], &ver_buf->v[chroma_offset], chroma_bytes); @@ -1026,11 +1020,7 @@ void kvz_search_lcu(encoder_state_t * const state, const int x, const int y, con // The best decisions through out the LCU got propagated back to depth 0, // so copy those back to the frame. - if (state->encoder_control->cfg.lmcs_enable) - for (int i = 0; i < LCU_WIDTH * LCU_WIDTH; i++) work_tree[0].rec.y[i] = state->tile->frame->lmcs_aps->m_invLUT[work_tree[0].rec.y[i]]; copy_lcu_to_cu_data(state, x, y, &work_tree[0]); - if (state->encoder_control->cfg.lmcs_enable) - for (int i = 0; i < LCU_WIDTH * LCU_WIDTH; i++) work_tree[0].rec.y[i] = state->tile->frame->lmcs_aps->m_fwdLUT[work_tree[0].rec.y[i]]; // Copy coeffs to encoder state. copy_coeffs(work_tree[0].coeff.y, state->coeff->y, LCU_WIDTH); diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index a92129da..ba2217d0 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -664,8 +664,18 @@ int kvz_quantize_residual_avx2(encoder_state_t *const state, assert(width <= TR_MAX_WIDTH); assert(width >= TR_MIN_WIDTH); - // Get residual. (ref_in - pred_in -> residual) - get_residual_avx2(ref_in, pred_in, residual, width, in_stride); + int y, x; + // ToDo: do something with the LMCS for better performance + if (state->encoder_control->cfg.lmcs_enable && color == COLOR_Y) { + for (y = 0; y < width; ++y) { + for (x = 0; x < width; ++x) { + residual[x + y * width] = (int16_t)(state->tile->frame->lmcs_aps->m_fwdLUT[ref_in[x + y * in_stride]] - pred_in[x + y * in_stride]); + } + } + } else { + // Get residual. (ref_in - pred_in -> residual) + get_residual_avx2(ref_in, pred_in, residual, width, in_stride); + } // Transform residual. (residual -> coeff) if (use_trskip) { diff --git a/src/strategies/generic/quant-generic.c b/src/strategies/generic/quant-generic.c index a171aff1..62609b34 100644 --- a/src/strategies/generic/quant-generic.c +++ b/src/strategies/generic/quant-generic.c @@ -30,6 +30,7 @@ #include "strategyselector.h" #include "transform.h" #include "fast_coeff_cost.h" +#include "reshape.h" #define QUANT_SHIFT 14 /** @@ -206,9 +207,18 @@ int kvz_quantize_residual_generic(encoder_state_t *const state, // Get residual. (ref_in - pred_in -> residual) { int y, x; - for (y = 0; y < width; ++y) { - for (x = 0; x < width; ++x) { - residual[x + y * width] = (int16_t)(ref_in[x + y * in_stride] - pred_in[x + y * in_stride]); + if (state->encoder_control->cfg.lmcs_enable && color == COLOR_Y) { + for (y = 0; y < width; ++y) { + for (x = 0; x < width; ++x) { + residual[x + y * width] = (int16_t)(state->tile->frame->lmcs_aps->m_fwdLUT[ref_in[x + y * in_stride]] - pred_in[x + y * in_stride]); + } + } + } + else { + for (y = 0; y < width; ++y) { + for (x = 0; x < width; ++x) { + residual[x + y * width] = (int16_t)(ref_in[x + y * in_stride] - pred_in[x + y * in_stride]); + } } } }