[cclm] fix cclm when deblocking is enabled

2024-11-23 18:14:06 +00:00 · 2021-11-24 08:46:08 +02:00 · 2021-11-24 08:46:08 +02:00 · 80ddb60ccf
parent f030158703
commit 80ddb60ccf
4 changed files with 22 additions and 12 deletions
--- a/src/intra.c
+++ b/src/intra.c
@ -497,17 +497,11 @@ void kvz_predict_cclm(
    for (; available_above_right < width / 2; available_above_right++) {
      int x_extension = x_scu + width * 2 + 4 * available_above_right;
      cu_info_t* pu = LCU_GET_CU_AT_PX(lcu, x_extension, y_scu - 4);
-      if (pu->type == CU_NOTSET || x_extension > LCU_WIDTH) break;
+      if (x_extension >= LCU_WIDTH || pu->type == CU_NOTSET) break;
    }
    if(y_scu == 0) {
      if(!state->encoder_control->cfg.wpp) available_above_right = MIN(width / 2, (state->tile->frame->width - x0 - width * 2) / 4);
-      for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
-        bool left_padding = x0 || x;
-        sampled_luma_ref.top[x / 2] = (state->tile->frame->rec->y[x0 + x + (y0 - 1) * stride] * 2 +
-          state->tile->frame->rec->y[x0 + x + 1 + (y0 - 1) * stride] +
-          state->tile->frame->rec->y[x0 + x - left_padding + (y0 - 1) * stride] + 
-          2) >> 2;
-      }
+      memcpy(sampled_luma_ref.top, &state->tile->frame->cclm_luma_rec_top_line[x0 / 2 + (y0 / 64 - 1) * (stride / 2)], sizeof(kvz_pixel) * (width + available_above_right * 2));
    }
    else {
      for (int x = 0; x < width * (available_above_right ? 4 : 2); x += 2) {
--- a/src/search.c
+++ b/src/search.c
@ -241,7 +241,7 @@ static double cu_zero_coeff_cost(const encoder_state_t *state, lcu_t *work_tree,
 }


-static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec) {
+static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width, int height, kvz_pixel *y_rec, kvz_pixel extra_pixel) {
  if (!state->encoder_control->cfg.cclm) return;
  int x_scu = SUB_SCU(x);
  int y_scu = SUB_SCU(y);
@ -265,6 +265,17 @@ static void downsample_cclm_rec(encoder_state_t *state, int x, int y, int width,
    }
    y_rec += LCU_WIDTH * 2;
  }
+  if((y + height * 2) % 64 == 0) {
+    int line = y / 64 * stride / 2;
+    y_rec -= LCU_WIDTH;
+    for (int i = 0; i < width; ++i) {
+      int s = 2;
+      s += y_rec[i * 2] * 2;
+      s += y_rec[i * 2 + 1];
+      s += !x_scu && !i && x ? extra_pixel : y_rec[i * 2 - ((i + x) > 0)] ;
+      state->tile->frame->cclm_luma_rec_top_line[i + x / 2 + line] = s >> 2;
+    }
+  }
 }


@ -739,7 +750,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
                         NULL, NULL, lcu);

      downsample_cclm_rec(
-        state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y
+        state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
      );

      // TODO: This heavily relies to square CUs
@ -945,7 +956,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
      // search.
      work_tree_copy_down(x_local, y_local, depth, work_tree);
      downsample_cclm_rec(
-        state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y
+        state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
      );

      if (state->frame->slicetype != KVZ_SLICE_I) {
@ -960,7 +971,7 @@ static double search_cu(encoder_state_t * const state, int x, int y, int depth,
    // when searching SMP and AMP blocks.
    work_tree_copy_down(x_local, y_local, depth, work_tree);
    downsample_cclm_rec(
-      state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y
+      state, x, y, cu_width / 2, cu_width / 2, lcu->rec.y, lcu->left_ref.y[64]
    );

    if (state->frame->slicetype != KVZ_SLICE_I) {
--- a/src/videoframe.c
+++ b/src/videoframe.c
@ -62,6 +62,7 @@ videoframe_t * kvz_videoframe_alloc(int32_t width,
    if (cclm) {
      assert(chroma_format == KVZ_CSP_420);
      frame->cclm_luma_rec = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) * (((height + 7) & ~7) + FRAME_PADDING_LUMA) / 4);
+      frame->cclm_luma_rec_top_line = MALLOC(kvz_pixel, (((width + 7) & ~7) + FRAME_PADDING_LUMA) / 2 * CEILDIV(height, 64));
    }
  }
  
@ -83,6 +84,9 @@ int kvz_videoframe_free(videoframe_t * const frame)
  if(frame->cclm_luma_rec) {
    FREE_POINTER(frame->cclm_luma_rec);
  }
+  if(frame->cclm_luma_rec_top_line) {
+    FREE_POINTER(frame->cclm_luma_rec_top_line);
+  }

  kvz_image_free(frame->source);
  frame->source = NULL;
--- a/src/videoframe.h
+++ b/src/videoframe.h
@ -54,6 +54,7 @@ typedef struct videoframe
  kvz_picture *rec_lmcs;       //!< \brief LMCS mapped reconstructed image, if available, otherwise points to source.

  kvz_pixel *cclm_luma_rec;    //!< \brief buffer for the downsampled luma reconstruction for cclm
+  kvz_pixel *cclm_luma_rec_top_line;    //!< \brief buffer for the downsampled luma reconstruction for cclm

  uint8_t* lmcs_avg_processed; //!< \brief For each LCU, indicates if already calculated average of border pixels is available
  int32_t* lmcs_avg;           //!< \brief Average of LCU border pixels