[isp] Convert functions to handle new coeff array order. Add function for getting coeff array subset. Fix assert.

2024-11-23 18:14:06 +00:00 · 2022-08-25 14:54:42 +03:00 · 2022-08-25 14:54:42 +03:00 · b8506c757c
parent 69dcb04c99
commit b8506c757c
8 changed files with 92 additions and 58 deletions
--- a/src/cu.h
+++ b/src/cu.h
@ -415,9 +415,11 @@ void uvg_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu
 */
 static INLINE void copy_coeffs(const coeff_t *__restrict src,
                               coeff_t *__restrict dest,
-                               size_t width, size_t height)
+                               size_t width, size_t height, const int lcu_width)
 {
-  memcpy(dest, src, width * height * sizeof(coeff_t));
+  for (int j = 0; j < height; ++j) {
+    memcpy(dest + j * lcu_width, src + j * lcu_width, width * sizeof(coeff_t));
+  }
 }


--- a/src/encode_coding_tree.c
+++ b/src/encode_coding_tree.c
@ -417,7 +417,7 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
  //ToDo: own ctx_offset and shift for X and Y 
  uint8_t ctx_offset_x = type ? 0 : prefix_ctx[index_x];
  uint8_t ctx_offset_y = type ? 0 : prefix_ctx[index_y];
-  uint8_t shift_x = type ? CLIP(0, 2, width>>3) : (index_x+1)>>2;
+  uint8_t shift_x = type ? CLIP(0, 2, width >> 3) : (index_x + 1) >> 2;
  uint8_t shift_y = type ? CLIP(0, 2, width >> 3) : (index_y + 1) >> 2;
  double bits = 0;

@ -481,11 +481,15 @@ static void encode_chroma_tu(
  cabac_data_t* const cabac = &state->cabac;
  *scan_idx = uvg_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth);
  if(!joint_chroma){
-    const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
-    const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
+    // const coeff_t *coeff_u = &coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
+    // const coeff_t *coeff_v = &coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
+    coeff_t coeff_u[TR_MAX_WIDTH * TR_MAX_WIDTH];
+    coeff_t coeff_v[TR_MAX_WIDTH * TR_MAX_WIDTH];
+    uvg_get_sub_coeff(coeff_u, coeff->u, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
+    uvg_get_sub_coeff(coeff_v, coeff->v, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);

    if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) {
-      // ISP_TODO: do these checks need height?
+      // TODO: height for this check and the others below
      if(state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)){
        cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
        // HEVC only supports transform_skip for Luma
@ -504,7 +508,9 @@ static void encode_chroma_tu(
    }
  }
  else {
-    const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
+    // const coeff_t *coeff_uv = &coeff->joint_uv[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)];
+    const coeff_t coeff_uv[TR_MAX_WIDTH * TR_MAX_WIDTH];
+    uvg_get_sub_coeff(coeff_uv, coeff->joint_uv, x_local, y_local, cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
    if (state->encoder_control->cfg.trskip_enable && width_c <= (1 << state->encoder_control->cfg.trskip_max_size)) {
      cabac->cur_ctx = &cabac->ctx.transform_skip_model_chroma;
      CABAC_BIN(cabac, 0, "transform_skip_flag");
@ -544,7 +550,9 @@ static void encode_transform_unit(
  if (cbf_y && !only_chroma) {
    int x_local = x % LCU_WIDTH;
    int y_local = y % LCU_WIDTH;
-    const coeff_t *coeff_y = &coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)];
+    // const coeff_t *coeff_y = &coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)];
+    coeff_t coeff_y[TR_MAX_WIDTH * TR_MAX_WIDTH];
+    uvg_get_sub_coeff(coeff_y, coeff->y, x_local, y_local, width, height, LCU_WIDTH);

    // CoeffNxN
    // Residual Coding
@ -1849,3 +1857,26 @@ void uvg_encode_mvd(encoder_state_t * const state,

  if(bits_out) *bits_out = temp_bits_out;
 }
+
+
+/**
+ * \brief Get a subset of LCU coeff array.
+ *
+ * \param dst         Destination array. Should be coeff_t [32*32].
+ * \param src         Coeff LCU array.
+ * \param lcu_x       Local LCU x coordinate.
+ * \param lcu_y       Local LCU y coordinate.
+ * \param width       Block width.
+ * \param height      Block height.
+ * \param lcu_width   LCU_WIDTH for luma, LCU_WIDTH_C for chroma.
+ *
+ */
+void uvg_get_sub_coeff(coeff_t *dst, const coeff_t * const src, const int lcu_x, const int lcu_y, const int block_w, const int block_h, const int lcu_width)
+{
+  // Take subset of coeff array
+  const coeff_t* coeff_ptr = &src[lcu_x + lcu_y * lcu_width];
+  for (int j = 0; j < block_h; ++j) {
+    //memcpy(dst_coeff + (j * lcu_width), &coeff[j * tr_width], tr_width * sizeof(coeff_t));
+    memcpy(&dst[j * block_w], &coeff_ptr[j * lcu_width], block_w * sizeof(coeff_t));
+  }
+}
--- a/src/encode_coding_tree.h
+++ b/src/encode_coding_tree.h
@ -116,3 +116,5 @@ void uvg_encode_last_significant_xy(cabac_data_t * const cabac,
  uint8_t lastpos_x, uint8_t lastpos_y,
  uint8_t width, uint8_t height,
  uint8_t type, uint8_t scan, double* bits_out);
+
+void uvg_get_sub_coeff(coeff_t* dst, const coeff_t* const src, const int lcu_x, const int lcu_y, const int block_w, const int block_h);
--- a/src/intra.c
+++ b/src/intra.c
@ -1520,7 +1520,7 @@ int uvg_get_isp_split_num(const int width, const int height, const int split_typ
 void uvg_get_isp_split_loc(cu_loc_t *loc, const int x, const int y, const int block_w, const int block_h, const int split_idx, const int split_type)
 {
  assert((split_idx >= 0 && split_idx <= 3) && "ISP split index must be in [0, 3].");
-  assert((split_type == ISP_MODE_NO_ISP && split_idx == 0) && "Trying to ISP split when split type = NO_ISP.");
+  assert((split_type != ISP_MODE_NO_ISP || split_idx == 0) && "Trying to ISP split when split type = NO_ISP.");
  int part_dim = block_w;
  if (split_type != ISP_MODE_NO_ISP) {
    part_dim = uvg_get_isp_split_dim(block_w, block_h, split_type);
--- a/src/rdo.c
+++ b/src/rdo.c
@ -305,11 +305,21 @@ static INLINE double get_coeff_cabac_cost(
 {
  const int width  = cu_loc->width;
  const int height = cu_loc->height;
+  const int sub_coeff_w = color == COLOR_Y ? cu_loc->width  : cu_loc->chroma_width;
+  const int sub_coeff_h = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
+  const int lcu_width = color == COLOR_Y ? LCU_WIDTH : LCU_WIDTH_C;
+
+  int x_local = cu_loc->x % LCU_WIDTH;
+  int y_local = cu_loc->y % LCU_WIDTH;
+
  // Make sure there are coeffs present
  bool found = false;
-  // ISP_TODO: this needs to be two separate x, y loops?
-  for (int i = 0; i < width * height; i++) {
-    if (coeff[i] != 0) {
+
+  coeff_t sub_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
+  uvg_get_sub_coeff(sub_coeff, coeff, x_local, y_local, sub_coeff_w, sub_coeff_h, lcu_width);
+
+  for (int i = 0; i < sub_coeff_w * sub_coeff_h; i++) {
+    if (sub_coeff[i] != 0) {
      found = 1;
      break;
    }
@ -332,7 +342,7 @@ static INLINE double get_coeff_cabac_cost(
  if(!tr_skip) {
    uvg_encode_coeff_nxn((encoder_state_t*) state,
                         &cabac_copy,
-                         coeff,
+                         sub_coeff,
                         cu_loc,
                         color,
                         scan_mode,
@ -342,7 +352,7 @@ static INLINE double get_coeff_cabac_cost(
  else {
    uvg_encode_ts_residual((encoder_state_t* const)state,
      &cabac_copy,
-      coeff,
+      sub_coeff,
      width,
      height,
      color,
--- a/src/search.c
+++ b/src/search.c
@ -90,20 +90,27 @@ static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *fr
  }
 }

+// ISP_TODO: this needs to work with the new coeff cu orderr
 static INLINE void copy_cu_coeffs(const cu_loc_t *cu_loc, lcu_t *from, lcu_t *to, bool joint, enum
                                  uvg_tree_type tree_type)
 {
  if (tree_type != UVG_CHROMA_T) {
-    const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y);
-    copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], cu_loc->width, cu_loc->height);
+    //const int luma_z = xy_to_zorder(LCU_WIDTH, cu_loc->x, cu_loc->y);
+    const int idx = (cu_loc->x % LCU_WIDTH) + ((cu_loc->y % LCU_WIDTH) * LCU_WIDTH);
+    copy_coeffs(&from->coeff.y[idx], &to->coeff.y[idx], cu_loc->width, cu_loc->height, LCU_WIDTH);
+    
  }

  if (from->rec.chroma_format != UVG_CSP_400 && tree_type != UVG_LUMA_T) {
-    const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
-    copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
-    copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
+    //const int chroma_z = xy_to_zorder(LCU_WIDTH_C, cu_loc->x >> (tree_type != UVG_CHROMA_T), cu_loc->y >> (tree_type != UVG_CHROMA_T));
+    const int chroma_x = cu_loc->x >> (tree_type != UVG_CHROMA_T);
+    const int chroma_y = cu_loc->y >> (tree_type != UVG_CHROMA_T);
+
+    const int idx = (chroma_x % LCU_WIDTH_C) + ((chroma_y % LCU_WIDTH_C) * LCU_WIDTH_C);
+    copy_coeffs(&from->coeff.u[idx], &to->coeff.u[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
+    copy_coeffs(&from->coeff.v[idx], &to->coeff.v[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
    if (joint) {
-      copy_coeffs(&from->coeff.joint_uv[chroma_z], &to->coeff.joint_uv[chroma_z], cu_loc->chroma_width, cu_loc->chroma_height);
+      copy_coeffs(&from->coeff.joint_uv[idx], &to->coeff.joint_uv[idx], cu_loc->chroma_width, cu_loc->chroma_height, LCU_WIDTH_C);
    }
  }
 }
@ -1672,7 +1679,7 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
  copy_lcu_to_cu_data(state, x, y, &work_tree[0], tree_type);

  // Copy coeffs to encoder state.
-  copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH);
+  copy_coeffs(work_tree[0].coeff.y, coeff->y, LCU_WIDTH, LCU_WIDTH, LCU_WIDTH);

  if(state->frame->slicetype == UVG_SLICE_I && state->encoder_control->cfg.dual_tree) {
    cost = search_cu(
@ -1689,9 +1696,9 @@ void uvg_search_lcu(encoder_state_t * const state, const int x, const int y, con
    copy_lcu_to_cu_data(state, x, y, &work_tree[0], UVG_CHROMA_T);
  }

-  copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C);
-  copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C);
+  copy_coeffs(work_tree[0].coeff.u, coeff->u, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C);
+  copy_coeffs(work_tree[0].coeff.v, coeff->v, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C);
  if (state->encoder_control->cfg.jccr) {
-    copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C);
+    copy_coeffs(work_tree[0].coeff.joint_uv, coeff->joint_uv, LCU_WIDTH_C, LCU_WIDTH_C, LCU_WIDTH_C);
  }
 }
--- a/src/strategies/generic/encode_coding_tree-generic.c
+++ b/src/strategies/generic/encode_coding_tree-generic.c
@ -64,6 +64,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
  const int y = cu_loc->y;
  const int width  = color == COLOR_Y ? cu_loc->width  : cu_loc->chroma_width;
  const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
+
  //const encoder_control_t * const encoder = state->encoder_control;
  //int c1 = 1;
  uint8_t last_coeff_x = 0;
@ -94,14 +95,13 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
  unsigned scan_cg_last = (unsigned)-1;
  unsigned scan_pos_last = (unsigned)-1;

-  for (int j = 0; j < height; j++) {
-    for (int i = 0; i < width; i++) {
-      if (coeff[scan[i + j * width]]) {
-        scan_pos_last = i + j * width;
-        sig_coeffgroup_flag[scan_cg[(i + j * width) >> log2_cg_size]] = 1;
-      }
+  for (int i = 0; i < (width * height); ++i) {
+    if (coeff[scan[i]]) {
+      scan_pos_last = i;
+      sig_coeffgroup_flag[scan_cg[i >> log2_cg_size]] = 1;
    }
  }
+
  scan_cg_last = scan_pos_last >> log2_cg_size;

  int pos_last = scan[scan_pos_last];
@ -139,7 +139,7 @@ void uvg_encode_coeff_nxn_generic(encoder_state_t * const state,
  int32_t temp_diag = -1;
  int32_t temp_sum = -1;

-  int32_t reg_bins = (width*width * 28) >> 4; //8 for 2x2
+  int32_t reg_bins = (width * height * 28) >> 4; //8 for 2x2

  // significant_coeff_flag
  for (i = scan_cg_last; i >= 0; i--) {
--- a/src/transform.c
+++ b/src/transform.c
@ -1102,24 +1102,13 @@ int uvg_quantize_residual_trskip(
    // we can skip this.
    uvg_pixels_blit(best->rec, rec_out, width, height, width, out_stride);
  }
-  copy_coeffs(best->coeff, coeff_out, width, height);
+  // TODO: copying coeffs here is very suspect
+  copy_coeffs(best->coeff, coeff_out, width, height, width);

  return best->has_coeffs;
 }


-static INLINE int translate_to_cu_order_idx(const int lcu_x, const int lcu_y, const int block_w, const int block_h, const int linear_idx)
-{
-  // ISP_TODO: get rid of all there temp variables after making sure this works
-  const int start_idx = lcu_x + lcu_y * LCU_WIDTH;
-  const int offset_x = linear_idx % block_w;
-  const int local_y = linear_idx / block_h;
-  const int offset_y = local_y * LCU_WIDTH;
-
-  return (start_idx + offset_x + offset_y);
-}
-
-
 /**
 * Calculate the residual coefficients for a single TU.
 *
@ -1176,23 +1165,21 @@ static void quantize_tr_residual(
  coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
  coeff_t *dst_coeff = NULL;

-  // ISP_TODO: use temp coeff array size MAX_TR_WIDTH^2 instead of coeff pointers
-  // ISP_TODO: inside temp coeff array, entries are in the old linear order. PÖTKÖ
  switch (color) {
    case COLOR_Y:
      pred      = &lcu->rec.y[offset];
      ref       = &lcu->ref.y[offset];
-      dst_coeff = &lcu->coeff.y;
+      dst_coeff = &lcu->coeff.y[lcu_px.x + lcu_px.y * lcu_width];
      break;
    case COLOR_U:
      pred      = &lcu->rec.u[offset];
      ref       = &lcu->ref.u[offset];
-      dst_coeff = &lcu->coeff.u;
+      dst_coeff = &lcu->coeff.u[lcu_px.x + lcu_px.y * lcu_width];
      break;
    case COLOR_V:
      pred      = &lcu->rec.v[offset];
      ref       = &lcu->ref.v[offset];
-      dst_coeff = &lcu->coeff.v;
+      dst_coeff = &lcu->coeff.v[lcu_px.x + lcu_px.y * lcu_width];
      break;
    default:
      break;
@ -1248,6 +1235,7 @@ static void quantize_tr_residual(
                                              lmcs_chroma_adj);
  } else {
    if(color == COLOR_UV) {
+      // ISP_TODO: fix this
      has_coeffs = uvg_quant_cbcr_residual(
        state,
        cur_pu,
@ -1289,20 +1277,14 @@ static void quantize_tr_residual(

  cbf_clear(&cur_pu->cbf, depth, color);
  if (has_coeffs) {
-    const int coeffs_to_copy = tr_width * tr_height;
-    for (int i = 0; i < coeffs_to_copy; ++i) {
-      const coeff_t c = coeff[i];
-      const idx = translate_to_cu_order_idx(lcu_px.x, lcu_px.y, tr_width, tr_height, i);
-      dst_coeff[idx] = c;
+    for (int j = 0; j < tr_height; ++j) {
+      memcpy(&dst_coeff[j * lcu_width], &coeff[j * tr_width], tr_width * sizeof(coeff_t));
    }
    cbf_set(&cur_pu->cbf, depth, color);
  }
  else {
-    // ISP_TODO: if no coeffs, mem set width * height amount of coeffs to zero
-    int idx = lcu_px.x + lcu_px.y * LCU_WIDTH;
    for (int j = 0; j < tr_height; ++j) {
-      memset(dst_coeff[idx], 0, (sizeof(coeff_t) * tr_width));
-      idx += LCU_WIDTH;
+      memset(&dst_coeff[j * lcu_width], 0, (sizeof(coeff_t) * tr_width));
    }
  }
 }