Copy generic to avx2

2024-11-27 19:24:06 +00:00 · 2015-10-02 17:50:34 +03:00 · 2015-10-02 17:50:34 +03:00 · b37cca87c8
parent cad2ea9d6e
commit b37cca87c8
1 changed files with 115 additions and 0 deletions
--- a/src/strategies/avx2/quant-avx2.c
+++ b/src/strategies/avx2/quant-avx2.c
@ -30,6 +30,7 @@
 #include "strategyselector.h"
 #include "encoder.h"
 #include "transform.h"
+#include "rdo.h"

 #if COMPILE_INTEL_AVX2
 #include <immintrin.h>
@ -194,6 +195,119 @@ void kvz_quant_flat_avx2(const encoder_state_t * const state, coeff_t *coef, coe
  }
 }

+/**
+* \brief Quantize residual and get both the reconstruction and coeffs.
+*
+* \param width  Transform width.
+* \param color  Color.
+* \param scan_order  Coefficient scan order.
+* \param use_trskip  Whether transform skip is used.
+* \param stride  Stride for ref_in, pred_in rec_out and coeff_out.
+* \param ref_in  Reference pixels.
+* \param pred_in  Predicted pixels.
+* \param rec_out  Reconstructed pixels.
+* \param coeff_out  Coefficients used for reconstruction of rec_out.
+*
+* \returns  Whether coeff_out contains any non-zero coefficients.
+*/
+int kvz_quantize_residual_avx2(encoder_state_t *const state,
+  const cu_info_t *const cur_cu, const int width, const color_t color,
+  const coeff_scan_order_t scan_order, const int use_trskip,
+  const int in_stride, const int out_stride,
+  const kvz_pixel *const ref_in, const kvz_pixel *const pred_in,
+  kvz_pixel *rec_out, coeff_t *coeff_out)
+{
+  // Temporary arrays to pass data to and from kvz_quant and transform functions.
+  int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH];
+  coeff_t quant_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
+  coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH];
+
+  int has_coeffs = 0;
+
+  assert(width <= TR_MAX_WIDTH);
+  assert(width >= TR_MIN_WIDTH);
+
+  // Get residual. (ref_in - pred_in -> residual)
+  {
+    int y, x;
+    for (y = 0; y < width; ++y) {
+      for (x = 0; x < width; ++x) {
+        residual[x + y * width] = (int16_t)(ref_in[x + y * in_stride] - pred_in[x + y * in_stride]);
+      }
+    }
+  }
+
+  // Transform residual. (residual -> coeff)
+  if (use_trskip) {
+    kvz_transformskip(state->encoder_control, residual, coeff, width);
+  }
+  else {
+    kvz_transform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535));
+  }
+
+  // Quantize coeffs. (coeff -> quant_coeff)
+  if (state->encoder_control->rdoq_enable) {
+    int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth;
+    tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0);
+    kvz_rdoq(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2),
+      scan_order, cur_cu->type, tr_depth);
+  }
+  else {
+    kvz_quant(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2),
+      scan_order, cur_cu->type);
+  }
+
+  // Check if there are any non-zero coefficients.
+  {
+    int i;
+    for (i = 0; i < width * width; ++i) {
+      if (quant_coeff[i] != 0) {
+        has_coeffs = 1;
+        break;
+      }
+    }
+  }
+
+  // Copy coefficients to coeff_out.
+  kvz_coefficients_blit(quant_coeff, coeff_out, width, width, width, out_stride);
+
+  // Do the inverse quantization and transformation and the reconstruction to
+  // rec_out.
+  if (has_coeffs) {
+    int y, x;
+
+    // Get quantized residual. (quant_coeff -> coeff -> residual)
+    kvz_dequant(state, quant_coeff, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type);
+    if (use_trskip) {
+      kvz_itransformskip(state->encoder_control, residual, coeff, width);
+    }
+    else {
+      kvz_itransform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535));
+    }
+
+    // Get quantized reconstruction. (residual + pred_in -> rec_out)
+    for (y = 0; y < width; ++y) {
+      for (x = 0; x < width; ++x) {
+        int16_t val = residual[x + y * width] + pred_in[x + y * in_stride];
+        rec_out[x + y * out_stride] = (kvz_pixel)CLIP(0, PIXEL_MAX, val);
+      }
+    }
+  }
+  else if (rec_out != pred_in) {
+    // With no coeffs and rec_out == pred_int we skip copying the coefficients
+    // because the reconstruction is just the prediction.
+    int y, x;
+
+    for (y = 0; y < width; ++y) {
+      for (x = 0; x < width; ++x) {
+        rec_out[x + y * out_stride] = pred_in[x + y * in_stride];
+      }
+    }
+  }
+
+  return has_coeffs;
+}
+
 void kvz_quant_avx2(const encoder_state_t * const state, coeff_t *coef, coeff_t *q_coef, int32_t width,
  int32_t height, int8_t type, int8_t scan_idx, int8_t block_type)
 {
@ -214,6 +328,7 @@ int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth)

 #if COMPILE_INTEL_AVX2
  success &= kvz_strategyselector_register(opaque, "quant", "avx2", 40, &kvz_quant_avx2);
+  success &= kvz_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &kvz_quantize_residual_avx2);
 #endif //COMPILE_INTEL_AVX2

  return success;