Stupidify the 4x4 planar loop for vectorization

2024-11-27 19:24:06 +00:00 · 2019-11-14 13:57:48 +02:00 · 2019-11-14 13:57:48 +02:00 · 8d45ab4951
parent 6f13f6525c
commit 8d45ab4951
1 changed files with 14 additions and 2 deletions
--- a/src/strategies/avx2/intra-avx2.c
+++ b/src/strategies/avx2/intra-avx2.c
@ -503,10 +503,22 @@ static void kvz_intra_pred_planar_avx2(

  } else {
    // Unoptimized version for reference.
+    // Only if log2_width == 2 <=> width == 4
    for (int y = 0; y < width; ++y) {
+      uint8_t  yp1     = y + 1;
+      uint16_t yp1_bl  = yp1 * bottom_left;
+      uint8_t  rl_curr = ref_left[yp1];
+
      for (int x = 0; x < width; ++x) {
-        int_fast16_t hor = (width - 1 - x) * ref_left[y + 1] + (x + 1) * top_right;
-        int_fast16_t ver = (width - 1 - y) * ref_top[x + 1] + (y + 1) * bottom_left;
+        uint8_t  xp1     = x + 1;
+        uint16_t xp1_tr  = xp1 * top_right;
+        uint8_t  rt_curr = ref_top[xp1];
+
+        uint8_t  rdist   = width - 1 - x;
+        uint8_t  bdist   = width - 1 - y;
+
+        int_fast16_t hor = rdist * rl_curr + xp1_tr;
+        int_fast16_t ver = bdist * rt_curr + yp1_bl;
        dst[y * width + x] = (ver + hor + width) >> (log2_width + 1);
      }
    }