[alf] Define the strategy for alf_derive_classification_blk()

2024-11-23 18:14:06 +00:00 · 2021-08-19 17:04:35 +03:00 · 2021-08-19 17:04:35 +03:00 · 3efaeede76
parent a5c27add5e
commit 3efaeede76
11 changed files with 437 additions and 243 deletions
--- a/build/kvazaar_lib/kvazaar_lib.vcxproj
+++ b/build/kvazaar_lib/kvazaar_lib.vcxproj
@ -188,10 +188,12 @@
      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
      <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\generic\alf-generic.c" />
    <ClCompile Include="..\..\src\strategies\generic\encode_coding_tree-generic.c" />
    <ClCompile Include="..\..\src\strategies\generic\intra-generic.c" />
    <ClCompile Include="..\..\src\strategies\generic\quant-generic.c" />
    <ClCompile Include="..\..\src\strategies\generic\sao-generic.c" />
+    <ClCompile Include="..\..\src\strategies\strategies-alf.c" />
    <ClCompile Include="..\..\src\strategies\strategies-encode.c" />
    <ClCompile Include="..\..\src\strategies\strategies-intra.c" />
    <ClCompile Include="..\..\src\strategies\strategies-quant.c" />
@ -263,9 +265,11 @@
    <ClInclude Include="..\..\src\search_intra.h" />
    <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" />
    <ClInclude Include="..\..\src\strategies\avx2\sao-avx2.h" />
+    <ClInclude Include="..\..\src\strategies\generic\alf-generic.h" />
    <ClInclude Include="..\..\src\strategies\generic\encode_coding_tree-generic.h" />
    <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" />
    <ClInclude Include="..\..\src\strategies\generic\sao-generic.h" />
+    <ClInclude Include="..\..\src\strategies\strategies-alf.h" />
    <ClInclude Include="..\..\src\strategies\strategies-common.h" />
    <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />
    <ClInclude Include="..\..\src\strategies\generic\quant-generic.h" />
--- a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters
+++ b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters
@ -254,6 +254,12 @@
      <Filter>Reconstruction</Filter>
    </ClCompile>
    <ClCompile Include="..\..\src\strategies\strategies-encode.c" />
+    <ClCompile Include="..\..\src\strategies\generic\alf-generic.c">
+      <Filter>Optimization\strategies\generic</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\strategies\strategies-alf.c">
+      <Filter>Optimization\strategies</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\src\bitstream.h">
@ -473,6 +479,12 @@
    <ClInclude Include="..\..\src\alf.h">
      <Filter>Reconstruction</Filter>
    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\generic\alf-generic.h">
+      <Filter>Optimization\strategies\generic</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\strategies\strategies-alf.h">
+      <Filter>Optimization\strategies</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <YASM Include="..\..\src\extras\x86inc.asm">
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -117,6 +117,8 @@ libkvazaar_la_SOURCES = \
 	transform.h \
 	videoframe.c \
 	videoframe.h \
+  strategies/generic/alf-generic.c \
+	strategies/generic/alf-generic.h \
 	strategies/generic/dct-generic.c \
 	strategies/generic/dct-generic.h \
 	strategies/generic/intra-generic.c \
@ -137,6 +139,8 @@ libkvazaar_la_SOURCES = \
 	strategies/optimized_sad_func_ptr_t.h \
 	strategies/generic/sao_shared_generics.h \
 	strategies/strategies-common.h \
+  strategies/strategies-alf.c \
+	strategies/strategies-alf.h \
 	strategies/strategies-dct.c \
 	strategies/strategies-dct.h \
 	strategies/strategies-intra.c \
--- a/src/alf.c
+++ b/src/alf.c
@ -10,7 +10,7 @@

 #include "cabac.h"
 #include "rdo.h"
-#include "strategies/strategies-sao.h"
+#include "strategies/strategies-alf.h"
 #include "kvz_math.h"
 #include "reshape.h"

@ -5852,247 +5852,6 @@ static void alf_reconstruct(encoder_state_t * const state,
  }
 }

-static void alf_derive_classification_blk(encoder_state_t * const state,
-  const int shift,
-  const int n_height,
-  const int n_width,
-  const int blk_pos_x,
-  const int blk_pos_y,
-  const int blk_dst_x,
-  const int blk_dst_y,
-  const int vb_ctu_height,
-  int vb_pos)
-{
-  videoframe_t* const frame = state->tile->frame;
-  //int ***g_laplacian = state->tile->frame->alf_info->g_laplacian;
-  //alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier;
-  //CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2");
-
-  static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
-  int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5];
-  memset(laplacian, 0, sizeof(laplacian));
-  alf_classifier **classifier = state->tile->frame->alf_info->classifier;
-
-  const int stride = frame->rec->stride;
-  kvz_pixel *src = state->tile->frame->rec->y;
-  const int max_activity = 15;
-
-  int fl = 2;
-  int fl_p1 = fl + 1;
-  int fl2 = 2 * fl;
-
-  int main_direction, secondary_direction, dir_temp_hv, dir_temp_d;
-  int pix_y;
-
-  int height = n_height + fl2;
-  int width = n_width + fl2;
-  int pos_x = blk_pos_x;
-  int pos_y = blk_pos_y;
-  int start_height = pos_y - fl_p1;
-
-  for (int i = 0; i < height; i += 2)
-  {
-    int yoffset = (i + 1 + start_height) * stride - fl_p1;
-    const kvz_pixel *src0 = &src[yoffset - stride];
-    const kvz_pixel *src1 = &src[yoffset];
-    const kvz_pixel *src2 = &src[yoffset + stride];
-    const kvz_pixel *src3 = &src[yoffset + stride * 2];
-
-    const int y = blk_dst_y - 2 + i;
-    if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2)
-    {
-      src3 = &src[yoffset + stride];
-    }
-    else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos)
-    {
-      src0 = &src[yoffset];
-    }
-
-    int *p_y_ver = laplacian[ALF_VER][i];
-    int *p_y_hor = laplacian[ALF_HOR][i];
-    int *p_y_dig_0 = laplacian[ALF_DIAG0][i];
-    int *p_y_dig_1 = laplacian[ALF_DIAG1][i];
-
-    for (int j = 0; j < width; j += 2)
-    {
-      pix_y = j + 1 + pos_x;
-      const kvz_pixel *p_y = src1 + pix_y;
-      const kvz_pixel *p_y_down = src0 + pix_y;
-      const kvz_pixel *p_y_up = src2 + pix_y;
-      const kvz_pixel *p_y_up2 = src3 + pix_y;
-
-      const int16_t y0 = p_y[0] << 1;
-      const int16_t y_up1 = p_y_up[1] << 1;
-
-      p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]);
-      p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]);
-      p_y_dig_0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]);
-      p_y_dig_1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]);
-
-      if (j > 4 && (j - 6) % 4 == 0)
-      {
-        int j_m_6 = j - 6;
-        int j_m_4 = j - 4;
-        int j_m_2 = j - 2;
-
-        p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j];
-        p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j];
-        p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j];
-        p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j];
-      }
-    }
-  }
-
-  // classification block size
-  const int cls_size_y = 4;
-  const int cls_size_x = 4;
-
-  //for (int i = 0; i < blk.height; i += cls_size_y)
-  for (int i = 0; i < n_height; i += cls_size_y)
-  {
-    int* p_y_ver = laplacian[ALF_VER][i];
-    int* p_y_ver2 = laplacian[ALF_VER][i + 2];
-    int* p_y_ver4 = laplacian[ALF_VER][i + 4];
-    int* p_y_ver6 = laplacian[ALF_VER][i + 6];
-
-    int* p_y_hor = laplacian[ALF_HOR][i];
-    int* p_y_hor2 = laplacian[ALF_HOR][i + 2];
-    int* p_y_hor4 = laplacian[ALF_HOR][i + 4];
-    int* p_y_hor6 = laplacian[ALF_HOR][i + 6];
-
-    int* p_y_dig0 = laplacian[ALF_DIAG0][i];
-    int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2];
-    int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4];
-    int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6];
-
-    int* p_y_dig1 = laplacian[ALF_DIAG1][i];
-    int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2];
-    int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4];
-    int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6];
-
-    //for (int j = 0; j < blk.width; j += cls_size_x)
-    for (int j = 0; j < n_width; j += cls_size_x)
-    {
-      int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0;
-
-      if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4))
-      {
-        sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j];
-        sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j];
-        sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j];
-        sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j];
-      }
-      else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos)
-      {
-        sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
-        sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
-        sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
-        sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
-      }
-      else
-      {
-        sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
-        sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
-        sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
-        sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
-      }
-
-      int temp_act = sum_v + sum_h;
-      int activity = 0;
-
-      const int y = (i + blk_dst_y) & (vb_ctu_height - 1);
-      if (y == vb_pos - 4 || y == vb_pos)
-      {
-        activity = CLIP(0, max_activity, (temp_act * 96) >> shift);
-      }
-      else
-      {
-        activity = CLIP(0, max_activity, (temp_act * 64) >> shift);
-      }
-
-      int class_idx = th[activity];
-
-      int hv1, hv0, d1, d0, hvd1, hvd0;
-
-      if (sum_v > sum_h)
-      {
-        hv1 = sum_v;
-        hv0 = sum_h;
-        dir_temp_hv = 1;
-      }
-      else
-      {
-        hv1 = sum_h;
-        hv0 = sum_v;
-        dir_temp_hv = 3;
-      }
-      if (sum_d0 > sum_d1)
-      {
-        d1 = sum_d0;
-        d0 = sum_d1;
-        dir_temp_d = 0;
-      }
-      else
-      {
-        d1 = sum_d1;
-        d0 = sum_d0;
-        dir_temp_d = 2;
-      }
-      if ((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0)
-      {
-        hvd1 = d1;
-        hvd0 = d0;
-        main_direction = dir_temp_d;
-        secondary_direction = dir_temp_hv;
-      }
-      else
-      {
-        hvd1 = hv1;
-        hvd0 = hv0;
-        main_direction = dir_temp_hv;
-        secondary_direction = dir_temp_d;
-      }
-
-      int direction_strength = 0;
-      if (hvd1 > 2 * hvd0)
-      {
-        direction_strength = 1;
-      }
-      if (hvd1 * 2 > 9 * hvd0)
-      {
-        direction_strength = 2;
-      }
-
-      if (direction_strength)
-      {
-        class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5;
-      }
-
-      static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 };
-      int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)];
-
-      int y_offset = i + blk_dst_y;
-      int x_offset = j + blk_dst_x;
-
-      alf_classifier *cl0 = classifier[y_offset] + x_offset;
-      alf_classifier *cl1 = classifier[y_offset + 1] + x_offset;
-      alf_classifier *cl2 = classifier[y_offset + 2] + x_offset;
-      alf_classifier *cl3 = classifier[y_offset + 3] + x_offset;
-
-      cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx =
-        cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx =
-        cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx =
-        cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx;
-
-      cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx =
-        cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx =
-        cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx =
-        cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx;
-
-    }
-  }
-}
-
 static void alf_derive_classification(encoder_state_t * const state,
  const int width,
  const int height,
--- a/src/rdo.h
+++ b/src/rdo.h
@ -80,7 +80,6 @@ extern const uint32_t kvz_entropy_bits[512];

 // Floating point fractional bits, derived from kvz_entropy_bits
 extern const float kvz_f_entropy_bits[512];
-// ToDo: generate a new table for VVC?
 #define CTX_ENTROPY_FBITS(ctx, val) kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]

 #endif
--- a/src/strategies/generic/alf-generic.c
+++ b/src/strategies/generic/alf-generic.c
@ -0,0 +1,281 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2021 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "strategies/generic/alf-generic.h"
+
+#include "cu.h"
+#include "encoder.h"
+#include "encoderstate.h"
+#include "kvazaar.h"
+#include "alf.h"
+#include "strategyselector.h"
+
+
+static void alf_derive_classification_blk_generic(encoder_state_t * const state,
+  const int shift,
+  const int n_height,
+  const int n_width,
+  const int blk_pos_x,
+  const int blk_pos_y,
+  const int blk_dst_x,
+  const int blk_dst_y,
+  const int vb_ctu_height,
+  int vb_pos)
+{
+  videoframe_t* const frame = state->tile->frame;
+  //int ***g_laplacian = state->tile->frame->alf_info->g_laplacian;
+  //alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier;
+  //CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2");
+
+  static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
+  int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5];
+  memset(laplacian, 0, sizeof(laplacian));
+  alf_classifier **classifier = state->tile->frame->alf_info->classifier;
+
+  const int stride = frame->rec->stride;
+  kvz_pixel *src = state->tile->frame->rec->y;
+  const int max_activity = 15;
+
+  int fl = 2;
+  int fl_p1 = fl + 1;
+  int fl2 = 2 * fl;
+
+  int main_direction, secondary_direction, dir_temp_hv, dir_temp_d;
+  int pix_y;
+
+  int height = n_height + fl2;
+  int width = n_width + fl2;
+  int pos_x = blk_pos_x;
+  int pos_y = blk_pos_y;
+  int start_height = pos_y - fl_p1;
+
+  for (int i = 0; i < height; i += 2)
+  {
+    int yoffset = (i + 1 + start_height) * stride - fl_p1;
+    const kvz_pixel *src0 = &src[yoffset - stride];
+    const kvz_pixel *src1 = &src[yoffset];
+    const kvz_pixel *src2 = &src[yoffset + stride];
+    const kvz_pixel *src3 = &src[yoffset + stride * 2];
+
+    const int y = blk_dst_y - 2 + i;
+    if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2)
+    {
+      src3 = &src[yoffset + stride];
+    }
+    else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos)
+    {
+      src0 = &src[yoffset];
+    }
+
+    int *p_y_ver = laplacian[ALF_VER][i];
+    int *p_y_hor = laplacian[ALF_HOR][i];
+    int *p_y_dig_0 = laplacian[ALF_DIAG0][i];
+    int *p_y_dig_1 = laplacian[ALF_DIAG1][i];
+
+    for (int j = 0; j < width; j += 2)
+    {
+      pix_y = j + 1 + pos_x;
+      const kvz_pixel *p_y = src1 + pix_y;
+      const kvz_pixel *p_y_down = src0 + pix_y;
+      const kvz_pixel *p_y_up = src2 + pix_y;
+      const kvz_pixel *p_y_up2 = src3 + pix_y;
+
+      const int16_t y0 = p_y[0] << 1;
+      const int16_t y_up1 = p_y_up[1] << 1;
+
+      p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]);
+      p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]);
+      p_y_dig_0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]);
+      p_y_dig_1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]);
+
+      if (j > 4 && (j - 6) % 4 == 0)
+      {
+        int j_m_6 = j - 6;
+        int j_m_4 = j - 4;
+        int j_m_2 = j - 2;
+
+        p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j];
+        p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j];
+        p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j];
+        p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j];
+      }
+    }
+  }
+
+  // classification block size
+  const int cls_size_y = 4;
+  const int cls_size_x = 4;
+
+  //for (int i = 0; i < blk.height; i += cls_size_y)
+  for (int i = 0; i < n_height; i += cls_size_y)
+  {
+    int* p_y_ver = laplacian[ALF_VER][i];
+    int* p_y_ver2 = laplacian[ALF_VER][i + 2];
+    int* p_y_ver4 = laplacian[ALF_VER][i + 4];
+    int* p_y_ver6 = laplacian[ALF_VER][i + 6];
+
+    int* p_y_hor = laplacian[ALF_HOR][i];
+    int* p_y_hor2 = laplacian[ALF_HOR][i + 2];
+    int* p_y_hor4 = laplacian[ALF_HOR][i + 4];
+    int* p_y_hor6 = laplacian[ALF_HOR][i + 6];
+
+    int* p_y_dig0 = laplacian[ALF_DIAG0][i];
+    int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2];
+    int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4];
+    int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6];
+
+    int* p_y_dig1 = laplacian[ALF_DIAG1][i];
+    int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2];
+    int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4];
+    int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6];
+
+    //for (int j = 0; j < blk.width; j += cls_size_x)
+    for (int j = 0; j < n_width; j += cls_size_x)
+    {
+      int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0;
+
+      if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4))
+      {
+        sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j];
+        sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j];
+        sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j];
+        sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j];
+      }
+      else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos)
+      {
+        sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
+        sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
+        sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
+        sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
+      }
+      else
+      {
+        sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
+        sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
+        sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
+        sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
+      }
+
+      int temp_act = sum_v + sum_h;
+      int activity = 0;
+
+      const int y = (i + blk_dst_y) & (vb_ctu_height - 1);
+      if (y == vb_pos - 4 || y == vb_pos)
+      {
+        activity = CLIP(0, max_activity, (temp_act * 96) >> shift);
+      }
+      else
+      {
+        activity = CLIP(0, max_activity, (temp_act * 64) >> shift);
+      }
+
+      int class_idx = th[activity];
+
+      int hv1, hv0, d1, d0, hvd1, hvd0;
+
+      if (sum_v > sum_h)
+      {
+        hv1 = sum_v;
+        hv0 = sum_h;
+        dir_temp_hv = 1;
+      }
+      else
+      {
+        hv1 = sum_h;
+        hv0 = sum_v;
+        dir_temp_hv = 3;
+      }
+      if (sum_d0 > sum_d1)
+      {
+        d1 = sum_d0;
+        d0 = sum_d1;
+        dir_temp_d = 0;
+      }
+      else
+      {
+        d1 = sum_d1;
+        d0 = sum_d0;
+        dir_temp_d = 2;
+      }
+      if ((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0)
+      {
+        hvd1 = d1;
+        hvd0 = d0;
+        main_direction = dir_temp_d;
+        secondary_direction = dir_temp_hv;
+      }
+      else
+      {
+        hvd1 = hv1;
+        hvd0 = hv0;
+        main_direction = dir_temp_hv;
+        secondary_direction = dir_temp_d;
+      }
+
+      int direction_strength = 0;
+      if (hvd1 > 2 * hvd0)
+      {
+        direction_strength = 1;
+      }
+      if (hvd1 * 2 > 9 * hvd0)
+      {
+        direction_strength = 2;
+      }
+
+      if (direction_strength)
+      {
+        class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5;
+      }
+
+      static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 };
+      int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)];
+
+      int y_offset = i + blk_dst_y;
+      int x_offset = j + blk_dst_x;
+
+      alf_classifier *cl0 = classifier[y_offset] + x_offset;
+      alf_classifier *cl1 = classifier[y_offset + 1] + x_offset;
+      alf_classifier *cl2 = classifier[y_offset + 2] + x_offset;
+      alf_classifier *cl3 = classifier[y_offset + 3] + x_offset;
+
+      cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx =
+        cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx =
+        cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx =
+        cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx;
+
+      cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx =
+        cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx =
+        cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx =
+        cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx;
+
+    }
+  }
+}
+
+
+
+int kvz_strategy_register_alf_generic(void* opaque, uint8_t bitdepth)
+{
+  bool success = true;
+
+  success &= kvz_strategyselector_register(opaque, "alf_derive_classification_blk", "generic", 0, &alf_derive_classification_blk_generic);
+
+  return success;
+}
--- a/src/strategies/generic/alf-generic.h
+++ b/src/strategies/generic/alf-generic.h
@ -0,0 +1,31 @@
+#pragma once
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2021 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/**
+ * \ingroup Optimization
+ * \file
+ * Generic C implementations of optimized functions.
+ */
+
+#include "global.h" // IWYU pragma: keep
+
+int kvz_strategy_register_alf_generic(void* opaque, uint8_t bitdepth);
+
--- a/src/strategies/strategies-alf.c
+++ b/src/strategies/strategies-alf.c
@ -0,0 +1,41 @@
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2021 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+#include "strategies/strategies-alf.h"
+//#include "strategies/avx2/alf-avx2.h"
+#include "strategies/generic/alf-generic.h"
+#include "strategyselector.h"
+
+
+// Define function pointers.
+alf_derive_classification_blk_func* alf_derive_classification_blk;
+
+
+int kvz_strategy_register_alf(void* opaque, uint8_t bitdepth) {
+  bool success = true;
+
+  success &= kvz_strategy_register_alf_generic(opaque, bitdepth);
+
+  if (kvz_g_hardware_flags.intel_flags.avx2) {
+    //success &= kvz_strategy_register_alf_avx2(opaque, bitdepth);
+  }
+
+  return success;
+}
--- a/src/strategies/strategies-alf.h
+++ b/src/strategies/strategies-alf.h
@ -0,0 +1,56 @@
+#pragma once
+/*****************************************************************************
+ * This file is part of Kvazaar HEVC encoder.
+ *
+ * Copyright (C) 2013-2021 Tampere University of Technology and others (see
+ * COPYING file).
+ *
+ * Kvazaar is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the
+ * Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Kvazaar.  If not, see <http://www.gnu.org/licenses/>.
+ ****************************************************************************/
+
+/**
+ * \ingroup Optimization
+ * \file
+ * Interface for sao functions.
+ */
+
+#include "encoder.h"
+#include "encoderstate.h"
+#include "global.h" // IWYU pragma: keep
+#include "kvazaar.h"
+#include "alf.h"
+
+
+// Declare function pointers.
+typedef void (alf_derive_classification_blk_func)(encoder_state_t * const state,
+  const int shift,
+  const int n_height,
+  const int n_width,
+  const int blk_pos_x,
+  const int blk_pos_y,
+  const int blk_dst_x,
+  const int blk_dst_y,
+  const int vb_ctu_height,
+  int vb_pos);
+
+// Declare function pointers.
+extern alf_derive_classification_blk_func * alf_derive_classification_blk;
+
+int kvz_strategy_register_alf(void* opaque, uint8_t bitdepth);
+
+
+#define STRATEGIES_ALF_EXPORTS \
+  {"alf_derive_classification_blk", (void**) &alf_derive_classification_blk}, \
+ 
+
--- a/src/strategyselector.c
+++ b/src/strategyselector.c
@ -90,6 +90,11 @@ int kvz_strategyselector_init(int32_t cpuid, uint8_t bitdepth) {
    fprintf(stderr, "kvz_strategy_register_encode failed!\n");
    return 0;
  }
+
+  if (!kvz_strategy_register_alf(&strategies, bitdepth)) {
+    fprintf(stderr, "kvz_strategy_register_encode failed!\n");
+    return 0;
+  }
  
  while(cur_strategy_to_select->fptr) {
    *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
--- a/src/strategyselector.h
+++ b/src/strategyselector.h
@ -96,6 +96,7 @@ int kvz_strategyselector_register(void *opaque, const char *type, const char *st
 #include "strategies/strategies-intra.h"
 #include "strategies/strategies-sao.h"
 #include "strategies/strategies-encode.h"
+#include "strategies/strategies-alf.h"

 static const strategy_to_select_t strategies_to_select[] = {
  STRATEGIES_NAL_EXPORTS
@ -106,6 +107,7 @@ static const strategy_to_select_t strategies_to_select[] = {
  STRATEGIES_INTRA_EXPORTS
  STRATEGIES_SAO_EXPORTS
  STRATEGIES_ENCODE_EXPORTS
+  STRATEGIES_ALF_EXPORTS
  { NULL, NULL },
 };