[alf] Define the strategy for alf_derive_classification_blk()

This commit is contained in:
Marko Viitanen 2021-08-19 17:04:35 +03:00
parent a5c27add5e
commit 3efaeede76
11 changed files with 437 additions and 243 deletions

View file

@ -188,10 +188,12 @@
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\strategies\generic\alf-generic.c" />
<ClCompile Include="..\..\src\strategies\generic\encode_coding_tree-generic.c" /> <ClCompile Include="..\..\src\strategies\generic\encode_coding_tree-generic.c" />
<ClCompile Include="..\..\src\strategies\generic\intra-generic.c" /> <ClCompile Include="..\..\src\strategies\generic\intra-generic.c" />
<ClCompile Include="..\..\src\strategies\generic\quant-generic.c" /> <ClCompile Include="..\..\src\strategies\generic\quant-generic.c" />
<ClCompile Include="..\..\src\strategies\generic\sao-generic.c" /> <ClCompile Include="..\..\src\strategies\generic\sao-generic.c" />
<ClCompile Include="..\..\src\strategies\strategies-alf.c" />
<ClCompile Include="..\..\src\strategies\strategies-encode.c" /> <ClCompile Include="..\..\src\strategies\strategies-encode.c" />
<ClCompile Include="..\..\src\strategies\strategies-intra.c" /> <ClCompile Include="..\..\src\strategies\strategies-intra.c" />
<ClCompile Include="..\..\src\strategies\strategies-quant.c" /> <ClCompile Include="..\..\src\strategies\strategies-quant.c" />
@ -263,9 +265,11 @@
<ClInclude Include="..\..\src\search_intra.h" /> <ClInclude Include="..\..\src\search_intra.h" />
<ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" /> <ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" />
<ClInclude Include="..\..\src\strategies\avx2\sao-avx2.h" /> <ClInclude Include="..\..\src\strategies\avx2\sao-avx2.h" />
<ClInclude Include="..\..\src\strategies\generic\alf-generic.h" />
<ClInclude Include="..\..\src\strategies\generic\encode_coding_tree-generic.h" /> <ClInclude Include="..\..\src\strategies\generic\encode_coding_tree-generic.h" />
<ClInclude Include="..\..\src\strategies\generic\intra-generic.h" /> <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" />
<ClInclude Include="..\..\src\strategies\generic\sao-generic.h" /> <ClInclude Include="..\..\src\strategies\generic\sao-generic.h" />
<ClInclude Include="..\..\src\strategies\strategies-alf.h" />
<ClInclude Include="..\..\src\strategies\strategies-common.h" /> <ClInclude Include="..\..\src\strategies\strategies-common.h" />
<ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" /> <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />
<ClInclude Include="..\..\src\strategies\generic\quant-generic.h" /> <ClInclude Include="..\..\src\strategies\generic\quant-generic.h" />

View file

@ -254,6 +254,12 @@
<Filter>Reconstruction</Filter> <Filter>Reconstruction</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\strategies\strategies-encode.c" /> <ClCompile Include="..\..\src\strategies\strategies-encode.c" />
<ClCompile Include="..\..\src\strategies\generic\alf-generic.c">
<Filter>Optimization\strategies\generic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\strategies\strategies-alf.c">
<Filter>Optimization\strategies</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\..\src\bitstream.h"> <ClInclude Include="..\..\src\bitstream.h">
@ -473,6 +479,12 @@
<ClInclude Include="..\..\src\alf.h"> <ClInclude Include="..\..\src\alf.h">
<Filter>Reconstruction</Filter> <Filter>Reconstruction</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\strategies\generic\alf-generic.h">
<Filter>Optimization\strategies\generic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\strategies\strategies-alf.h">
<Filter>Optimization\strategies</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<YASM Include="..\..\src\extras\x86inc.asm"> <YASM Include="..\..\src\extras\x86inc.asm">

View file

@ -117,6 +117,8 @@ libkvazaar_la_SOURCES = \
transform.h \ transform.h \
videoframe.c \ videoframe.c \
videoframe.h \ videoframe.h \
strategies/generic/alf-generic.c \
strategies/generic/alf-generic.h \
strategies/generic/dct-generic.c \ strategies/generic/dct-generic.c \
strategies/generic/dct-generic.h \ strategies/generic/dct-generic.h \
strategies/generic/intra-generic.c \ strategies/generic/intra-generic.c \
@ -137,6 +139,8 @@ libkvazaar_la_SOURCES = \
strategies/optimized_sad_func_ptr_t.h \ strategies/optimized_sad_func_ptr_t.h \
strategies/generic/sao_shared_generics.h \ strategies/generic/sao_shared_generics.h \
strategies/strategies-common.h \ strategies/strategies-common.h \
strategies/strategies-alf.c \
strategies/strategies-alf.h \
strategies/strategies-dct.c \ strategies/strategies-dct.c \
strategies/strategies-dct.h \ strategies/strategies-dct.h \
strategies/strategies-intra.c \ strategies/strategies-intra.c \

243
src/alf.c
View file

@ -10,7 +10,7 @@
#include "cabac.h" #include "cabac.h"
#include "rdo.h" #include "rdo.h"
#include "strategies/strategies-sao.h" #include "strategies/strategies-alf.h"
#include "kvz_math.h" #include "kvz_math.h"
#include "reshape.h" #include "reshape.h"
@ -5852,247 +5852,6 @@ static void alf_reconstruct(encoder_state_t * const state,
} }
} }
static void alf_derive_classification_blk(encoder_state_t * const state,
const int shift,
const int n_height,
const int n_width,
const int blk_pos_x,
const int blk_pos_y,
const int blk_dst_x,
const int blk_dst_y,
const int vb_ctu_height,
int vb_pos)
{
videoframe_t* const frame = state->tile->frame;
//int ***g_laplacian = state->tile->frame->alf_info->g_laplacian;
//alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier;
//CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2");
static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5];
memset(laplacian, 0, sizeof(laplacian));
alf_classifier **classifier = state->tile->frame->alf_info->classifier;
const int stride = frame->rec->stride;
kvz_pixel *src = state->tile->frame->rec->y;
const int max_activity = 15;
int fl = 2;
int fl_p1 = fl + 1;
int fl2 = 2 * fl;
int main_direction, secondary_direction, dir_temp_hv, dir_temp_d;
int pix_y;
int height = n_height + fl2;
int width = n_width + fl2;
int pos_x = blk_pos_x;
int pos_y = blk_pos_y;
int start_height = pos_y - fl_p1;
for (int i = 0; i < height; i += 2)
{
int yoffset = (i + 1 + start_height) * stride - fl_p1;
const kvz_pixel *src0 = &src[yoffset - stride];
const kvz_pixel *src1 = &src[yoffset];
const kvz_pixel *src2 = &src[yoffset + stride];
const kvz_pixel *src3 = &src[yoffset + stride * 2];
const int y = blk_dst_y - 2 + i;
if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2)
{
src3 = &src[yoffset + stride];
}
else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos)
{
src0 = &src[yoffset];
}
int *p_y_ver = laplacian[ALF_VER][i];
int *p_y_hor = laplacian[ALF_HOR][i];
int *p_y_dig_0 = laplacian[ALF_DIAG0][i];
int *p_y_dig_1 = laplacian[ALF_DIAG1][i];
for (int j = 0; j < width; j += 2)
{
pix_y = j + 1 + pos_x;
const kvz_pixel *p_y = src1 + pix_y;
const kvz_pixel *p_y_down = src0 + pix_y;
const kvz_pixel *p_y_up = src2 + pix_y;
const kvz_pixel *p_y_up2 = src3 + pix_y;
const int16_t y0 = p_y[0] << 1;
const int16_t y_up1 = p_y_up[1] << 1;
p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]);
p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]);
p_y_dig_0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]);
p_y_dig_1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]);
if (j > 4 && (j - 6) % 4 == 0)
{
int j_m_6 = j - 6;
int j_m_4 = j - 4;
int j_m_2 = j - 2;
p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j];
p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j];
p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j];
p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j];
}
}
}
// classification block size
const int cls_size_y = 4;
const int cls_size_x = 4;
//for (int i = 0; i < blk.height; i += cls_size_y)
for (int i = 0; i < n_height; i += cls_size_y)
{
int* p_y_ver = laplacian[ALF_VER][i];
int* p_y_ver2 = laplacian[ALF_VER][i + 2];
int* p_y_ver4 = laplacian[ALF_VER][i + 4];
int* p_y_ver6 = laplacian[ALF_VER][i + 6];
int* p_y_hor = laplacian[ALF_HOR][i];
int* p_y_hor2 = laplacian[ALF_HOR][i + 2];
int* p_y_hor4 = laplacian[ALF_HOR][i + 4];
int* p_y_hor6 = laplacian[ALF_HOR][i + 6];
int* p_y_dig0 = laplacian[ALF_DIAG0][i];
int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2];
int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4];
int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6];
int* p_y_dig1 = laplacian[ALF_DIAG1][i];
int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2];
int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4];
int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6];
//for (int j = 0; j < blk.width; j += cls_size_x)
for (int j = 0; j < n_width; j += cls_size_x)
{
int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0;
if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4))
{
sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j];
sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j];
sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j];
sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j];
}
else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos)
{
sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
}
else
{
sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
}
int temp_act = sum_v + sum_h;
int activity = 0;
const int y = (i + blk_dst_y) & (vb_ctu_height - 1);
if (y == vb_pos - 4 || y == vb_pos)
{
activity = CLIP(0, max_activity, (temp_act * 96) >> shift);
}
else
{
activity = CLIP(0, max_activity, (temp_act * 64) >> shift);
}
int class_idx = th[activity];
int hv1, hv0, d1, d0, hvd1, hvd0;
if (sum_v > sum_h)
{
hv1 = sum_v;
hv0 = sum_h;
dir_temp_hv = 1;
}
else
{
hv1 = sum_h;
hv0 = sum_v;
dir_temp_hv = 3;
}
if (sum_d0 > sum_d1)
{
d1 = sum_d0;
d0 = sum_d1;
dir_temp_d = 0;
}
else
{
d1 = sum_d1;
d0 = sum_d0;
dir_temp_d = 2;
}
if ((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0)
{
hvd1 = d1;
hvd0 = d0;
main_direction = dir_temp_d;
secondary_direction = dir_temp_hv;
}
else
{
hvd1 = hv1;
hvd0 = hv0;
main_direction = dir_temp_hv;
secondary_direction = dir_temp_d;
}
int direction_strength = 0;
if (hvd1 > 2 * hvd0)
{
direction_strength = 1;
}
if (hvd1 * 2 > 9 * hvd0)
{
direction_strength = 2;
}
if (direction_strength)
{
class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5;
}
static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 };
int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)];
int y_offset = i + blk_dst_y;
int x_offset = j + blk_dst_x;
alf_classifier *cl0 = classifier[y_offset] + x_offset;
alf_classifier *cl1 = classifier[y_offset + 1] + x_offset;
alf_classifier *cl2 = classifier[y_offset + 2] + x_offset;
alf_classifier *cl3 = classifier[y_offset + 3] + x_offset;
cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx =
cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx =
cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx =
cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx;
cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx =
cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx =
cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx =
cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx;
}
}
}
static void alf_derive_classification(encoder_state_t * const state, static void alf_derive_classification(encoder_state_t * const state,
const int width, const int width,
const int height, const int height,

View file

@ -80,7 +80,6 @@ extern const uint32_t kvz_entropy_bits[512];
// Floating point fractional bits, derived from kvz_entropy_bits // Floating point fractional bits, derived from kvz_entropy_bits
extern const float kvz_f_entropy_bits[512]; extern const float kvz_f_entropy_bits[512];
// ToDo: generate a new table for VVC?
#define CTX_ENTROPY_FBITS(ctx, val) kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)] #define CTX_ENTROPY_FBITS(ctx, val) kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)]
#endif #endif

View file

@ -0,0 +1,281 @@
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2021 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "strategies/generic/alf-generic.h"
#include "cu.h"
#include "encoder.h"
#include "encoderstate.h"
#include "kvazaar.h"
#include "alf.h"
#include "strategyselector.h"
static void alf_derive_classification_blk_generic(encoder_state_t * const state,
const int shift,
const int n_height,
const int n_width,
const int blk_pos_x,
const int blk_pos_y,
const int blk_dst_x,
const int blk_dst_y,
const int vb_ctu_height,
int vb_pos)
{
videoframe_t* const frame = state->tile->frame;
//int ***g_laplacian = state->tile->frame->alf_info->g_laplacian;
//alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier;
//CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2");
static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5];
memset(laplacian, 0, sizeof(laplacian));
alf_classifier **classifier = state->tile->frame->alf_info->classifier;
const int stride = frame->rec->stride;
kvz_pixel *src = state->tile->frame->rec->y;
const int max_activity = 15;
int fl = 2;
int fl_p1 = fl + 1;
int fl2 = 2 * fl;
int main_direction, secondary_direction, dir_temp_hv, dir_temp_d;
int pix_y;
int height = n_height + fl2;
int width = n_width + fl2;
int pos_x = blk_pos_x;
int pos_y = blk_pos_y;
int start_height = pos_y - fl_p1;
for (int i = 0; i < height; i += 2)
{
int yoffset = (i + 1 + start_height) * stride - fl_p1;
const kvz_pixel *src0 = &src[yoffset - stride];
const kvz_pixel *src1 = &src[yoffset];
const kvz_pixel *src2 = &src[yoffset + stride];
const kvz_pixel *src3 = &src[yoffset + stride * 2];
const int y = blk_dst_y - 2 + i;
if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2)
{
src3 = &src[yoffset + stride];
}
else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos)
{
src0 = &src[yoffset];
}
int *p_y_ver = laplacian[ALF_VER][i];
int *p_y_hor = laplacian[ALF_HOR][i];
int *p_y_dig_0 = laplacian[ALF_DIAG0][i];
int *p_y_dig_1 = laplacian[ALF_DIAG1][i];
for (int j = 0; j < width; j += 2)
{
pix_y = j + 1 + pos_x;
const kvz_pixel *p_y = src1 + pix_y;
const kvz_pixel *p_y_down = src0 + pix_y;
const kvz_pixel *p_y_up = src2 + pix_y;
const kvz_pixel *p_y_up2 = src3 + pix_y;
const int16_t y0 = p_y[0] << 1;
const int16_t y_up1 = p_y_up[1] << 1;
p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]);
p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]);
p_y_dig_0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]);
p_y_dig_1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]);
if (j > 4 && (j - 6) % 4 == 0)
{
int j_m_6 = j - 6;
int j_m_4 = j - 4;
int j_m_2 = j - 2;
p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j];
p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j];
p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j];
p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j];
}
}
}
// classification block size
const int cls_size_y = 4;
const int cls_size_x = 4;
//for (int i = 0; i < blk.height; i += cls_size_y)
for (int i = 0; i < n_height; i += cls_size_y)
{
int* p_y_ver = laplacian[ALF_VER][i];
int* p_y_ver2 = laplacian[ALF_VER][i + 2];
int* p_y_ver4 = laplacian[ALF_VER][i + 4];
int* p_y_ver6 = laplacian[ALF_VER][i + 6];
int* p_y_hor = laplacian[ALF_HOR][i];
int* p_y_hor2 = laplacian[ALF_HOR][i + 2];
int* p_y_hor4 = laplacian[ALF_HOR][i + 4];
int* p_y_hor6 = laplacian[ALF_HOR][i + 6];
int* p_y_dig0 = laplacian[ALF_DIAG0][i];
int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2];
int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4];
int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6];
int* p_y_dig1 = laplacian[ALF_DIAG1][i];
int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2];
int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4];
int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6];
//for (int j = 0; j < blk.width; j += cls_size_x)
for (int j = 0; j < n_width; j += cls_size_x)
{
int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0;
if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4))
{
sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j];
sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j];
sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j];
sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j];
}
else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos)
{
sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
}
else
{
sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j];
sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j];
sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j];
sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j];
}
int temp_act = sum_v + sum_h;
int activity = 0;
const int y = (i + blk_dst_y) & (vb_ctu_height - 1);
if (y == vb_pos - 4 || y == vb_pos)
{
activity = CLIP(0, max_activity, (temp_act * 96) >> shift);
}
else
{
activity = CLIP(0, max_activity, (temp_act * 64) >> shift);
}
int class_idx = th[activity];
int hv1, hv0, d1, d0, hvd1, hvd0;
if (sum_v > sum_h)
{
hv1 = sum_v;
hv0 = sum_h;
dir_temp_hv = 1;
}
else
{
hv1 = sum_h;
hv0 = sum_v;
dir_temp_hv = 3;
}
if (sum_d0 > sum_d1)
{
d1 = sum_d0;
d0 = sum_d1;
dir_temp_d = 0;
}
else
{
d1 = sum_d1;
d0 = sum_d0;
dir_temp_d = 2;
}
if ((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0)
{
hvd1 = d1;
hvd0 = d0;
main_direction = dir_temp_d;
secondary_direction = dir_temp_hv;
}
else
{
hvd1 = hv1;
hvd0 = hv0;
main_direction = dir_temp_hv;
secondary_direction = dir_temp_d;
}
int direction_strength = 0;
if (hvd1 > 2 * hvd0)
{
direction_strength = 1;
}
if (hvd1 * 2 > 9 * hvd0)
{
direction_strength = 2;
}
if (direction_strength)
{
class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5;
}
static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 };
int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)];
int y_offset = i + blk_dst_y;
int x_offset = j + blk_dst_x;
alf_classifier *cl0 = classifier[y_offset] + x_offset;
alf_classifier *cl1 = classifier[y_offset + 1] + x_offset;
alf_classifier *cl2 = classifier[y_offset + 2] + x_offset;
alf_classifier *cl3 = classifier[y_offset + 3] + x_offset;
cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx =
cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx =
cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx =
cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx;
cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx =
cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx =
cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx =
cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx;
}
}
}
int kvz_strategy_register_alf_generic(void* opaque, uint8_t bitdepth)
{
bool success = true;
success &= kvz_strategyselector_register(opaque, "alf_derive_classification_blk", "generic", 0, &alf_derive_classification_blk_generic);
return success;
}

View file

@ -0,0 +1,31 @@
#pragma once
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2021 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
/**
* \ingroup Optimization
* \file
* Generic C implementations of optimized functions.
*/
#include "global.h" // IWYU pragma: keep
int kvz_strategy_register_alf_generic(void* opaque, uint8_t bitdepth);

View file

@ -0,0 +1,41 @@
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2021 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include "strategies/strategies-alf.h"
//#include "strategies/avx2/alf-avx2.h"
#include "strategies/generic/alf-generic.h"
#include "strategyselector.h"
// Define function pointers.
alf_derive_classification_blk_func* alf_derive_classification_blk;
int kvz_strategy_register_alf(void* opaque, uint8_t bitdepth) {
bool success = true;
success &= kvz_strategy_register_alf_generic(opaque, bitdepth);
if (kvz_g_hardware_flags.intel_flags.avx2) {
//success &= kvz_strategy_register_alf_avx2(opaque, bitdepth);
}
return success;
}

View file

@ -0,0 +1,56 @@
#pragma once
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2021 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
/**
* \ingroup Optimization
* \file
* Interface for sao functions.
*/
#include "encoder.h"
#include "encoderstate.h"
#include "global.h" // IWYU pragma: keep
#include "kvazaar.h"
#include "alf.h"
// Declare function pointers.
typedef void (alf_derive_classification_blk_func)(encoder_state_t * const state,
const int shift,
const int n_height,
const int n_width,
const int blk_pos_x,
const int blk_pos_y,
const int blk_dst_x,
const int blk_dst_y,
const int vb_ctu_height,
int vb_pos);
// Declare function pointers.
extern alf_derive_classification_blk_func * alf_derive_classification_blk;
int kvz_strategy_register_alf(void* opaque, uint8_t bitdepth);
#define STRATEGIES_ALF_EXPORTS \
{"alf_derive_classification_blk", (void**) &alf_derive_classification_blk}, \

View file

@ -90,6 +90,11 @@ int kvz_strategyselector_init(int32_t cpuid, uint8_t bitdepth) {
fprintf(stderr, "kvz_strategy_register_encode failed!\n"); fprintf(stderr, "kvz_strategy_register_encode failed!\n");
return 0; return 0;
} }
if (!kvz_strategy_register_alf(&strategies, bitdepth)) {
fprintf(stderr, "kvz_strategy_register_encode failed!\n");
return 0;
}
while(cur_strategy_to_select->fptr) { while(cur_strategy_to_select->fptr) {
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type); *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);

View file

@ -96,6 +96,7 @@ int kvz_strategyselector_register(void *opaque, const char *type, const char *st
#include "strategies/strategies-intra.h" #include "strategies/strategies-intra.h"
#include "strategies/strategies-sao.h" #include "strategies/strategies-sao.h"
#include "strategies/strategies-encode.h" #include "strategies/strategies-encode.h"
#include "strategies/strategies-alf.h"
static const strategy_to_select_t strategies_to_select[] = { static const strategy_to_select_t strategies_to_select[] = {
STRATEGIES_NAL_EXPORTS STRATEGIES_NAL_EXPORTS
@ -106,6 +107,7 @@ static const strategy_to_select_t strategies_to_select[] = {
STRATEGIES_INTRA_EXPORTS STRATEGIES_INTRA_EXPORTS
STRATEGIES_SAO_EXPORTS STRATEGIES_SAO_EXPORTS
STRATEGIES_ENCODE_EXPORTS STRATEGIES_ENCODE_EXPORTS
STRATEGIES_ALF_EXPORTS
{ NULL, NULL }, { NULL, NULL },
}; };