diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj b/build/kvazaar_lib/kvazaar_lib.vcxproj index dd08a6e4..ef459fe1 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj @@ -188,10 +188,12 @@ AdvancedVectorExtensions2 AdvancedVectorExtensions2 + + @@ -263,9 +265,11 @@ + + diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters index bc68cd26..bc207ca8 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters @@ -254,6 +254,12 @@ Reconstruction + + Optimization\strategies\generic + + + Optimization\strategies + @@ -473,6 +479,12 @@ Reconstruction + + Optimization\strategies\generic + + + Optimization\strategies + diff --git a/src/Makefile.am b/src/Makefile.am index 6a13c407..4cdf6a34 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -117,6 +117,8 @@ libkvazaar_la_SOURCES = \ transform.h \ videoframe.c \ videoframe.h \ + strategies/generic/alf-generic.c \ + strategies/generic/alf-generic.h \ strategies/generic/dct-generic.c \ strategies/generic/dct-generic.h \ strategies/generic/intra-generic.c \ @@ -137,6 +139,8 @@ libkvazaar_la_SOURCES = \ strategies/optimized_sad_func_ptr_t.h \ strategies/generic/sao_shared_generics.h \ strategies/strategies-common.h \ + strategies/strategies-alf.c \ + strategies/strategies-alf.h \ strategies/strategies-dct.c \ strategies/strategies-dct.h \ strategies/strategies-intra.c \ diff --git a/src/alf.c b/src/alf.c index dec4d22e..9de923b3 100644 --- a/src/alf.c +++ b/src/alf.c @@ -10,7 +10,7 @@ #include "cabac.h" #include "rdo.h" -#include "strategies/strategies-sao.h" +#include "strategies/strategies-alf.h" #include "kvz_math.h" #include "reshape.h" @@ -5852,247 +5852,6 @@ static void alf_reconstruct(encoder_state_t * const state, } } -static void alf_derive_classification_blk(encoder_state_t * const state, - const int shift, - const int n_height, - const int n_width, - const int blk_pos_x, - const int blk_pos_y, - const int blk_dst_x, - const int blk_dst_y, - const int vb_ctu_height, - int vb_pos) -{ - videoframe_t* const frame = state->tile->frame; - //int ***g_laplacian = state->tile->frame->alf_info->g_laplacian; - //alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier; - //CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2"); - - static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 }; - int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5]; - memset(laplacian, 0, sizeof(laplacian)); - alf_classifier **classifier = state->tile->frame->alf_info->classifier; - - const int stride = frame->rec->stride; - kvz_pixel *src = state->tile->frame->rec->y; - const int max_activity = 15; - - int fl = 2; - int fl_p1 = fl + 1; - int fl2 = 2 * fl; - - int main_direction, secondary_direction, dir_temp_hv, dir_temp_d; - int pix_y; - - int height = n_height + fl2; - int width = n_width + fl2; - int pos_x = blk_pos_x; - int pos_y = blk_pos_y; - int start_height = pos_y - fl_p1; - - for (int i = 0; i < height; i += 2) - { - int yoffset = (i + 1 + start_height) * stride - fl_p1; - const kvz_pixel *src0 = &src[yoffset - stride]; - const kvz_pixel *src1 = &src[yoffset]; - const kvz_pixel *src2 = &src[yoffset + stride]; - const kvz_pixel *src3 = &src[yoffset + stride * 2]; - - const int y = blk_dst_y - 2 + i; - if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2) - { - src3 = &src[yoffset + stride]; - } - else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos) - { - src0 = &src[yoffset]; - } - - int *p_y_ver = laplacian[ALF_VER][i]; - int *p_y_hor = laplacian[ALF_HOR][i]; - int *p_y_dig_0 = laplacian[ALF_DIAG0][i]; - int *p_y_dig_1 = laplacian[ALF_DIAG1][i]; - - for (int j = 0; j < width; j += 2) - { - pix_y = j + 1 + pos_x; - const kvz_pixel *p_y = src1 + pix_y; - const kvz_pixel *p_y_down = src0 + pix_y; - const kvz_pixel *p_y_up = src2 + pix_y; - const kvz_pixel *p_y_up2 = src3 + pix_y; - - const int16_t y0 = p_y[0] << 1; - const int16_t y_up1 = p_y_up[1] << 1; - - p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]); - p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]); - p_y_dig_0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]); - p_y_dig_1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]); - - if (j > 4 && (j - 6) % 4 == 0) - { - int j_m_6 = j - 6; - int j_m_4 = j - 4; - int j_m_2 = j - 2; - - p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j]; - p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j]; - p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j]; - p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j]; - } - } - } - - // classification block size - const int cls_size_y = 4; - const int cls_size_x = 4; - - //for (int i = 0; i < blk.height; i += cls_size_y) - for (int i = 0; i < n_height; i += cls_size_y) - { - int* p_y_ver = laplacian[ALF_VER][i]; - int* p_y_ver2 = laplacian[ALF_VER][i + 2]; - int* p_y_ver4 = laplacian[ALF_VER][i + 4]; - int* p_y_ver6 = laplacian[ALF_VER][i + 6]; - - int* p_y_hor = laplacian[ALF_HOR][i]; - int* p_y_hor2 = laplacian[ALF_HOR][i + 2]; - int* p_y_hor4 = laplacian[ALF_HOR][i + 4]; - int* p_y_hor6 = laplacian[ALF_HOR][i + 6]; - - int* p_y_dig0 = laplacian[ALF_DIAG0][i]; - int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2]; - int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4]; - int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6]; - - int* p_y_dig1 = laplacian[ALF_DIAG1][i]; - int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2]; - int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4]; - int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6]; - - //for (int j = 0; j < blk.width; j += cls_size_x) - for (int j = 0; j < n_width; j += cls_size_x) - { - int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0; - - if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4)) - { - sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j]; - sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j]; - sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j]; - sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j]; - } - else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos) - { - sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j]; - sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j]; - sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j]; - sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j]; - } - else - { - sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j]; - sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j]; - sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j]; - sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j]; - } - - int temp_act = sum_v + sum_h; - int activity = 0; - - const int y = (i + blk_dst_y) & (vb_ctu_height - 1); - if (y == vb_pos - 4 || y == vb_pos) - { - activity = CLIP(0, max_activity, (temp_act * 96) >> shift); - } - else - { - activity = CLIP(0, max_activity, (temp_act * 64) >> shift); - } - - int class_idx = th[activity]; - - int hv1, hv0, d1, d0, hvd1, hvd0; - - if (sum_v > sum_h) - { - hv1 = sum_v; - hv0 = sum_h; - dir_temp_hv = 1; - } - else - { - hv1 = sum_h; - hv0 = sum_v; - dir_temp_hv = 3; - } - if (sum_d0 > sum_d1) - { - d1 = sum_d0; - d0 = sum_d1; - dir_temp_d = 0; - } - else - { - d1 = sum_d1; - d0 = sum_d0; - dir_temp_d = 2; - } - if ((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0) - { - hvd1 = d1; - hvd0 = d0; - main_direction = dir_temp_d; - secondary_direction = dir_temp_hv; - } - else - { - hvd1 = hv1; - hvd0 = hv0; - main_direction = dir_temp_hv; - secondary_direction = dir_temp_d; - } - - int direction_strength = 0; - if (hvd1 > 2 * hvd0) - { - direction_strength = 1; - } - if (hvd1 * 2 > 9 * hvd0) - { - direction_strength = 2; - } - - if (direction_strength) - { - class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5; - } - - static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 }; - int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)]; - - int y_offset = i + blk_dst_y; - int x_offset = j + blk_dst_x; - - alf_classifier *cl0 = classifier[y_offset] + x_offset; - alf_classifier *cl1 = classifier[y_offset + 1] + x_offset; - alf_classifier *cl2 = classifier[y_offset + 2] + x_offset; - alf_classifier *cl3 = classifier[y_offset + 3] + x_offset; - - cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx = - cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx = - cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx = - cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx; - - cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx = - cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx = - cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx = - cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx; - - } - } -} - static void alf_derive_classification(encoder_state_t * const state, const int width, const int height, diff --git a/src/rdo.h b/src/rdo.h index 70055fee..91db4d34 100644 --- a/src/rdo.h +++ b/src/rdo.h @@ -80,7 +80,6 @@ extern const uint32_t kvz_entropy_bits[512]; // Floating point fractional bits, derived from kvz_entropy_bits extern const float kvz_f_entropy_bits[512]; -// ToDo: generate a new table for VVC? #define CTX_ENTROPY_FBITS(ctx, val) kvz_f_entropy_bits[(CTX_STATE(ctx)<<1) ^ (val)] #endif diff --git a/src/strategies/generic/alf-generic.c b/src/strategies/generic/alf-generic.c new file mode 100644 index 00000000..35d622a7 --- /dev/null +++ b/src/strategies/generic/alf-generic.c @@ -0,0 +1,281 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2021 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see . + ****************************************************************************/ + +#include "strategies/generic/alf-generic.h" + +#include "cu.h" +#include "encoder.h" +#include "encoderstate.h" +#include "kvazaar.h" +#include "alf.h" +#include "strategyselector.h" + + +static void alf_derive_classification_blk_generic(encoder_state_t * const state, + const int shift, + const int n_height, + const int n_width, + const int blk_pos_x, + const int blk_pos_y, + const int blk_dst_x, + const int blk_dst_y, + const int vb_ctu_height, + int vb_pos) +{ + videoframe_t* const frame = state->tile->frame; + //int ***g_laplacian = state->tile->frame->alf_info->g_laplacian; + //alf_classifier **g_classifier = state->tile->frame->alf_info->g_classifier; + //CHECK((vb_ctu_height & (vb_ctu_height - 1)) != 0, "vb_ctu_height must be a power of 2"); + + static const int th[16] = { 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 }; + int laplacian[NUM_DIRECTIONS][CLASSIFICATION_BLK_SIZE + 5][CLASSIFICATION_BLK_SIZE + 5]; + memset(laplacian, 0, sizeof(laplacian)); + alf_classifier **classifier = state->tile->frame->alf_info->classifier; + + const int stride = frame->rec->stride; + kvz_pixel *src = state->tile->frame->rec->y; + const int max_activity = 15; + + int fl = 2; + int fl_p1 = fl + 1; + int fl2 = 2 * fl; + + int main_direction, secondary_direction, dir_temp_hv, dir_temp_d; + int pix_y; + + int height = n_height + fl2; + int width = n_width + fl2; + int pos_x = blk_pos_x; + int pos_y = blk_pos_y; + int start_height = pos_y - fl_p1; + + for (int i = 0; i < height; i += 2) + { + int yoffset = (i + 1 + start_height) * stride - fl_p1; + const kvz_pixel *src0 = &src[yoffset - stride]; + const kvz_pixel *src1 = &src[yoffset]; + const kvz_pixel *src2 = &src[yoffset + stride]; + const kvz_pixel *src3 = &src[yoffset + stride * 2]; + + const int y = blk_dst_y - 2 + i; + if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos - 2) + { + src3 = &src[yoffset + stride]; + } + else if (y > 0 && (y & (vb_ctu_height - 1)) == vb_pos) + { + src0 = &src[yoffset]; + } + + int *p_y_ver = laplacian[ALF_VER][i]; + int *p_y_hor = laplacian[ALF_HOR][i]; + int *p_y_dig_0 = laplacian[ALF_DIAG0][i]; + int *p_y_dig_1 = laplacian[ALF_DIAG1][i]; + + for (int j = 0; j < width; j += 2) + { + pix_y = j + 1 + pos_x; + const kvz_pixel *p_y = src1 + pix_y; + const kvz_pixel *p_y_down = src0 + pix_y; + const kvz_pixel *p_y_up = src2 + pix_y; + const kvz_pixel *p_y_up2 = src3 + pix_y; + + const int16_t y0 = p_y[0] << 1; + const int16_t y_up1 = p_y_up[1] << 1; + + p_y_ver[j] = abs(y0 - p_y_down[0] - p_y_up[0]) + abs(y_up1 - p_y[1] - p_y_up2[1]); + p_y_hor[j] = abs(y0 - p_y[1] - p_y[-1]) + abs(y_up1 - p_y_up[2] - p_y_up[0]); + p_y_dig_0[j] = abs(y0 - p_y_down[-1] - p_y_up[1]) + abs(y_up1 - p_y[0] - p_y_up2[2]); + p_y_dig_1[j] = abs(y0 - p_y_up[-1] - p_y_down[1]) + abs(y_up1 - p_y_up2[0] - p_y[2]); + + if (j > 4 && (j - 6) % 4 == 0) + { + int j_m_6 = j - 6; + int j_m_4 = j - 4; + int j_m_2 = j - 2; + + p_y_ver[j_m_6] += p_y_ver[j_m_4] + p_y_ver[j_m_2] + p_y_ver[j]; + p_y_hor[j_m_6] += p_y_hor[j_m_4] + p_y_hor[j_m_2] + p_y_hor[j]; + p_y_dig_0[j_m_6] += p_y_dig_0[j_m_4] + p_y_dig_0[j_m_2] + p_y_dig_0[j]; + p_y_dig_1[j_m_6] += p_y_dig_1[j_m_4] + p_y_dig_1[j_m_2] + p_y_dig_1[j]; + } + } + } + + // classification block size + const int cls_size_y = 4; + const int cls_size_x = 4; + + //for (int i = 0; i < blk.height; i += cls_size_y) + for (int i = 0; i < n_height; i += cls_size_y) + { + int* p_y_ver = laplacian[ALF_VER][i]; + int* p_y_ver2 = laplacian[ALF_VER][i + 2]; + int* p_y_ver4 = laplacian[ALF_VER][i + 4]; + int* p_y_ver6 = laplacian[ALF_VER][i + 6]; + + int* p_y_hor = laplacian[ALF_HOR][i]; + int* p_y_hor2 = laplacian[ALF_HOR][i + 2]; + int* p_y_hor4 = laplacian[ALF_HOR][i + 4]; + int* p_y_hor6 = laplacian[ALF_HOR][i + 6]; + + int* p_y_dig0 = laplacian[ALF_DIAG0][i]; + int* p_y_dig02 = laplacian[ALF_DIAG0][i + 2]; + int* p_y_dig04 = laplacian[ALF_DIAG0][i + 4]; + int* p_y_dig06 = laplacian[ALF_DIAG0][i + 6]; + + int* p_y_dig1 = laplacian[ALF_DIAG1][i]; + int* p_y_dig12 = laplacian[ALF_DIAG1][i + 2]; + int* p_y_dig14 = laplacian[ALF_DIAG1][i + 4]; + int* p_y_dig16 = laplacian[ALF_DIAG1][i + 6]; + + //for (int j = 0; j < blk.width; j += cls_size_x) + for (int j = 0; j < n_width; j += cls_size_x) + { + int sum_v = 0; int sum_h = 0; int sum_d0 = 0; int sum_d1 = 0; + + if (((i + blk_dst_y) % vb_ctu_height) == (vb_pos - 4)) + { + sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j]; + sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j]; + sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j]; + sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j]; + } + else if (((i + blk_dst_y) % vb_ctu_height) == vb_pos) + { + sum_v = p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j]; + sum_h = p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j]; + sum_d0 = p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j]; + sum_d1 = p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j]; + } + else + { + sum_v = p_y_ver[j] + p_y_ver2[j] + p_y_ver4[j] + p_y_ver6[j]; + sum_h = p_y_hor[j] + p_y_hor2[j] + p_y_hor4[j] + p_y_hor6[j]; + sum_d0 = p_y_dig0[j] + p_y_dig02[j] + p_y_dig04[j] + p_y_dig06[j]; + sum_d1 = p_y_dig1[j] + p_y_dig12[j] + p_y_dig14[j] + p_y_dig16[j]; + } + + int temp_act = sum_v + sum_h; + int activity = 0; + + const int y = (i + blk_dst_y) & (vb_ctu_height - 1); + if (y == vb_pos - 4 || y == vb_pos) + { + activity = CLIP(0, max_activity, (temp_act * 96) >> shift); + } + else + { + activity = CLIP(0, max_activity, (temp_act * 64) >> shift); + } + + int class_idx = th[activity]; + + int hv1, hv0, d1, d0, hvd1, hvd0; + + if (sum_v > sum_h) + { + hv1 = sum_v; + hv0 = sum_h; + dir_temp_hv = 1; + } + else + { + hv1 = sum_h; + hv0 = sum_v; + dir_temp_hv = 3; + } + if (sum_d0 > sum_d1) + { + d1 = sum_d0; + d0 = sum_d1; + dir_temp_d = 0; + } + else + { + d1 = sum_d1; + d0 = sum_d0; + dir_temp_d = 2; + } + if ((uint32_t)d1 * (uint32_t)hv0 > (uint32_t)hv1 * (uint32_t)d0) + { + hvd1 = d1; + hvd0 = d0; + main_direction = dir_temp_d; + secondary_direction = dir_temp_hv; + } + else + { + hvd1 = hv1; + hvd0 = hv0; + main_direction = dir_temp_hv; + secondary_direction = dir_temp_d; + } + + int direction_strength = 0; + if (hvd1 > 2 * hvd0) + { + direction_strength = 1; + } + if (hvd1 * 2 > 9 * hvd0) + { + direction_strength = 2; + } + + if (direction_strength) + { + class_idx += (((main_direction & 0x1) << 1) + direction_strength) * 5; + } + + static const int transpose_table[8] = { 0, 1, 0, 2, 2, 3, 1, 3 }; + int transpose_idx = transpose_table[main_direction * 2 + (secondary_direction >> 1)]; + + int y_offset = i + blk_dst_y; + int x_offset = j + blk_dst_x; + + alf_classifier *cl0 = classifier[y_offset] + x_offset; + alf_classifier *cl1 = classifier[y_offset + 1] + x_offset; + alf_classifier *cl2 = classifier[y_offset + 2] + x_offset; + alf_classifier *cl3 = classifier[y_offset + 3] + x_offset; + + cl0[0].class_idx = cl0[1].class_idx = cl0[2].class_idx = cl0[3].class_idx = + cl1[0].class_idx = cl1[1].class_idx = cl1[2].class_idx = cl1[3].class_idx = + cl2[0].class_idx = cl2[1].class_idx = cl2[2].class_idx = cl2[3].class_idx = + cl3[0].class_idx = cl3[1].class_idx = cl3[2].class_idx = cl3[3].class_idx = class_idx; + + cl0[0].transpose_idx = cl0[1].transpose_idx = cl0[2].transpose_idx = cl0[3].transpose_idx = + cl1[0].transpose_idx = cl1[1].transpose_idx = cl1[2].transpose_idx = cl1[3].transpose_idx = + cl2[0].transpose_idx = cl2[1].transpose_idx = cl2[2].transpose_idx = cl2[3].transpose_idx = + cl3[0].transpose_idx = cl3[1].transpose_idx = cl3[2].transpose_idx = cl3[3].transpose_idx = transpose_idx; + + } + } +} + + + +int kvz_strategy_register_alf_generic(void* opaque, uint8_t bitdepth) +{ + bool success = true; + + success &= kvz_strategyselector_register(opaque, "alf_derive_classification_blk", "generic", 0, &alf_derive_classification_blk_generic); + + return success; +} diff --git a/src/strategies/generic/alf-generic.h b/src/strategies/generic/alf-generic.h new file mode 100644 index 00000000..edbb9d94 --- /dev/null +++ b/src/strategies/generic/alf-generic.h @@ -0,0 +1,31 @@ +#pragma once +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2021 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see . + ****************************************************************************/ + +/** + * \ingroup Optimization + * \file + * Generic C implementations of optimized functions. + */ + +#include "global.h" // IWYU pragma: keep + +int kvz_strategy_register_alf_generic(void* opaque, uint8_t bitdepth); + diff --git a/src/strategies/strategies-alf.c b/src/strategies/strategies-alf.c new file mode 100644 index 00000000..d6da9332 --- /dev/null +++ b/src/strategies/strategies-alf.c @@ -0,0 +1,41 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2021 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see . + ****************************************************************************/ + +#include "strategies/strategies-alf.h" +//#include "strategies/avx2/alf-avx2.h" +#include "strategies/generic/alf-generic.h" +#include "strategyselector.h" + + +// Define function pointers. +alf_derive_classification_blk_func* alf_derive_classification_blk; + + +int kvz_strategy_register_alf(void* opaque, uint8_t bitdepth) { + bool success = true; + + success &= kvz_strategy_register_alf_generic(opaque, bitdepth); + + if (kvz_g_hardware_flags.intel_flags.avx2) { + //success &= kvz_strategy_register_alf_avx2(opaque, bitdepth); + } + + return success; +} \ No newline at end of file diff --git a/src/strategies/strategies-alf.h b/src/strategies/strategies-alf.h new file mode 100644 index 00000000..563697c4 --- /dev/null +++ b/src/strategies/strategies-alf.h @@ -0,0 +1,56 @@ +#pragma once +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2021 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see . + ****************************************************************************/ + +/** + * \ingroup Optimization + * \file + * Interface for sao functions. + */ + +#include "encoder.h" +#include "encoderstate.h" +#include "global.h" // IWYU pragma: keep +#include "kvazaar.h" +#include "alf.h" + + +// Declare function pointers. +typedef void (alf_derive_classification_blk_func)(encoder_state_t * const state, + const int shift, + const int n_height, + const int n_width, + const int blk_pos_x, + const int blk_pos_y, + const int blk_dst_x, + const int blk_dst_y, + const int vb_ctu_height, + int vb_pos); + +// Declare function pointers. +extern alf_derive_classification_blk_func * alf_derive_classification_blk; + +int kvz_strategy_register_alf(void* opaque, uint8_t bitdepth); + + +#define STRATEGIES_ALF_EXPORTS \ + {"alf_derive_classification_blk", (void**) &alf_derive_classification_blk}, \ + + diff --git a/src/strategyselector.c b/src/strategyselector.c index fc97635b..80281940 100644 --- a/src/strategyselector.c +++ b/src/strategyselector.c @@ -90,6 +90,11 @@ int kvz_strategyselector_init(int32_t cpuid, uint8_t bitdepth) { fprintf(stderr, "kvz_strategy_register_encode failed!\n"); return 0; } + + if (!kvz_strategy_register_alf(&strategies, bitdepth)) { + fprintf(stderr, "kvz_strategy_register_encode failed!\n"); + return 0; + } while(cur_strategy_to_select->fptr) { *(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type); diff --git a/src/strategyselector.h b/src/strategyselector.h index 575accc3..c4820153 100644 --- a/src/strategyselector.h +++ b/src/strategyselector.h @@ -96,6 +96,7 @@ int kvz_strategyselector_register(void *opaque, const char *type, const char *st #include "strategies/strategies-intra.h" #include "strategies/strategies-sao.h" #include "strategies/strategies-encode.h" +#include "strategies/strategies-alf.h" static const strategy_to_select_t strategies_to_select[] = { STRATEGIES_NAL_EXPORTS @@ -106,6 +107,7 @@ static const strategy_to_select_t strategies_to_select[] = { STRATEGIES_INTRA_EXPORTS STRATEGIES_SAO_EXPORTS STRATEGIES_ENCODE_EXPORTS + STRATEGIES_ALF_EXPORTS { NULL, NULL }, };