diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj b/build/kvazaar_lib/kvazaar_lib.vcxproj index 95a011d5..a4af9567 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj @@ -153,6 +153,12 @@ + + AdvancedVectorExtensions2 + AdvancedVectorExtensions2 + AdvancedVectorExtensions2 + AdvancedVectorExtensions2 + AdvancedVectorExtensions2 AdvancedVectorExtensions2 @@ -211,6 +217,7 @@ + diff --git a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters index 013a89b1..c874bea8 100644 --- a/build/kvazaar_lib/kvazaar_lib.vcxproj.filters +++ b/build/kvazaar_lib/kvazaar_lib.vcxproj.filters @@ -222,6 +222,9 @@ Source Files\strategies\generic + + Source Files\strategies\avx2 + @@ -407,6 +410,9 @@ Header Files\strategies\generic + + Header Files\strategies\avx2 + diff --git a/src/Makefile b/src/Makefile index 93aba11b..ea20ba0b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -221,7 +221,8 @@ OBJS = \ strategies/avx2/ipol-avx2.o \ strategies/generic/quant-generic.o \ strategies/avx2/quant-avx2.o \ - strategies/generic/intra-generic.o + strategies/generic/intra-generic.o \ + strategies/avx2/intra-avx2.o ifndef KVZ_DISABLE_ASM # Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause diff --git a/src/strategies/avx2/intra-avx2.c b/src/strategies/avx2/intra-avx2.c new file mode 100644 index 00000000..258d0364 --- /dev/null +++ b/src/strategies/avx2/intra-avx2.c @@ -0,0 +1,158 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see . + ****************************************************************************/ + +/* + * \file + */ + +#include + +#include "intra-avx2.h" +#include "strategyselector.h" + +#if COMPILE_INTEL_AVX2 +#include + + /** + * \brief Generage angular predictions. + * \param log2_width Log2 of width, range 2..5. + * \param intra_mode Angular mode in range 2..34. + * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. + * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. + * \param dst Buffer of size width*width. + */ +static void kvz_angular_pred_avx2( + const int_fast8_t log2_width, + const int_fast8_t intra_mode, + const kvz_pixel *const in_ref_above, + const kvz_pixel *const in_ref_left, + kvz_pixel *const dst) +{ + assert(log2_width >= 2 && log2_width <= 5); + assert(intra_mode >= 2 && intra_mode <= 34); + + static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 }; + static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp + + // Temporary buffer for modes 11-25. + // It only needs to be big enough to hold indices from -width to width-1. + kvz_pixel tmp_ref[2 * 32]; + const int_fast8_t width = 1 << log2_width; + + // Whether to swap references to always project on the left reference row. + const bool vertical_mode = intra_mode >= 18; + // Modes distance to horizontal or vertical mode. + const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; + // Sample displacement per column in fractions of 32. + const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; + + // Pointer for the reference we are interpolating from. + const kvz_pixel *ref_main; + // Pointer for the other reference. + const kvz_pixel *ref_side; + + // Set ref_main and ref_side such that, when indexed with 0, they point to + // index 0 in block coordinates. + if (sample_disp < 0) { + // Negative sample_disp means, we need to use both references. + + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + + // Move the reference pixels to start from the middle to the later half of + // the tmp_ref, so there is room for negative indices. + for (int_fast8_t x = -1; x < width; ++x) { + tmp_ref[x + width] = ref_main[x]; + } + // Get a pointer to block index 0 in tmp_ref. + ref_main = &tmp_ref[width]; + + // Extend the side reference to the negative indices of main reference. + int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" + int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; + int_fast8_t most_negative_index = (width * sample_disp) >> 5; + for (int_fast8_t x = -2; x >= most_negative_index; --x) { + col_sample_disp += inv_abs_sample_disp; + int_fast8_t side_index = col_sample_disp >> 8; + tmp_ref[x + width] = ref_side[side_index - 1]; + } + } + else { + // sample_disp >= 0 means we don't need to refer to negative indices, + // which means we can just use the references as is. + ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; + ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; + } + + if (sample_disp != 0) { + // The mode is not horizontal or vertical, we have to do interpolation. + + int_fast16_t delta_pos = 0; + for (int_fast8_t y = 0; y < width; ++y) { + delta_pos += sample_disp; + int_fast8_t delta_int = delta_pos >> 5; + int_fast8_t delta_fract = delta_pos & (32 - 1); + + if (delta_fract) { + // Do linear filtering + for (int_fast8_t x = 0; x < width; ++x) { + kvz_pixel ref1 = ref_main[x + delta_int]; + kvz_pixel ref2 = ref_main[x + delta_int + 1]; + dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5; + } + } + else { + // Just copy the integer samples + for (int_fast8_t x = 0; x < width; x++) { + dst[y * width + x] = ref_main[x + delta_int]; + } + } + } + } + else { + // Mode is horizontal or vertical, just copy the pixels. + + for (int_fast8_t y = 0; y < width; ++y) { + for (int_fast8_t x = 0; x < width; ++x) { + dst[y * width + x] = ref_main[x]; + } + } + } + + // Flip the block if this is was a horizontal mode. + if (!vertical_mode) { + for (int_fast8_t y = 0; y < width - 1; ++y) { + for (int_fast8_t x = y + 1; x < width; ++x) { + SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel); + } + } + } +} + +#endif //COMPILE_INTEL_AVX2 + +int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth) +{ + bool success = true; +#if COMPILE_INTEL_AVX2 + success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 0, &kvz_angular_pred_avx2); +#endif //COMPILE_INTEL_AVX2 + return success; +} diff --git a/src/strategies/avx2/intra-avx2.h b/src/strategies/avx2/intra-avx2.h new file mode 100644 index 00000000..26b1aec6 --- /dev/null +++ b/src/strategies/avx2/intra-avx2.h @@ -0,0 +1,27 @@ +#ifndef STRATEGIES_INTRA_AVX2_H_ +#define STRATEGIES_INTRA_AVX2_H_ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2013-2015 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the + * Free Software Foundation; either version 2.1 of the License, or (at your + * option) any later version. + * + * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with Kvazaar. If not, see . + ****************************************************************************/ +#include +#include "encoderstate.h" + +int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth); + +#endif //STRATEGIES_INTRA_AVX2_H_