Add AVX2 strategy. Copy generic implementation there.

This commit is contained in:
Ari Lemmetti 2015-10-08 10:02:29 +03:00
parent 54e8b346a3
commit f4fe3dca5e
5 changed files with 200 additions and 1 deletions

View file

@ -153,6 +153,12 @@
<ClCompile Include="..\..\src\search.c" /> <ClCompile Include="..\..\src\search.c" />
<ClCompile Include="..\..\src\search_inter.c" /> <ClCompile Include="..\..\src\search_inter.c" />
<ClCompile Include="..\..\src\search_intra.c" /> <ClCompile Include="..\..\src\search_intra.c" />
<ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
</ClCompile>
<ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c"> <ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet> <EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
@ -211,6 +217,7 @@
<ClInclude Include="..\..\src\kvazaar_internal.h" /> <ClInclude Include="..\..\src\kvazaar_internal.h" />
<ClInclude Include="..\..\src\search_inter.h" /> <ClInclude Include="..\..\src\search_inter.h" />
<ClInclude Include="..\..\src\search_intra.h" /> <ClInclude Include="..\..\src\search_intra.h" />
<ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" />
<ClInclude Include="..\..\src\strategies\generic\intra-generic.h" /> <ClInclude Include="..\..\src\strategies\generic\intra-generic.h" />
<ClInclude Include="..\..\src\strategies\strategies-common.h" /> <ClInclude Include="..\..\src\strategies\strategies-common.h" />
<ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" /> <ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />

View file

@ -222,6 +222,9 @@
<ClCompile Include="..\..\src\strategies\generic\intra-generic.c"> <ClCompile Include="..\..\src\strategies\generic\intra-generic.c">
<Filter>Source Files\strategies\generic</Filter> <Filter>Source Files\strategies\generic</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
<Filter>Source Files\strategies\avx2</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\..\src\global.h"> <ClInclude Include="..\..\src\global.h">
@ -407,6 +410,9 @@
<ClInclude Include="..\..\src\strategies\generic\intra-generic.h"> <ClInclude Include="..\..\src\strategies\generic\intra-generic.h">
<Filter>Header Files\strategies\generic</Filter> <Filter>Header Files\strategies\generic</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h">
<Filter>Header Files\strategies\avx2</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<YASM Include="..\..\src\extras\x86inc.asm"> <YASM Include="..\..\src\extras\x86inc.asm">

View file

@ -221,7 +221,8 @@ OBJS = \
strategies/avx2/ipol-avx2.o \ strategies/avx2/ipol-avx2.o \
strategies/generic/quant-generic.o \ strategies/generic/quant-generic.o \
strategies/avx2/quant-avx2.o \ strategies/avx2/quant-avx2.o \
strategies/generic/intra-generic.o strategies/generic/intra-generic.o \
strategies/avx2/intra-avx2.o
ifndef KVZ_DISABLE_ASM ifndef KVZ_DISABLE_ASM
# Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause # Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause

View file

@ -0,0 +1,158 @@
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
/*
* \file
*/
#include <stdlib.h>
#include "intra-avx2.h"
#include "strategyselector.h"
#if COMPILE_INTEL_AVX2
#include <immintrin.h>
/**
* \brief Generage angular predictions.
* \param log2_width Log2 of width, range 2..5.
* \param intra_mode Angular mode in range 2..34.
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
* \param dst Buffer of size width*width.
*/
static void kvz_angular_pred_avx2(
const int_fast8_t log2_width,
const int_fast8_t intra_mode,
const kvz_pixel *const in_ref_above,
const kvz_pixel *const in_ref_left,
kvz_pixel *const dst)
{
assert(log2_width >= 2 && log2_width <= 5);
assert(intra_mode >= 2 && intra_mode <= 34);
static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
// Temporary buffer for modes 11-25.
// It only needs to be big enough to hold indices from -width to width-1.
kvz_pixel tmp_ref[2 * 32];
const int_fast8_t width = 1 << log2_width;
// Whether to swap references to always project on the left reference row.
const bool vertical_mode = intra_mode >= 18;
// Modes distance to horizontal or vertical mode.
const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
// Sample displacement per column in fractions of 32.
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
// Pointer for the reference we are interpolating from.
const kvz_pixel *ref_main;
// Pointer for the other reference.
const kvz_pixel *ref_side;
// Set ref_main and ref_side such that, when indexed with 0, they point to
// index 0 in block coordinates.
if (sample_disp < 0) {
// Negative sample_disp means, we need to use both references.
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
// Move the reference pixels to start from the middle to the later half of
// the tmp_ref, so there is room for negative indices.
for (int_fast8_t x = -1; x < width; ++x) {
tmp_ref[x + width] = ref_main[x];
}
// Get a pointer to block index 0 in tmp_ref.
ref_main = &tmp_ref[width];
// Extend the side reference to the negative indices of main reference.
int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
int_fast8_t most_negative_index = (width * sample_disp) >> 5;
for (int_fast8_t x = -2; x >= most_negative_index; --x) {
col_sample_disp += inv_abs_sample_disp;
int_fast8_t side_index = col_sample_disp >> 8;
tmp_ref[x + width] = ref_side[side_index - 1];
}
}
else {
// sample_disp >= 0 means we don't need to refer to negative indices,
// which means we can just use the references as is.
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
}
if (sample_disp != 0) {
// The mode is not horizontal or vertical, we have to do interpolation.
int_fast16_t delta_pos = 0;
for (int_fast8_t y = 0; y < width; ++y) {
delta_pos += sample_disp;
int_fast8_t delta_int = delta_pos >> 5;
int_fast8_t delta_fract = delta_pos & (32 - 1);
if (delta_fract) {
// Do linear filtering
for (int_fast8_t x = 0; x < width; ++x) {
kvz_pixel ref1 = ref_main[x + delta_int];
kvz_pixel ref2 = ref_main[x + delta_int + 1];
dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
}
}
else {
// Just copy the integer samples
for (int_fast8_t x = 0; x < width; x++) {
dst[y * width + x] = ref_main[x + delta_int];
}
}
}
}
else {
// Mode is horizontal or vertical, just copy the pixels.
for (int_fast8_t y = 0; y < width; ++y) {
for (int_fast8_t x = 0; x < width; ++x) {
dst[y * width + x] = ref_main[x];
}
}
}
// Flip the block if this is was a horizontal mode.
if (!vertical_mode) {
for (int_fast8_t y = 0; y < width - 1; ++y) {
for (int_fast8_t x = y + 1; x < width; ++x) {
SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel);
}
}
}
}
#endif //COMPILE_INTEL_AVX2
int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth)
{
bool success = true;
#if COMPILE_INTEL_AVX2
success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 0, &kvz_angular_pred_avx2);
#endif //COMPILE_INTEL_AVX2
return success;
}

View file

@ -0,0 +1,27 @@
#ifndef STRATEGIES_INTRA_AVX2_H_
#define STRATEGIES_INTRA_AVX2_H_
/*****************************************************************************
* This file is part of Kvazaar HEVC encoder.
*
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
* COPYING file).
*
* Kvazaar is free software: you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the
* Free Software Foundation; either version 2.1 of the License, or (at your
* option) any later version.
*
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
****************************************************************************/
#include <stdint.h>
#include "encoderstate.h"
int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth);
#endif //STRATEGIES_INTRA_AVX2_H_