mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 10:34:05 +00:00
Merge branch 'angular_pred_avx2'
This commit is contained in:
commit
14eae9ee86
|
@ -153,13 +153,21 @@
|
||||||
<ClCompile Include="..\..\src\search.c" />
|
<ClCompile Include="..\..\src\search.c" />
|
||||||
<ClCompile Include="..\..\src\search_inter.c" />
|
<ClCompile Include="..\..\src\search_inter.c" />
|
||||||
<ClCompile Include="..\..\src\search_intra.c" />
|
<ClCompile Include="..\..\src\search_intra.c" />
|
||||||
|
<ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
|
||||||
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
|
<ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
|
||||||
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|x64'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
<EnableEnhancedInstructionSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\..\src\strategies\generic\intra-generic.c" />
|
||||||
<ClCompile Include="..\..\src\strategies\generic\quant-generic.c" />
|
<ClCompile Include="..\..\src\strategies\generic\quant-generic.c" />
|
||||||
|
<ClCompile Include="..\..\src\strategies\strategies-intra.c" />
|
||||||
<ClCompile Include="..\..\src\strategies\strategies-quant.c" />
|
<ClCompile Include="..\..\src\strategies\strategies-quant.c" />
|
||||||
<ClCompile Include="..\..\src\yuv_io.c" />
|
<ClCompile Include="..\..\src\yuv_io.c" />
|
||||||
<ClInclude Include="..\..\src\checkpoint.h" />
|
<ClInclude Include="..\..\src\checkpoint.h" />
|
||||||
|
@ -209,9 +217,12 @@
|
||||||
<ClInclude Include="..\..\src\kvazaar_internal.h" />
|
<ClInclude Include="..\..\src\kvazaar_internal.h" />
|
||||||
<ClInclude Include="..\..\src\search_inter.h" />
|
<ClInclude Include="..\..\src\search_inter.h" />
|
||||||
<ClInclude Include="..\..\src\search_intra.h" />
|
<ClInclude Include="..\..\src\search_intra.h" />
|
||||||
|
<ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h" />
|
||||||
|
<ClInclude Include="..\..\src\strategies\generic\intra-generic.h" />
|
||||||
<ClInclude Include="..\..\src\strategies\strategies-common.h" />
|
<ClInclude Include="..\..\src\strategies\strategies-common.h" />
|
||||||
<ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />
|
<ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h" />
|
||||||
<ClInclude Include="..\..\src\strategies\generic\quant-generic.h" />
|
<ClInclude Include="..\..\src\strategies\generic\quant-generic.h" />
|
||||||
|
<ClInclude Include="..\..\src\strategies\strategies-intra.h" />
|
||||||
<ClInclude Include="..\..\src\strategies\strategies-quant.h" />
|
<ClInclude Include="..\..\src\strategies\strategies-quant.h" />
|
||||||
<ClInclude Include="..\..\src\yuv_io.h" />
|
<ClInclude Include="..\..\src\yuv_io.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
|
@ -216,6 +216,15 @@
|
||||||
<ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
|
<ClCompile Include="..\..\src\strategies\avx2\quant-avx2.c">
|
||||||
<Filter>Source Files\strategies\avx2</Filter>
|
<Filter>Source Files\strategies\avx2</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\..\src\strategies\strategies-intra.c">
|
||||||
|
<Filter>Source Files\strategies</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\..\src\strategies\generic\intra-generic.c">
|
||||||
|
<Filter>Source Files\strategies\generic</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\..\src\strategies\avx2\intra-avx2.c">
|
||||||
|
<Filter>Source Files\strategies\avx2</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="..\..\src\global.h">
|
<ClInclude Include="..\..\src\global.h">
|
||||||
|
@ -395,6 +404,15 @@
|
||||||
<ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h">
|
<ClInclude Include="..\..\src\strategies\avx2\quant-avx2.h">
|
||||||
<Filter>Header Files\strategies\avx2</Filter>
|
<Filter>Header Files\strategies\avx2</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\src\strategies\strategies-intra.h">
|
||||||
|
<Filter>Header Files\strategies</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\src\strategies\generic\intra-generic.h">
|
||||||
|
<Filter>Header Files\strategies\generic</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\src\strategies\avx2\intra-avx2.h">
|
||||||
|
<Filter>Header Files\strategies\avx2</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<YASM Include="..\..\src\extras\x86inc.asm">
|
<YASM Include="..\..\src\extras\x86inc.asm">
|
||||||
|
|
|
@ -207,6 +207,7 @@ OBJS = \
|
||||||
strategies/strategies-dct.o \
|
strategies/strategies-dct.o \
|
||||||
strategies/strategies-ipol.o \
|
strategies/strategies-ipol.o \
|
||||||
strategies/strategies-quant.o \
|
strategies/strategies-quant.o \
|
||||||
|
strategies/strategies-intra.o \
|
||||||
strategies/generic/nal-generic.o \
|
strategies/generic/nal-generic.o \
|
||||||
strategies/generic/picture-generic.o \
|
strategies/generic/picture-generic.o \
|
||||||
strategies/sse2/picture-sse2.o \
|
strategies/sse2/picture-sse2.o \
|
||||||
|
@ -220,6 +221,8 @@ OBJS = \
|
||||||
strategies/avx2/ipol-avx2.o \
|
strategies/avx2/ipol-avx2.o \
|
||||||
strategies/generic/quant-generic.o \
|
strategies/generic/quant-generic.o \
|
||||||
strategies/avx2/quant-avx2.o \
|
strategies/avx2/quant-avx2.o \
|
||||||
|
strategies/generic/intra-generic.o \
|
||||||
|
strategies/avx2/intra-avx2.o
|
||||||
|
|
||||||
ifndef KVZ_DISABLE_ASM
|
ifndef KVZ_DISABLE_ASM
|
||||||
# Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause
|
# Compile C files in x86_asm folder with KVZ_COMPILE_ASM, which will cause
|
||||||
|
|
117
src/intra.c
117
src/intra.c
|
@ -31,6 +31,7 @@
|
||||||
|
|
||||||
#include "encoder.h"
|
#include "encoder.h"
|
||||||
#include "transform.h"
|
#include "transform.h"
|
||||||
|
#include "strategies/strategies-intra.h"
|
||||||
|
|
||||||
|
|
||||||
int8_t kvz_intra_get_dir_luma_predictor(
|
int8_t kvz_intra_get_dir_luma_predictor(
|
||||||
|
@ -140,120 +141,6 @@ static void intra_post_process_angular(
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Generage angular predictions.
|
|
||||||
* \param log2_width Log2 of width, range 2..5.
|
|
||||||
* \param intra_mode Angular mode in range 2..34.
|
|
||||||
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
|
||||||
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
|
||||||
* \param dst Buffer of size width*width.
|
|
||||||
*/
|
|
||||||
static void intra_pred_angular(
|
|
||||||
const int_fast8_t log2_width,
|
|
||||||
const int_fast8_t intra_mode,
|
|
||||||
const kvz_pixel *const in_ref_above,
|
|
||||||
const kvz_pixel *const in_ref_left,
|
|
||||||
kvz_pixel *const dst)
|
|
||||||
{
|
|
||||||
assert(log2_width >= 2 && log2_width <= 5);
|
|
||||||
assert(intra_mode >= 2 && intra_mode <= 34);
|
|
||||||
|
|
||||||
static const int8_t modedisp2sampledisp[9] = {0, 2, 5, 9, 13, 17, 21, 26, 32};
|
|
||||||
static const int16_t modedisp2invsampledisp[9] = {0, 4096, 1638, 910, 630, 482, 390, 315, 256}; // (256 * 32) / sampledisp
|
|
||||||
|
|
||||||
// Temporary buffer for modes 11-25.
|
|
||||||
// It only needs to be big enough to hold indices from -width to width-1.
|
|
||||||
kvz_pixel tmp_ref[2 * 32];
|
|
||||||
const int_fast8_t width = 1 << log2_width;
|
|
||||||
|
|
||||||
// Whether to swap references to always project on the left reference row.
|
|
||||||
const bool vertical_mode = intra_mode >= 18;
|
|
||||||
// Modes distance to horizontal or vertical mode.
|
|
||||||
const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
|
|
||||||
// Sample displacement per column in fractions of 32.
|
|
||||||
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
|
||||||
|
|
||||||
// Pointer for the reference we are interpolating from.
|
|
||||||
const kvz_pixel *ref_main;
|
|
||||||
// Pointer for the other reference.
|
|
||||||
const kvz_pixel *ref_side;
|
|
||||||
|
|
||||||
// Set ref_main and ref_side such that, when indexed with 0, they point to
|
|
||||||
// index 0 in block coordinates.
|
|
||||||
if (sample_disp < 0) {
|
|
||||||
// Negative sample_disp means, we need to use both references.
|
|
||||||
|
|
||||||
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
|
||||||
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
|
||||||
|
|
||||||
// Move the reference pixels to start from the middle to the later half of
|
|
||||||
// the tmp_ref, so there is room for negative indices.
|
|
||||||
for (int_fast8_t x = -1; x < width; ++x) {
|
|
||||||
tmp_ref[x + width] = ref_main[x];
|
|
||||||
}
|
|
||||||
// Get a pointer to block index 0 in tmp_ref.
|
|
||||||
ref_main = &tmp_ref[width];
|
|
||||||
|
|
||||||
// Extend the side reference to the negative indices of main reference.
|
|
||||||
int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
|
|
||||||
int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
|
|
||||||
int_fast8_t most_negative_index = (width * sample_disp) >> 5;
|
|
||||||
for (int_fast8_t x = -2; x >= most_negative_index; --x) {
|
|
||||||
col_sample_disp += inv_abs_sample_disp;
|
|
||||||
int_fast8_t side_index = col_sample_disp >> 8;
|
|
||||||
tmp_ref[x + width] = ref_side[side_index - 1];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// sample_disp >= 0 means we don't need to refer to negative indices,
|
|
||||||
// which means we can just use the references as is.
|
|
||||||
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
|
||||||
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_disp != 0) {
|
|
||||||
// The mode is not horizontal or vertical, we have to do interpolation.
|
|
||||||
|
|
||||||
int_fast16_t delta_pos = 0;
|
|
||||||
for (int_fast8_t y = 0; y < width; ++y) {
|
|
||||||
delta_pos += sample_disp;
|
|
||||||
int_fast8_t delta_int = delta_pos >> 5;
|
|
||||||
int_fast8_t delta_fract = delta_pos & (32 - 1);
|
|
||||||
|
|
||||||
if (delta_fract) {
|
|
||||||
// Do linear filtering
|
|
||||||
for (int_fast8_t x = 0; x < width; ++x) {
|
|
||||||
kvz_pixel ref1 = ref_main[x + delta_int];
|
|
||||||
kvz_pixel ref2 = ref_main[x + delta_int + 1];
|
|
||||||
dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Just copy the integer samples
|
|
||||||
for (int_fast8_t x = 0; x < width; x++) {
|
|
||||||
dst[y * width + x] = ref_main[x + delta_int];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Mode is horizontal or vertical, just copy the pixels.
|
|
||||||
|
|
||||||
for (int_fast8_t y = 0; y < width; ++y) {
|
|
||||||
for (int_fast8_t x = 0; x < width; ++x) {
|
|
||||||
dst[y * width + x] = ref_main[x];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flip the block if this is was a horizontal mode.
|
|
||||||
if (!vertical_mode) {
|
|
||||||
for (int_fast8_t y = 0; y < width - 1; ++y) {
|
|
||||||
for (int_fast8_t x = y + 1; x < width; ++x) {
|
|
||||||
SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Generage planar prediction.
|
* \brief Generage planar prediction.
|
||||||
* \param log2_width Log2 of width, range 2..5.
|
* \param log2_width Log2 of width, range 2..5.
|
||||||
|
@ -411,7 +298,7 @@ void kvz_intra_predict(
|
||||||
intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst);
|
intra_pred_dc(log2_width, used_ref->top, used_ref->left, dst);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
intra_pred_angular(log2_width, mode, used_ref->top, used_ref->left, dst);
|
kvz_angular_pred(log2_width, mode, used_ref->top, used_ref->left, dst);
|
||||||
if (color == COLOR_Y && width < 32) {
|
if (color == COLOR_Y && width < 32) {
|
||||||
if (mode == 10) {
|
if (mode == 10) {
|
||||||
intra_post_process_angular(width, 1, used_ref->top, dst);
|
intra_post_process_angular(width, 1, used_ref->top, dst);
|
||||||
|
|
176
src/strategies/avx2/intra-avx2.c
Normal file
176
src/strategies/avx2/intra-avx2.c
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \file
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "intra-avx2.h"
|
||||||
|
#include "strategyselector.h"
|
||||||
|
|
||||||
|
#if COMPILE_INTEL_AVX2
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Generage angular predictions.
|
||||||
|
* \param log2_width Log2 of width, range 2..5.
|
||||||
|
* \param intra_mode Angular mode in range 2..34.
|
||||||
|
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||||
|
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||||
|
* \param dst Buffer of size width*width.
|
||||||
|
*/
|
||||||
|
static void kvz_angular_pred_avx2(
|
||||||
|
const int_fast8_t log2_width,
|
||||||
|
const int_fast8_t intra_mode,
|
||||||
|
const kvz_pixel *const in_ref_above,
|
||||||
|
const kvz_pixel *const in_ref_left,
|
||||||
|
kvz_pixel *const dst)
|
||||||
|
{
|
||||||
|
assert(log2_width >= 2 && log2_width <= 5);
|
||||||
|
assert(intra_mode >= 2 && intra_mode <= 34);
|
||||||
|
|
||||||
|
static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
|
||||||
|
static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
|
||||||
|
|
||||||
|
// Temporary buffer for modes 11-25.
|
||||||
|
// It only needs to be big enough to hold indices from -width to width-1.
|
||||||
|
kvz_pixel tmp_ref[2 * 32];
|
||||||
|
const int_fast8_t width = 1 << log2_width;
|
||||||
|
|
||||||
|
// Whether to swap references to always project on the left reference row.
|
||||||
|
const bool vertical_mode = intra_mode >= 18;
|
||||||
|
// Modes distance to horizontal or vertical mode.
|
||||||
|
const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
|
||||||
|
// Sample displacement per column in fractions of 32.
|
||||||
|
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
||||||
|
|
||||||
|
// Pointer for the reference we are interpolating from.
|
||||||
|
const kvz_pixel *ref_main;
|
||||||
|
// Pointer for the other reference.
|
||||||
|
const kvz_pixel *ref_side;
|
||||||
|
|
||||||
|
// Set ref_main and ref_side such that, when indexed with 0, they point to
|
||||||
|
// index 0 in block coordinates.
|
||||||
|
if (sample_disp < 0) {
|
||||||
|
// Negative sample_disp means, we need to use both references.
|
||||||
|
|
||||||
|
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
||||||
|
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
||||||
|
|
||||||
|
// Move the reference pixels to start from the middle to the later half of
|
||||||
|
// the tmp_ref, so there is room for negative indices.
|
||||||
|
for (int_fast8_t x = -1; x < width; ++x) {
|
||||||
|
tmp_ref[x + width] = ref_main[x];
|
||||||
|
}
|
||||||
|
// Get a pointer to block index 0 in tmp_ref.
|
||||||
|
ref_main = &tmp_ref[width];
|
||||||
|
|
||||||
|
// Extend the side reference to the negative indices of main reference.
|
||||||
|
int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
|
||||||
|
int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
|
||||||
|
int_fast8_t most_negative_index = (width * sample_disp) >> 5;
|
||||||
|
for (int_fast8_t x = -2; x >= most_negative_index; --x) {
|
||||||
|
col_sample_disp += inv_abs_sample_disp;
|
||||||
|
int_fast8_t side_index = col_sample_disp >> 8;
|
||||||
|
tmp_ref[x + width] = ref_side[side_index - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// sample_disp >= 0 means we don't need to refer to negative indices,
|
||||||
|
// which means we can just use the references as is.
|
||||||
|
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
||||||
|
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample_disp != 0) {
|
||||||
|
// The mode is not horizontal or vertical, we have to do interpolation.
|
||||||
|
|
||||||
|
int_fast16_t delta_pos = 0;
|
||||||
|
for (int_fast8_t y = 0; y < width; ++y) {
|
||||||
|
delta_pos += sample_disp;
|
||||||
|
int_fast8_t delta_int = delta_pos >> 5;
|
||||||
|
int_fast8_t delta_fract = delta_pos & (32 - 1);
|
||||||
|
|
||||||
|
if (delta_fract) {
|
||||||
|
// Do linear filtering
|
||||||
|
if (width < 8) {
|
||||||
|
for (int_fast8_t x = 0; x < width; ++x) {
|
||||||
|
kvz_pixel ref1 = ref_main[x + delta_int];
|
||||||
|
kvz_pixel ref2 = ref_main[x + delta_int + 1];
|
||||||
|
dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
struct { uint8_t w1; uint8_t w2; } packed_weights = { 32 - delta_fract, delta_fract };
|
||||||
|
__m128i v_weights = _mm_set1_epi16(*(int16_t*)&packed_weights);
|
||||||
|
|
||||||
|
for (int_fast8_t x = 0; x < width; x += 8) {
|
||||||
|
__m128i v_ref1 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int]));
|
||||||
|
__m128i v_ref2 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int + 1]));
|
||||||
|
__m128i v_refs = _mm_unpacklo_epi8(v_ref1, v_ref2);
|
||||||
|
__m128i v_tmp = _mm_maddubs_epi16(v_refs, v_weights);
|
||||||
|
v_tmp = _mm_add_epi16(v_tmp, _mm_set1_epi16(16));
|
||||||
|
v_tmp = _mm_srli_epi16(v_tmp, 5);
|
||||||
|
v_tmp = _mm_packus_epi16(v_tmp, v_tmp);
|
||||||
|
_mm_storel_epi64((__m128i*)(dst + y * width + x), v_tmp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Just copy the integer samples
|
||||||
|
for (int_fast8_t x = 0; x < width; x+=4) {
|
||||||
|
*(int32_t*)(&dst[y * width + x]) = *(int32_t*)(&ref_main[x + delta_int]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Mode is horizontal or vertical, just copy the pixels.
|
||||||
|
|
||||||
|
for (int_fast8_t y = 0; y < width; ++y) {
|
||||||
|
for (int_fast8_t x = 0; x < width; x+=4) {
|
||||||
|
*(int32_t*)&(dst[y * width + x]) = *(int32_t*)&(ref_main[x]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flip the block if this is was a horizontal mode.
|
||||||
|
if (!vertical_mode) {
|
||||||
|
for (int_fast8_t y = 0; y < width - 1; ++y) {
|
||||||
|
for (int_fast8_t x = y + 1; x < width; ++x) {
|
||||||
|
SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //COMPILE_INTEL_AVX2
|
||||||
|
|
||||||
|
int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth)
|
||||||
|
{
|
||||||
|
bool success = true;
|
||||||
|
#if COMPILE_INTEL_AVX2
|
||||||
|
if (bitdepth == 8) {
|
||||||
|
success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 40, &kvz_angular_pred_avx2);
|
||||||
|
}
|
||||||
|
#endif //COMPILE_INTEL_AVX2
|
||||||
|
return success;
|
||||||
|
}
|
27
src/strategies/avx2/intra-avx2.h
Normal file
27
src/strategies/avx2/intra-avx2.h
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#ifndef STRATEGIES_INTRA_AVX2_H_
|
||||||
|
#define STRATEGIES_INTRA_AVX2_H_
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "encoderstate.h"
|
||||||
|
|
||||||
|
int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth);
|
||||||
|
|
||||||
|
#endif //STRATEGIES_INTRA_AVX2_H_
|
154
src/strategies/generic/intra-generic.c
Normal file
154
src/strategies/generic/intra-generic.c
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* \file
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "intra-generic.h"
|
||||||
|
#include "strategyselector.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Generage angular predictions.
|
||||||
|
* \param log2_width Log2 of width, range 2..5.
|
||||||
|
* \param intra_mode Angular mode in range 2..34.
|
||||||
|
* \param in_ref_above Pointer to -1 index of above reference, length=width*2+1.
|
||||||
|
* \param in_ref_left Pointer to -1 index of left reference, length=width*2+1.
|
||||||
|
* \param dst Buffer of size width*width.
|
||||||
|
*/
|
||||||
|
static void kvz_angular_pred_generic(
|
||||||
|
const int_fast8_t log2_width,
|
||||||
|
const int_fast8_t intra_mode,
|
||||||
|
const kvz_pixel *const in_ref_above,
|
||||||
|
const kvz_pixel *const in_ref_left,
|
||||||
|
kvz_pixel *const dst)
|
||||||
|
{
|
||||||
|
assert(log2_width >= 2 && log2_width <= 5);
|
||||||
|
assert(intra_mode >= 2 && intra_mode <= 34);
|
||||||
|
|
||||||
|
static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 };
|
||||||
|
static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp
|
||||||
|
|
||||||
|
// Temporary buffer for modes 11-25.
|
||||||
|
// It only needs to be big enough to hold indices from -width to width-1.
|
||||||
|
kvz_pixel tmp_ref[2 * 32];
|
||||||
|
const int_fast8_t width = 1 << log2_width;
|
||||||
|
|
||||||
|
// Whether to swap references to always project on the left reference row.
|
||||||
|
const bool vertical_mode = intra_mode >= 18;
|
||||||
|
// Modes distance to horizontal or vertical mode.
|
||||||
|
const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode;
|
||||||
|
// Sample displacement per column in fractions of 32.
|
||||||
|
const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)];
|
||||||
|
|
||||||
|
// Pointer for the reference we are interpolating from.
|
||||||
|
const kvz_pixel *ref_main;
|
||||||
|
// Pointer for the other reference.
|
||||||
|
const kvz_pixel *ref_side;
|
||||||
|
|
||||||
|
// Set ref_main and ref_side such that, when indexed with 0, they point to
|
||||||
|
// index 0 in block coordinates.
|
||||||
|
if (sample_disp < 0) {
|
||||||
|
// Negative sample_disp means, we need to use both references.
|
||||||
|
|
||||||
|
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
||||||
|
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
||||||
|
|
||||||
|
// Move the reference pixels to start from the middle to the later half of
|
||||||
|
// the tmp_ref, so there is room for negative indices.
|
||||||
|
for (int_fast8_t x = -1; x < width; ++x) {
|
||||||
|
tmp_ref[x + width] = ref_main[x];
|
||||||
|
}
|
||||||
|
// Get a pointer to block index 0 in tmp_ref.
|
||||||
|
ref_main = &tmp_ref[width];
|
||||||
|
|
||||||
|
// Extend the side reference to the negative indices of main reference.
|
||||||
|
int_fast32_t col_sample_disp = 128; // rounding for the ">> 8"
|
||||||
|
int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)];
|
||||||
|
int_fast8_t most_negative_index = (width * sample_disp) >> 5;
|
||||||
|
for (int_fast8_t x = -2; x >= most_negative_index; --x) {
|
||||||
|
col_sample_disp += inv_abs_sample_disp;
|
||||||
|
int_fast8_t side_index = col_sample_disp >> 8;
|
||||||
|
tmp_ref[x + width] = ref_side[side_index - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// sample_disp >= 0 means we don't need to refer to negative indices,
|
||||||
|
// which means we can just use the references as is.
|
||||||
|
ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1;
|
||||||
|
ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample_disp != 0) {
|
||||||
|
// The mode is not horizontal or vertical, we have to do interpolation.
|
||||||
|
|
||||||
|
int_fast16_t delta_pos = 0;
|
||||||
|
for (int_fast8_t y = 0; y < width; ++y) {
|
||||||
|
delta_pos += sample_disp;
|
||||||
|
int_fast8_t delta_int = delta_pos >> 5;
|
||||||
|
int_fast8_t delta_fract = delta_pos & (32 - 1);
|
||||||
|
|
||||||
|
if (delta_fract) {
|
||||||
|
// Do linear filtering
|
||||||
|
for (int_fast8_t x = 0; x < width; ++x) {
|
||||||
|
kvz_pixel ref1 = ref_main[x + delta_int];
|
||||||
|
kvz_pixel ref2 = ref_main[x + delta_int + 1];
|
||||||
|
dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Just copy the integer samples
|
||||||
|
for (int_fast8_t x = 0; x < width; x++) {
|
||||||
|
dst[y * width + x] = ref_main[x + delta_int];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Mode is horizontal or vertical, just copy the pixels.
|
||||||
|
|
||||||
|
for (int_fast8_t y = 0; y < width; ++y) {
|
||||||
|
for (int_fast8_t x = 0; x < width; ++x) {
|
||||||
|
dst[y * width + x] = ref_main[x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flip the block if this is was a horizontal mode.
|
||||||
|
if (!vertical_mode) {
|
||||||
|
for (int_fast8_t y = 0; y < width - 1; ++y) {
|
||||||
|
for (int_fast8_t x = y + 1; x < width; ++x) {
|
||||||
|
SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth)
|
||||||
|
{
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
|
success &= kvz_strategyselector_register(opaque, "angular_pred", "generic", 0, &kvz_angular_pred_generic);
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
27
src/strategies/generic/intra-generic.h
Normal file
27
src/strategies/generic/intra-generic.h
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#ifndef STRATEGIES_INTRA_GENERIC_H_
|
||||||
|
#define STRATEGIES_INTRA_GENERIC_H_
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "encoderstate.h"
|
||||||
|
|
||||||
|
int kvz_strategy_register_intra_generic(void* opaque, uint8_t bitdepth);
|
||||||
|
|
||||||
|
#endif //STRATEGIES_INTRA_GENERIC_H_
|
41
src/strategies/strategies-intra.c
Normal file
41
src/strategies/strategies-intra.c
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include "strategies-intra.h"
|
||||||
|
#include "strategyselector.h"
|
||||||
|
|
||||||
|
// Define function pointers.
|
||||||
|
angular_pred_func *kvz_angular_pred;
|
||||||
|
|
||||||
|
// Headers for platform optimizations.
|
||||||
|
#include "generic/intra-generic.h"
|
||||||
|
#include "avx2/intra-avx2.h"
|
||||||
|
|
||||||
|
|
||||||
|
int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth) {
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
|
success &= kvz_strategy_register_intra_generic(opaque, bitdepth);
|
||||||
|
|
||||||
|
if (kvz_g_hardware_flags.intel_flags.avx2) {
|
||||||
|
success &= kvz_strategy_register_intra_avx2(opaque, bitdepth);
|
||||||
|
}
|
||||||
|
return success;
|
||||||
|
}
|
43
src/strategies/strategies-intra.h
Normal file
43
src/strategies/strategies-intra.h
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
#ifndef STRATEGIES_INTRA_H_
|
||||||
|
#define STRATEGIES_INTRA_H_
|
||||||
|
/*****************************************************************************
|
||||||
|
* This file is part of Kvazaar HEVC encoder.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
||||||
|
* COPYING file).
|
||||||
|
*
|
||||||
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
||||||
|
* the terms of the GNU Lesser General Public License as published by the
|
||||||
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
||||||
|
* option) any later version.
|
||||||
|
*
|
||||||
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along
|
||||||
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include "encoderstate.h"
|
||||||
|
|
||||||
|
typedef void (angular_pred_func)(
|
||||||
|
const int_fast8_t log2_width,
|
||||||
|
const int_fast8_t intra_mode,
|
||||||
|
const kvz_pixel *const in_ref_above,
|
||||||
|
const kvz_pixel *const in_ref_left,
|
||||||
|
kvz_pixel *const dst);
|
||||||
|
|
||||||
|
// Declare function pointers.
|
||||||
|
extern angular_pred_func * kvz_angular_pred;
|
||||||
|
|
||||||
|
int kvz_strategy_register_intra(void* opaque, uint8_t bitdepth);
|
||||||
|
|
||||||
|
|
||||||
|
#define STRATEGIES_INTRA_EXPORTS \
|
||||||
|
{"angular_pred", (void**) &kvz_angular_pred}, \
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif //STRATEGIES_INTRA_H_
|
|
@ -75,6 +75,11 @@ int kvz_strategyselector_init(int32_t cpuid, uint8_t bitdepth) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!kvz_strategy_register_intra(&strategies, bitdepth)) {
|
||||||
|
fprintf(stderr, "kvz_strategy_register_intra failed!\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
while(cur_strategy_to_select->fptr) {
|
while(cur_strategy_to_select->fptr) {
|
||||||
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
|
*(cur_strategy_to_select->fptr) = strategyselector_choose_for(&strategies, cur_strategy_to_select->strategy_type);
|
||||||
|
|
||||||
|
|
|
@ -149,6 +149,7 @@ int kvz_strategyselector_register(void *opaque, const char *type, const char *st
|
||||||
#include "strategies/strategies-dct.h"
|
#include "strategies/strategies-dct.h"
|
||||||
#include "strategies/strategies-ipol.h"
|
#include "strategies/strategies-ipol.h"
|
||||||
#include "strategies/strategies-quant.h"
|
#include "strategies/strategies-quant.h"
|
||||||
|
#include "strategies/strategies-intra.h"
|
||||||
|
|
||||||
static const strategy_to_select_t strategies_to_select[] = {
|
static const strategy_to_select_t strategies_to_select[] = {
|
||||||
STRATEGIES_NAL_EXPORTS
|
STRATEGIES_NAL_EXPORTS
|
||||||
|
@ -156,6 +157,7 @@ static const strategy_to_select_t strategies_to_select[] = {
|
||||||
STRATEGIES_DCT_EXPORTS
|
STRATEGIES_DCT_EXPORTS
|
||||||
STRATEGIES_IPOL_EXPORTS
|
STRATEGIES_IPOL_EXPORTS
|
||||||
STRATEGIES_QUANT_EXPORTS
|
STRATEGIES_QUANT_EXPORTS
|
||||||
|
STRATEGIES_INTRA_EXPORTS
|
||||||
{ NULL, NULL },
|
{ NULL, NULL },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue