/***************************************************************************** * This file is part of Kvazaar HEVC encoder. * * Copyright (C) 2013-2015 Tampere University of Technology and others (see * COPYING file). * * Kvazaar is free software: you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the * Free Software Foundation; either version 2.1 of the License, or (at your * option) any later version. * * Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with Kvazaar. If not, see . ****************************************************************************/ /* * \file */ #include #include "intra-avx2.h" #include "strategyselector.h" #if COMPILE_INTEL_AVX2 #include /** * \brief Generage angular predictions. * \param log2_width Log2 of width, range 2..5. * \param intra_mode Angular mode in range 2..34. * \param in_ref_above Pointer to -1 index of above reference, length=width*2+1. * \param in_ref_left Pointer to -1 index of left reference, length=width*2+1. * \param dst Buffer of size width*width. */ static void kvz_angular_pred_avx2( const int_fast8_t log2_width, const int_fast8_t intra_mode, const kvz_pixel *const in_ref_above, const kvz_pixel *const in_ref_left, kvz_pixel *const dst) { assert(log2_width >= 2 && log2_width <= 5); assert(intra_mode >= 2 && intra_mode <= 34); static const int8_t modedisp2sampledisp[9] = { 0, 2, 5, 9, 13, 17, 21, 26, 32 }; static const int16_t modedisp2invsampledisp[9] = { 0, 4096, 1638, 910, 630, 482, 390, 315, 256 }; // (256 * 32) / sampledisp // Temporary buffer for modes 11-25. // It only needs to be big enough to hold indices from -width to width-1. kvz_pixel tmp_ref[2 * 32]; const int_fast8_t width = 1 << log2_width; // Whether to swap references to always project on the left reference row. const bool vertical_mode = intra_mode >= 18; // Modes distance to horizontal or vertical mode. const int_fast8_t mode_disp = vertical_mode ? intra_mode - 26 : 10 - intra_mode; // Sample displacement per column in fractions of 32. const int_fast8_t sample_disp = (mode_disp < 0 ? -1 : 1) * modedisp2sampledisp[abs(mode_disp)]; // Pointer for the reference we are interpolating from. const kvz_pixel *ref_main; // Pointer for the other reference. const kvz_pixel *ref_side; // Set ref_main and ref_side such that, when indexed with 0, they point to // index 0 in block coordinates. if (sample_disp < 0) { // Negative sample_disp means, we need to use both references. ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; // Move the reference pixels to start from the middle to the later half of // the tmp_ref, so there is room for negative indices. for (int_fast8_t x = -1; x < width; ++x) { tmp_ref[x + width] = ref_main[x]; } // Get a pointer to block index 0 in tmp_ref. ref_main = &tmp_ref[width]; // Extend the side reference to the negative indices of main reference. int_fast32_t col_sample_disp = 128; // rounding for the ">> 8" int_fast16_t inv_abs_sample_disp = modedisp2invsampledisp[abs(mode_disp)]; int_fast8_t most_negative_index = (width * sample_disp) >> 5; for (int_fast8_t x = -2; x >= most_negative_index; --x) { col_sample_disp += inv_abs_sample_disp; int_fast8_t side_index = col_sample_disp >> 8; tmp_ref[x + width] = ref_side[side_index - 1]; } } else { // sample_disp >= 0 means we don't need to refer to negative indices, // which means we can just use the references as is. ref_main = (vertical_mode ? in_ref_above : in_ref_left) + 1; ref_side = (vertical_mode ? in_ref_left : in_ref_above) + 1; } if (sample_disp != 0) { // The mode is not horizontal or vertical, we have to do interpolation. int_fast16_t delta_pos = 0; for (int_fast8_t y = 0; y < width; ++y) { delta_pos += sample_disp; int_fast8_t delta_int = delta_pos >> 5; int_fast8_t delta_fract = delta_pos & (32 - 1); if (delta_fract) { // Do linear filtering if (width < 8) { for (int_fast8_t x = 0; x < width; ++x) { kvz_pixel ref1 = ref_main[x + delta_int]; kvz_pixel ref2 = ref_main[x + delta_int + 1]; dst[y * width + x] = ((32 - delta_fract) * ref1 + delta_fract * ref2 + 16) >> 5; } } else { struct { uint8_t w1; uint8_t w2; } packed_weights = { 32 - delta_fract, delta_fract }; __m128i v_weights = _mm_set1_epi16(*(int16_t*)&packed_weights); for (int_fast8_t x = 0; x < width; x += 8) { __m128i v_ref1 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int])); __m128i v_ref2 = _mm_loadl_epi64((__m128i*)&(ref_main[x + delta_int + 1])); __m128i v_refs = _mm_unpacklo_epi8(v_ref1, v_ref2); __m128i v_tmp = _mm_maddubs_epi16(v_refs, v_weights); v_tmp = _mm_add_epi16(v_tmp, _mm_set1_epi16(16)); v_tmp = _mm_srli_epi16(v_tmp, 5); v_tmp = _mm_packus_epi16(v_tmp, v_tmp); _mm_storel_epi64((__m128i*)(dst + y * width + x), v_tmp); } } } else { // Just copy the integer samples for (int_fast8_t x = 0; x < width; x+=4) { *(int32_t*)(&dst[y * width + x]) = *(int32_t*)(&ref_main[x + delta_int]); } } } } else { // Mode is horizontal or vertical, just copy the pixels. for (int_fast8_t y = 0; y < width; ++y) { for (int_fast8_t x = 0; x < width; x+=4) { *(int32_t*)&(dst[y * width + x]) = *(int32_t*)&(ref_main[x]); } } } // Flip the block if this is was a horizontal mode. if (!vertical_mode) { for (int_fast8_t y = 0; y < width - 1; ++y) { for (int_fast8_t x = y + 1; x < width; ++x) { SWAP(dst[y * width + x], dst[x * width + y], kvz_pixel); } } } } #endif //COMPILE_INTEL_AVX2 int kvz_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth) { bool success = true; #if COMPILE_INTEL_AVX2 if (bitdepth == 8) { success &= kvz_strategyselector_register(opaque, "angular_pred", "avx2", 40, &kvz_angular_pred_avx2); } #endif //COMPILE_INTEL_AVX2 return success; }