2014-11-20 16:38:54 +00:00
|
|
|
/*****************************************************************************
|
|
|
|
* This file is part of Kvazaar HEVC encoder.
|
|
|
|
*
|
2015-02-23 11:18:48 +00:00
|
|
|
* Copyright (C) 2013-2015 Tampere University of Technology and others (see
|
2014-11-20 16:38:54 +00:00
|
|
|
* COPYING file).
|
|
|
|
*
|
2015-02-23 11:18:48 +00:00
|
|
|
* Kvazaar is free software: you can redistribute it and/or modify it under
|
|
|
|
* the terms of the GNU Lesser General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
* option) any later version.
|
2014-11-20 16:38:54 +00:00
|
|
|
*
|
2015-02-23 11:18:48 +00:00
|
|
|
* Kvazaar is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
|
|
|
* more details.
|
2014-11-20 16:38:54 +00:00
|
|
|
*
|
2015-02-23 11:18:48 +00:00
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with Kvazaar. If not, see <http://www.gnu.org/licenses/>.
|
2014-11-20 16:38:54 +00:00
|
|
|
****************************************************************************/
|
|
|
|
|
2016-03-31 10:34:32 +00:00
|
|
|
#include "strategies/generic/ipol-generic.h"
|
2016-04-01 14:14:23 +00:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
2014-11-20 16:38:54 +00:00
|
|
|
#include "encoder.h"
|
2016-03-31 10:34:32 +00:00
|
|
|
#include "strategies/generic/picture-generic.h"
|
2016-04-01 14:14:23 +00:00
|
|
|
#include "strategies/strategies-ipol.h"
|
|
|
|
#include "strategyselector.h"
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
extern int8_t kvz_g_luma_filter[4][8];
|
|
|
|
extern int8_t kvz_g_chroma_filter[8][4];
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_eight_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data)
|
2014-12-11 15:49:41 +00:00
|
|
|
{
|
2015-04-30 05:49:07 +00:00
|
|
|
int32_t temp = 0;
|
2014-12-11 15:49:41 +00:00
|
|
|
for (int i = 0; i < 8; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_eight_tap_filter_hor_16bit_generic(int8_t *filter, int16_t *data)
|
2014-12-11 15:49:41 +00:00
|
|
|
{
|
|
|
|
int32_t temp = 0;
|
|
|
|
for (int i = 0; i < 8; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_eight_tap_filter_ver_generic(int8_t *filter, kvz_pixel *data, int16_t stride)
|
2014-12-11 15:49:41 +00:00
|
|
|
{
|
2015-04-30 05:49:07 +00:00
|
|
|
int32_t temp = 0;
|
2014-12-11 15:49:41 +00:00
|
|
|
for (int i = 0; i < 8; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[stride * i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_eight_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int16_t stride)
|
2014-12-11 15:49:41 +00:00
|
|
|
{
|
|
|
|
int32_t temp = 0;
|
|
|
|
for (int i = 0; i < 8; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[stride * i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_four_tap_filter_hor_generic(int8_t *filter, kvz_pixel *data)
|
2015-02-04 15:50:12 +00:00
|
|
|
{
|
2015-04-30 05:49:07 +00:00
|
|
|
int32_t temp = 0;
|
2015-02-04 15:50:12 +00:00
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_four_tap_filter_hor_16bit_generic(int8_t *filter, int16_t *data)
|
2015-02-04 15:50:12 +00:00
|
|
|
{
|
|
|
|
int32_t temp = 0;
|
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_four_tap_filter_ver_generic(int8_t *filter, kvz_pixel *data, int16_t stride)
|
2015-02-04 15:50:12 +00:00
|
|
|
{
|
2015-04-30 05:49:07 +00:00
|
|
|
int32_t temp = 0;
|
2015-02-04 15:50:12 +00:00
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[stride * i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int32_t kvz_four_tap_filter_ver_16bit_generic(int8_t *filter, int16_t *data, int16_t stride)
|
2015-02-04 15:50:12 +00:00
|
|
|
{
|
|
|
|
int32_t temp = 0;
|
|
|
|
for (int i = 0; i < 4; ++i)
|
|
|
|
{
|
|
|
|
temp += filter[i] * data[stride * i];
|
|
|
|
}
|
|
|
|
|
|
|
|
return temp;
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
void kvz_sample_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
2014-11-20 16:38:54 +00:00
|
|
|
{
|
2015-02-27 13:12:03 +00:00
|
|
|
//TODO: horizontal and vertical only filtering
|
2014-11-20 16:38:54 +00:00
|
|
|
int32_t x, y;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2015-06-30 07:38:17 +00:00
|
|
|
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
2014-11-20 16:38:54 +00:00
|
|
|
int32_t shift2 = 6;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Weighted prediction offset and shift
|
|
|
|
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
|
|
|
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
2014-12-11 15:49:41 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Select filters according to the fractional part of the x and y mv components
|
|
|
|
int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 3];
|
|
|
|
int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 3];
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
int16_t hor_filtered[KVZ_EXT_BLOCK_W_LUMA][LCU_WIDTH];
|
|
|
|
int16_t hor_stride = LCU_WIDTH;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter horizontally
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
hor_filtered[y][x] = kvz_eight_tap_filter_hor_generic(hor_filter, &src[src_stride * ypos + xpos]) >> shift1;
|
2015-02-27 13:12:03 +00:00
|
|
|
}
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter vertically
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_ver_16bit_generic(ver_filter, &hor_filtered[y][x], hor_stride) >> shift2) + wp_offset1) >> wp_shift1);
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-12-11 14:38:43 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
void kvz_sample_14bit_quarterpel_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
2015-05-07 12:53:06 +00:00
|
|
|
{
|
|
|
|
//TODO: horizontal and vertical only filtering
|
|
|
|
int32_t x, y;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2015-05-07 12:53:06 +00:00
|
|
|
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
|
|
|
int32_t shift2 = 6;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Select filters according to the fractional part of the x and y mv components
|
|
|
|
int8_t *hor_filter = kvz_g_luma_filter[mv[0] & 3];
|
|
|
|
int8_t *ver_filter = kvz_g_luma_filter[mv[1] & 3];
|
2015-05-07 12:53:06 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
int16_t hor_filtered[KVZ_EXT_BLOCK_W_LUMA][LCU_WIDTH];
|
|
|
|
int16_t hor_stride = LCU_WIDTH;
|
2015-05-07 12:53:06 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter horizontally
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
hor_filtered[y][x] = kvz_eight_tap_filter_hor_generic(hor_filter, &src[src_stride * ypos + xpos]) >> shift1;
|
2015-05-07 12:53:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter vertically
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
dst[y * dst_stride + x] = kvz_eight_tap_filter_ver_16bit_generic(ver_filter, &hor_filtered[y][x], hor_stride) >> shift2;
|
2015-05-07 12:53:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
void kvz_filter_hpel_blocks_hor_ver_luma_generic(const encoder_control_t * encoder,
|
|
|
|
kvz_pixel *src,
|
|
|
|
int16_t src_stride,
|
|
|
|
int width,
|
|
|
|
int height,
|
|
|
|
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
|
|
|
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
|
|
|
int8_t fme_level,
|
|
|
|
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
|
|
|
int8_t hpel_off_x, int8_t hpel_off_y)
|
2015-05-20 12:54:04 +00:00
|
|
|
{
|
2018-11-04 19:04:17 +00:00
|
|
|
int x, y, first_y;
|
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2015-05-20 12:54:04 +00:00
|
|
|
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Weighted prediction offset and shift
|
|
|
|
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
|
|
|
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
2015-05-20 12:54:04 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
int8_t *fir0 = kvz_g_luma_filter[0];
|
|
|
|
int8_t *fir2 = kvz_g_luma_filter[2];
|
2015-05-20 12:54:04 +00:00
|
|
|
|
2018-12-19 19:01:22 +00:00
|
|
|
int16_t dst_stride = LCU_WIDTH;
|
|
|
|
int16_t hor_stride = LCU_WIDTH;
|
2018-11-04 19:04:17 +00:00
|
|
|
int32_t first_row_offset = (KVZ_LUMA_FILTER_OFFSET + 1) * hor_stride;
|
|
|
|
|
|
|
|
int16_t *col_pos0 = hor_first_cols[0];
|
|
|
|
int16_t *col_pos2 = hor_first_cols[2];
|
|
|
|
|
|
|
|
// Horizontally filtered samples from the top row are
|
|
|
|
// not needed unless samples for diagonal positions are filtered later.
|
|
|
|
first_y = fme_level > 1 ? 0 : 1;
|
|
|
|
|
|
|
|
// HORIZONTAL STEP
|
|
|
|
// Integer pixels
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET + 1;
|
|
|
|
hor_intermediate[0][y * hor_stride + x] = kvz_eight_tap_filter_hor_generic(fir0, &src[src_stride*ypos + xpos]) >> shift1;
|
2015-05-20 12:54:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Write the first column in contiguous memory
|
|
|
|
x = 0;
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
col_pos0[y] = kvz_eight_tap_filter_hor_generic(fir0, &src[src_stride*ypos + xpos]) >> shift1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Half pixels
|
|
|
|
for (y = first_y; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET + 1;
|
|
|
|
hor_intermediate[1][y * hor_stride + x] = kvz_eight_tap_filter_hor_generic(fir2, &src[src_stride*ypos + xpos]) >> shift1;
|
2015-05-20 12:54:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Write the first column in contiguous memory
|
|
|
|
x = 0;
|
|
|
|
for (y = first_y; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
col_pos2[y] = kvz_eight_tap_filter_hor_generic(fir2, &src[src_stride*ypos + xpos]) >> shift1;
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// VERTICAL STEP
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Right
|
|
|
|
// Only horizontal filter
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
filtered[1][y * dst_stride + x] = kvz_fast_clip_16bit_to_pixel((hor_intermediate[1][first_row_offset + y * hor_stride + x] + wp_offset1) >> wp_shift1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Left
|
|
|
|
// Copy from the right filtered block and the extra column
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
x = 0;
|
|
|
|
filtered[0][y * dst_stride + x] = kvz_fast_clip_16bit_to_pixel((col_pos2[y + KVZ_LUMA_FILTER_OFFSET + 1] + wp_offset1) >> wp_shift1);
|
|
|
|
for (x = 1; x < width; ++x) filtered[0][y * dst_stride + x] = filtered[1][y * dst_stride + x - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Top
|
|
|
|
// Only vertical filter
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int xpos = x;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_generic(fir2, &src[src_stride*ypos + xpos + 1], src_stride) >> shift1;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[2][y * dst_stride + x] = sample;
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
|
|
|
}
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Bottom
|
|
|
|
// Copy what can be copied from the top filtered values.
|
|
|
|
// Then filter the last row from horizontal intermediate buffer.
|
|
|
|
for (y = 0; y < height - 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) filtered[3][y * dst_stride + x] = filtered[2][(y + 1) * dst_stride + x];
|
|
|
|
}
|
|
|
|
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int xpos = x;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_generic(fir2, &src[src_stride*(ypos + 1) + xpos + 1], src_stride) >> shift1;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[3][y * dst_stride + x] = sample;
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
void kvz_filter_hpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|
|
|
kvz_pixel *src,
|
|
|
|
int16_t src_stride,
|
|
|
|
int width,
|
|
|
|
int height,
|
|
|
|
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
|
|
|
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
|
|
|
int8_t fme_level,
|
|
|
|
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
|
|
|
int8_t hpel_off_x, int8_t hpel_off_y)
|
2014-11-20 16:38:54 +00:00
|
|
|
{
|
2018-11-04 19:04:17 +00:00
|
|
|
int x, y;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Interpolation filter shifts
|
2014-11-20 16:38:54 +00:00
|
|
|
int32_t shift2 = 6;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Weighted prediction offset and shift
|
|
|
|
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
|
|
|
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
int8_t *fir2 = kvz_g_luma_filter[2];
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-12-19 19:01:22 +00:00
|
|
|
int16_t dst_stride = LCU_WIDTH;
|
|
|
|
int16_t hor_stride = LCU_WIDTH;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Horizontal positions
|
|
|
|
int16_t *col_pos2 = hor_first_cols[2];
|
2015-02-04 16:23:57 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// VERTICAL STEP
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Top-right
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(fir2, &hor_intermediate[1][y * hor_stride + x], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[1][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
x = 0;
|
|
|
|
filtered[0][y * dst_stride + x] = kvz_fast_clip_16bit_to_pixel((col_pos2[y + KVZ_LUMA_FILTER_OFFSET + 1] + wp_offset1) >> wp_shift1);
|
|
|
|
for (x = 1; x < width; ++x) filtered[0][y * dst_stride + x] = filtered[1][y * dst_stride + x - 1];
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Top-left
|
|
|
|
// Copy what can be copied from top-right filtered values. Filter the first column from the column array.
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(fir2, &col_pos2[y]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[0][y * dst_stride + x] = sample;
|
|
|
|
for (x = 1; x < width; ++x) filtered[0][y * dst_stride + x] = filtered[1][y * dst_stride + x - 1];
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Bottom-right
|
|
|
|
// Copy what can be copied from top-right filtered values. Filter the last row.
|
|
|
|
for (y = 0; y < height - 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) filtered[3][y* dst_stride + x] = filtered[1][(y + 1) * dst_stride + x];
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(fir2, &hor_intermediate[1][(y + 1) * hor_stride + x], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[3][y * dst_stride + x] = sample;
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Bottom-left
|
|
|
|
// Copy what can be copied from the top-left filtered values.
|
|
|
|
// Copy what can be copied from the bottom-right filtered values.
|
|
|
|
// Finally filter the last pixel from the column array.
|
|
|
|
for (y = 0; y < height - 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) filtered[2][y * dst_stride + x] = filtered[0][(y + 1) * dst_stride + x];
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
2018-11-04 19:04:17 +00:00
|
|
|
for (x = 1; x < width; ++x) filtered[2][y * dst_stride + x] = filtered[3][y * dst_stride + x - 1];
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(fir2, &col_pos2[(y + 1)]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[2][y * dst_stride + x] = sample;
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
void kvz_filter_qpel_blocks_hor_ver_luma_generic(const encoder_control_t * encoder,
|
|
|
|
kvz_pixel *src,
|
|
|
|
int16_t src_stride,
|
|
|
|
int width,
|
|
|
|
int height,
|
|
|
|
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
|
|
|
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
|
|
|
int8_t fme_level,
|
|
|
|
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
|
|
|
int8_t hpel_off_x, int8_t hpel_off_y)
|
2016-07-12 15:03:52 +00:00
|
|
|
{
|
|
|
|
int x, y;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2016-07-12 15:03:52 +00:00
|
|
|
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
|
|
|
int32_t shift2 = 6;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Weighted prediction offset and shift
|
|
|
|
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
|
|
|
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
2016-07-12 15:03:52 +00:00
|
|
|
|
|
|
|
int8_t *fir0 = kvz_g_luma_filter[0];
|
|
|
|
int8_t *fir2 = kvz_g_luma_filter[2];
|
2018-11-04 19:04:17 +00:00
|
|
|
int8_t *fir1 = kvz_g_luma_filter[1];
|
|
|
|
int8_t *fir3 = kvz_g_luma_filter[3];
|
2016-07-12 15:03:52 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Horiziontal positions. Positions 0 and 2 have already been calculated in filtered.
|
|
|
|
int16_t *hor_pos0 = hor_intermediate[0];
|
|
|
|
int16_t *hor_pos2 = hor_intermediate[1];
|
|
|
|
int16_t *hor_pos_l = hor_intermediate[3];
|
|
|
|
int16_t *hor_pos_r = hor_intermediate[4];
|
|
|
|
int8_t *hor_fir_l = hpel_off_x != 0 ? fir1 : fir3;
|
|
|
|
int8_t *hor_fir_r = hpel_off_x != 0 ? fir3 : fir1;
|
|
|
|
int16_t *col_pos_l = hor_first_cols[1];
|
|
|
|
int16_t *col_pos_r = hor_first_cols[3];
|
|
|
|
|
2018-12-19 19:01:22 +00:00
|
|
|
int16_t dst_stride = LCU_WIDTH;
|
|
|
|
int16_t hor_stride = LCU_WIDTH;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
int16_t *hor_hpel_pos = hpel_off_x != 0 ? hor_pos2 : hor_pos0;
|
|
|
|
int16_t *col_pos_hor = hpel_off_x != 0 ? hor_first_cols[2] : hor_first_cols[0];
|
|
|
|
|
|
|
|
// Specify if integer pixels are filtered from left or/and top integer samples
|
|
|
|
int off_x_fir_l = hpel_off_x < 1 ? 0 : 1;
|
|
|
|
int off_x_fir_r = hpel_off_x < 0 ? 0 : 1;
|
|
|
|
int off_y_fir_t = hpel_off_y < 1 ? 0 : 1;
|
|
|
|
int off_y_fir_b = hpel_off_y < 0 ? 0 : 1;
|
|
|
|
|
|
|
|
// HORIZONTAL STEP
|
|
|
|
// Left QPEL
|
|
|
|
int sample_off_y = hpel_off_y < 0 ? 0 : 1;
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET + 1;
|
|
|
|
hor_pos_l[y * hor_stride + x] = kvz_eight_tap_filter_hor_generic(hor_fir_l, &src[src_stride*ypos + xpos]) >> shift1;
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Write the first column in contiguous memory
|
|
|
|
x = 0;
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
col_pos_l[y] = kvz_eight_tap_filter_hor_generic(hor_fir_l, &src[src_stride*ypos + xpos]) >> shift1;
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Right QPEL
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET + 1;
|
|
|
|
hor_pos_r[y * hor_stride + x] = kvz_eight_tap_filter_hor_generic(hor_fir_r, &src[src_stride*ypos + xpos]) >> shift1;
|
|
|
|
}
|
|
|
|
}
|
2016-07-12 15:03:52 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Write the first column in contiguous memory
|
|
|
|
x = 0;
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_LUMA + 1; ++y) {
|
|
|
|
int ypos = y - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_LUMA_FILTER_OFFSET;
|
|
|
|
col_pos_r[y] = kvz_eight_tap_filter_hor_generic(hor_fir_r, &src[src_stride*ypos + xpos]) >> shift1;
|
|
|
|
}
|
2016-07-12 15:03:52 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// VERTICAL STEP
|
|
|
|
int8_t *ver_fir_l = hpel_off_y != 0 ? fir2 : fir0;
|
|
|
|
int8_t *ver_fir_r = hpel_off_y != 0 ? fir2 : fir0;
|
|
|
|
int8_t *ver_fir_t = hpel_off_y != 0 ? fir1 : fir3;
|
|
|
|
int8_t *ver_fir_b = hpel_off_y != 0 ? fir3 : fir1;
|
|
|
|
|
|
|
|
// Left QPEL (1/4 or 3/4 x positions)
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!off_x_fir_l) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_l, &col_pos_l[y + sample_off_y]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[0][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !off_x_fir_l; x < width; ++x) {
|
|
|
|
int ypos = y + sample_off_y;
|
|
|
|
int xpos = x - !off_x_fir_l;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_l, &hor_pos_l[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[0][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
}
|
2016-07-12 15:03:52 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Right QPEL (3/4 or 1/4 x positions)
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!off_x_fir_r) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_r, &col_pos_r[y + sample_off_y]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[1][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !off_x_fir_r; x < width; ++x) {
|
|
|
|
int ypos = y + sample_off_y;
|
|
|
|
int xpos = x - !off_x_fir_r;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_r, &hor_pos_r[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[1][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
}
|
2016-07-12 15:03:52 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Top QPEL (1/4 or 3/4 y positions)
|
|
|
|
int sample_off_x = (hpel_off_x > -1 ? 1 : 0);
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!sample_off_x) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_t, &col_pos_hor[y + off_y_fir_t]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[2][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !sample_off_x; x < width; ++x) {
|
|
|
|
int ypos = y + off_y_fir_t;
|
|
|
|
int xpos = x - !sample_off_x;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_t, &hor_hpel_pos[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[2][y * dst_stride + x] = sample;
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Bottom QPEL (3/4 or 1/4 y positions)
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!sample_off_x) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_b, &col_pos_hor[y + off_y_fir_b]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[3][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !sample_off_x; x < width; ++x) {
|
|
|
|
int ypos = y + off_y_fir_b;
|
|
|
|
int xpos = x - !sample_off_x;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_b, &hor_hpel_pos[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[3][y * dst_stride + x] = sample;
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
void kvz_filter_qpel_blocks_diag_luma_generic(const encoder_control_t * encoder,
|
|
|
|
kvz_pixel *src,
|
|
|
|
int16_t src_stride,
|
|
|
|
int width,
|
|
|
|
int height,
|
|
|
|
kvz_pixel filtered[4][LCU_WIDTH * LCU_WIDTH],
|
|
|
|
int16_t hor_intermediate[5][(KVZ_EXT_BLOCK_W_LUMA + 1) * LCU_WIDTH],
|
|
|
|
int8_t fme_level,
|
|
|
|
int16_t hor_first_cols[5][KVZ_EXT_BLOCK_W_LUMA + 1],
|
|
|
|
int8_t hpel_off_x, int8_t hpel_off_y)
|
2016-07-12 15:03:52 +00:00
|
|
|
{
|
|
|
|
int x, y;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2016-07-12 15:03:52 +00:00
|
|
|
int32_t shift2 = 6;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Weighted prediction offset and shift
|
|
|
|
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
|
|
|
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
|
|
|
|
2016-07-12 15:03:52 +00:00
|
|
|
int8_t *fir1 = kvz_g_luma_filter[1];
|
|
|
|
int8_t *fir3 = kvz_g_luma_filter[3];
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Horiziontal positions.
|
|
|
|
int16_t *hor_pos_l = hor_intermediate[3];
|
|
|
|
int16_t *hor_pos_r = hor_intermediate[4];
|
|
|
|
|
|
|
|
int16_t *col_pos_l = hor_first_cols[1];
|
|
|
|
int16_t *col_pos_r = hor_first_cols[3];
|
|
|
|
|
2018-12-19 19:01:22 +00:00
|
|
|
int16_t dst_stride = LCU_WIDTH;
|
|
|
|
int16_t hor_stride = LCU_WIDTH;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// VERTICAL STEP
|
|
|
|
int8_t *ver_fir_t = hpel_off_y != 0 ? fir1 : fir3;
|
|
|
|
int8_t *ver_fir_b = hpel_off_y != 0 ? fir3 : fir1;
|
|
|
|
|
|
|
|
// Specify if integer pixels are filtered from left or/and top integer samples
|
|
|
|
int off_x_fir_l = hpel_off_x < 1 ? 0 : 1;
|
|
|
|
int off_x_fir_r = hpel_off_x < 0 ? 0 : 1;
|
|
|
|
int off_y_fir_t = hpel_off_y < 1 ? 0 : 1;
|
|
|
|
int off_y_fir_b = hpel_off_y < 0 ? 0 : 1;
|
|
|
|
|
|
|
|
// Top-left QPEL
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!off_x_fir_l) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_t, &col_pos_l[y + off_y_fir_t]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[0][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !off_x_fir_l; x < width; ++x) {
|
|
|
|
int ypos = y + off_y_fir_t;
|
|
|
|
int xpos = x - !off_x_fir_l;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_t, &hor_pos_l[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[0][y * dst_stride + x] = sample;
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Top-right QPEL
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!off_x_fir_r) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_t, &col_pos_r[y + off_y_fir_t]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[1][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !off_x_fir_r; x < width; ++x) {
|
|
|
|
int ypos = y + off_y_fir_t;
|
|
|
|
int xpos = x - !off_x_fir_r;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_t, &hor_pos_r[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[1][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
}
|
2016-07-12 15:03:52 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Bottom-left QPEL
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!off_x_fir_l) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_b, &col_pos_l[y + off_y_fir_b]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[2][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !off_x_fir_l; x < width; ++x) {
|
|
|
|
int ypos = y + off_y_fir_b;
|
|
|
|
int xpos = x - !off_x_fir_l;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_b, &hor_pos_l[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[2][y * dst_stride + x] = sample;
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Bottom-right QPEL
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
if (!off_x_fir_r) {
|
|
|
|
x = 0;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_hor_16bit_generic(ver_fir_b, &col_pos_r[y + off_y_fir_b]) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[3][y * dst_stride + x] = sample;
|
|
|
|
}
|
|
|
|
for (x = !off_x_fir_r; x < width; ++x) {
|
|
|
|
int ypos = y + off_y_fir_b;
|
|
|
|
int xpos = x - !off_x_fir_r;
|
|
|
|
int16_t sample = kvz_eight_tap_filter_ver_16bit_generic(ver_fir_b, &hor_pos_r[ypos * hor_stride + xpos], hor_stride) >> shift2;
|
|
|
|
sample = kvz_fast_clip_16bit_to_pixel((sample + wp_offset1) >> wp_shift1);
|
|
|
|
filtered[3][y * dst_stride + x] = sample;
|
|
|
|
}
|
2016-07-12 15:03:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
2015-08-10 14:49:31 +00:00
|
|
|
{
|
|
|
|
//TODO: horizontal and vertical only filtering
|
|
|
|
int32_t x, y;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2015-08-10 14:49:31 +00:00
|
|
|
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
|
|
|
int32_t shift2 = 6;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Weighted prediction offset and shift
|
|
|
|
int32_t wp_shift1 = 14 - KVZ_BIT_DEPTH;
|
|
|
|
int32_t wp_offset1 = 1 << (wp_shift1 - 1);
|
|
|
|
|
|
|
|
// Select filters according to the fractional part of the x and y mv components
|
2015-08-26 08:50:27 +00:00
|
|
|
int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 7];
|
|
|
|
int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 7];
|
2015-08-10 14:49:31 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
int16_t hor_filtered[KVZ_EXT_BLOCK_W_CHROMA][LCU_WIDTH_C];
|
|
|
|
int16_t hor_stride = LCU_WIDTH_C;
|
2015-08-10 14:49:31 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter horizontally
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_CHROMA; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_CHROMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_CHROMA_FILTER_OFFSET;
|
|
|
|
hor_filtered[y][x] = kvz_four_tap_filter_hor_generic(hor_filter, &src[src_stride * ypos + xpos]) >> shift1;
|
2015-08-10 14:49:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter vertically
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_four_tap_filter_ver_16bit_generic(ver_filter, &hor_filtered[y][x], hor_stride) >> shift2) + wp_offset1) >> wp_shift1);
|
2015-08-10 14:49:31 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
void kvz_sample_14bit_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
2015-05-20 12:54:04 +00:00
|
|
|
{
|
|
|
|
//TODO: horizontal and vertical only filtering
|
|
|
|
int32_t x, y;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Interpolation filter shifts
|
2015-05-20 12:54:04 +00:00
|
|
|
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
|
|
|
int32_t shift2 = 6;
|
2018-11-04 19:04:17 +00:00
|
|
|
|
|
|
|
// Select filters according to the fractional part of the x and y mv components
|
2015-08-26 08:50:27 +00:00
|
|
|
int8_t *hor_filter = kvz_g_chroma_filter[mv[0] & 7];
|
|
|
|
int8_t *ver_filter = kvz_g_chroma_filter[mv[1] & 7];
|
2015-05-20 12:54:04 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
int16_t hor_filtered[KVZ_EXT_BLOCK_W_CHROMA][LCU_WIDTH_C];
|
|
|
|
int16_t hor_stride = LCU_WIDTH_C;
|
2015-05-20 12:54:04 +00:00
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter horizontally
|
|
|
|
for (y = 0; y < height + KVZ_EXT_PADDING_CHROMA; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
int ypos = y - KVZ_CHROMA_FILTER_OFFSET;
|
|
|
|
int xpos = x - KVZ_CHROMA_FILTER_OFFSET;
|
|
|
|
hor_filtered[y][x] = kvz_four_tap_filter_hor_generic(hor_filter, &src[src_stride * ypos + xpos]) >> shift1;
|
2015-05-20 12:54:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
// Filter vertically
|
|
|
|
for (y = 0; y < height; ++y) {
|
|
|
|
for (x = 0; x < width; ++x) {
|
|
|
|
dst[y * dst_stride + x] = kvz_four_tap_filter_ver_16bit_generic(ver_filter, &hor_filtered[y][x], hor_stride) >> shift2;
|
2015-05-20 12:54:04 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
void kvz_get_extended_block_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
2015-08-17 09:48:41 +00:00
|
|
|
int filter_size, int width, int height, kvz_extended_block *out) {
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-17 09:48:41 +00:00
|
|
|
int half_filter_size = filter_size >> 1;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-17 09:48:41 +00:00
|
|
|
out->buffer = ref + (ypos - half_filter_size + off_y + mv_y) * ref_width + (xpos - half_filter_size + off_x + mv_x);
|
2015-08-14 15:39:39 +00:00
|
|
|
out->stride = ref_width;
|
2015-08-17 09:48:41 +00:00
|
|
|
out->orig_topleft = out->buffer + out->stride * half_filter_size + half_filter_size;
|
2015-08-14 15:39:39 +00:00
|
|
|
out->malloc_used = 0;
|
|
|
|
|
2015-08-17 09:48:41 +00:00
|
|
|
int min_y = ypos - half_filter_size + off_y + mv_y;
|
|
|
|
int max_y = min_y + height + filter_size;
|
2015-08-14 15:39:39 +00:00
|
|
|
int out_of_bounds_y = (min_y < 0) || (max_y >= ref_height);
|
|
|
|
|
2015-08-17 09:48:41 +00:00
|
|
|
int min_x = xpos - half_filter_size + off_x + mv_x;
|
|
|
|
int max_x = min_x + width + filter_size;
|
2015-08-14 15:39:39 +00:00
|
|
|
int out_of_bounds_x = (min_x < 0) || (max_x >= ref_width);
|
|
|
|
|
|
|
|
int sample_out_of_bounds = out_of_bounds_y || out_of_bounds_x;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
if (sample_out_of_bounds){
|
2015-11-03 09:27:50 +00:00
|
|
|
out->buffer = MALLOC(kvz_pixel, (width + filter_size) * (height + filter_size));
|
2015-08-17 11:22:44 +00:00
|
|
|
if (!out->buffer){
|
2015-08-17 11:40:35 +00:00
|
|
|
fprintf(stderr, "Memory allocation failed!\n");
|
2015-08-17 11:22:44 +00:00
|
|
|
assert(0);
|
|
|
|
}
|
2015-08-17 09:48:41 +00:00
|
|
|
out->stride = width + filter_size;
|
|
|
|
out->orig_topleft = out->buffer + out->stride * half_filter_size + half_filter_size;
|
2015-08-14 15:39:39 +00:00
|
|
|
out->malloc_used = 1;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-17 09:48:41 +00:00
|
|
|
for (dst_y = 0, y = ypos - half_filter_size; y < ((ypos + height)) + half_filter_size; dst_y++, y++) {
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
// calculate y-pixel offset
|
|
|
|
coord_y = y + off_y + mv_y;
|
|
|
|
coord_y = CLIP(0, (ref_height)-1, coord_y);
|
|
|
|
coord_y *= ref_width;
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
if (!out_of_bounds_x){
|
2015-11-03 09:27:50 +00:00
|
|
|
memcpy(&out->buffer[dst_y * out->stride + 0], &ref[coord_y + min_x], out->stride * sizeof(kvz_pixel));
|
2015-08-14 15:39:39 +00:00
|
|
|
} else {
|
2015-08-17 09:48:41 +00:00
|
|
|
for (dst_x = 0, x = (xpos)-half_filter_size; x < ((xpos + width)) + half_filter_size; dst_x++, x++) {
|
2014-11-20 16:38:54 +00:00
|
|
|
|
2015-08-14 15:39:39 +00:00
|
|
|
coord_x = x + off_x + mv_x;
|
|
|
|
coord_x = CLIP(0, (ref_width)-1, coord_x);
|
|
|
|
|
|
|
|
// Store source block data (with extended borders)
|
2015-11-03 09:27:50 +00:00
|
|
|
out->buffer[dst_y * out->stride + dst_x] = ref[coord_y + coord_x];
|
2015-08-14 15:39:39 +00:00
|
|
|
}
|
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
2015-08-14 15:39:39 +00:00
|
|
|
}
|
2014-11-20 16:38:54 +00:00
|
|
|
}
|
|
|
|
|
2015-08-26 08:50:27 +00:00
|
|
|
int kvz_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth)
|
2014-11-20 16:38:54 +00:00
|
|
|
{
|
|
|
|
bool success = true;
|
|
|
|
|
2018-11-04 19:04:17 +00:00
|
|
|
success &= kvz_strategyselector_register(opaque, "filter_hpel_blocks_hor_ver_luma", "generic", 0, &kvz_filter_hpel_blocks_hor_ver_luma_generic);
|
|
|
|
success &= kvz_strategyselector_register(opaque, "filter_hpel_blocks_diag_luma", "generic", 0, &kvz_filter_hpel_blocks_diag_luma_generic);
|
|
|
|
success &= kvz_strategyselector_register(opaque, "filter_qpel_blocks_hor_ver_luma", "generic", 0, &kvz_filter_qpel_blocks_hor_ver_luma_generic);
|
|
|
|
success &= kvz_strategyselector_register(opaque, "filter_qpel_blocks_diag_luma", "generic", 0, &kvz_filter_qpel_blocks_diag_luma_generic);
|
2016-04-06 16:55:42 +00:00
|
|
|
success &= kvz_strategyselector_register(opaque, "sample_quarterpel_luma", "generic", 0, &kvz_sample_quarterpel_luma_generic);
|
|
|
|
success &= kvz_strategyselector_register(opaque, "sample_octpel_chroma", "generic", 0, &kvz_sample_octpel_chroma_generic);
|
|
|
|
success &= kvz_strategyselector_register(opaque, "sample_14bit_quarterpel_luma", "generic", 0, &kvz_sample_14bit_quarterpel_luma_generic);
|
|
|
|
success &= kvz_strategyselector_register(opaque, "sample_14bit_octpel_chroma", "generic", 0, &kvz_sample_14bit_octpel_chroma_generic);
|
2015-08-26 08:50:27 +00:00
|
|
|
success &= kvz_strategyselector_register(opaque, "get_extended_block", "generic", 0, &kvz_get_extended_block_generic);
|
2014-11-20 16:38:54 +00:00
|
|
|
|
|
|
|
return success;
|
|
|
|
}
|