mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Add strategy for kvz_filter_frac_blocks_luma
This commit is contained in:
parent
e9c3074d32
commit
fccfbd2f28
|
@ -413,6 +413,219 @@ void kvz_filter_inter_octpel_chroma_generic(const encoder_control_t * const enco
|
|||
}
|
||||
}
|
||||
|
||||
void kvz_filter_hpel_blocks_hor_ver_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, frac_search_block *filtered)
|
||||
{
|
||||
int x, y;
|
||||
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
||||
int32_t shift2 = 6;
|
||||
int32_t shift3 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t offset23 = 1 << (shift2 + shift3 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
|
||||
int16_t flipped0[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped2[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
|
||||
int16_t temp_stride = height + KVZ_EXT_PADDING + 1;
|
||||
int16_t dst_stride = (LCU_WIDTH + 1);
|
||||
|
||||
// Horizontal positions
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + KVZ_EXT_PADDING + 1; ++y) {
|
||||
int ypos = y - FILTER_OFFSET;
|
||||
int xpos = x - FILTER_OFFSET;
|
||||
flipped0[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir0, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped2[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir2, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter vertically and flip x and y
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + 1; ++y) {
|
||||
filtered[HPEL_POS_HOR][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[HPEL_POS_VER][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void kvz_filter_hpel_blocks_full_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, frac_search_block *filtered)
|
||||
{
|
||||
int x, y;
|
||||
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
||||
int32_t shift2 = 6;
|
||||
int32_t shift3 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t offset23 = 1 << (shift2 + shift3 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
|
||||
int16_t flipped0[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped2[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
|
||||
int16_t temp_stride = height + KVZ_EXT_PADDING + 1;
|
||||
int16_t dst_stride = (LCU_WIDTH + 1);
|
||||
|
||||
// Horizontal positions
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + KVZ_EXT_PADDING + 1; ++y) {
|
||||
int ypos = y - FILTER_OFFSET;
|
||||
int xpos = x - FILTER_OFFSET;
|
||||
flipped0[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir0, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped2[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir2, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter vertically and flip x and y
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + 1; ++y) {
|
||||
filtered[HPEL_POS_HOR][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[HPEL_POS_VER][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[HPEL_POS_DIA][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void kvz_filter_qpel_blocks_hor_ver_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, frac_search_block *filtered)
|
||||
{
|
||||
int x, y;
|
||||
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
||||
int32_t shift2 = 6;
|
||||
int32_t shift3 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t offset23 = 1 << (shift2 + shift3 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir1 = kvz_g_luma_filter[1];
|
||||
int8_t *fir3 = kvz_g_luma_filter[3];
|
||||
|
||||
int16_t flipped0[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped2[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped1[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped3[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
|
||||
int16_t temp_stride = height + KVZ_EXT_PADDING + 1;
|
||||
int16_t dst_stride = (LCU_WIDTH + 1);
|
||||
|
||||
// Horizontal positions
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + KVZ_EXT_PADDING + 1; ++y) {
|
||||
int ypos = y - FILTER_OFFSET;
|
||||
int xpos = x - FILTER_OFFSET;
|
||||
flipped0[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir0, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped2[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir2, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped1[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir1, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped3[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir3, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter vertically and flip x and y
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + 1; ++y) {
|
||||
|
||||
// HPEL
|
||||
filtered[ 0][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 1][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 2][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
|
||||
// QPEL
|
||||
// Horizontal
|
||||
filtered[ 3][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped1[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 4][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped3[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 5][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped1[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 6][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped3[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
|
||||
// Vertical
|
||||
filtered[ 7][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir1, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 8][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir1, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 9][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir3, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[10][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir3, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void kvz_filter_qpel_blocks_full_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, frac_search_block *filtered)
|
||||
{
|
||||
int x, y;
|
||||
int16_t shift1 = KVZ_BIT_DEPTH - 8;
|
||||
int32_t shift2 = 6;
|
||||
int32_t shift3 = 14 - KVZ_BIT_DEPTH;
|
||||
int32_t offset23 = 1 << (shift2 + shift3 - 1);
|
||||
|
||||
int8_t *fir0 = kvz_g_luma_filter[0];
|
||||
int8_t *fir2 = kvz_g_luma_filter[2];
|
||||
int8_t *fir1 = kvz_g_luma_filter[1];
|
||||
int8_t *fir3 = kvz_g_luma_filter[3];
|
||||
|
||||
int16_t flipped0[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped2[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped1[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
int16_t flipped3[(LCU_WIDTH + 1) * (KVZ_EXT_BLOCK_W + 1)];
|
||||
|
||||
int16_t temp_stride = height + KVZ_EXT_PADDING + 1;
|
||||
int16_t dst_stride = (LCU_WIDTH + 1);
|
||||
|
||||
// Horizontal positions
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + KVZ_EXT_PADDING + 1; ++y) {
|
||||
int ypos = y - FILTER_OFFSET;
|
||||
int xpos = x - FILTER_OFFSET;
|
||||
flipped0[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir0, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped2[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir2, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped1[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir1, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
flipped3[x * temp_stride + y] = kvz_eight_tap_filter_hor_generic(fir3, &src[src_stride*ypos + xpos]) >> shift1;
|
||||
}
|
||||
}
|
||||
|
||||
// Filter vertically and flip x and y
|
||||
for (x = 0; x < width + 1; ++x) {
|
||||
for (y = 0; y < height + 1; ++y) {
|
||||
|
||||
// HPEL
|
||||
filtered[ 0][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 1][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 2][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
|
||||
// QPEL
|
||||
// Horizontal
|
||||
filtered[ 3][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped1[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 4][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir0, &flipped3[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 5][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped1[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 6][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir2, &flipped3[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
|
||||
// Vertical
|
||||
filtered[ 7][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir1, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 8][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir1, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[ 9][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir3, &flipped0[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[10][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir3, &flipped2[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
|
||||
// Diagonal
|
||||
filtered[11][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir1, &flipped1[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[12][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir1, &flipped3[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[13][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir3, &flipped1[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
filtered[14][y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(((kvz_eight_tap_filter_hor_16bit_generic(fir3, &flipped3[x * temp_stride + y]) + offset23) >> shift2) >> shift3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void kvz_filter_frac_blocks_luma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height, frac_search_block filtered[15], int8_t fme_level)
|
||||
{
|
||||
switch (fme_level) {
|
||||
case 1:
|
||||
kvz_filter_hpel_blocks_hor_ver_luma_generic(encoder, src, src_stride, width, height, filtered);
|
||||
break;
|
||||
case 2:
|
||||
kvz_filter_hpel_blocks_full_luma_generic(encoder, src, src_stride, width, height, filtered);
|
||||
break;
|
||||
case 3:
|
||||
kvz_filter_qpel_blocks_hor_ver_luma_generic(encoder, src, src_stride, width, height, filtered);
|
||||
break;
|
||||
default:
|
||||
kvz_filter_qpel_blocks_full_luma_generic(encoder, src, src_stride, width, height, filtered);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void kvz_sample_octpel_chroma_generic(const encoder_control_t * const encoder, kvz_pixel *src, int16_t src_stride, int width, int height,kvz_pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag, const int16_t mv[2])
|
||||
{
|
||||
//TODO: horizontal and vertical only filtering
|
||||
|
@ -544,6 +757,7 @@ int kvz_strategy_register_ipol_generic(void* opaque, uint8_t bitdepth)
|
|||
success &= kvz_strategyselector_register(opaque, "filter_inter_quarterpel_luma", "generic", 0, &kvz_filter_inter_quarterpel_luma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "filter_inter_halfpel_chroma", "generic", 0, &kvz_filter_inter_halfpel_chroma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "filter_inter_octpel_chroma", "generic", 0, &kvz_filter_inter_octpel_chroma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "filter_frac_blocks_luma", "generic", 0, &kvz_filter_frac_blocks_luma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_quarterpel_luma", "generic", 0, &kvz_sample_quarterpel_luma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_octpel_chroma", "generic", 0, &kvz_sample_octpel_chroma_generic);
|
||||
success &= kvz_strategyselector_register(opaque, "sample_14bit_quarterpel_luma", "generic", 0, &kvz_sample_14bit_quarterpel_luma_generic);
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
ipol_func *kvz_filter_inter_quarterpel_luma;
|
||||
ipol_func *kvz_filter_inter_halfpel_chroma;
|
||||
ipol_func *kvz_filter_inter_octpel_chroma;
|
||||
ipol_frac_blocks_func *kvz_filter_frac_blocks_luma;
|
||||
epol_func *kvz_get_extended_block;
|
||||
kvz_sample_quarterpel_luma_func * kvz_sample_quarterpel_luma;
|
||||
kvz_sample_octpel_chroma_func * kvz_sample_octpel_chroma;
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "encoder.h"
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "kvazaar.h"
|
||||
#include "search_inter.h"
|
||||
|
||||
|
||||
typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; unsigned malloc_used; } kvz_extended_block;
|
||||
|
@ -36,6 +37,9 @@ typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; un
|
|||
typedef unsigned(ipol_func)(const encoder_control_t * encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst,
|
||||
int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
||||
|
||||
typedef unsigned(ipol_frac_blocks_func)(const encoder_control_t * encoder, kvz_pixel *src, int16_t src_stride, int width, int height,
|
||||
frac_search_block filtered_out[15], int8_t fme_level);
|
||||
|
||||
typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filter_size, int width, int height, kvz_extended_block *out);
|
||||
|
||||
|
@ -49,6 +53,7 @@ typedef void(kvz_sample_14bit_octpel_chroma_func)(const encoder_control_t * cons
|
|||
extern ipol_func * kvz_filter_inter_quarterpel_luma;
|
||||
extern ipol_func * kvz_filter_inter_halfpel_chroma;
|
||||
extern ipol_func * kvz_filter_inter_octpel_chroma;
|
||||
extern ipol_frac_blocks_func *kvz_filter_frac_blocks_luma;
|
||||
extern epol_func * kvz_get_extended_block;
|
||||
extern kvz_sample_quarterpel_luma_func * kvz_sample_quarterpel_luma;
|
||||
extern kvz_sample_octpel_chroma_func * kvz_sample_octpel_chroma;
|
||||
|
@ -63,6 +68,7 @@ int kvz_strategy_register_ipol(void* opaque, uint8_t bitdepth);
|
|||
{"filter_inter_quarterpel_luma", (void**) &kvz_filter_inter_quarterpel_luma}, \
|
||||
{"filter_inter_halfpel_chroma", (void**) &kvz_filter_inter_halfpel_chroma}, \
|
||||
{"filter_inter_octpel_chroma", (void**) &kvz_filter_inter_octpel_chroma}, \
|
||||
{"filter_frac_blocks_luma", (void**) &kvz_filter_frac_blocks_luma}, \
|
||||
{"sample_quarterpel_luma", (void**) &kvz_sample_quarterpel_luma}, \
|
||||
{"sample_octpel_chroma", (void**) &kvz_sample_octpel_chroma}, \
|
||||
{"sample_14bit_quarterpel_luma", (void**) &kvz_sample_14bit_quarterpel_luma}, \
|
||||
|
|
Loading…
Reference in a new issue