mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Changes to extend border function.
Now outputs a pointer to a block with guaranteed padding for filtering. Only generate extra pixels if samples are needed out of bounds. Use memcpy otherwise.
This commit is contained in:
parent
4dcc0d876d
commit
d82582c37c
48
src/inter.c
48
src/inter.c
|
@ -72,14 +72,15 @@ void inter_recon_frac_luma(const encoder_state_t * const state, const kvz_pictur
|
|||
#define FILTER_SIZE_Y 8 //Luma filter size
|
||||
|
||||
// Fractional luma 1/4-pel
|
||||
kvz_pixel qpel_src_y[(LCU_WIDTH + FILTER_SIZE_Y) * (LCU_WIDTH + FILTER_SIZE_Y)];
|
||||
kvz_pixel* qpel_src_off_y = &qpel_src_y[(block_width + FILTER_SIZE_Y)*(FILTER_SIZE_Y >> 1) + (FILTER_SIZE_Y >> 1)];
|
||||
extended_block src = {0, 0, 0};
|
||||
|
||||
// Fractional luma
|
||||
extend_borders(xpos, ypos, mv_param[0] >> 2, mv_param[1] >> 2, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH,
|
||||
ref->y, ref->width, ref->height, FILTER_SIZE_Y, block_width, block_width, qpel_src_y);
|
||||
sample_quarterpel_luma_generic(state->encoder_control, qpel_src_off_y, block_width + FILTER_SIZE_Y, block_width,
|
||||
ref->y, ref->width, ref->height, FILTER_SIZE_Y, block_width, block_width, &src);
|
||||
sample_quarterpel_luma_generic(state->encoder_control, src.orig_topleft, src.stride, block_width,
|
||||
block_width, lcu->rec.y + (ypos%LCU_WIDTH)*LCU_WIDTH + (xpos%LCU_WIDTH), LCU_WIDTH, mv_frac_x, mv_frac_y, mv_param);
|
||||
|
||||
if (src.malloc_used) free(src.buffer);
|
||||
}
|
||||
|
||||
void inter_recon_14bit_frac_luma(const encoder_state_t * const state, const kvz_picture * const ref, int32_t xpos, int32_t ypos, int32_t block_width, const int16_t mv_param[2], hi_prec_buf_t *hi_prec_out)
|
||||
|
@ -90,14 +91,15 @@ void inter_recon_14bit_frac_luma(const encoder_state_t * const state, const kvz_
|
|||
#define FILTER_SIZE_Y 8 //Luma filter size
|
||||
|
||||
// Fractional luma 1/4-pel
|
||||
kvz_pixel qpel_src_y[(LCU_WIDTH + FILTER_SIZE_Y) * (LCU_WIDTH + FILTER_SIZE_Y)];
|
||||
kvz_pixel* qpel_src_off_y = &qpel_src_y[(block_width + FILTER_SIZE_Y)*(FILTER_SIZE_Y >> 1) + (FILTER_SIZE_Y >> 1)];
|
||||
extended_block src = {0, 0, 0};
|
||||
|
||||
// Fractional luma
|
||||
extend_borders(xpos, ypos, mv_param[0] >> 2, mv_param[1] >> 2, state->tile->lcu_offset_x * LCU_WIDTH, state->tile->lcu_offset_y * LCU_WIDTH,
|
||||
ref->y, ref->width, ref->height, FILTER_SIZE_Y, block_width, block_width, qpel_src_y);
|
||||
sample_14bit_quarterpel_luma_generic(state->encoder_control, qpel_src_off_y, block_width + FILTER_SIZE_Y, block_width,
|
||||
ref->y, ref->width, ref->height, FILTER_SIZE_Y, block_width, block_width, &src);
|
||||
sample_14bit_quarterpel_luma_generic(state->encoder_control, src.orig_topleft, src.stride, block_width,
|
||||
block_width, hi_prec_out->y + (ypos%LCU_WIDTH)*LCU_WIDTH + (xpos%LCU_WIDTH), LCU_WIDTH, mv_frac_x, mv_frac_y, mv_param);
|
||||
|
||||
if (src.malloc_used) free(src.buffer);
|
||||
}
|
||||
|
||||
void inter_recon_frac_chroma(const encoder_state_t * const state, const kvz_picture * const ref, int32_t xpos, int32_t ypos, int32_t block_width, const int16_t mv_param[2], lcu_t *lcu)
|
||||
|
@ -113,20 +115,23 @@ void inter_recon_frac_chroma(const encoder_state_t * const state, const kvz_pict
|
|||
#define FILTER_SIZE_C 4 //Chroma filter size
|
||||
|
||||
// Fractional chroma 1/8-pel
|
||||
kvz_pixel octpel_src[((LCU_WIDTH_C) + FILTER_SIZE_C) * ((LCU_WIDTH_C) + FILTER_SIZE_C)];
|
||||
kvz_pixel* octpel_src_off = &octpel_src[(block_width + FILTER_SIZE_C)*(FILTER_SIZE_C >> 1) + (FILTER_SIZE_C >> 1)];
|
||||
extended_block src_u = { 0, 0, 0 };
|
||||
extended_block src_v = { 0, 0, 0 };
|
||||
|
||||
//Fractional chroma U
|
||||
extend_borders(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C,
|
||||
ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, octpel_src);
|
||||
sample_octpel_chroma_generic(state->encoder_control, octpel_src_off, block_width + FILTER_SIZE_C, block_width,
|
||||
ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, &src_u);
|
||||
sample_octpel_chroma_generic(state->encoder_control, src_u.orig_topleft, src_u.stride, block_width,
|
||||
block_width, lcu->rec.u + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);
|
||||
|
||||
//Fractional chroma V
|
||||
extend_borders(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C,
|
||||
ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, octpel_src);
|
||||
sample_octpel_chroma_generic(state->encoder_control, octpel_src_off, block_width + FILTER_SIZE_C, block_width,
|
||||
ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, &src_v);
|
||||
sample_octpel_chroma_generic(state->encoder_control, src_v.orig_topleft, src_u.stride, block_width,
|
||||
block_width, lcu->rec.v + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);
|
||||
|
||||
if (src_u.malloc_used) free(src_u.buffer);
|
||||
if (src_v.malloc_used) free(src_v.buffer);
|
||||
}
|
||||
|
||||
void inter_recon_14bit_frac_chroma(const encoder_state_t * const state, const kvz_picture * const ref, int32_t xpos, int32_t ypos, int32_t block_width, const int16_t mv_param[2], hi_prec_buf_t *hi_prec_out)
|
||||
|
@ -142,20 +147,23 @@ void inter_recon_14bit_frac_chroma(const encoder_state_t * const state, const kv
|
|||
#define FILTER_SIZE_C 4 //Chroma filter size
|
||||
|
||||
// Fractional chroma 1/8-pel
|
||||
kvz_pixel octpel_src[((LCU_WIDTH_C)+FILTER_SIZE_C) * ((LCU_WIDTH_C)+FILTER_SIZE_C)];
|
||||
kvz_pixel* octpel_src_off = &octpel_src[(block_width + FILTER_SIZE_C)*(FILTER_SIZE_C >> 1) + (FILTER_SIZE_C >> 1)];
|
||||
extended_block src_u = {0, 0, 0};
|
||||
extended_block src_v = { 0, 0, 0 };
|
||||
|
||||
//Fractional chroma U
|
||||
extend_borders(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C,
|
||||
ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, octpel_src);
|
||||
sample_14bit_octpel_chroma_generic(state->encoder_control, octpel_src_off, block_width + FILTER_SIZE_C, block_width,
|
||||
ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, &src_u);
|
||||
sample_14bit_octpel_chroma_generic(state->encoder_control, src_u.orig_topleft, src_u.stride, block_width,
|
||||
block_width, hi_prec_out->u + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);
|
||||
|
||||
//Fractional chroma V
|
||||
extend_borders(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C,
|
||||
ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, octpel_src);
|
||||
sample_14bit_octpel_chroma_generic(state->encoder_control, octpel_src_off, block_width + FILTER_SIZE_C, block_width,
|
||||
ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_width, &src_v);
|
||||
sample_14bit_octpel_chroma_generic(state->encoder_control, src_v.orig_topleft, src_v.stride, block_width,
|
||||
block_width, hi_prec_out->v + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param);
|
||||
|
||||
if (src_u.malloc_used) free(src_u.buffer);
|
||||
if (src_v.malloc_used) free(src_v.buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -815,10 +815,7 @@ static unsigned search_frac(const encoder_state_t * const state,
|
|||
#define FILTER_SIZE 8
|
||||
#define HALF_FILTER (FILTER_SIZE>>1)
|
||||
|
||||
//create buffer for block + extra for filter
|
||||
int src_stride = block_width+FILTER_SIZE+1;
|
||||
kvz_pixel src[(LCU_WIDTH+FILTER_SIZE+1) * (LCU_WIDTH+FILTER_SIZE+1)];
|
||||
kvz_pixel* src_off = &src[HALF_FILTER+HALF_FILTER*(block_width+FILTER_SIZE+1)];
|
||||
extended_block src = {0, 0, 0};
|
||||
|
||||
//destination buffer for interpolation
|
||||
int dst_stride = (block_width+1)*4;
|
||||
|
@ -828,11 +825,12 @@ static unsigned search_frac(const encoder_state_t * const state,
|
|||
extend_borders(orig->x, orig->y, mv.x-1, mv.y-1,
|
||||
state->tile->lcu_offset_x * LCU_WIDTH,
|
||||
state->tile->lcu_offset_y * LCU_WIDTH,
|
||||
ref->y, ref->width, ref->height, FILTER_SIZE, block_width+1, block_width+1, src);
|
||||
ref->y, ref->width, ref->height, FILTER_SIZE, block_width+1, block_width+1, &src);
|
||||
|
||||
filter_inter_quarterpel_luma(state->encoder_control, src_off, src_stride, block_width+1,
|
||||
filter_inter_quarterpel_luma(state->encoder_control, src.orig_topleft, src.stride, block_width+1,
|
||||
block_width+1, dst, dst_stride, 1, 1);
|
||||
|
||||
if (src.malloc_used) free(src.buffer);
|
||||
|
||||
//Set mv to half-pixel precision
|
||||
mv.x <<= 1;
|
||||
|
|
|
@ -482,37 +482,52 @@ void filter_inter_octpel_chroma_avx2(const encoder_control_t * const encoder, kv
|
|||
}
|
||||
|
||||
void extend_borders_avx2(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filterSize, int width, int height, kvz_pixel *dst) {
|
||||
int filterSize, int width, int height, extended_block *out) {
|
||||
|
||||
int16_t mv[2] = { mv_x, mv_y };
|
||||
int halfFilterSize = filterSize >> 1;
|
||||
|
||||
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y;
|
||||
int8_t overflow_neg_y_temp, overflow_pos_y_temp, overflow_neg_x_temp, overflow_pos_x_temp;
|
||||
out->buffer = ref + (ypos - halfFilterSize + off_y + mv_y) * ref_width + (xpos - halfFilterSize + off_x + mv_x);
|
||||
out->stride = ref_width;
|
||||
out->orig_topleft = out->buffer + out->stride * halfFilterSize + halfFilterSize;
|
||||
out->malloc_used = 0;
|
||||
|
||||
for (dst_y = 0, y = ypos - halfFilterSize; y < ((ypos + height)) + halfFilterSize; dst_y++, y++) {
|
||||
int min_y = ypos - halfFilterSize + off_y + mv_y;
|
||||
int max_y = min_y + height + filterSize;
|
||||
int out_of_bounds_y = (min_y < 0) || (max_y >= ref_height);
|
||||
|
||||
// calculate y-pixel offset
|
||||
coord_y = y + off_y + mv[1];
|
||||
int min_x = xpos - halfFilterSize + off_x + mv_x;
|
||||
int max_x = min_x + width + filterSize;
|
||||
int out_of_bounds_x = (min_x < 0) || (max_x >= ref_width);
|
||||
|
||||
// On y-overflow set coord_y accordingly
|
||||
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
||||
overflow_pos_y_temp = (coord_y >= ref_height) ? 1 : 0;
|
||||
if (overflow_neg_y_temp) coord_y = 0;
|
||||
else if (overflow_pos_y_temp) coord_y = (ref_height)-1;
|
||||
coord_y *= ref_width;
|
||||
int sample_out_of_bounds = out_of_bounds_y || out_of_bounds_x;
|
||||
|
||||
for (dst_x = 0, x = (xpos)-halfFilterSize; x < ((xpos + width)) + halfFilterSize; dst_x++, x++) {
|
||||
coord_x = x + off_x + mv[0];
|
||||
if (sample_out_of_bounds){
|
||||
out->buffer = MALLOC(kvz_pixel, (width + filterSize) * (width + filterSize));
|
||||
out->stride = width + filterSize;
|
||||
out->orig_topleft = out->buffer + out->stride * halfFilterSize + halfFilterSize;
|
||||
out->malloc_used = 1;
|
||||
|
||||
// On x-overflow set coord_x accordingly
|
||||
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
||||
overflow_pos_x_temp = (coord_x >= ref_width) ? 1 : 0;
|
||||
if (overflow_neg_x_temp) coord_x = 0;
|
||||
else if (overflow_pos_x_temp) coord_x = ref_width - 1;
|
||||
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y;
|
||||
|
||||
// Store source block data (with extended borders)
|
||||
dst[dst_y*(width + filterSize) + dst_x] = ref[coord_y + coord_x];
|
||||
for (dst_y = 0, y = ypos - halfFilterSize; y < ((ypos + height)) + halfFilterSize; dst_y++, y++) {
|
||||
|
||||
// calculate y-pixel offset
|
||||
coord_y = y + off_y + mv_y;
|
||||
coord_y = CLIP(0, (ref_height)-1, coord_y);
|
||||
coord_y *= ref_width;
|
||||
|
||||
if (!out_of_bounds_x){
|
||||
memcpy(&out->buffer[dst_y*(width + filterSize) + 0], &ref[coord_y + min_x], (width + filterSize) * sizeof(kvz_pixel));
|
||||
} else {
|
||||
for (dst_x = 0, x = (xpos)-halfFilterSize; x < ((xpos + width)) + halfFilterSize; dst_x++, x++) {
|
||||
|
||||
coord_x = x + off_x + mv_x;
|
||||
coord_x = CLIP(0, (ref_width)-1, coord_x);
|
||||
|
||||
// Store source block data (with extended borders)
|
||||
out->buffer[dst_y*(width + filterSize) + dst_x] = ref[coord_y + coord_x];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -481,40 +481,56 @@ void sample_14bit_octpel_chroma_generic(const encoder_control_t * const encoder,
|
|||
}
|
||||
}
|
||||
|
||||
void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filterSize, int width, int height, kvz_pixel *dst) {
|
||||
|
||||
int16_t mv[2] = { mv_x, mv_y };
|
||||
void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filterSize, int width, int height, extended_block *out) {
|
||||
|
||||
int halfFilterSize = filterSize >> 1;
|
||||
|
||||
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y;
|
||||
int8_t overflow_neg_y_temp, overflow_pos_y_temp, overflow_neg_x_temp, overflow_pos_x_temp;
|
||||
out->buffer = ref + (ypos - halfFilterSize + off_y + mv_y) * ref_width + (xpos - halfFilterSize + off_x + mv_x);
|
||||
out->stride = ref_width;
|
||||
out->orig_topleft = out->buffer + out->stride * halfFilterSize + halfFilterSize;
|
||||
out->malloc_used = 0;
|
||||
|
||||
for (dst_y = 0, y = ypos - halfFilterSize; y < ((ypos + height)) + halfFilterSize; dst_y++, y++) {
|
||||
int min_y = ypos - halfFilterSize + off_y + mv_y;
|
||||
int max_y = min_y + height + filterSize;
|
||||
int out_of_bounds_y = (min_y < 0) || (max_y >= ref_height);
|
||||
|
||||
// calculate y-pixel offset
|
||||
coord_y = y + off_y + mv[1];
|
||||
int min_x = xpos - halfFilterSize + off_x + mv_x;
|
||||
int max_x = min_x + width + filterSize;
|
||||
int out_of_bounds_x = (min_x < 0) || (max_x >= ref_width);
|
||||
|
||||
// On y-overflow set coord_y accordingly
|
||||
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
||||
overflow_pos_y_temp = (coord_y >= ref_height) ? 1 : 0;
|
||||
if (overflow_neg_y_temp) coord_y = 0;
|
||||
else if (overflow_pos_y_temp) coord_y = (ref_height)-1;
|
||||
coord_y *= ref_width;
|
||||
int sample_out_of_bounds = out_of_bounds_y || out_of_bounds_x;
|
||||
|
||||
for (dst_x = 0, x = (xpos)-halfFilterSize; x < ((xpos + width)) + halfFilterSize; dst_x++, x++) {
|
||||
coord_x = x + off_x + mv[0];
|
||||
if (sample_out_of_bounds){
|
||||
out->buffer = MALLOC(kvz_pixel, (width + filterSize) * (width + filterSize));
|
||||
out->stride = width + filterSize;
|
||||
out->orig_topleft = out->buffer + out->stride * halfFilterSize + halfFilterSize;
|
||||
out->malloc_used = 1;
|
||||
|
||||
// On x-overflow set coord_x accordingly
|
||||
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
||||
overflow_pos_x_temp = (coord_x >= ref_width) ? 1 : 0;
|
||||
if (overflow_neg_x_temp) coord_x = 0;
|
||||
else if (overflow_pos_x_temp) coord_x = ref_width - 1;
|
||||
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y;
|
||||
|
||||
// Store source block data (with extended borders)
|
||||
dst[dst_y*(width + filterSize) + dst_x] = ref[coord_y + coord_x];
|
||||
for (dst_y = 0, y = ypos - halfFilterSize; y < ((ypos + height)) + halfFilterSize; dst_y++, y++) {
|
||||
|
||||
// calculate y-pixel offset
|
||||
coord_y = y + off_y + mv_y;
|
||||
coord_y = CLIP(0, (ref_height)-1, coord_y);
|
||||
coord_y *= ref_width;
|
||||
|
||||
if (!out_of_bounds_x){
|
||||
memcpy(&out->buffer[dst_y*(width + filterSize) + 0], &ref[coord_y + min_x], (width + filterSize) * sizeof(kvz_pixel));
|
||||
} else {
|
||||
for (dst_x = 0, x = (xpos)-halfFilterSize; x < ((xpos + width)) + halfFilterSize; dst_x++, x++) {
|
||||
|
||||
coord_x = x + off_x + mv_x;
|
||||
coord_x = CLIP(0, (ref_width)-1, coord_x);
|
||||
|
||||
// Store source block data (with extended borders)
|
||||
out->buffer[dst_y*(width + filterSize) + dst_x] = ref[coord_y + coord_x];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,12 +24,13 @@
|
|||
|
||||
#include "encoder.h"
|
||||
|
||||
typedef struct { kvz_pixel *buffer; kvz_pixel *orig_topleft; unsigned stride; unsigned malloc_used; } extended_block;
|
||||
|
||||
typedef unsigned(ipol_func)(const encoder_control_t * encoder, kvz_pixel *src, int16_t src_stride, int width, int height, kvz_pixel *dst,
|
||||
int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
||||
|
||||
typedef unsigned(epol_func)(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, kvz_pixel *ref, int ref_width, int ref_height,
|
||||
int filterSize, int width, int height, kvz_pixel *dst);
|
||||
int filterSize, int width, int height, extended_block *out);
|
||||
|
||||
|
||||
// Declare function pointers.
|
||||
|
|
Loading…
Reference in a new issue