mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Fixes and comments for inter_recon and filter_inter_halfpel_chroma
This commit is contained in:
parent
13e058abce
commit
3d228278ef
33
src/filter.c
33
src/filter.c
|
@ -377,9 +377,9 @@ void filter_inter_halfpel_chroma(int16_t *src, int16_t src_stride, int width, in
|
|||
* | B0,0|ae0,0|
|
||||
* |ea0,0|ee0,0|
|
||||
*
|
||||
* ae0,0 = ( -4 * B-1,0 + 36 * B0,0 + 36 * B1,0 - 4 * B2,0 ) >> shift1
|
||||
* ea0,0 = ( -4 * B0,-1 + 36 * B0,0 + 36 * B0,1 - 4 * B0,2 ) >> shift1
|
||||
* ee0,0 = ( -4 * ae0,-1 + 36 * ae0,0 + 36 * ae0,1 - 4 * ae0,2 ) >> shift2
|
||||
* ae0,0 = (-4*B-1,0 + 36*B0,0 + 36*B1,0 - 4*B2,0) >> shift1
|
||||
* ea0,0 = (-4*B0,-1 + 36*B0,0 + 36*B0,1 - 4*B0,2) >> shift1
|
||||
* ee0,0 = (-4*ae0,-1 + 36*ae0,0 + 36*ae0,1 - 4*ae0,2) >> shift2
|
||||
*/
|
||||
|
||||
int32_t x, y;
|
||||
|
@ -398,30 +398,33 @@ void filter_inter_halfpel_chroma(int16_t *src, int16_t src_stride, int width, in
|
|||
int src_pos = src_pos_y+x;
|
||||
|
||||
// Temporary variables..
|
||||
int32_t ae_temp1,ae_temp2,ae_temp3;
|
||||
int32_t ae_temp1, ae_temp2, ae_temp3;
|
||||
|
||||
// Original pixel (not really needed)
|
||||
//dst[dst_pos] = src[src_pos]; //B0,0
|
||||
dst[dst_pos] = src[src_pos]; //B0,0
|
||||
|
||||
// We need this only when hor_flag and for ee0,0
|
||||
// ae0,0 - We need this only when hor_flag and for ee0,0
|
||||
if (hor_flag) {
|
||||
dst[dst_pos + 1] = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; //ae0,0
|
||||
} else if(ver_flag) { // This one only needed if ver_flag and !hor_flag
|
||||
dst[dst_pos + 1*dst_stride] = ((-4*src[src_pos - src_stride] + 36*src[src_pos] + 36*src[src_pos + src_stride] - 4*src[src_pos + 2*src_stride] ) >> shift1) >> shift3; //ea0,0
|
||||
dst[dst_pos + 1] = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; // ae0,0
|
||||
}
|
||||
// ea0,0 - needed only when ver_flag
|
||||
if(ver_flag) {
|
||||
dst[dst_pos + 1*dst_stride] = ((-4*src[src_pos - src_stride] + 36*src[src_pos] + 36*src[src_pos + src_stride]
|
||||
- 4*src[src_pos + 2*src_stride] ) >> shift1) >> shift3; // ea0,0
|
||||
}
|
||||
|
||||
// When both flags, we _only_ use this pixel (but still need ae0,0 for it)
|
||||
if (hor_flag && ver_flag) {
|
||||
// When both flags, we use _only_ this pixel (but still need ae0,0 for it)
|
||||
if (hor_flag && ver_flag) {
|
||||
// Calculate temporary values..
|
||||
//TODO: optimization, store these values
|
||||
src_pos -= src_stride; //0,-1
|
||||
ae_temp1 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; //ae0,-1
|
||||
ae_temp1 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; // ae0,-1
|
||||
src_pos += src_stride; //0,1
|
||||
ae_temp2 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; //ae0,1
|
||||
ae_temp2 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; // ae0,1
|
||||
src_pos += src_stride; //0,2
|
||||
ae_temp2 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; //ae0,2
|
||||
ae_temp3 = ((-4*src[src_pos - 1] + 36*src[src_pos] + 36*src[src_pos + 1] - 4*src[src_pos + 2]) >> shift1) >> shift3; // ae0,2
|
||||
|
||||
dst[dst_pos + 1*dst_stride + 1] = (( -4*ae_temp1 + 36*dst[dst_pos + 1] + 36*ae_temp2 - 4*ae_temp2) >> shift2); //ee0,0
|
||||
dst[dst_pos + 1*dst_stride + 1] = (( -4*ae_temp1 + 36*dst[dst_pos + 1] + 36*ae_temp2 - 4*ae_temp3) >> shift2); // ee0,0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
36
src/inter.c
36
src/inter.c
|
@ -72,18 +72,18 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, int16_t
|
|||
int8_t overflow_neg_y = (ypos + (mv[1]>>2) < 0)?1:0;
|
||||
|
||||
// positive overflow flag
|
||||
int8_t overflow_pos_x = (xpos + (mv[0]>>2) + width >= ref->width )?1:0;
|
||||
int8_t overflow_pos_y = (ypos + (mv[1]>>2) + width >= ref->height)?1:0;
|
||||
int8_t overflow_pos_x = (xpos + (mv[0]>>2) + width > ref->width )?1:0;
|
||||
int8_t overflow_pos_y = (ypos + (mv[1]>>2) + width > ref->height)?1:0;
|
||||
|
||||
// Chroma half-pel
|
||||
#define HALFPEL_CHROMA_WIDTH ((LCU_WIDTH>>1) + 8)
|
||||
int8_t chroma_halfpel = ((mv[0]>>2)&1) | ((mv[1]>>2)&1); //!< third lsb is set -> chroma is half-pel
|
||||
int16_t halfpel_src_u[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH];
|
||||
int16_t halfpel_src_v[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH];
|
||||
int16_t *halfpel_src_off_u = &halfpel_src_u[HALFPEL_CHROMA_WIDTH*4 + 4]; //!< halfpel_src_u with offset (-4,-4)
|
||||
int16_t *halfpel_src_off_v = &halfpel_src_v[HALFPEL_CHROMA_WIDTH*4 + 4]; //!< halfpel_src_v with offset (-4,-4)
|
||||
int16_t halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2X x 2Y block (u)
|
||||
int16_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2X x 2Y block (v)
|
||||
int8_t chroma_halfpel = ((mv[0]>>2)&1) | ((mv[1]>>2)&1); //!< (luma integer mv) lsb is set -> chroma is half-pel
|
||||
int16_t halfpel_src_u[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< U source block for interpolation
|
||||
int16_t halfpel_src_v[HALFPEL_CHROMA_WIDTH * HALFPEL_CHROMA_WIDTH]; //!< V source block for interpolation
|
||||
int16_t *halfpel_src_off_u = &halfpel_src_u[HALFPEL_CHROMA_WIDTH*4 + 4]; //!< halfpel_src_u with offset (4,4)
|
||||
int16_t *halfpel_src_off_v = &halfpel_src_v[HALFPEL_CHROMA_WIDTH*4 + 4]; //!< halfpel_src_v with offset (4,4)
|
||||
int16_t halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u)
|
||||
int16_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v)
|
||||
|
||||
// TODO: Fractional pixel support
|
||||
mv[0] = mv[0]>>2;
|
||||
|
@ -92,19 +92,21 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, int16_t
|
|||
// Chroma half-pel
|
||||
// get half-pel interpolated block and push it to output
|
||||
if(chroma_halfpel) {
|
||||
int halfpel_y,halfpel_x;
|
||||
|
||||
int halfpel_y, halfpel_x;
|
||||
int abs_mv_x = mv[0]&1;
|
||||
int abs_mv_y = mv[1]&1;
|
||||
|
||||
// Fill source blocks with data from reference, -4...width+4
|
||||
for (halfpel_y = 0, y = (ypos>>1) - 4; y < ((ypos + width)>>1) + 4; halfpel_y++, y++) {
|
||||
// calculate y-pixel offset
|
||||
coord_y = (y + (mv[1]>>1));
|
||||
coord_y = y + (mv[1]>>1);
|
||||
|
||||
// On y-overflow set coord_y accordingly
|
||||
overflow_neg_y = (coord_y < 0)?1:0;
|
||||
overflow_pos_y = (coord_y >= ref->height>>1)?1:0;
|
||||
if (overflow_neg_y) coord_y = 0;
|
||||
overflow_neg_y = (coord_y < 0) ? 1 : 0;
|
||||
overflow_pos_y = (coord_y >= ref->height>>1) ? 1 : 0;
|
||||
if (overflow_neg_y) coord_y = 0;
|
||||
else if (overflow_pos_y) coord_y = ((ref->height>>1) - 1);
|
||||
|
||||
coord_y *= ref_width_c;
|
||||
|
||||
for (halfpel_x = 0, x = (xpos>>1) - 4; x < ((xpos + width)>>1) + 4; halfpel_x++, x++) {
|
||||
|
@ -113,7 +115,7 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, int16_t
|
|||
// On x-overflow set coord_x accordingly
|
||||
overflow_neg_x = (coord_x < 0) ? 1 : 0;
|
||||
overflow_pos_x = (coord_x >= ref->width>>1) ? 1 : 0;
|
||||
if (overflow_neg_x) coord_x = 0;
|
||||
if (overflow_neg_x) coord_x = 0;
|
||||
else if (overflow_pos_x) coord_x = (ref->width>>1) - 1;
|
||||
|
||||
// Store source block data (with extended borders)
|
||||
|
@ -126,7 +128,7 @@ void inter_recon(picture* ref,int32_t xpos, int32_t ypos,int32_t width, int16_t
|
|||
filter_inter_halfpel_chroma(halfpel_src_off_u, HALFPEL_CHROMA_WIDTH, width>>1, width>>1, halfpel_u, LCU_WIDTH, abs_mv_x, abs_mv_y);
|
||||
filter_inter_halfpel_chroma(halfpel_src_off_v, HALFPEL_CHROMA_WIDTH, width>>1, width>>1, halfpel_v, LCU_WIDTH, abs_mv_x, abs_mv_y);
|
||||
|
||||
// Assign filtered pixels to output
|
||||
// Assign filtered pixels to output, take every second half-pel sample with offset of abs_mv_y/x
|
||||
for (halfpel_y = abs_mv_y, y = ypos>>1; y < (ypos + width)>>1; halfpel_y += 2, y++) {
|
||||
for (halfpel_x = abs_mv_x, x = xpos>>1; x < (xpos + width)>>1; halfpel_x += 2, x++) {
|
||||
dst->u_recdata[y*dst_width_c + x] = (uint8_t)halfpel_u[halfpel_y*LCU_WIDTH + halfpel_x];
|
||||
|
|
Loading…
Reference in a new issue