Apply fast clipping also to chroma filtering.

This commit is contained in:
Ari Lemmetti 2015-01-21 18:19:32 +02:00
parent b4aab06073
commit 041d970ece

View file

@ -176,7 +176,6 @@ void filter_inter_halfpel_chroma_generic(const encoder_control * const encoder,
* ea0,0 = (-4*B0,-1 + 36*B0,0 + 36*B0,1 - 4*B0,2) >> shift1
* ee0,0 = (-4*ae0,-1 + 36*ae0,0 + 36*ae0,1 - 4*ae0,2) >> shift2
*/
int i = 0;
int32_t x, y;
int32_t shift1 = encoder->bitdepth - 8;
int32_t shift2 = 6;
@ -205,8 +204,8 @@ void filter_inter_halfpel_chroma_generic(const encoder_control * const encoder,
}
// ea0,0 - needed only when ver_flag
if (ver_flag) {
dst[dst_pos + 1 * dst_stride] = (((-4 * src[src_pos - src_stride] + 36 * src[src_pos] + 36 * src[src_pos + src_stride]
- 4 * src[src_pos + 2 * src_stride]) >> shift1) + (1 << (shift3 - 1))) >> shift3; // ea0,0
dst[dst_pos + 1 * dst_stride] = fast_clip_32bit_to_pixel((((-4 * src[src_pos - src_stride] + 36 * src[src_pos] + 36 * src[src_pos + src_stride]
- 4 * src[src_pos + 2 * src_stride]) >> shift1) + (1 << (shift3 - 1))) >> shift3); // ea0,0
}
// When both flags, we use _only_ this pixel (but still need ae0,0 for it)
@ -221,19 +220,14 @@ void filter_inter_halfpel_chroma_generic(const encoder_control * const encoder,
src_pos += src_stride; //0,2
ae_temp3 = ((-4 * src[src_pos - 1] + 36 * src[src_pos] + 36 * src[src_pos + 1] - 4 * src[src_pos + 2]) >> shift1); // ae0,2
dst[dst_pos + 1 * dst_stride + 1] = (((-4 * ae_temp1 + 36 * ae_temp + 36 * ae_temp2 - 4 * ae_temp3) + offset23) >> shift2) >> shift3; // ee0,0
dst[dst_pos + 1 * dst_stride + 1] = fast_clip_32bit_to_pixel((((-4 * ae_temp1 + 36 * ae_temp + 36 * ae_temp2 - 4 * ae_temp3) + offset23) >> shift2) >> shift3); // ee0,0
}
if (hor_flag) {
dst[dst_pos + 1] = (ae_temp + offset3) >> shift3;
dst[dst_pos + 1] = fast_clip_32bit_to_pixel((ae_temp + offset3) >> shift3);
}
}
}
//Clamp values to bitdepth
for (i = 0; i < width*height * 4; ++i) {
if (dst[i] >((1 << encoder->bitdepth) - 1)) dst[i] = (pixel)((1 << encoder->bitdepth) - 1);
if (dst[i] < 0) dst[i] = 0;
}
}
void filter_inter_octpel_chroma_generic(const encoder_control * const encoder, pixel *src, int16_t src_stride, int width, int height, pixel *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
@ -316,47 +310,47 @@ void filter_inter_octpel_chroma_generic(const encoder_control * const encoder, p
// Vertical 1/8-values
if (ver_flag) {
dst[dst_pos + 1 * dst_stride] = (((c1[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 1 * dst_stride] = fast_clip_32bit_to_pixel((((c1[0] * src[src_pos - 1 * src_stride]
+ c1[1] * src[src_pos]
+ c1[2] * src[src_pos + 1 * src_stride]
+ c1[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
dst[dst_pos + 2 * dst_stride] = (((c2[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 2 * dst_stride] = fast_clip_32bit_to_pixel((((c2[0] * src[src_pos - 1 * src_stride]
+ c2[1] * src[src_pos]
+ c2[2] * src[src_pos + 1 * src_stride]
+ c2[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
dst[dst_pos + 3 * dst_stride] = (((c3[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 3 * dst_stride] = fast_clip_32bit_to_pixel((((c3[0] * src[src_pos - 1 * src_stride]
+ c3[1] * src[src_pos]
+ c3[2] * src[src_pos + 1 * src_stride]
+ c3[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
dst[dst_pos + 4 * dst_stride] = (((c4[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 4 * dst_stride] = fast_clip_32bit_to_pixel((((c4[0] * src[src_pos - 1 * src_stride]
+ c4[1] * src[src_pos]
+ c4[2] * src[src_pos + 1 * src_stride]
+ c4[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
dst[dst_pos + 5 * dst_stride] = (((c5[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 5 * dst_stride] = fast_clip_32bit_to_pixel((((c5[0] * src[src_pos - 1 * src_stride]
+ c5[1] * src[src_pos]
+ c5[2] * src[src_pos + 1 * src_stride]
+ c5[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
dst[dst_pos + 6 * dst_stride] = (((c6[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 6 * dst_stride] = fast_clip_32bit_to_pixel((((c6[0] * src[src_pos - 1 * src_stride]
+ c6[1] * src[src_pos]
+ c6[2] * src[src_pos + 1 * src_stride]
+ c6[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
dst[dst_pos + 7 * dst_stride] = (((c7[0] * src[src_pos - 1 * src_stride]
dst[dst_pos + 7 * dst_stride] = fast_clip_32bit_to_pixel((((c7[0] * src[src_pos - 1 * src_stride]
+ c7[1] * src[src_pos]
+ c7[2] * src[src_pos + 1 * src_stride]
+ c7[3] * src[src_pos + 2 * src_stride]) >> shift1)
+ (1 << (shift3 - 1))) >> shift3; //
+ (1 << (shift3 - 1))) >> shift3); //
}
// When both flags, interpolate values from temporary horizontal values
@ -408,57 +402,51 @@ void filter_inter_octpel_chroma_generic(const encoder_control * const encoder, p
//Calculate values from temporary horizontal 1/8-values
for (i = 0; i<7; ++i){
dst[dst_pos + 1 * dst_stride + i + 1] = (((c1[0] * temp[0][i] + c1[1] * h_temp[i]
dst[dst_pos + 1 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c1[0] * temp[0][i] + c1[1] * h_temp[i]
+ c1[2] * temp[1][i] + c1[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
dst[dst_pos + 2 * dst_stride + i + 1] = (((c2[0] * temp[0][i] + c2[1] * h_temp[i]
dst[dst_pos + 2 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c2[0] * temp[0][i] + c2[1] * h_temp[i]
+ c2[2] * temp[1][i] + c2[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
dst[dst_pos + 3 * dst_stride + i + 1] = (((c3[0] * temp[0][i] + c3[1] * h_temp[i]
dst[dst_pos + 3 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c3[0] * temp[0][i] + c3[1] * h_temp[i]
+ c3[2] * temp[1][i] + c3[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
dst[dst_pos + 4 * dst_stride + i + 1] = (((c4[0] * temp[0][i] + c4[1] * h_temp[i]
dst[dst_pos + 4 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c4[0] * temp[0][i] + c4[1] * h_temp[i]
+ c4[2] * temp[1][i] + c4[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
dst[dst_pos + 5 * dst_stride + i + 1] = (((c5[0] * temp[0][i] + c5[1] * h_temp[i]
dst[dst_pos + 5 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c5[0] * temp[0][i] + c5[1] * h_temp[i]
+ c5[2] * temp[1][i] + c5[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
dst[dst_pos + 6 * dst_stride + i + 1] = (((c6[0] * temp[0][i] + c6[1] * h_temp[i]
dst[dst_pos + 6 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c6[0] * temp[0][i] + c6[1] * h_temp[i]
+ c6[2] * temp[1][i] + c6[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
dst[dst_pos + 7 * dst_stride + i + 1] = (((c7[0] * temp[0][i] + c7[1] * h_temp[i]
dst[dst_pos + 7 * dst_stride + i + 1] = fast_clip_32bit_to_pixel((((c7[0] * temp[0][i] + c7[1] * h_temp[i]
+ c7[2] * temp[1][i] + c7[3] * temp[2][i])
+ offset23) >> shift2) >> shift3; // ee0,0
+ offset23) >> shift2) >> shift3); // ee0,0
}
}
if (hor_flag) {
dst[dst_pos + 1] = (h_temp[0] + offset3) >> shift3;
dst[dst_pos + 2] = (h_temp[1] + offset3) >> shift3;
dst[dst_pos + 3] = (h_temp[2] + offset3) >> shift3;
dst[dst_pos + 4] = (h_temp[3] + offset3) >> shift3;
dst[dst_pos + 5] = (h_temp[4] + offset3) >> shift3;
dst[dst_pos + 6] = (h_temp[5] + offset3) >> shift3;
dst[dst_pos + 7] = (h_temp[6] + offset3) >> shift3;
dst[dst_pos + 1] = fast_clip_32bit_to_pixel((h_temp[0] + offset3) >> shift3);
dst[dst_pos + 2] = fast_clip_32bit_to_pixel((h_temp[1] + offset3) >> shift3);
dst[dst_pos + 3] = fast_clip_32bit_to_pixel((h_temp[2] + offset3) >> shift3);
dst[dst_pos + 4] = fast_clip_32bit_to_pixel((h_temp[3] + offset3) >> shift3);
dst[dst_pos + 5] = fast_clip_32bit_to_pixel((h_temp[4] + offset3) >> shift3);
dst[dst_pos + 6] = fast_clip_32bit_to_pixel((h_temp[5] + offset3) >> shift3);
dst[dst_pos + 7] = fast_clip_32bit_to_pixel((h_temp[6] + offset3) >> shift3);
}
}
}
//Clamp values to bitdepth
for (i = 0; i < width*height * 64; ++i) {
if (dst[i] >((1 << encoder->bitdepth) - 1)) dst[i] = (pixel)((1 << encoder->bitdepth) - 1);
if (dst[i] < 0) dst[i] = 0;
}
}
void extend_borders_generic(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel *ref, int ref_width, int ref_height,