mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-28 03:34:06 +00:00
Added fractional pixel motion estimation
Added farctional mv support for inter recon Added 1/8-pel chroma and 1/4-pel luma interpolation
This commit is contained in:
parent
6c7e4dbeef
commit
efc43c8b3a
408
src/filter.c
408
src/filter.c
|
@ -515,7 +515,7 @@ void filter_inter_halfpel_chroma(const encoder_control * const encoder, int16_t
|
||||||
* ea0,0 = (-4*B0,-1 + 36*B0,0 + 36*B0,1 - 4*B0,2) >> shift1
|
* ea0,0 = (-4*B0,-1 + 36*B0,0 + 36*B0,1 - 4*B0,2) >> shift1
|
||||||
* ee0,0 = (-4*ae0,-1 + 36*ae0,0 + 36*ae0,1 - 4*ae0,2) >> shift2
|
* ee0,0 = (-4*ae0,-1 + 36*ae0,0 + 36*ae0,1 - 4*ae0,2) >> shift2
|
||||||
*/
|
*/
|
||||||
|
int i = 0;
|
||||||
int32_t x, y;
|
int32_t x, y;
|
||||||
int32_t shift1 = encoder->bitdepth-8;
|
int32_t shift1 = encoder->bitdepth-8;
|
||||||
int32_t shift2 = 6;
|
int32_t shift2 = 6;
|
||||||
|
@ -568,4 +568,410 @@ void filter_inter_halfpel_chroma(const encoder_control * const encoder, int16_t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//Clamp values to bitdepth
|
||||||
|
for(i = 0; i < width*height*4; ++i) {
|
||||||
|
if(dst[i] > ((1 << encoder->bitdepth)-1)) dst[i] = (int16_t)((1 << encoder->bitdepth)-1);
|
||||||
|
if(dst[i] < 0) dst[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void filter_inter_octpel_chroma(const encoder_control * const encoder, int16_t *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
|
{
|
||||||
|
|
||||||
|
int32_t x, y;
|
||||||
|
int32_t shift1 = encoder->bitdepth-8;
|
||||||
|
int32_t shift2 = 6;
|
||||||
|
int32_t shift3 = 14-encoder->bitdepth;
|
||||||
|
int32_t offset3 = 1 << (shift3 - 1);
|
||||||
|
int32_t offset23 = 1 << (shift2 + shift3 - 1);
|
||||||
|
|
||||||
|
//coefficients for 1/8, 2/8, 3/8, 4/8, 5/8, 6/8 and 7/8 positions
|
||||||
|
int16_t c1[4], c2[4], c3[4], c4[4], c5[4], c6[4], c7[4];
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 4; ++i ) {
|
||||||
|
c1[i] = g_chroma_filter[1][i];
|
||||||
|
c2[i] = g_chroma_filter[2][i];
|
||||||
|
c3[i] = g_chroma_filter[3][i];
|
||||||
|
c4[i] = g_chroma_filter[4][i];
|
||||||
|
c5[i] = g_chroma_filter[5][i];
|
||||||
|
c6[i] = g_chroma_filter[6][i];
|
||||||
|
c7[i] = g_chroma_filter[7][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop source pixels and generate 64 filtered 1/8-pel pixels on each round
|
||||||
|
for (y = 0; y < height; y++) {
|
||||||
|
int dst_pos_y = (y<<3)*dst_stride;
|
||||||
|
int src_pos_y = y*src_stride;
|
||||||
|
for (x = 0; x < width; x++) {
|
||||||
|
// Calculate current dst and src pixel positions
|
||||||
|
int dst_pos = dst_pos_y+(x<<3);
|
||||||
|
int src_pos = src_pos_y+x;
|
||||||
|
|
||||||
|
// Temporary horizontally interpolated postions
|
||||||
|
int32_t h_temp[7] = {0,0,0,0,0,0,0};
|
||||||
|
|
||||||
|
// Original pixel
|
||||||
|
dst[dst_pos] = src[src_pos];
|
||||||
|
|
||||||
|
// Horizontal 1/8-values
|
||||||
|
if (hor_flag) {
|
||||||
|
|
||||||
|
h_temp[0] = ((c1[0]*src[src_pos - 1]
|
||||||
|
+ c1[1]*src[src_pos]
|
||||||
|
+ c1[2]*src[src_pos + 1]
|
||||||
|
+ c1[3]*src[src_pos + 2]) >> shift1); // ae0,0 h0
|
||||||
|
|
||||||
|
h_temp[1] = ((c2[0]*src[src_pos - 1]
|
||||||
|
+ c2[1]*src[src_pos]
|
||||||
|
+ c2[2]*src[src_pos + 1]
|
||||||
|
+ c2[3]*src[src_pos + 2]) >> shift1); // ae0,0 h1
|
||||||
|
|
||||||
|
h_temp[2] = ((c3[0]*src[src_pos - 1]
|
||||||
|
+ c3[1]*src[src_pos]
|
||||||
|
+ c3[2]*src[src_pos + 1]
|
||||||
|
+ c3[3]*src[src_pos + 2]) >> shift1); // ae0,0 h2
|
||||||
|
|
||||||
|
h_temp[3] = ((c4[0]*src[src_pos - 1]
|
||||||
|
+ c4[1]*src[src_pos]
|
||||||
|
+ c4[2]*src[src_pos + 1]
|
||||||
|
+ c4[3]*src[src_pos + 2]) >> shift1); // ae0,0 h2
|
||||||
|
|
||||||
|
h_temp[4] = ((c5[0]*src[src_pos - 1]
|
||||||
|
+ c5[1]*src[src_pos]
|
||||||
|
+ c5[2]*src[src_pos + 1]
|
||||||
|
+ c5[3]*src[src_pos + 2]) >> shift1); // ae0,0 h2
|
||||||
|
|
||||||
|
h_temp[5] = ((c6[0]*src[src_pos - 1]
|
||||||
|
+ c6[1]*src[src_pos]
|
||||||
|
+ c6[2]*src[src_pos + 1]
|
||||||
|
+ c6[3]*src[src_pos + 2]) >> shift1); // ae0,0 h2
|
||||||
|
|
||||||
|
h_temp[6] = ((c7[0]*src[src_pos - 1]
|
||||||
|
+ c7[1]*src[src_pos]
|
||||||
|
+ c7[2]*src[src_pos + 1]
|
||||||
|
+ c7[3]*src[src_pos + 2]) >> shift1); // ae0,0 h2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vertical 1/8-values
|
||||||
|
if(ver_flag) {
|
||||||
|
dst[dst_pos + 1*dst_stride] = (((c1[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c1[1]*src[src_pos]
|
||||||
|
+ c1[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c1[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
|
||||||
|
dst[dst_pos + 2*dst_stride] = (((c2[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c2[1]*src[src_pos]
|
||||||
|
+ c2[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c2[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
|
||||||
|
dst[dst_pos + 3*dst_stride] = (((c3[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c3[1]*src[src_pos]
|
||||||
|
+ c3[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c3[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
|
||||||
|
dst[dst_pos + 4*dst_stride] = (((c4[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c4[1]*src[src_pos]
|
||||||
|
+ c4[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c4[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
|
||||||
|
dst[dst_pos + 5*dst_stride] = (((c5[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c5[1]*src[src_pos]
|
||||||
|
+ c5[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c5[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
|
||||||
|
dst[dst_pos + 6*dst_stride] = (((c6[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c6[1]*src[src_pos]
|
||||||
|
+ c6[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c6[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
|
||||||
|
dst[dst_pos + 7*dst_stride] = (((c7[0]*src[src_pos - 1*src_stride]
|
||||||
|
+ c7[1]*src[src_pos]
|
||||||
|
+ c7[2]*src[src_pos + 1*src_stride]
|
||||||
|
+ c7[3]*src[src_pos + 2*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3; //
|
||||||
|
}
|
||||||
|
|
||||||
|
// When both flags, interpolate values from temporary horizontal values
|
||||||
|
if (hor_flag && ver_flag) {
|
||||||
|
|
||||||
|
int32_t temp[3][7]; // Temporary horizontal values calculated from integer pixels
|
||||||
|
|
||||||
|
// Calculate temporary values
|
||||||
|
src_pos -= 1*src_stride; //0,-3
|
||||||
|
for(i = 0; i < 3; ++i) {
|
||||||
|
|
||||||
|
temp[i][0] = ((c1[0]*src[src_pos - 1] + c1[1]*src[src_pos]
|
||||||
|
+ c1[2]*src[src_pos + 1] + c1[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h0(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][1] = ((c2[0]*src[src_pos - 1] + c2[1]*src[src_pos]
|
||||||
|
+ c2[2]*src[src_pos + 1] + c2[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h1(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][2] = ((c3[0]*src[src_pos - 1] + c3[1]*src[src_pos]
|
||||||
|
+ c3[2]*src[src_pos + 1] + c3[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h2(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][3] = ((c4[0]*src[src_pos - 1] + c4[1]*src[src_pos]
|
||||||
|
+ c4[2]*src[src_pos + 1] + c4[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h2(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][4] = ((c5[0]*src[src_pos - 1] + c5[1]*src[src_pos]
|
||||||
|
+ c5[2]*src[src_pos + 1] + c5[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h2(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][5] = ((c6[0]*src[src_pos - 1] + c6[1]*src[src_pos]
|
||||||
|
+ c6[2]*src[src_pos + 1] + c6[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h2(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][6] = ((c7[0]*src[src_pos - 1] + c7[1]*src[src_pos]
|
||||||
|
+ c7[2]*src[src_pos + 1] + c7[3]*src[src_pos + 2])
|
||||||
|
>> shift1); // h2(0,-3+i)
|
||||||
|
|
||||||
|
if(i == 0) {
|
||||||
|
//Skip calculating h_temp again
|
||||||
|
src_pos += 2*src_stride;
|
||||||
|
} else {
|
||||||
|
src_pos += src_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Calculate values from temporary horizontal 1/8-values
|
||||||
|
for(i=0;i<7;++i){
|
||||||
|
dst[dst_pos + 1*dst_stride + i+1] = (((c1[0]*temp[0][i] + c1[1]*h_temp[i]
|
||||||
|
+ c1[2]*temp[1][i] + c1[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
dst[dst_pos + 2*dst_stride + i+1] = (((c2[0]*temp[0][i] + c2[1]*h_temp[i]
|
||||||
|
+ c2[2]*temp[1][i] + c2[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
dst[dst_pos + 3*dst_stride + i+1] = (((c3[0]*temp[0][i] + c3[1]*h_temp[i]
|
||||||
|
+ c3[2]*temp[1][i] + c3[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
dst[dst_pos + 4*dst_stride + i+1] = (((c4[0]*temp[0][i] + c4[1]*h_temp[i]
|
||||||
|
+ c4[2]*temp[1][i] + c4[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
dst[dst_pos + 5*dst_stride + i+1] = (((c5[0]*temp[0][i] + c5[1]*h_temp[i]
|
||||||
|
+ c5[2]*temp[1][i] + c5[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
dst[dst_pos + 6*dst_stride + i+1] = (((c6[0]*temp[0][i] + c6[1]*h_temp[i]
|
||||||
|
+ c6[2]*temp[1][i] + c6[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
dst[dst_pos + 7*dst_stride + i+1] = (((c7[0]*temp[0][i] + c7[1]*h_temp[i]
|
||||||
|
+ c7[2]*temp[1][i] + c7[3]*temp[2][i])
|
||||||
|
+ offset23) >> shift2) >> shift3; // ee0,0
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(hor_flag) {
|
||||||
|
dst[dst_pos + 1] = (h_temp[0] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 2] = (h_temp[1] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 3] = (h_temp[2] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 4] = (h_temp[3] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 5] = (h_temp[4] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 6] = (h_temp[5] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 7] = (h_temp[6] + offset3) >> shift3;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Clamp values to bitdepth
|
||||||
|
for(i = 0; i < width*height*64; ++i) {
|
||||||
|
if(dst[i] > ((1 << encoder->bitdepth)-1)) dst[i] = (int16_t)((1 << encoder->bitdepth)-1);
|
||||||
|
if(dst[i] < 0) dst[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void filter_inter_quarterpel_luma(const encoder_control * const encoder, int16_t *src, int16_t src_stride, int width, int height, int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag)
|
||||||
|
{
|
||||||
|
|
||||||
|
int32_t x, y;
|
||||||
|
int32_t shift1 = encoder->bitdepth-8;
|
||||||
|
int32_t shift2 = 6;
|
||||||
|
int32_t shift3 = 14-encoder->bitdepth;
|
||||||
|
int32_t offset3 = 1 << (shift3 - 1);
|
||||||
|
int32_t offset23 = 1 << (shift2 + shift3 - 1);
|
||||||
|
|
||||||
|
//coefficients for 1/4, 2/4 and 3/4 positions
|
||||||
|
int16_t c1[8], c2[8], c3[8];
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 8; ++i ) {
|
||||||
|
c1[i] = g_luma_filter[1][i];
|
||||||
|
c2[i] = g_luma_filter[2][i];
|
||||||
|
c3[i] = g_luma_filter[3][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop source pixels and generate sixteen filtered quarter-pel pixels on each round
|
||||||
|
for (y = 0; y < height; y++) {
|
||||||
|
int dst_pos_y = (y<<2)*dst_stride;
|
||||||
|
int src_pos_y = y*src_stride;
|
||||||
|
for (x = 0; x < width; x++) {
|
||||||
|
// Calculate current dst and src pixel positions
|
||||||
|
int dst_pos = dst_pos_y+(x<<2);
|
||||||
|
int src_pos = src_pos_y+x;
|
||||||
|
|
||||||
|
// Temporary variables..
|
||||||
|
int32_t h_temp[3] = {0,0,0};
|
||||||
|
|
||||||
|
// Original pixel
|
||||||
|
dst[dst_pos] = src[src_pos];
|
||||||
|
|
||||||
|
//
|
||||||
|
if (hor_flag) {
|
||||||
|
|
||||||
|
h_temp[0] = ((c1[0]*src[src_pos - 3]
|
||||||
|
+ c1[1]*src[src_pos - 2]
|
||||||
|
+ c1[2]*src[src_pos - 1]
|
||||||
|
+ c1[3]*src[src_pos]
|
||||||
|
+ c1[4]*src[src_pos + 1]
|
||||||
|
+ c1[5]*src[src_pos + 2]
|
||||||
|
+ c1[6]*src[src_pos + 3]
|
||||||
|
+ c1[7]*src[src_pos + 4]) >> shift1);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
h_temp[1] = ((c2[0]*src[src_pos - 3]
|
||||||
|
+ c2[1]*src[src_pos - 2]
|
||||||
|
+ c2[2]*src[src_pos - 1]
|
||||||
|
+ c2[3]*src[src_pos]
|
||||||
|
+ c2[4]*src[src_pos + 1]
|
||||||
|
+ c2[5]*src[src_pos + 2]
|
||||||
|
+ c2[6]*src[src_pos + 3]
|
||||||
|
+ c2[7]*src[src_pos + 4]) >> shift1);
|
||||||
|
|
||||||
|
h_temp[2] = ((c3[0]*src[src_pos - 3]
|
||||||
|
+ c3[1]*src[src_pos - 2]
|
||||||
|
+ c3[2]*src[src_pos - 1]
|
||||||
|
+ c3[3]*src[src_pos]
|
||||||
|
+ c3[4]*src[src_pos + 1]
|
||||||
|
+ c3[5]*src[src_pos + 2]
|
||||||
|
+ c3[6]*src[src_pos + 3]
|
||||||
|
+ c3[7]*src[src_pos + 4]) >> shift1);
|
||||||
|
}
|
||||||
|
// ea0,0 - needed only when ver_flag
|
||||||
|
if(ver_flag) {
|
||||||
|
dst[dst_pos + 1*dst_stride] = (((c1[0]*src[src_pos - 3*src_stride]
|
||||||
|
+ c1[1]*src[src_pos - 2*src_stride]
|
||||||
|
+ c1[2]*src[src_pos - 1*src_stride]
|
||||||
|
+ c1[3]*src[src_pos]
|
||||||
|
+ c1[4]*src[src_pos + 1*src_stride]
|
||||||
|
+ c1[5]*src[src_pos + 2*src_stride]
|
||||||
|
+ c1[6]*src[src_pos + 3*src_stride]
|
||||||
|
+ c1[7]*src[src_pos + 4*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3;
|
||||||
|
|
||||||
|
dst[dst_pos + 2*dst_stride] = (((c2[0]*src[src_pos - 3*src_stride]
|
||||||
|
+ c2[1]*src[src_pos - 2*src_stride]
|
||||||
|
+ c2[2]*src[src_pos - 1*src_stride]
|
||||||
|
+ c2[3]*src[src_pos]
|
||||||
|
+ c2[4]*src[src_pos + 1*src_stride]
|
||||||
|
+ c2[5]*src[src_pos + 2*src_stride]
|
||||||
|
+ c2[6]*src[src_pos + 3*src_stride]
|
||||||
|
+ c2[7]*src[src_pos + 4*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3;
|
||||||
|
|
||||||
|
dst[dst_pos + 3*dst_stride] = (((c3[0]*src[src_pos - 3*src_stride]
|
||||||
|
+ c3[1]*src[src_pos - 2*src_stride]
|
||||||
|
+ c3[2]*src[src_pos - 1*src_stride]
|
||||||
|
+ c3[3]*src[src_pos]
|
||||||
|
+ c3[4]*src[src_pos + 1*src_stride]
|
||||||
|
+ c3[5]*src[src_pos + 2*src_stride]
|
||||||
|
+ c3[6]*src[src_pos + 3*src_stride]
|
||||||
|
+ c3[7]*src[src_pos + 4*src_stride]) >> shift1)
|
||||||
|
+ (1<<(shift3-1))) >> shift3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When both flags, we use _only_ this pixel (but still need ae0,0 for it)
|
||||||
|
if (hor_flag && ver_flag) {
|
||||||
|
|
||||||
|
int32_t temp[7][3];
|
||||||
|
|
||||||
|
// Calculate temporary values..
|
||||||
|
src_pos -= 3*src_stride; //0,-3
|
||||||
|
for(i = 0; i < 7; ++i) {
|
||||||
|
|
||||||
|
temp[i][0] = ((c1[0]*src[src_pos - 3] + c1[1]*src[src_pos - 2]
|
||||||
|
+ c1[2]*src[src_pos - 1] + c1[3]*src[src_pos]
|
||||||
|
+ c1[4]*src[src_pos + 1] + c1[5]*src[src_pos + 2]
|
||||||
|
+ c1[6]*src[src_pos + 3] + c1[7]*src[src_pos + 4])
|
||||||
|
>> shift1); // h0(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][1] = ((c2[0]*src[src_pos - 3] + c2[1]*src[src_pos - 2]
|
||||||
|
+ c2[2]*src[src_pos - 1] + c2[3]*src[src_pos]
|
||||||
|
+ c2[4]*src[src_pos + 1] + c2[5]*src[src_pos + 2]
|
||||||
|
+ c2[6]*src[src_pos + 3] + c2[7]*src[src_pos + 4])
|
||||||
|
>> shift1); // h1(0,-3+i)
|
||||||
|
|
||||||
|
temp[i][2] = ((c3[0]*src[src_pos - 3] + c3[1]*src[src_pos - 2]
|
||||||
|
+ c3[2]*src[src_pos - 1] + c3[3]*src[src_pos]
|
||||||
|
+ c3[4]*src[src_pos + 1] + c3[5]*src[src_pos + 2]
|
||||||
|
+ c3[6]*src[src_pos + 3] + c3[7]*src[src_pos + 4])
|
||||||
|
>> shift1); // h2(0,-3+i)
|
||||||
|
|
||||||
|
if(i == 2) {
|
||||||
|
//Skip calculating h_temp again
|
||||||
|
src_pos += 2*src_stride;
|
||||||
|
} else {
|
||||||
|
src_pos += src_stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for(i=0;i<3;++i){
|
||||||
|
dst[dst_pos + 1*dst_stride + i+1] = (((c1[0]*temp[0][i] + c1[1]*temp[1][i]
|
||||||
|
+ c1[2]*temp[2][i] + c1[3]*h_temp[i]
|
||||||
|
+ c1[4]*temp[3][i] + c1[5]*temp[4][i]
|
||||||
|
+ c1[6]*temp[5][i] + c1[7]*temp[6][i])
|
||||||
|
+ offset23) >> shift2) >> shift3;
|
||||||
|
|
||||||
|
dst[dst_pos + 2*dst_stride + i+1] = (((c2[0]*temp[0][i] + c2[1]*temp[1][i]
|
||||||
|
+ c2[2]*temp[2][i] + c2[3]*h_temp[i]
|
||||||
|
+ c2[4]*temp[3][i] + c2[5]*temp[4][i]
|
||||||
|
+ c2[6]*temp[5][i] + c2[7]*temp[6][i])
|
||||||
|
+ offset23) >> shift2) >> shift3;
|
||||||
|
|
||||||
|
dst[dst_pos + 3*dst_stride + i+1] = (((c3[0]*temp[0][i] + c3[1]*temp[1][i]
|
||||||
|
+ c3[2]*temp[2][i] + c3[3]*h_temp[i]
|
||||||
|
+ c3[4]*temp[3][i] + c3[5]*temp[4][i]
|
||||||
|
+ c3[6]*temp[5][i] + c3[7]*temp[6][i])
|
||||||
|
+ offset23) >> shift2) >> shift3;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if(hor_flag) {
|
||||||
|
dst[dst_pos + 1] = (h_temp[0] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 2] = (h_temp[1] + offset3) >> shift3;
|
||||||
|
dst[dst_pos + 3] = (h_temp[2] + offset3) >> shift3;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Clamp values to bitdepth
|
||||||
|
for(i = 0; i < width*height*16; ++i) {
|
||||||
|
if(dst[i] > ((1 << encoder->bitdepth)-1)) dst[i] = (int16_t)((1 << encoder->bitdepth)-1);
|
||||||
|
if(dst[i] < 0) dst[i] = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,12 @@ void filter_inter_halfpel_chroma(const encoder_control * encoder,
|
||||||
int16_t *src, int16_t src_stride, int width, int height,
|
int16_t *src, int16_t src_stride, int width, int height,
|
||||||
int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
int16_t *dst, int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
||||||
|
|
||||||
|
void filter_inter_octpel_chroma(const encoder_control * encoder, int16_t *src, int16_t src_stride, int width, int height, int16_t *dst,
|
||||||
|
int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
||||||
|
|
||||||
|
void filter_inter_quarterpel_luma(const encoder_control * encoder, int16_t *src, int16_t src_stride, int width, int height, int16_t *dst,
|
||||||
|
int16_t dst_stride, int8_t hor_flag, int8_t ver_flag);
|
||||||
|
|
||||||
// SAO
|
// SAO
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
121
src/inter.c
121
src/inter.c
|
@ -64,6 +64,45 @@ void inter_set_block(picture* pic, uint32_t x_cu, uint32_t y_cu, uint8_t depth,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void extend_borders(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel *ref, int ref_width, int ref_height,
|
||||||
|
int filterSize, int width, int height, int16_t *dst) {
|
||||||
|
|
||||||
|
int16_t mv[2] = {mv_x, mv_y};
|
||||||
|
int halfFilterSize = filterSize>>1;
|
||||||
|
|
||||||
|
int dst_y; int y; int dst_x; int x; int coord_x; int coord_y; int ref_width_c;
|
||||||
|
int8_t overflow_neg_y_temp,overflow_pos_y_temp,overflow_neg_x_temp,overflow_pos_x_temp;
|
||||||
|
|
||||||
|
ref_width_c = ref_width;
|
||||||
|
//width = LCU_WIDTH>>depth;
|
||||||
|
|
||||||
|
for (dst_y = 0, y = ypos - halfFilterSize; y < ((ypos + height)) + halfFilterSize; dst_y++, y++) {
|
||||||
|
|
||||||
|
// calculate y-pixel offset
|
||||||
|
coord_y = y + off_y + mv[1];
|
||||||
|
|
||||||
|
// On y-overflow set coord_y accordingly
|
||||||
|
overflow_neg_y_temp = (coord_y < 0) ? 1 : 0;
|
||||||
|
overflow_pos_y_temp = (coord_y >= ref_height) ? 1 : 0;
|
||||||
|
if (overflow_neg_y_temp) coord_y = 0;
|
||||||
|
else if (overflow_pos_y_temp) coord_y = (ref_height) - 1;
|
||||||
|
coord_y *= ref_width_c;
|
||||||
|
|
||||||
|
for (dst_x = 0, x = (xpos) - halfFilterSize; x < ((xpos + width)) + halfFilterSize; dst_x++, x++) {
|
||||||
|
coord_x = x + off_x + mv[0];
|
||||||
|
|
||||||
|
// On x-overflow set coord_x accordingly
|
||||||
|
overflow_neg_x_temp = (coord_x < 0) ? 1 : 0;
|
||||||
|
overflow_pos_x_temp = (coord_x >= ref_width_c) ? 1 : 0;
|
||||||
|
if (overflow_neg_x_temp) coord_x = 0;
|
||||||
|
else if (overflow_pos_x_temp) coord_x = ref_width_c - 1;
|
||||||
|
|
||||||
|
// Store source block data (with extended borders)
|
||||||
|
dst[dst_y*(width+filterSize) + dst_x] = ref[coord_y + coord_x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Reconstruct inter block
|
* \brief Reconstruct inter block
|
||||||
* \param ref picture to copy the data from
|
* \param ref picture to copy the data from
|
||||||
|
@ -100,12 +139,91 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
int16_t halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u)
|
int16_t halfpel_u[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (u)
|
||||||
int16_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v)
|
int16_t halfpel_v[LCU_WIDTH * LCU_WIDTH]; //!< interpolated 2W x 2H block (v)
|
||||||
|
|
||||||
// TODO: Fractional pixel support
|
// Luma quarter-pel
|
||||||
|
int8_t fractional_mv = (mv[0]&1) || (mv[1]&1) || (mv[0]&2) || (mv[1]&2); // 2 lowest bits of mv set -> mv is fractional
|
||||||
|
|
||||||
|
if(fractional_mv) {
|
||||||
|
int y_off_x = (mv[0]&3);
|
||||||
|
int y_off_y = (mv[1]&3);
|
||||||
|
|
||||||
|
int c_off_x = (mv[0]&7);
|
||||||
|
int c_off_y = (mv[1]&7);
|
||||||
|
|
||||||
|
int y,x;
|
||||||
|
|
||||||
|
#define FILTER_SIZE_Y 8
|
||||||
|
#define FILTER_SIZE_C 4
|
||||||
|
|
||||||
|
//vector2d orig = {xpos, ypos};
|
||||||
|
//vector2d orig_c = {xpos>>1, ypos>>1};
|
||||||
|
|
||||||
|
// Fractional luma 1/4-pel
|
||||||
|
int16_t qpel_src_y[(LCU_WIDTH+FILTER_SIZE_Y) * (LCU_WIDTH+FILTER_SIZE_Y)];
|
||||||
|
int16_t* qpel_src_off_y = &qpel_src_y[(width+FILTER_SIZE_Y)*(FILTER_SIZE_Y>>1)+(FILTER_SIZE_Y>>1)];
|
||||||
|
int16_t qpel_dst_y[LCU_WIDTH*LCU_WIDTH*16];
|
||||||
|
|
||||||
|
// Fractional chroma 1/8-pel
|
||||||
|
int width_c = width>>1;
|
||||||
|
int16_t octpel_src_u[((LCU_WIDTH>>1)+FILTER_SIZE_C) * ((LCU_WIDTH>>1)+FILTER_SIZE_C)];
|
||||||
|
int16_t* octpel_src_off_u = &octpel_src_u[(width_c+FILTER_SIZE_C)*(FILTER_SIZE_C>>1)+(FILTER_SIZE_C>>1)];
|
||||||
|
int16_t octpel_dst_u[(LCU_WIDTH>>1)*(LCU_WIDTH>>1)*64];
|
||||||
|
|
||||||
|
int16_t octpel_src_v[((LCU_WIDTH>>1)+FILTER_SIZE_C) * ((LCU_WIDTH>>1)+FILTER_SIZE_C)];
|
||||||
|
int16_t* octpel_src_off_v = &octpel_src_v[(width_c+FILTER_SIZE_C)*(FILTER_SIZE_C>>1)+(FILTER_SIZE_C>>1)];
|
||||||
|
int16_t octpel_dst_v[(LCU_WIDTH>>1)*(LCU_WIDTH>>1)*64];
|
||||||
|
|
||||||
|
// Fractional luma
|
||||||
|
extend_borders(xpos, ypos, mv[0]>>2, mv[1]>>2, encoder_state->tile->lcu_offset_x * LCU_WIDTH, encoder_state->tile->lcu_offset_y * LCU_WIDTH,
|
||||||
|
ref->y_recdata, ref->width, ref->height, FILTER_SIZE_Y, width, width, qpel_src_y);
|
||||||
|
|
||||||
|
filter_inter_quarterpel_luma(encoder_state->encoder_control, qpel_src_off_y, width+FILTER_SIZE_Y, width,
|
||||||
|
width, qpel_dst_y, width*4, y_off_x, y_off_y);
|
||||||
|
|
||||||
|
//Fractional chroma U
|
||||||
|
extend_borders(xpos>>1, ypos>>1, (mv[0]>>2)>>1, (mv[1]>>2)>>1, encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1), encoder_state->tile->lcu_offset_y * (LCU_WIDTH>>1),
|
||||||
|
ref->u_recdata, ref->width>>1, ref->height>>1, FILTER_SIZE_C, width_c, width_c, octpel_src_u);
|
||||||
|
|
||||||
|
filter_inter_octpel_chroma(encoder_state->encoder_control, octpel_src_off_u, width_c+FILTER_SIZE_C, width_c,
|
||||||
|
width_c, octpel_dst_u, width_c*8, c_off_x, c_off_y);
|
||||||
|
|
||||||
|
//Fractional chroma V
|
||||||
|
extend_borders(xpos>>1, ypos>>1, (mv[0]>>2)>>1, (mv[1]>>2)>>1, encoder_state->tile->lcu_offset_x * (LCU_WIDTH>>1), encoder_state->tile->lcu_offset_y * (LCU_WIDTH>>1),
|
||||||
|
ref->v_recdata, ref->width>>1, ref->height>>1, FILTER_SIZE_C, width_c, width_c, octpel_src_v);
|
||||||
|
|
||||||
|
filter_inter_octpel_chroma(encoder_state->encoder_control, octpel_src_off_v, width_c+FILTER_SIZE_C, width_c,
|
||||||
|
width_c, octpel_dst_v, width_c*8, c_off_x, c_off_y);
|
||||||
|
|
||||||
|
//Luma
|
||||||
|
for(y = 0; y < width; ++y) {
|
||||||
|
int y_in_lcu = ((y+ypos) & ((LCU_WIDTH)-1));
|
||||||
|
int qpel_y = y*4+y_off_y;
|
||||||
|
for(x = 0; x < width; ++x) {
|
||||||
|
int x_in_lcu = ((x+xpos) & ((LCU_WIDTH)-1));
|
||||||
|
int qpel_x = x*4+y_off_x;
|
||||||
|
//printf("x: %d, y: %d\n", off_x, off_y);
|
||||||
|
lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (uint8_t)qpel_dst_y[qpel_y*(width*4)+qpel_x];
|
||||||
|
//printf("i: %d", qpel_y*(width*4)+qpel_x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Chroma
|
||||||
|
for(y = 0; y < width_c; ++y) {
|
||||||
|
int y_in_lcu = ((y+(ypos>>1)) & ((LCU_WIDTH>>1)-1));
|
||||||
|
int qpel_y = y*8+c_off_y;
|
||||||
|
for(x = 0; x < width_c; ++x) {
|
||||||
|
int x_in_lcu = ((x+(xpos>>1)) & ((LCU_WIDTH>>1)-1));
|
||||||
|
int qpel_x = x*8+c_off_x;
|
||||||
|
lcu->rec.u[y_in_lcu * dst_width_c + x_in_lcu] = (uint8_t)octpel_dst_u[qpel_y*(width_c*8)+qpel_x];
|
||||||
|
lcu->rec.v[y_in_lcu * dst_width_c + x_in_lcu] = (uint8_t)octpel_dst_v[qpel_y*(width_c*8)+qpel_x];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mv[0] >>= 2;
|
mv[0] >>= 2;
|
||||||
mv[1] >>= 2;
|
mv[1] >>= 2;
|
||||||
|
|
||||||
// Chroma half-pel
|
// Chroma half-pel
|
||||||
// get half-pel interpolated block and push it to output
|
// get half-pel interpolated block and push it to output
|
||||||
|
if(!fractional_mv) {
|
||||||
if(chroma_halfpel) {
|
if(chroma_halfpel) {
|
||||||
int halfpel_y, halfpel_x;
|
int halfpel_y, halfpel_x;
|
||||||
int abs_mv_x = mv[0]&1;
|
int abs_mv_x = mv[0]&1;
|
||||||
|
@ -251,6 +369,7 @@ void inter_recon_lcu(const encoder_state * const encoder_state, const picture *
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -37,4 +37,6 @@ void inter_get_spatial_merge_candidates(int32_t x, int32_t y, int8_t depth, cu_i
|
||||||
cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
|
cu_info **b2,cu_info **a0,cu_info **a1, lcu_t *lcu);
|
||||||
void inter_get_mv_cand(const encoder_state *encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
|
void inter_get_mv_cand(const encoder_state *encoder_state, int32_t x, int32_t y, int8_t depth, int16_t mv_cand[2][2], cu_info* cur_cu, lcu_t *lcu);
|
||||||
uint8_t inter_get_merge_cand(int32_t x, int32_t y, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][3], lcu_t *lcu);
|
uint8_t inter_get_merge_cand(int32_t x, int32_t y, int8_t depth, int16_t mv_cand[MRG_MAX_NUM_CANDS][3], lcu_t *lcu);
|
||||||
|
void extend_borders(int xpos, int ypos, int mv_x, int mv_y, int off_x, int off_y, pixel *ref, int ref_width, int ref_height,
|
||||||
|
int filterSize, int width, int height, int16_t *dst);
|
||||||
#endif
|
#endif
|
||||||
|
|
161
src/search.c
161
src/search.c
|
@ -72,6 +72,16 @@ const vector2d small_hexbs[5] = {
|
||||||
{ -1, -1 }, { -1, 0 }, { 1, 0 }, { 1, 1 }
|
{ -1, -1 }, { -1, 0 }, { 1, 0 }, { 1, 1 }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 6 7 8
|
||||||
|
* 3 4 5
|
||||||
|
* 0 1 2
|
||||||
|
*/
|
||||||
|
const vector2d square[9] = {
|
||||||
|
{ -1, 1 },
|
||||||
|
{ 0, 1 }, { 1, 1 }, { -1, 0 }, { 0, 0 }, { 1, 0 }, { -1, -1 },
|
||||||
|
{ 0, -1 }, { 1, -1 }
|
||||||
|
};
|
||||||
|
|
||||||
static uint32_t get_ep_ex_golomb_bitcost(uint32_t symbol, uint32_t count)
|
static uint32_t get_ep_ex_golomb_bitcost(uint32_t symbol, uint32_t count)
|
||||||
{
|
{
|
||||||
|
@ -118,7 +128,7 @@ static uint32_t get_mvd_coding_cost(vector2d *mvd)
|
||||||
return bitcost;
|
return bitcost;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y,
|
static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y, int mv_shift,
|
||||||
int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
|
int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
|
||||||
int16_t num_cand,int32_t ref_idx, uint32_t *bitcost)
|
int16_t num_cand,int32_t ref_idx, uint32_t *bitcost)
|
||||||
{
|
{
|
||||||
|
@ -129,8 +139,8 @@ static int calc_mvd_cost(const encoder_state * const encoder_state, int x, int y
|
||||||
int8_t merged = 0;
|
int8_t merged = 0;
|
||||||
int8_t cur_mv_cand = 0;
|
int8_t cur_mv_cand = 0;
|
||||||
|
|
||||||
x <<= 2;
|
x <<= mv_shift;
|
||||||
y <<= 2;
|
y <<= mv_shift;
|
||||||
|
|
||||||
// Check every candidate to find a match
|
// Check every candidate to find a match
|
||||||
for(merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
|
for(merge_idx = 0; merge_idx < (uint32_t)num_cand; merge_idx++) {
|
||||||
|
@ -205,7 +215,7 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + pattern->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + pattern->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, 2, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
best_cost = cost;
|
best_cost = cost;
|
||||||
|
@ -220,7 +230,7 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, 0, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, 0, 0, 2,mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
// If the 0,0 is better, redo the hexagon around that point.
|
// If the 0,0 is better, redo the hexagon around that point.
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
|
@ -236,7 +246,7 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + pattern->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + pattern->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, pattern->x, pattern->y, 2,mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
best_cost = cost;
|
best_cost = cost;
|
||||||
|
@ -271,7 +281,7 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, 2,mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
best_cost = cost;
|
best_cost = cost;
|
||||||
|
@ -294,7 +304,7 @@ static unsigned hexagon_search(const encoder_state * const encoder_state, unsign
|
||||||
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
(encoder_state->tile->lcu_offset_x * LCU_WIDTH) + orig->x + mv.x + offset->x,
|
||||||
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
(encoder_state->tile->lcu_offset_y * LCU_WIDTH) + orig->y + mv.y + offset->y,
|
||||||
block_width, block_width);
|
block_width, block_width);
|
||||||
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
cost += calc_mvd_cost(encoder_state, mv.x + offset->x, mv.y + offset->y, 2,mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
if (cost > 0 && cost < best_cost) {
|
if (cost > 0 && cost < best_cost) {
|
||||||
best_cost = cost;
|
best_cost = cost;
|
||||||
|
@ -369,6 +379,139 @@ static unsigned search_mv_full(unsigned depth,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static unsigned search_frac( const encoder_state * const encoder_state,
|
||||||
|
unsigned depth,
|
||||||
|
const picture *pic, const picture *ref,
|
||||||
|
const vector2d *orig, vector2d *mv_in_out,
|
||||||
|
int16_t mv_cand[2][2], int16_t merge_cand[MRG_MAX_NUM_CANDS][3],
|
||||||
|
int16_t num_cand, int32_t ref_idx, uint32_t *bitcost_out) {
|
||||||
|
|
||||||
|
//Set mv to halfpel precision
|
||||||
|
vector2d mv = { mv_in_out->x >> 2, mv_in_out->y >> 2 };
|
||||||
|
int block_width = CU_WIDTH_FROM_DEPTH(depth);
|
||||||
|
unsigned best_cost = UINT32_MAX;
|
||||||
|
uint32_t best_bitcost = 0, bitcost;
|
||||||
|
unsigned i;
|
||||||
|
unsigned best_index = 0; // Index of large_hexbs or finally small_hexbs.
|
||||||
|
|
||||||
|
unsigned cost = 0;
|
||||||
|
|
||||||
|
cost_16bit_nxn_func satd = get_satd_16bit_nxn_func(block_width);
|
||||||
|
|
||||||
|
vector2d halfpel_offset;
|
||||||
|
|
||||||
|
#define FILTER_SIZE 8
|
||||||
|
#define HALF_FILTER (FILTER_SIZE>>1)
|
||||||
|
|
||||||
|
//create buffer for block + extra for filter
|
||||||
|
int src_stride = block_width+FILTER_SIZE+1;
|
||||||
|
int16_t src[(LCU_WIDTH+FILTER_SIZE+1) * (LCU_WIDTH+FILTER_SIZE+1)];
|
||||||
|
int16_t* src_off = &src[HALF_FILTER+HALF_FILTER*(block_width+FILTER_SIZE+1)];
|
||||||
|
|
||||||
|
//destination buffer for interpolation
|
||||||
|
int dst_stride = (block_width+1)*4;
|
||||||
|
int16_t dst[(LCU_WIDTH+1) * (LCU_WIDTH+1) * 16];
|
||||||
|
int16_t* dst_off = &dst[dst_stride*4+4];
|
||||||
|
|
||||||
|
extend_borders(orig->x, orig->y, mv.x-1, mv.y-1,
|
||||||
|
encoder_state->tile->lcu_offset_x * LCU_WIDTH,
|
||||||
|
encoder_state->tile->lcu_offset_y * LCU_WIDTH,
|
||||||
|
ref->y_data, ref->width, ref->height, FILTER_SIZE, block_width+1, block_width+1, src);
|
||||||
|
|
||||||
|
filter_inter_quarterpel_luma(encoder_state->encoder_control, src_off, src_stride, block_width+1,
|
||||||
|
block_width+1, dst, dst_stride, 1, 1);
|
||||||
|
|
||||||
|
|
||||||
|
//Set mv to half-pixel precision
|
||||||
|
mv.x <<= 1;
|
||||||
|
mv.y <<= 1;
|
||||||
|
|
||||||
|
// Search halfpel positions around best integer mv
|
||||||
|
for (i = 0; i < 9; ++i) {
|
||||||
|
const vector2d *pattern = &square[i];
|
||||||
|
|
||||||
|
pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
pixel tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
|
||||||
|
int y,x;
|
||||||
|
for(y = 0; y < block_width; ++y) {
|
||||||
|
int dst_y = y*4+pattern->y*2;
|
||||||
|
for(x = 0; x < block_width; ++x) {
|
||||||
|
int dst_x = x*4+pattern->x*2;
|
||||||
|
tmp_filtered[y*block_width+x] = (uint8_t)dst_off[dst_y*dst_stride+dst_x];
|
||||||
|
tmp_pic[y*block_width+x] = (uint8_t)pic->y_data[orig->x+x + (orig->y+y)*pic->width];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cost = satd(tmp_pic,tmp_filtered);
|
||||||
|
|
||||||
|
cost = cost>>1;
|
||||||
|
|
||||||
|
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, 1, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
if (cost < best_cost) {
|
||||||
|
best_cost = cost;
|
||||||
|
best_index = i;
|
||||||
|
best_bitcost = bitcost;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Set mv to best match
|
||||||
|
mv.x += square[best_index].x;
|
||||||
|
mv.y += square[best_index].y;
|
||||||
|
|
||||||
|
halfpel_offset.x = square[best_index].x*2;
|
||||||
|
halfpel_offset.y = square[best_index].y*2;
|
||||||
|
|
||||||
|
//Set mv to quarterpel precision
|
||||||
|
mv.x <<= 1;
|
||||||
|
mv.y <<= 1;
|
||||||
|
|
||||||
|
//Search quarterpel points around best halfpel mv
|
||||||
|
for (i = 0; i < 9; ++i) {
|
||||||
|
const vector2d *pattern = &square[i];
|
||||||
|
|
||||||
|
pixel tmp_filtered[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
pixel tmp_pic[LCU_WIDTH*LCU_WIDTH];
|
||||||
|
|
||||||
|
int y,x;
|
||||||
|
for(y = 0; y < block_width; ++y) {
|
||||||
|
int dst_y = y*4+halfpel_offset.y+pattern->y;
|
||||||
|
for(x = 0; x < block_width; ++x) {
|
||||||
|
int dst_x = x*4+halfpel_offset.x+pattern->x;
|
||||||
|
tmp_filtered[y*block_width+x] = (uint8_t)dst_off[dst_y*dst_stride+dst_x];
|
||||||
|
tmp_pic[y*block_width+x] = (uint8_t)pic->y_data[orig->x+x + (orig->y+y)*pic->width];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cost = satd(tmp_pic,tmp_filtered);
|
||||||
|
|
||||||
|
cost = cost>>1;
|
||||||
|
|
||||||
|
cost += calc_mvd_cost(encoder_state, mv.x + pattern->x, mv.y + pattern->y, 0, mv_cand,merge_cand,num_cand,ref_idx, &bitcost);
|
||||||
|
|
||||||
|
if (cost < best_cost) {
|
||||||
|
best_cost = cost;
|
||||||
|
best_index = i;
|
||||||
|
best_bitcost = bitcost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Set mv to best final best match
|
||||||
|
mv.x += square[best_index].x;
|
||||||
|
mv.y += square[best_index].y;
|
||||||
|
|
||||||
|
mv_in_out->x = mv.x;
|
||||||
|
mv_in_out->y = mv.y;
|
||||||
|
|
||||||
|
*bitcost_out = best_bitcost;
|
||||||
|
|
||||||
|
|
||||||
|
return best_cost;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update lcu to have best modes at this depth.
|
* Update lcu to have best modes at this depth.
|
||||||
* \return Cost of best mode.
|
* \return Cost of best mode.
|
||||||
|
@ -425,6 +568,8 @@ static int search_cu_inter(const encoder_state * const encoder_state, int x, int
|
||||||
temp_cost += hexagon_search(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
temp_cost += hexagon_search(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
temp_cost = search_frac(encoder_state, depth, cur_pic, ref_pic, &orig, &mv, mv_cand, merge_cand, num_cand, ref_idx, &temp_bitcost);
|
||||||
|
|
||||||
merged = 0;
|
merged = 0;
|
||||||
// Check every candidate to find a match
|
// Check every candidate to find a match
|
||||||
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
|
for(merge_idx = 0; merge_idx < num_cand; merge_idx++) {
|
||||||
|
|
Loading…
Reference in a new issue