Consistent naming part 1

This commit is contained in:
Ari Lemmetti 2021-11-21 00:32:01 +02:00
parent 8f0e96162a
commit a68d73674b
6 changed files with 108 additions and 108 deletions

View file

@ -60,10 +60,10 @@ typedef struct {
typedef struct {
int size;
kvz_pixel_ip *y;
kvz_pixel_ip *u;
kvz_pixel_ip *v;
} yuv_ip_t;
kvz_pixel_im *y;
kvz_pixel_im *u;
kvz_pixel_im *v;
} yuv_im_t;
kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height);
kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height);

View file

@ -115,7 +115,7 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state,
int32_t block_width,
int32_t block_height,
const int16_t mv_param[2],
yuv_ip_t *out,
yuv_im_t *out,
const unsigned out_stride)
{
int mv_frac_x = (mv_param[0] & 3);
@ -248,7 +248,7 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
int32_t pu_w,
int32_t pu_h,
const int16_t mv_param[2],
yuv_ip_t *out,
yuv_im_t *out,
const unsigned out_stride)
{
int mv_frac_x = (mv_param[0] & 7);
@ -367,7 +367,7 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride,
* \param height PU height
* \param mv_param motion vector
* \param lcu_px destination lcu
* \param lcu_ip destination of high precision output, or NULL if not needed
* \param lcu_im destination of high precision output, or NULL if not needed
* \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call.
*/
@ -380,7 +380,7 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
int32_t out_stride_luma,
const int16_t mv_param[2],
yuv_t *yuv_px,
yuv_ip_t *yuv_ip,
yuv_im_t *yuv_im,
bool predict_luma,
bool predict_chroma)
{
@ -403,11 +403,11 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
if (predict_luma) {
if (fractional_luma) {
// With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && yuv_ip) {
if (state->encoder_control->cfg.bipred && yuv_im) {
inter_recon_frac_luma_hi(state, ref,
pu_x, pu_y,
pu_w, pu_h,
mv_param, yuv_ip, out_stride_luma);
mv_param, yuv_im, out_stride_luma);
}
else {
inter_recon_frac_luma(state, ref,
@ -444,11 +444,11 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
// Generate prediction for chroma.
if (fractional_luma || fractional_chroma) {
// With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && yuv_ip) {
if (state->encoder_control->cfg.bipred && yuv_im) {
inter_recon_frac_chroma_hi(state, ref,
pu_x, pu_y,
pu_w, pu_h,
mv_param, yuv_ip, out_stride_c);
mv_param, yuv_im, out_stride_c);
} else {
inter_recon_frac_chroma(state, ref,
pu_x, pu_y,
@ -516,8 +516,8 @@ void kvz_inter_recon_bipred(const encoder_state_t *const state,
// Allocate maximum size arrays for interpolated and copied samples
ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_im ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_im ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
yuv_t px_L0;
px_L0.size = pu_w * pu_h;
@ -531,13 +531,13 @@ void kvz_inter_recon_bipred(const encoder_state_t *const state,
px_L1.u = &px_buf_L1[LCU_LUMA_SIZE];
px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
yuv_ip_t ip_L0;
yuv_im_t ip_L0;
ip_L0.size = pu_w * pu_h;
ip_L0.y = &ip_buf_L0[0];
ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE];
ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
yuv_ip_t ip_L1;
yuv_im_t ip_L1;
ip_L1.size = pu_w * pu_h;
ip_L1.y = &ip_buf_L1[0];
ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE];

View file

@ -97,7 +97,7 @@ typedef uint8_t kvz_pixel;
typedef uint16_t kvz_pixel;
#endif
typedef int16_t kvz_pixel_ip;
typedef int16_t kvz_pixel_im; // For intermediate precision (interpolation/bipred).
/**
* \brief Opaque data structure representing one instance of the encoder.

View file

@ -947,9 +947,9 @@ static INLINE void bipred_average_px_px_avx2(kvz_pixel *dst,
}
}
static INLINE void bipred_average_ip_ip_template_avx2(kvz_pixel *dst,
kvz_pixel_ip *ip_L0,
kvz_pixel_ip *ip_L1,
static INLINE void bipred_average_im_im_template_avx2(kvz_pixel *dst,
kvz_pixel_im *ip_L0,
kvz_pixel_im *ip_L1,
unsigned pu_w,
unsigned pu_h,
unsigned dst_stride)
@ -1135,9 +1135,9 @@ static INLINE void bipred_average_ip_ip_template_avx2(kvz_pixel *dst,
}
}
static void bipred_average_ip_ip_avx2(kvz_pixel *dst,
kvz_pixel_ip *ip_L0,
kvz_pixel_ip *ip_L1,
static void bipred_average_im_im_avx2(kvz_pixel *dst,
kvz_pixel_im *ip_L0,
kvz_pixel_im *ip_L1,
unsigned pu_w,
unsigned pu_h,
unsigned dst_stride)
@ -1145,16 +1145,16 @@ static void bipred_average_ip_ip_avx2(kvz_pixel *dst,
// Use scalar code for yet unoptimized block sizes (4x4, 2x8)
if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) {
switch (pu_w) {
case 4: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 4, pu_h, dst_stride); break;
case 8: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 8, pu_h, dst_stride); break;
case 16: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 16, pu_h, dst_stride); break;
case 32: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 32, pu_h, dst_stride); break;
case 64: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 64, pu_h, dst_stride); break;
case 4: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 4, pu_h, dst_stride); break;
case 8: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 8, pu_h, dst_stride); break;
case 16: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 16, pu_h, dst_stride); break;
case 32: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 32, pu_h, dst_stride); break;
case 64: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 64, pu_h, dst_stride); break;
case 6: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 6, pu_h, dst_stride); break;
case 12: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 12, pu_h, dst_stride); break;
case 24: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 24, pu_h, dst_stride); break;
case 48: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 48, pu_h, dst_stride); break;
case 6: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 6, pu_h, dst_stride); break;
case 12: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 12, pu_h, dst_stride); break;
case 24: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 24, pu_h, dst_stride); break;
case 48: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 48, pu_h, dst_stride); break;
default:
assert(0 && "Unexpected block width.");
break;
@ -1175,9 +1175,9 @@ static void bipred_average_ip_ip_avx2(kvz_pixel *dst,
}
}
static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
static INLINE void bipred_average_px_im_template_avx2(kvz_pixel *dst,
kvz_pixel *px,
kvz_pixel_ip *ip,
kvz_pixel_im *ip,
unsigned pu_w,
unsigned pu_h,
unsigned dst_stride)
@ -1201,19 +1201,19 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m256i sample_px_23_16bit = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i*)&px[i + 16]));
sample_px_01_16bit = _mm256_slli_epi16(sample_px_01_16bit, 14 - KVZ_BIT_DEPTH);
sample_px_23_16bit = _mm256_slli_epi16(sample_px_23_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_ip_23_16bit = _mm256_loadu_si256((__m256i*)&ip[i + 16]);
__m256i sample_im_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_im_23_16bit = _mm256_loadu_si256((__m256i*)&ip[i + 16]);
__m256i sample_px_ip_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_ip_01_16bit);
__m256i sample_px_ip_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_ip_01_16bit);
__m256i sample_px_ip_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_ip_23_16bit);
__m256i sample_px_ip_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_ip_23_16bit);
__m256i sample_px_im_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_im_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_im_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i sample_px_im_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_01_lo = _mm256_madd_epi16(sample_px_ip_01_lo, all_ones);
__m256i avg_01_hi = _mm256_madd_epi16(sample_px_ip_01_hi, all_ones);
__m256i avg_23_lo = _mm256_madd_epi16(sample_px_ip_23_lo, all_ones);
__m256i avg_23_hi = _mm256_madd_epi16(sample_px_ip_23_hi, all_ones);
__m256i avg_01_lo = _mm256_madd_epi16(sample_px_im_01_lo, all_ones);
__m256i avg_01_hi = _mm256_madd_epi16(sample_px_im_01_hi, all_ones);
__m256i avg_23_lo = _mm256_madd_epi16(sample_px_im_23_lo, all_ones);
__m256i avg_23_hi = _mm256_madd_epi16(sample_px_im_23_hi, all_ones);
avg_01_lo = _mm256_add_epi32(avg_01_lo, offset);
avg_01_hi = _mm256_add_epi32(avg_01_hi, offset);
@ -1255,19 +1255,19 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m256i sample_px_23_16bit = _mm256_cvtepu8_epi16(sample_px_23_8bit);
sample_px_01_16bit = _mm256_slli_epi16(sample_px_01_16bit, 14 - KVZ_BIT_DEPTH);
sample_px_23_16bit = _mm256_slli_epi16(sample_px_23_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_ip_23_16bit = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&ip[i + 16]));
__m256i sample_im_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_im_23_16bit = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&ip[i + 16]));
__m256i sample_px_ip_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_ip_01_16bit);
__m256i sample_px_ip_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_ip_01_16bit);
__m256i sample_px_ip_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_ip_23_16bit);
__m256i sample_px_ip_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_ip_23_16bit);
__m256i sample_px_im_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_im_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_im_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i sample_px_im_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_01_lo = _mm256_madd_epi16(sample_px_ip_01_lo, all_ones);
__m256i avg_01_hi = _mm256_madd_epi16(sample_px_ip_01_hi, all_ones);
__m256i avg_23_lo = _mm256_madd_epi16(sample_px_ip_23_lo, all_ones);
__m256i avg_23_hi = _mm256_madd_epi16(sample_px_ip_23_hi, all_ones);
__m256i avg_01_lo = _mm256_madd_epi16(sample_px_im_01_lo, all_ones);
__m256i avg_01_hi = _mm256_madd_epi16(sample_px_im_01_hi, all_ones);
__m256i avg_23_lo = _mm256_madd_epi16(sample_px_im_23_lo, all_ones);
__m256i avg_23_hi = _mm256_madd_epi16(sample_px_im_23_hi, all_ones);
avg_01_lo = _mm256_add_epi32(avg_01_lo, offset);
avg_01_hi = _mm256_add_epi32(avg_01_hi, offset);
@ -1304,14 +1304,14 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m128i sample_px_8bit = _mm_loadu_si128((__m128i*)&px[i]);
__m256i sample_px_16bit = _mm256_cvtepu8_epi16(sample_px_8bit);
sample_px_16bit = _mm256_slli_epi16(sample_px_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_im_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_px_ip_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_ip_16bit);
__m256i sample_px_ip_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_ip_16bit);
__m256i sample_px_im_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_im_16bit);
__m256i sample_px_im_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_im_16bit);
__m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_lo = _mm256_madd_epi16(sample_px_ip_lo, all_ones);
__m256i avg_hi = _mm256_madd_epi16(sample_px_ip_hi, all_ones);
__m256i avg_lo = _mm256_madd_epi16(sample_px_im_lo, all_ones);
__m256i avg_hi = _mm256_madd_epi16(sample_px_im_hi, all_ones);
avg_lo = _mm256_add_epi32(avg_lo, offset);
avg_hi = _mm256_add_epi32(avg_hi, offset);
@ -1339,14 +1339,14 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m256i mask = _mm256_setr_epi64x(-1, -1, -1, 0);
__m256i sample_px_16bit = _mm256_cvtepu8_epi16(sample_px_8bit);
sample_px_16bit = _mm256_slli_epi16(sample_px_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_16bit = _mm256_maskload_epi64((const long long*)(&ip[i]), mask);
__m256i sample_im_16bit = _mm256_maskload_epi64((const long long*)(&ip[i]), mask);
__m256i sample_px_ip_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_ip_16bit);
__m256i sample_px_ip_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_ip_16bit);
__m256i sample_px_im_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_im_16bit);
__m256i sample_px_im_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_im_16bit);
__m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_lo = _mm256_madd_epi16(sample_px_ip_lo, all_ones);
__m256i avg_hi = _mm256_madd_epi16(sample_px_ip_hi, all_ones);
__m256i avg_lo = _mm256_madd_epi16(sample_px_im_lo, all_ones);
__m256i avg_hi = _mm256_madd_epi16(sample_px_im_hi, all_ones);
avg_lo = _mm256_add_epi32(avg_lo, offset);
avg_hi = _mm256_add_epi32(avg_hi, offset);
@ -1376,9 +1376,9 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
}
}
static void bipred_average_px_ip_avx2(kvz_pixel *dst,
static void bipred_average_px_im_avx2(kvz_pixel *dst,
kvz_pixel *px,
kvz_pixel_ip *ip,
kvz_pixel_im *ip,
unsigned pu_w,
unsigned pu_h,
unsigned dst_stride)
@ -1386,16 +1386,16 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst,
// Use scalar code for yet unoptimized block sizes (4x4, 2x8)
if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) {
switch (pu_w) {
case 4: bipred_average_px_ip_template_avx2(dst, px, ip, 4, pu_h, dst_stride); break;
case 8: bipred_average_px_ip_template_avx2(dst, px, ip, 8, pu_h, dst_stride); break;
case 16: bipred_average_px_ip_template_avx2(dst, px, ip, 16, pu_h, dst_stride); break;
case 32: bipred_average_px_ip_template_avx2(dst, px, ip, 32, pu_h, dst_stride); break;
case 64: bipred_average_px_ip_template_avx2(dst, px, ip, 64, pu_h, dst_stride); break;
case 4: bipred_average_px_im_template_avx2(dst, px, ip, 4, pu_h, dst_stride); break;
case 8: bipred_average_px_im_template_avx2(dst, px, ip, 8, pu_h, dst_stride); break;
case 16: bipred_average_px_im_template_avx2(dst, px, ip, 16, pu_h, dst_stride); break;
case 32: bipred_average_px_im_template_avx2(dst, px, ip, 32, pu_h, dst_stride); break;
case 64: bipred_average_px_im_template_avx2(dst, px, ip, 64, pu_h, dst_stride); break;
case 6: bipred_average_px_ip_template_avx2(dst, px, ip, 6, pu_h, dst_stride); break;
case 12: bipred_average_px_ip_template_avx2(dst, px, ip, 12, pu_h, dst_stride); break;
case 24: bipred_average_px_ip_template_avx2(dst, px, ip, 24, pu_h, dst_stride); break;
case 48: bipred_average_px_ip_template_avx2(dst, px, ip, 48, pu_h, dst_stride); break;
case 6: bipred_average_px_im_template_avx2(dst, px, ip, 6, pu_h, dst_stride); break;
case 12: bipred_average_px_im_template_avx2(dst, px, ip, 12, pu_h, dst_stride); break;
case 24: bipred_average_px_im_template_avx2(dst, px, ip, 24, pu_h, dst_stride); break;
case 48: bipred_average_px_im_template_avx2(dst, px, ip, 48, pu_h, dst_stride); break;
default:
assert(0 && "Unexpected block width.");
break;
@ -1409,8 +1409,8 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst,
int y = i / pu_w;
int x = i % pu_w;
int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH);
int16_t sample_ip = ip[i];
int32_t rounded = (sample_px + sample_ip + offset) >> shift;
int16_t sample_im = ip[i];
int32_t rounded = (sample_px + sample_im + offset) >> shift;
dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(rounded);
}
}
@ -1419,8 +1419,8 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst,
static void bipred_average_avx2(lcu_t *const lcu,
const yuv_t *const px_L0,
const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0,
const yuv_ip_t *const ip_L1,
const yuv_im_t *const ip_L0,
const yuv_im_t *const ip_L1,
const unsigned pu_x,
const unsigned pu_y,
const unsigned pu_w,
@ -1438,12 +1438,12 @@ static void bipred_average_avx2(lcu_t *const lcu,
bipred_average_px_px_avx2(lcu->rec.y + pb_offset, px_L0->y, px_L1->y, pu_w, pu_h, LCU_WIDTH);
} else if ((ip_flags_L0 & 1) && (ip_flags_L1 & 1)) {
bipred_average_ip_ip_avx2(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH);
bipred_average_im_im_avx2(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH);
} else {
kvz_pixel *src_px = (ip_flags_L0 & 1) ? px_L1->y : px_L0->y;
kvz_pixel_ip *src_ip = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y;
bipred_average_px_ip_avx2(lcu->rec.y + pb_offset, src_px, src_ip, pu_w, pu_h, LCU_WIDTH);
kvz_pixel_im *src_im = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y;
bipred_average_px_im_avx2(lcu->rec.y + pb_offset, src_px, src_im, pu_w, pu_h, LCU_WIDTH);
}
}
if (predict_chroma) {
@ -1456,16 +1456,16 @@ static void bipred_average_avx2(lcu_t *const lcu,
bipred_average_px_px_avx2(lcu->rec.v + pb_offset, px_L0->v, px_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else if ((ip_flags_L0 & 2) && (ip_flags_L1 & 2)) {
bipred_average_ip_ip_avx2(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_ip_ip_avx2(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_im_im_avx2(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_im_im_avx2(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else {
kvz_pixel *src_px_u = (ip_flags_L0 & 2) ? px_L1->u : px_L0->u;
kvz_pixel_ip *src_ip_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u;
kvz_pixel_im *src_im_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u;
kvz_pixel *src_px_v = (ip_flags_L0 & 2) ? px_L1->v : px_L0->v;
kvz_pixel_ip *src_ip_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v;
bipred_average_px_ip_avx2(lcu->rec.u + pb_offset, src_px_u, src_ip_u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_px_ip_avx2(lcu->rec.v + pb_offset, src_px_v, src_ip_v, pb_w, pb_h, LCU_WIDTH_C);
kvz_pixel_im *src_im_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v;
bipred_average_px_im_avx2(lcu->rec.u + pb_offset, src_px_u, src_im_u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_px_im_avx2(lcu->rec.v + pb_offset, src_px_v, src_im_v, pb_w, pb_h, LCU_WIDTH_C);
}
}
}

View file

@ -568,9 +568,9 @@ static void bipred_average_px_px(kvz_pixel *dst,
}
}
static void bipred_average_ip_ip(kvz_pixel *dst,
kvz_pixel_ip *ip_L0,
kvz_pixel_ip *ip_L1,
static void bipred_average_im_im(kvz_pixel *dst,
kvz_pixel_im *ip_L0,
kvz_pixel_im *ip_L1,
unsigned pu_w,
unsigned pu_h,
unsigned dst_stride)
@ -589,9 +589,9 @@ static void bipred_average_ip_ip(kvz_pixel *dst,
}
}
static void bipred_average_px_ip(kvz_pixel *dst,
static void bipred_average_px_im(kvz_pixel *dst,
kvz_pixel *px,
kvz_pixel_ip *ip,
kvz_pixel_im *ip,
unsigned pu_w,
unsigned pu_h,
unsigned dst_stride)
@ -604,8 +604,8 @@ static void bipred_average_px_ip(kvz_pixel *dst,
int y = i / pu_w;
int x = i % pu_w;
int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH);
int16_t sample_ip = ip[i];
int32_t rounded = (sample_px + sample_ip + offset) >> shift;
int16_t sample_im = ip[i];
int32_t rounded = (sample_px + sample_im + offset) >> shift;
dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(rounded);
}
}
@ -613,8 +613,8 @@ static void bipred_average_px_ip(kvz_pixel *dst,
static void bipred_average_generic(lcu_t *const lcu,
const yuv_t *const px_L0,
const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0,
const yuv_ip_t *const ip_L1,
const yuv_im_t *const ip_L0,
const yuv_im_t *const ip_L1,
const unsigned pu_x,
const unsigned pu_y,
const unsigned pu_w,
@ -632,12 +632,12 @@ static void bipred_average_generic(lcu_t *const lcu,
bipred_average_px_px(lcu->rec.y + pb_offset, px_L0->y, px_L1->y, pu_w, pu_h, LCU_WIDTH);
} else if ((ip_flags_L0 & 1) && (ip_flags_L1 & 1)) {
bipred_average_ip_ip(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH);
bipred_average_im_im(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH);
} else {
kvz_pixel *src_px = (ip_flags_L0 & 1) ? px_L1->y : px_L0->y;
kvz_pixel_ip *src_ip = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y;
bipred_average_px_ip(lcu->rec.y + pb_offset, src_px, src_ip, pu_w, pu_h, LCU_WIDTH);
kvz_pixel_im *src_im = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y;
bipred_average_px_im(lcu->rec.y + pb_offset, src_px, src_im, pu_w, pu_h, LCU_WIDTH);
}
}
if (predict_chroma) {
@ -650,16 +650,16 @@ static void bipred_average_generic(lcu_t *const lcu,
bipred_average_px_px(lcu->rec.v + pb_offset, px_L0->v, px_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else if ((ip_flags_L0 & 2) && (ip_flags_L1 & 2)) {
bipred_average_ip_ip(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_ip_ip(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_im_im(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_im_im(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else {
kvz_pixel *src_px_u = (ip_flags_L0 & 2) ? px_L1->u : px_L0->u;
kvz_pixel_ip *src_ip_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u;
kvz_pixel_im *src_im_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u;
kvz_pixel *src_px_v = (ip_flags_L0 & 2) ? px_L1->v : px_L0->v;
kvz_pixel_ip *src_ip_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v;
bipred_average_px_ip(lcu->rec.u + pb_offset, src_px_u, src_ip_u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_px_ip(lcu->rec.v + pb_offset, src_px_v, src_ip_v, pb_w, pb_h, LCU_WIDTH_C);
kvz_pixel_im *src_im_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v;
bipred_average_px_im(lcu->rec.u + pb_offset, src_px_u, src_im_u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_px_im(lcu->rec.v + pb_offset, src_px_v, src_im_v, pb_w, pb_h, LCU_WIDTH_C);
}
}
}

View file

@ -136,8 +136,8 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_
typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
const yuv_t *const px_L0,
const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0,
const yuv_ip_t *const ip_L1,
const yuv_im_t *const ip_L0,
const yuv_im_t *const ip_L1,
const unsigned pu_x,
const unsigned pu_y,
const unsigned pu_w,