Consistent naming part 1

This commit is contained in:
Ari Lemmetti 2021-11-21 00:32:01 +02:00
parent 8f0e96162a
commit a68d73674b
6 changed files with 108 additions and 108 deletions

View file

@ -60,10 +60,10 @@ typedef struct {
typedef struct { typedef struct {
int size; int size;
kvz_pixel_ip *y; kvz_pixel_im *y;
kvz_pixel_ip *u; kvz_pixel_im *u;
kvz_pixel_ip *v; kvz_pixel_im *v;
} yuv_ip_t; } yuv_im_t;
kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height); kvz_picture *kvz_image_alloc_420(const int32_t width, const int32_t height);
kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height); kvz_picture *kvz_image_alloc(enum kvz_chroma_format chroma_format, const int32_t width, const int32_t height);

View file

@ -115,7 +115,7 @@ static void inter_recon_frac_luma_hi(const encoder_state_t *const state,
int32_t block_width, int32_t block_width,
int32_t block_height, int32_t block_height,
const int16_t mv_param[2], const int16_t mv_param[2],
yuv_ip_t *out, yuv_im_t *out,
const unsigned out_stride) const unsigned out_stride)
{ {
int mv_frac_x = (mv_param[0] & 3); int mv_frac_x = (mv_param[0] & 3);
@ -248,7 +248,7 @@ static void inter_recon_frac_chroma_hi(const encoder_state_t *const state,
int32_t pu_w, int32_t pu_w,
int32_t pu_h, int32_t pu_h,
const int16_t mv_param[2], const int16_t mv_param[2],
yuv_ip_t *out, yuv_im_t *out,
const unsigned out_stride) const unsigned out_stride)
{ {
int mv_frac_x = (mv_param[0] & 7); int mv_frac_x = (mv_param[0] & 7);
@ -367,7 +367,7 @@ static void inter_cp_with_ext_border(const kvz_pixel *ref_buf, int ref_stride,
* \param height PU height * \param height PU height
* \param mv_param motion vector * \param mv_param motion vector
* \param lcu_px destination lcu * \param lcu_px destination lcu
* \param lcu_ip destination of high precision output, or NULL if not needed * \param lcu_im destination of high precision output, or NULL if not needed
* \param predict_luma Enable or disable luma prediction for this call. * \param predict_luma Enable or disable luma prediction for this call.
* \param predict_chroma Enable or disable chroma prediction for this call. * \param predict_chroma Enable or disable chroma prediction for this call.
*/ */
@ -380,7 +380,7 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
int32_t out_stride_luma, int32_t out_stride_luma,
const int16_t mv_param[2], const int16_t mv_param[2],
yuv_t *yuv_px, yuv_t *yuv_px,
yuv_ip_t *yuv_ip, yuv_im_t *yuv_im,
bool predict_luma, bool predict_luma,
bool predict_chroma) bool predict_chroma)
{ {
@ -403,11 +403,11 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
if (predict_luma) { if (predict_luma) {
if (fractional_luma) { if (fractional_luma) {
// With a fractional MV, do interpolation. // With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && yuv_ip) { if (state->encoder_control->cfg.bipred && yuv_im) {
inter_recon_frac_luma_hi(state, ref, inter_recon_frac_luma_hi(state, ref,
pu_x, pu_y, pu_x, pu_y,
pu_w, pu_h, pu_w, pu_h,
mv_param, yuv_ip, out_stride_luma); mv_param, yuv_im, out_stride_luma);
} }
else { else {
inter_recon_frac_luma(state, ref, inter_recon_frac_luma(state, ref,
@ -444,11 +444,11 @@ static unsigned inter_recon_unipred(const encoder_state_t * const state,
// Generate prediction for chroma. // Generate prediction for chroma.
if (fractional_luma || fractional_chroma) { if (fractional_luma || fractional_chroma) {
// With a fractional MV, do interpolation. // With a fractional MV, do interpolation.
if (state->encoder_control->cfg.bipred && yuv_ip) { if (state->encoder_control->cfg.bipred && yuv_im) {
inter_recon_frac_chroma_hi(state, ref, inter_recon_frac_chroma_hi(state, ref,
pu_x, pu_y, pu_x, pu_y,
pu_w, pu_h, pu_w, pu_h,
mv_param, yuv_ip, out_stride_c); mv_param, yuv_im, out_stride_c);
} else { } else {
inter_recon_frac_chroma(state, ref, inter_recon_frac_chroma(state, ref,
pu_x, pu_y, pu_x, pu_y,
@ -516,8 +516,8 @@ void kvz_inter_recon_bipred(const encoder_state_t *const state,
// Allocate maximum size arrays for interpolated and copied samples // Allocate maximum size arrays for interpolated and copied samples
ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) kvz_pixel px_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) kvz_pixel px_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) kvz_pixel_im ip_buf_L0[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
ALIGNED(64) kvz_pixel_ip ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE]; ALIGNED(64) kvz_pixel_im ip_buf_L1[LCU_LUMA_SIZE + 2 * LCU_CHROMA_SIZE];
yuv_t px_L0; yuv_t px_L0;
px_L0.size = pu_w * pu_h; px_L0.size = pu_w * pu_h;
@ -531,13 +531,13 @@ void kvz_inter_recon_bipred(const encoder_state_t *const state,
px_L1.u = &px_buf_L1[LCU_LUMA_SIZE]; px_L1.u = &px_buf_L1[LCU_LUMA_SIZE];
px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE]; px_L1.v = &px_buf_L1[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
yuv_ip_t ip_L0; yuv_im_t ip_L0;
ip_L0.size = pu_w * pu_h; ip_L0.size = pu_w * pu_h;
ip_L0.y = &ip_buf_L0[0]; ip_L0.y = &ip_buf_L0[0];
ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE]; ip_L0.u = &ip_buf_L0[LCU_LUMA_SIZE];
ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE]; ip_L0.v = &ip_buf_L0[LCU_LUMA_SIZE + LCU_CHROMA_SIZE];
yuv_ip_t ip_L1; yuv_im_t ip_L1;
ip_L1.size = pu_w * pu_h; ip_L1.size = pu_w * pu_h;
ip_L1.y = &ip_buf_L1[0]; ip_L1.y = &ip_buf_L1[0];
ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE]; ip_L1.u = &ip_buf_L1[LCU_LUMA_SIZE];

View file

@ -97,7 +97,7 @@ typedef uint8_t kvz_pixel;
typedef uint16_t kvz_pixel; typedef uint16_t kvz_pixel;
#endif #endif
typedef int16_t kvz_pixel_ip; typedef int16_t kvz_pixel_im; // For intermediate precision (interpolation/bipred).
/** /**
* \brief Opaque data structure representing one instance of the encoder. * \brief Opaque data structure representing one instance of the encoder.

View file

@ -947,9 +947,9 @@ static INLINE void bipred_average_px_px_avx2(kvz_pixel *dst,
} }
} }
static INLINE void bipred_average_ip_ip_template_avx2(kvz_pixel *dst, static INLINE void bipred_average_im_im_template_avx2(kvz_pixel *dst,
kvz_pixel_ip *ip_L0, kvz_pixel_im *ip_L0,
kvz_pixel_ip *ip_L1, kvz_pixel_im *ip_L1,
unsigned pu_w, unsigned pu_w,
unsigned pu_h, unsigned pu_h,
unsigned dst_stride) unsigned dst_stride)
@ -1135,9 +1135,9 @@ static INLINE void bipred_average_ip_ip_template_avx2(kvz_pixel *dst,
} }
} }
static void bipred_average_ip_ip_avx2(kvz_pixel *dst, static void bipred_average_im_im_avx2(kvz_pixel *dst,
kvz_pixel_ip *ip_L0, kvz_pixel_im *ip_L0,
kvz_pixel_ip *ip_L1, kvz_pixel_im *ip_L1,
unsigned pu_w, unsigned pu_w,
unsigned pu_h, unsigned pu_h,
unsigned dst_stride) unsigned dst_stride)
@ -1145,16 +1145,16 @@ static void bipred_average_ip_ip_avx2(kvz_pixel *dst,
// Use scalar code for yet unoptimized block sizes (4x4, 2x8) // Use scalar code for yet unoptimized block sizes (4x4, 2x8)
if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) { if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) {
switch (pu_w) { switch (pu_w) {
case 4: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 4, pu_h, dst_stride); break; case 4: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 4, pu_h, dst_stride); break;
case 8: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 8, pu_h, dst_stride); break; case 8: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 8, pu_h, dst_stride); break;
case 16: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 16, pu_h, dst_stride); break; case 16: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 16, pu_h, dst_stride); break;
case 32: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 32, pu_h, dst_stride); break; case 32: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 32, pu_h, dst_stride); break;
case 64: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 64, pu_h, dst_stride); break; case 64: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 64, pu_h, dst_stride); break;
case 6: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 6, pu_h, dst_stride); break; case 6: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 6, pu_h, dst_stride); break;
case 12: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 12, pu_h, dst_stride); break; case 12: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 12, pu_h, dst_stride); break;
case 24: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 24, pu_h, dst_stride); break; case 24: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 24, pu_h, dst_stride); break;
case 48: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 48, pu_h, dst_stride); break; case 48: bipred_average_im_im_template_avx2(dst, ip_L0, ip_L1, 48, pu_h, dst_stride); break;
default: default:
assert(0 && "Unexpected block width."); assert(0 && "Unexpected block width.");
break; break;
@ -1175,9 +1175,9 @@ static void bipred_average_ip_ip_avx2(kvz_pixel *dst,
} }
} }
static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst, static INLINE void bipred_average_px_im_template_avx2(kvz_pixel *dst,
kvz_pixel *px, kvz_pixel *px,
kvz_pixel_ip *ip, kvz_pixel_im *ip,
unsigned pu_w, unsigned pu_w,
unsigned pu_h, unsigned pu_h,
unsigned dst_stride) unsigned dst_stride)
@ -1201,19 +1201,19 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m256i sample_px_23_16bit = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i*)&px[i + 16])); __m256i sample_px_23_16bit = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i*)&px[i + 16]));
sample_px_01_16bit = _mm256_slli_epi16(sample_px_01_16bit, 14 - KVZ_BIT_DEPTH); sample_px_01_16bit = _mm256_slli_epi16(sample_px_01_16bit, 14 - KVZ_BIT_DEPTH);
sample_px_23_16bit = _mm256_slli_epi16(sample_px_23_16bit, 14 - KVZ_BIT_DEPTH); sample_px_23_16bit = _mm256_slli_epi16(sample_px_23_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]); __m256i sample_im_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_ip_23_16bit = _mm256_loadu_si256((__m256i*)&ip[i + 16]); __m256i sample_im_23_16bit = _mm256_loadu_si256((__m256i*)&ip[i + 16]);
__m256i sample_px_ip_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_ip_01_16bit); __m256i sample_px_im_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_ip_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_ip_01_16bit); __m256i sample_px_im_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_ip_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_ip_23_16bit); __m256i sample_px_im_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i sample_px_ip_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_ip_23_16bit); __m256i sample_px_im_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i all_ones = _mm256_set1_epi16(1); __m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_01_lo = _mm256_madd_epi16(sample_px_ip_01_lo, all_ones); __m256i avg_01_lo = _mm256_madd_epi16(sample_px_im_01_lo, all_ones);
__m256i avg_01_hi = _mm256_madd_epi16(sample_px_ip_01_hi, all_ones); __m256i avg_01_hi = _mm256_madd_epi16(sample_px_im_01_hi, all_ones);
__m256i avg_23_lo = _mm256_madd_epi16(sample_px_ip_23_lo, all_ones); __m256i avg_23_lo = _mm256_madd_epi16(sample_px_im_23_lo, all_ones);
__m256i avg_23_hi = _mm256_madd_epi16(sample_px_ip_23_hi, all_ones); __m256i avg_23_hi = _mm256_madd_epi16(sample_px_im_23_hi, all_ones);
avg_01_lo = _mm256_add_epi32(avg_01_lo, offset); avg_01_lo = _mm256_add_epi32(avg_01_lo, offset);
avg_01_hi = _mm256_add_epi32(avg_01_hi, offset); avg_01_hi = _mm256_add_epi32(avg_01_hi, offset);
@ -1255,19 +1255,19 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m256i sample_px_23_16bit = _mm256_cvtepu8_epi16(sample_px_23_8bit); __m256i sample_px_23_16bit = _mm256_cvtepu8_epi16(sample_px_23_8bit);
sample_px_01_16bit = _mm256_slli_epi16(sample_px_01_16bit, 14 - KVZ_BIT_DEPTH); sample_px_01_16bit = _mm256_slli_epi16(sample_px_01_16bit, 14 - KVZ_BIT_DEPTH);
sample_px_23_16bit = _mm256_slli_epi16(sample_px_23_16bit, 14 - KVZ_BIT_DEPTH); sample_px_23_16bit = _mm256_slli_epi16(sample_px_23_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]); __m256i sample_im_01_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_ip_23_16bit = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&ip[i + 16])); __m256i sample_im_23_16bit = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&ip[i + 16]));
__m256i sample_px_ip_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_ip_01_16bit); __m256i sample_px_im_01_lo = _mm256_unpacklo_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_ip_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_ip_01_16bit); __m256i sample_px_im_01_hi = _mm256_unpackhi_epi16(sample_px_01_16bit, sample_im_01_16bit);
__m256i sample_px_ip_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_ip_23_16bit); __m256i sample_px_im_23_lo = _mm256_unpacklo_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i sample_px_ip_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_ip_23_16bit); __m256i sample_px_im_23_hi = _mm256_unpackhi_epi16(sample_px_23_16bit, sample_im_23_16bit);
__m256i all_ones = _mm256_set1_epi16(1); __m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_01_lo = _mm256_madd_epi16(sample_px_ip_01_lo, all_ones); __m256i avg_01_lo = _mm256_madd_epi16(sample_px_im_01_lo, all_ones);
__m256i avg_01_hi = _mm256_madd_epi16(sample_px_ip_01_hi, all_ones); __m256i avg_01_hi = _mm256_madd_epi16(sample_px_im_01_hi, all_ones);
__m256i avg_23_lo = _mm256_madd_epi16(sample_px_ip_23_lo, all_ones); __m256i avg_23_lo = _mm256_madd_epi16(sample_px_im_23_lo, all_ones);
__m256i avg_23_hi = _mm256_madd_epi16(sample_px_ip_23_hi, all_ones); __m256i avg_23_hi = _mm256_madd_epi16(sample_px_im_23_hi, all_ones);
avg_01_lo = _mm256_add_epi32(avg_01_lo, offset); avg_01_lo = _mm256_add_epi32(avg_01_lo, offset);
avg_01_hi = _mm256_add_epi32(avg_01_hi, offset); avg_01_hi = _mm256_add_epi32(avg_01_hi, offset);
@ -1304,14 +1304,14 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m128i sample_px_8bit = _mm_loadu_si128((__m128i*)&px[i]); __m128i sample_px_8bit = _mm_loadu_si128((__m128i*)&px[i]);
__m256i sample_px_16bit = _mm256_cvtepu8_epi16(sample_px_8bit); __m256i sample_px_16bit = _mm256_cvtepu8_epi16(sample_px_8bit);
sample_px_16bit = _mm256_slli_epi16(sample_px_16bit, 14 - KVZ_BIT_DEPTH); sample_px_16bit = _mm256_slli_epi16(sample_px_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_16bit = _mm256_loadu_si256((__m256i*)&ip[i]); __m256i sample_im_16bit = _mm256_loadu_si256((__m256i*)&ip[i]);
__m256i sample_px_ip_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_ip_16bit); __m256i sample_px_im_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_im_16bit);
__m256i sample_px_ip_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_ip_16bit); __m256i sample_px_im_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_im_16bit);
__m256i all_ones = _mm256_set1_epi16(1); __m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_lo = _mm256_madd_epi16(sample_px_ip_lo, all_ones); __m256i avg_lo = _mm256_madd_epi16(sample_px_im_lo, all_ones);
__m256i avg_hi = _mm256_madd_epi16(sample_px_ip_hi, all_ones); __m256i avg_hi = _mm256_madd_epi16(sample_px_im_hi, all_ones);
avg_lo = _mm256_add_epi32(avg_lo, offset); avg_lo = _mm256_add_epi32(avg_lo, offset);
avg_hi = _mm256_add_epi32(avg_hi, offset); avg_hi = _mm256_add_epi32(avg_hi, offset);
@ -1339,14 +1339,14 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
__m256i mask = _mm256_setr_epi64x(-1, -1, -1, 0); __m256i mask = _mm256_setr_epi64x(-1, -1, -1, 0);
__m256i sample_px_16bit = _mm256_cvtepu8_epi16(sample_px_8bit); __m256i sample_px_16bit = _mm256_cvtepu8_epi16(sample_px_8bit);
sample_px_16bit = _mm256_slli_epi16(sample_px_16bit, 14 - KVZ_BIT_DEPTH); sample_px_16bit = _mm256_slli_epi16(sample_px_16bit, 14 - KVZ_BIT_DEPTH);
__m256i sample_ip_16bit = _mm256_maskload_epi64((const long long*)(&ip[i]), mask); __m256i sample_im_16bit = _mm256_maskload_epi64((const long long*)(&ip[i]), mask);
__m256i sample_px_ip_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_ip_16bit); __m256i sample_px_im_lo = _mm256_unpacklo_epi16(sample_px_16bit, sample_im_16bit);
__m256i sample_px_ip_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_ip_16bit); __m256i sample_px_im_hi = _mm256_unpackhi_epi16(sample_px_16bit, sample_im_16bit);
__m256i all_ones = _mm256_set1_epi16(1); __m256i all_ones = _mm256_set1_epi16(1);
__m256i avg_lo = _mm256_madd_epi16(sample_px_ip_lo, all_ones); __m256i avg_lo = _mm256_madd_epi16(sample_px_im_lo, all_ones);
__m256i avg_hi = _mm256_madd_epi16(sample_px_ip_hi, all_ones); __m256i avg_hi = _mm256_madd_epi16(sample_px_im_hi, all_ones);
avg_lo = _mm256_add_epi32(avg_lo, offset); avg_lo = _mm256_add_epi32(avg_lo, offset);
avg_hi = _mm256_add_epi32(avg_hi, offset); avg_hi = _mm256_add_epi32(avg_hi, offset);
@ -1376,9 +1376,9 @@ static INLINE void bipred_average_px_ip_template_avx2(kvz_pixel *dst,
} }
} }
static void bipred_average_px_ip_avx2(kvz_pixel *dst, static void bipred_average_px_im_avx2(kvz_pixel *dst,
kvz_pixel *px, kvz_pixel *px,
kvz_pixel_ip *ip, kvz_pixel_im *ip,
unsigned pu_w, unsigned pu_w,
unsigned pu_h, unsigned pu_h,
unsigned dst_stride) unsigned dst_stride)
@ -1386,16 +1386,16 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst,
// Use scalar code for yet unoptimized block sizes (4x4, 2x8) // Use scalar code for yet unoptimized block sizes (4x4, 2x8)
if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) { if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) {
switch (pu_w) { switch (pu_w) {
case 4: bipred_average_px_ip_template_avx2(dst, px, ip, 4, pu_h, dst_stride); break; case 4: bipred_average_px_im_template_avx2(dst, px, ip, 4, pu_h, dst_stride); break;
case 8: bipred_average_px_ip_template_avx2(dst, px, ip, 8, pu_h, dst_stride); break; case 8: bipred_average_px_im_template_avx2(dst, px, ip, 8, pu_h, dst_stride); break;
case 16: bipred_average_px_ip_template_avx2(dst, px, ip, 16, pu_h, dst_stride); break; case 16: bipred_average_px_im_template_avx2(dst, px, ip, 16, pu_h, dst_stride); break;
case 32: bipred_average_px_ip_template_avx2(dst, px, ip, 32, pu_h, dst_stride); break; case 32: bipred_average_px_im_template_avx2(dst, px, ip, 32, pu_h, dst_stride); break;
case 64: bipred_average_px_ip_template_avx2(dst, px, ip, 64, pu_h, dst_stride); break; case 64: bipred_average_px_im_template_avx2(dst, px, ip, 64, pu_h, dst_stride); break;
case 6: bipred_average_px_ip_template_avx2(dst, px, ip, 6, pu_h, dst_stride); break; case 6: bipred_average_px_im_template_avx2(dst, px, ip, 6, pu_h, dst_stride); break;
case 12: bipred_average_px_ip_template_avx2(dst, px, ip, 12, pu_h, dst_stride); break; case 12: bipred_average_px_im_template_avx2(dst, px, ip, 12, pu_h, dst_stride); break;
case 24: bipred_average_px_ip_template_avx2(dst, px, ip, 24, pu_h, dst_stride); break; case 24: bipred_average_px_im_template_avx2(dst, px, ip, 24, pu_h, dst_stride); break;
case 48: bipred_average_px_ip_template_avx2(dst, px, ip, 48, pu_h, dst_stride); break; case 48: bipred_average_px_im_template_avx2(dst, px, ip, 48, pu_h, dst_stride); break;
default: default:
assert(0 && "Unexpected block width."); assert(0 && "Unexpected block width.");
break; break;
@ -1409,8 +1409,8 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst,
int y = i / pu_w; int y = i / pu_w;
int x = i % pu_w; int x = i % pu_w;
int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH); int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH);
int16_t sample_ip = ip[i]; int16_t sample_im = ip[i];
int32_t rounded = (sample_px + sample_ip + offset) >> shift; int32_t rounded = (sample_px + sample_im + offset) >> shift;
dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(rounded); dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(rounded);
} }
} }
@ -1419,8 +1419,8 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst,
static void bipred_average_avx2(lcu_t *const lcu, static void bipred_average_avx2(lcu_t *const lcu,
const yuv_t *const px_L0, const yuv_t *const px_L0,
const yuv_t *const px_L1, const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0, const yuv_im_t *const ip_L0,
const yuv_ip_t *const ip_L1, const yuv_im_t *const ip_L1,
const unsigned pu_x, const unsigned pu_x,
const unsigned pu_y, const unsigned pu_y,
const unsigned pu_w, const unsigned pu_w,
@ -1438,12 +1438,12 @@ static void bipred_average_avx2(lcu_t *const lcu,
bipred_average_px_px_avx2(lcu->rec.y + pb_offset, px_L0->y, px_L1->y, pu_w, pu_h, LCU_WIDTH); bipred_average_px_px_avx2(lcu->rec.y + pb_offset, px_L0->y, px_L1->y, pu_w, pu_h, LCU_WIDTH);
} else if ((ip_flags_L0 & 1) && (ip_flags_L1 & 1)) { } else if ((ip_flags_L0 & 1) && (ip_flags_L1 & 1)) {
bipred_average_ip_ip_avx2(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH); bipred_average_im_im_avx2(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH);
} else { } else {
kvz_pixel *src_px = (ip_flags_L0 & 1) ? px_L1->y : px_L0->y; kvz_pixel *src_px = (ip_flags_L0 & 1) ? px_L1->y : px_L0->y;
kvz_pixel_ip *src_ip = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y; kvz_pixel_im *src_im = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y;
bipred_average_px_ip_avx2(lcu->rec.y + pb_offset, src_px, src_ip, pu_w, pu_h, LCU_WIDTH); bipred_average_px_im_avx2(lcu->rec.y + pb_offset, src_px, src_im, pu_w, pu_h, LCU_WIDTH);
} }
} }
if (predict_chroma) { if (predict_chroma) {
@ -1456,16 +1456,16 @@ static void bipred_average_avx2(lcu_t *const lcu,
bipred_average_px_px_avx2(lcu->rec.v + pb_offset, px_L0->v, px_L1->v, pb_w, pb_h, LCU_WIDTH_C); bipred_average_px_px_avx2(lcu->rec.v + pb_offset, px_L0->v, px_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else if ((ip_flags_L0 & 2) && (ip_flags_L1 & 2)) { } else if ((ip_flags_L0 & 2) && (ip_flags_L1 & 2)) {
bipred_average_ip_ip_avx2(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C); bipred_average_im_im_avx2(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_ip_ip_avx2(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C); bipred_average_im_im_avx2(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else { } else {
kvz_pixel *src_px_u = (ip_flags_L0 & 2) ? px_L1->u : px_L0->u; kvz_pixel *src_px_u = (ip_flags_L0 & 2) ? px_L1->u : px_L0->u;
kvz_pixel_ip *src_ip_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u; kvz_pixel_im *src_im_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u;
kvz_pixel *src_px_v = (ip_flags_L0 & 2) ? px_L1->v : px_L0->v; kvz_pixel *src_px_v = (ip_flags_L0 & 2) ? px_L1->v : px_L0->v;
kvz_pixel_ip *src_ip_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v; kvz_pixel_im *src_im_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v;
bipred_average_px_ip_avx2(lcu->rec.u + pb_offset, src_px_u, src_ip_u, pb_w, pb_h, LCU_WIDTH_C); bipred_average_px_im_avx2(lcu->rec.u + pb_offset, src_px_u, src_im_u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_px_ip_avx2(lcu->rec.v + pb_offset, src_px_v, src_ip_v, pb_w, pb_h, LCU_WIDTH_C); bipred_average_px_im_avx2(lcu->rec.v + pb_offset, src_px_v, src_im_v, pb_w, pb_h, LCU_WIDTH_C);
} }
} }
} }

View file

@ -568,9 +568,9 @@ static void bipred_average_px_px(kvz_pixel *dst,
} }
} }
static void bipred_average_ip_ip(kvz_pixel *dst, static void bipred_average_im_im(kvz_pixel *dst,
kvz_pixel_ip *ip_L0, kvz_pixel_im *ip_L0,
kvz_pixel_ip *ip_L1, kvz_pixel_im *ip_L1,
unsigned pu_w, unsigned pu_w,
unsigned pu_h, unsigned pu_h,
unsigned dst_stride) unsigned dst_stride)
@ -589,9 +589,9 @@ static void bipred_average_ip_ip(kvz_pixel *dst,
} }
} }
static void bipred_average_px_ip(kvz_pixel *dst, static void bipred_average_px_im(kvz_pixel *dst,
kvz_pixel *px, kvz_pixel *px,
kvz_pixel_ip *ip, kvz_pixel_im *ip,
unsigned pu_w, unsigned pu_w,
unsigned pu_h, unsigned pu_h,
unsigned dst_stride) unsigned dst_stride)
@ -604,8 +604,8 @@ static void bipred_average_px_ip(kvz_pixel *dst,
int y = i / pu_w; int y = i / pu_w;
int x = i % pu_w; int x = i % pu_w;
int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH); int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH);
int16_t sample_ip = ip[i]; int16_t sample_im = ip[i];
int32_t rounded = (sample_px + sample_ip + offset) >> shift; int32_t rounded = (sample_px + sample_im + offset) >> shift;
dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(rounded); dst[y * dst_stride + x] = kvz_fast_clip_32bit_to_pixel(rounded);
} }
} }
@ -613,8 +613,8 @@ static void bipred_average_px_ip(kvz_pixel *dst,
static void bipred_average_generic(lcu_t *const lcu, static void bipred_average_generic(lcu_t *const lcu,
const yuv_t *const px_L0, const yuv_t *const px_L0,
const yuv_t *const px_L1, const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0, const yuv_im_t *const ip_L0,
const yuv_ip_t *const ip_L1, const yuv_im_t *const ip_L1,
const unsigned pu_x, const unsigned pu_x,
const unsigned pu_y, const unsigned pu_y,
const unsigned pu_w, const unsigned pu_w,
@ -632,12 +632,12 @@ static void bipred_average_generic(lcu_t *const lcu,
bipred_average_px_px(lcu->rec.y + pb_offset, px_L0->y, px_L1->y, pu_w, pu_h, LCU_WIDTH); bipred_average_px_px(lcu->rec.y + pb_offset, px_L0->y, px_L1->y, pu_w, pu_h, LCU_WIDTH);
} else if ((ip_flags_L0 & 1) && (ip_flags_L1 & 1)) { } else if ((ip_flags_L0 & 1) && (ip_flags_L1 & 1)) {
bipred_average_ip_ip(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH); bipred_average_im_im(lcu->rec.y + pb_offset, ip_L0->y, ip_L1->y, pu_w, pu_h, LCU_WIDTH);
} else { } else {
kvz_pixel *src_px = (ip_flags_L0 & 1) ? px_L1->y : px_L0->y; kvz_pixel *src_px = (ip_flags_L0 & 1) ? px_L1->y : px_L0->y;
kvz_pixel_ip *src_ip = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y; kvz_pixel_im *src_im = (ip_flags_L0 & 1) ? ip_L0->y : ip_L1->y;
bipred_average_px_ip(lcu->rec.y + pb_offset, src_px, src_ip, pu_w, pu_h, LCU_WIDTH); bipred_average_px_im(lcu->rec.y + pb_offset, src_px, src_im, pu_w, pu_h, LCU_WIDTH);
} }
} }
if (predict_chroma) { if (predict_chroma) {
@ -650,16 +650,16 @@ static void bipred_average_generic(lcu_t *const lcu,
bipred_average_px_px(lcu->rec.v + pb_offset, px_L0->v, px_L1->v, pb_w, pb_h, LCU_WIDTH_C); bipred_average_px_px(lcu->rec.v + pb_offset, px_L0->v, px_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else if ((ip_flags_L0 & 2) && (ip_flags_L1 & 2)) { } else if ((ip_flags_L0 & 2) && (ip_flags_L1 & 2)) {
bipred_average_ip_ip(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C); bipred_average_im_im(lcu->rec.u + pb_offset, ip_L0->u, ip_L1->u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_ip_ip(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C); bipred_average_im_im(lcu->rec.v + pb_offset, ip_L0->v, ip_L1->v, pb_w, pb_h, LCU_WIDTH_C);
} else { } else {
kvz_pixel *src_px_u = (ip_flags_L0 & 2) ? px_L1->u : px_L0->u; kvz_pixel *src_px_u = (ip_flags_L0 & 2) ? px_L1->u : px_L0->u;
kvz_pixel_ip *src_ip_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u; kvz_pixel_im *src_im_u = (ip_flags_L0 & 2) ? ip_L0->u : ip_L1->u;
kvz_pixel *src_px_v = (ip_flags_L0 & 2) ? px_L1->v : px_L0->v; kvz_pixel *src_px_v = (ip_flags_L0 & 2) ? px_L1->v : px_L0->v;
kvz_pixel_ip *src_ip_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v; kvz_pixel_im *src_im_v = (ip_flags_L0 & 2) ? ip_L0->v : ip_L1->v;
bipred_average_px_ip(lcu->rec.u + pb_offset, src_px_u, src_ip_u, pb_w, pb_h, LCU_WIDTH_C); bipred_average_px_im(lcu->rec.u + pb_offset, src_px_u, src_im_u, pb_w, pb_h, LCU_WIDTH_C);
bipred_average_px_ip(lcu->rec.v + pb_offset, src_px_v, src_ip_v, pb_w, pb_h, LCU_WIDTH_C); bipred_average_px_im(lcu->rec.v + pb_offset, src_px_v, src_im_v, pb_w, pb_h, LCU_WIDTH_C);
} }
} }
} }

View file

@ -136,8 +136,8 @@ typedef uint32_t (hor_sad_func)(const kvz_pixel *pic_data, const kvz_pixel *ref_
typedef void (inter_recon_bipred_func)(lcu_t * const lcu, typedef void (inter_recon_bipred_func)(lcu_t * const lcu,
const yuv_t *const px_L0, const yuv_t *const px_L0,
const yuv_t *const px_L1, const yuv_t *const px_L1,
const yuv_ip_t *const ip_L0, const yuv_im_t *const ip_L0,
const yuv_ip_t *const ip_L1, const yuv_im_t *const ip_L1,
const unsigned pu_x, const unsigned pu_x,
const unsigned pu_y, const unsigned pu_y,
const unsigned pu_w, const unsigned pu_w,