diff --git a/src/strategies/avx2/picture-avx2.c b/src/strategies/avx2/picture-avx2.c index d3b6ca29..2c47f02d 100644 --- a/src/strategies/avx2/picture-avx2.c +++ b/src/strategies/avx2/picture-avx2.c @@ -916,28 +916,28 @@ static INLINE void bipred_average_px_px_avx2(kvz_pixel *dst, { // Use scalar code for yet unoptimized block sizes (4x4, 2x8) if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) { - switch (pu_w) { - case 4: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 4, pu_h, dst_stride); break; - case 8: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 8, pu_h, dst_stride); break; - case 16: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 16, pu_h, dst_stride); break; - case 32: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 32, pu_h, dst_stride); break; - case 64: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 64, pu_h, dst_stride); break; - - case 6: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 6, pu_h, dst_stride); break; - case 12: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 12, pu_h, dst_stride); break; - case 24: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 24, pu_h, dst_stride); break; - case 48: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 48, pu_h, dst_stride); break; + switch (pu_w) { + case 4: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 4, pu_h, dst_stride); break; + case 8: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 8, pu_h, dst_stride); break; + case 16: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 16, pu_h, dst_stride); break; + case 32: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 32, pu_h, dst_stride); break; + case 64: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 64, pu_h, dst_stride); break; + + case 6: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 6, pu_h, dst_stride); break; + case 12: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 12, pu_h, dst_stride); break; + case 24: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 24, pu_h, dst_stride); break; + case 48: bipred_average_px_px_template_avx2(dst, px_L0, px_L1, 48, pu_h, dst_stride); break; default: assert(0 && "Unexpected block width."); - break; + break; } } else { int32_t shift = 15 - KVZ_BIT_DEPTH; // TODO: defines int32_t offset = 1 << (shift - 1); - for (int i = 0; i < pu_w * pu_h; ++i) - { - int y = i / pu_w; + for (int i = 0; i < pu_w * pu_h; ++i) + { + int y = i / pu_w; int x = i % pu_w; int16_t sample_L0 = px_L0[i] << (14 - KVZ_BIT_DEPTH); int16_t sample_L1 = px_L1[i] << (14 - KVZ_BIT_DEPTH); @@ -1144,29 +1144,29 @@ static void bipred_average_ip_ip_avx2(kvz_pixel *dst, { // Use scalar code for yet unoptimized block sizes (4x4, 2x8) if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) { - switch (pu_w) { - case 4: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 4, pu_h, dst_stride); break; - case 8: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 8, pu_h, dst_stride); break; - case 16: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 16, pu_h, dst_stride); break; - case 32: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 32, pu_h, dst_stride); break; - case 64: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 64, pu_h, dst_stride); break; - - case 6: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 6, pu_h, dst_stride); break; - case 12: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 12, pu_h, dst_stride); break; - case 24: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 24, pu_h, dst_stride); break; - case 48: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 48, pu_h, dst_stride); break; + switch (pu_w) { + case 4: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 4, pu_h, dst_stride); break; + case 8: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 8, pu_h, dst_stride); break; + case 16: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 16, pu_h, dst_stride); break; + case 32: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 32, pu_h, dst_stride); break; + case 64: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 64, pu_h, dst_stride); break; + + case 6: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 6, pu_h, dst_stride); break; + case 12: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 12, pu_h, dst_stride); break; + case 24: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 24, pu_h, dst_stride); break; + case 48: bipred_average_ip_ip_template_avx2(dst, ip_L0, ip_L1, 48, pu_h, dst_stride); break; default: printf("%dx%d\n", pu_w, pu_h); assert(0 && "Unexpected block width."); - break; + break; } } else { int32_t shift = 15 - KVZ_BIT_DEPTH; // TODO: defines int32_t offset = 1 << (shift - 1); - for (int i = 0; i < pu_w * pu_h; ++i) - { - int y = i / pu_w; + for (int i = 0; i < pu_w * pu_h; ++i) + { + int y = i / pu_w; int x = i % pu_w; int16_t sample_L0 = ip_L0[i]; int16_t sample_L1 = ip_L1[i]; @@ -1385,28 +1385,28 @@ static void bipred_average_px_ip_avx2(kvz_pixel *dst, { // Use scalar code for yet unoptimized block sizes (4x4, 2x8) if (!(pu_w == 4 && pu_h == 4) && pu_w > 2) { - switch (pu_w) { - case 4: bipred_average_px_ip_template_avx2(dst, px, ip, 4, pu_h, dst_stride); break; - case 8: bipred_average_px_ip_template_avx2(dst, px, ip, 8, pu_h, dst_stride); break; - case 16: bipred_average_px_ip_template_avx2(dst, px, ip, 16, pu_h, dst_stride); break; - case 32: bipred_average_px_ip_template_avx2(dst, px, ip, 32, pu_h, dst_stride); break; - case 64: bipred_average_px_ip_template_avx2(dst, px, ip, 64, pu_h, dst_stride); break; - - case 6: bipred_average_px_ip_template_avx2(dst, px, ip, 6, pu_h, dst_stride); break; - case 12: bipred_average_px_ip_template_avx2(dst, px, ip, 12, pu_h, dst_stride); break; - case 24: bipred_average_px_ip_template_avx2(dst, px, ip, 24, pu_h, dst_stride); break; - case 48: bipred_average_px_ip_template_avx2(dst, px, ip, 48, pu_h, dst_stride); break; + switch (pu_w) { + case 4: bipred_average_px_ip_template_avx2(dst, px, ip, 4, pu_h, dst_stride); break; + case 8: bipred_average_px_ip_template_avx2(dst, px, ip, 8, pu_h, dst_stride); break; + case 16: bipred_average_px_ip_template_avx2(dst, px, ip, 16, pu_h, dst_stride); break; + case 32: bipred_average_px_ip_template_avx2(dst, px, ip, 32, pu_h, dst_stride); break; + case 64: bipred_average_px_ip_template_avx2(dst, px, ip, 64, pu_h, dst_stride); break; + + case 6: bipred_average_px_ip_template_avx2(dst, px, ip, 6, pu_h, dst_stride); break; + case 12: bipred_average_px_ip_template_avx2(dst, px, ip, 12, pu_h, dst_stride); break; + case 24: bipred_average_px_ip_template_avx2(dst, px, ip, 24, pu_h, dst_stride); break; + case 48: bipred_average_px_ip_template_avx2(dst, px, ip, 48, pu_h, dst_stride); break; default: assert(0 && "Unexpected block width."); - break; + break; } } else { int32_t shift = 15 - KVZ_BIT_DEPTH; // TODO: defines int32_t offset = 1 << (shift - 1); - for (int i = 0; i < pu_w * pu_h; ++i) - { - int y = i / pu_w; + for (int i = 0; i < pu_w * pu_h; ++i) + { + int y = i / pu_w; int x = i % pu_w; int16_t sample_px = px[i] << (14 - KVZ_BIT_DEPTH); int16_t sample_ip = ip[i];