mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
Disable all avx2 optimizations that cannot be used with mtt/isp
This commit is contained in:
parent
1c293b8253
commit
19829da152
|
@ -1656,22 +1656,22 @@ int uvg_strategy_register_dct_avx2(void* opaque, uint8_t bitdepth)
|
|||
#if COMPILE_INTEL_AVX2
|
||||
#if UVG_BIT_DEPTH == 8
|
||||
if (bitdepth == 8){
|
||||
success &= uvg_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "fast_forward_dst_4x4", "avx2", 40, &matrix_dst_4x4_avx2);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "dct_4x4", "avx2", 40, &matrix_dct_4x4_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "dct_8x8", "avx2", 40, &matrix_dct_8x8_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "dct_16x16", "avx2", 40, &matrix_dct_16x16_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "dct_32x32", "avx2", 40, &matrix_dct_32x32_avx2);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "fast_inverse_dst_4x4", "avx2", 40, &matrix_idst_4x4_avx2);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "idct_4x4", "avx2", 40, &matrix_idct_4x4_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "idct_8x8", "avx2", 40, &matrix_idct_8x8_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "idct_16x16", "avx2", 40, &matrix_idct_16x16_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "idct_32x32", "avx2", 40, &matrix_idct_32x32_avx2);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "mts_dct", "avx2", 40, &mts_dct_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "mts_idct", "avx2", 40, &mts_idct_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "mts_dct", "avx2", 40, &mts_dct_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "mts_idct", "avx2", 40, &mts_idct_avx2);
|
||||
|
||||
}
|
||||
#endif // UVG_BIT_DEPTH == 8
|
||||
|
|
|
@ -1075,10 +1075,10 @@ int uvg_strategy_register_intra_avx2(void* opaque, uint8_t bitdepth)
|
|||
#if COMPILE_INTEL_AVX2 && defined X86_64
|
||||
#if UVG_BIT_DEPTH == 8
|
||||
if (bitdepth == 8) {
|
||||
success &= uvg_strategyselector_register(opaque, "angular_pred", "avx2", 40, &uvg_angular_pred_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "intra_pred_planar", "avx2", 40, &uvg_intra_pred_planar_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "intra_pred_filtered_dc", "avx2", 40, &uvg_intra_pred_filtered_dc_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "pdpc_planar_dc", "avx2", 40, &uvg_pdpc_planar_dc_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "angular_pred", "avx2", 40, &uvg_angular_pred_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "intra_pred_planar", "avx2", 40, &uvg_intra_pred_planar_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "intra_pred_filtered_dc", "avx2", 40, &uvg_intra_pred_filtered_dc_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "pdpc_planar_dc", "avx2", 40, &uvg_pdpc_planar_dc_avx2);
|
||||
}
|
||||
#endif //UVG_BIT_DEPTH == 8
|
||||
#endif //COMPILE_INTEL_AVX2 && defined X86_64
|
||||
|
|
|
@ -1749,35 +1749,27 @@ static void generate_residual_avx2(const uint8_t* ref_in, const uint8_t* pred_in
|
|||
__m128i diff = _mm_setzero_si128();
|
||||
switch (width) {
|
||||
case 4:
|
||||
diff = get_residual_4x1_avx2(ref_in + 0 * ref_stride, pred_in + 0 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[0]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 1 * ref_stride, pred_in + 1 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 2 * ref_stride, pred_in + 2 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[8]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + 3 * ref_stride, pred_in + 3 * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[12]), diff);
|
||||
for (int y = 0; y < height; y+=4) {
|
||||
diff = get_residual_4x1_avx2(ref_in + y * ref_stride, pred_in + y * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + (y + 1) * ref_stride, pred_in + (y + 1) * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4 + 4]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + (y + 2) * ref_stride, pred_in + (y + 2) * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4 + 8]), diff);
|
||||
diff = get_residual_4x1_avx2(ref_in + (y + 3) * ref_stride, pred_in + (y + 3) * pred_stride);
|
||||
_mm_storel_epi64((__m128i*) & (residual[y * 4 + 12]), diff);
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
diff = get_residual_8x1_avx2(&ref_in[0 * ref_stride], &pred_in[0 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[0]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[1 * ref_stride], &pred_in[1 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[8]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[2 * ref_stride], &pred_in[2 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[16]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[3 * ref_stride], &pred_in[3 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[24]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[4 * ref_stride], &pred_in[4 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[32]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[5 * ref_stride], &pred_in[5 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[40]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[6 * ref_stride], &pred_in[6 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[48]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[7 * ref_stride], &pred_in[7 * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[56]), diff);
|
||||
for (int y = 0; y < height; y += 2) {
|
||||
diff = get_residual_8x1_avx2(&ref_in[y * ref_stride], &pred_in[y * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[y * 8]), diff);
|
||||
diff = get_residual_8x1_avx2(&ref_in[(y + 1) * ref_stride], &pred_in[(y + 1) * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & (residual[y*8 + 8]), diff);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
for (int y = 0; y < width; ++y) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; x += 16) {
|
||||
diff = get_residual_8x1_avx2(&ref_in[x + y * ref_stride], &pred_in[x + y * pred_stride]);
|
||||
_mm_storeu_si128((__m128i*) & residual[x + y * width], diff);
|
||||
|
@ -1816,15 +1808,15 @@ int uvg_strategy_register_picture_avx2(void* opaque, uint8_t bitdepth)
|
|||
success &= uvg_strategyselector_register(opaque, "satd_32x32", "avx2", 40, &satd_32x32_8bit_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_64x64", "avx2", 40, &satd_64x64_8bit_avx2);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "satd_4x4_dual", "avx2", 40, &satd_8bit_4x4_dual_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_8x8_dual", "avx2", 40, &satd_8bit_8x8_dual_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_16x16_dual", "avx2", 40, &satd_8bit_16x16_dual_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_32x32_dual", "avx2", 40, &satd_8bit_32x32_dual_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_64x64_dual", "avx2", 40, &satd_8bit_64x64_dual_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_any_size", "avx2", 40, &satd_any_size_8bit_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "satd_any_size_quad", "avx2", 40, &satd_any_size_quad_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_4x4_dual", "avx2", 40, &satd_8bit_4x4_dual_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_8x8_dual", "avx2", 40, &satd_8bit_8x8_dual_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_16x16_dual", "avx2", 40, &satd_8bit_16x16_dual_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_32x32_dual", "avx2", 40, &satd_8bit_32x32_dual_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_64x64_dual", "avx2", 40, &satd_8bit_64x64_dual_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_any_size", "avx2", 40, &satd_any_size_8bit_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "satd_any_size_quad", "avx2", 40, &satd_any_size_quad_avx2);
|
||||
|
||||
success &= uvg_strategyselector_register(opaque, "pixels_calc_ssd", "avx2", 40, &pixels_calc_ssd_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "pixels_calc_ssd", "avx2", 40, &pixels_calc_ssd_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "bipred_average", "avx2", 40, &bipred_average_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "get_optimized_sad", "avx2", 40, &get_optimized_sad_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "ver_sad", "avx2", 40, &ver_sad_avx2);
|
||||
|
|
|
@ -960,7 +960,7 @@ int uvg_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth)
|
|||
#if COMPILE_INTEL_AVX2 && defined X86_64
|
||||
#if UVG_BIT_DEPTH == 8
|
||||
if (bitdepth == 8) {
|
||||
success &= uvg_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &uvg_quantize_residual_avx2);
|
||||
//success &= uvg_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &uvg_quantize_residual_avx2);
|
||||
success &= uvg_strategyselector_register(opaque, "dequant", "avx2", 40, &uvg_dequant_avx2);
|
||||
}
|
||||
#endif // UVG_BIT_DEPTH == 8
|
||||
|
|
Loading…
Reference in a new issue