Use unsigned min() to correctly clip -32768

If a coeff happens to be -32768 (0x8000), its 16-bit abs() is also 0x8000. It should ultimately be clipped to 3, so interpret absolute values as unsigned instead to make that happen.
2024-11-27 19:24:06 +00:00 · 2020-02-04 20:28:14 +02:00 · 2020-02-04 20:28:14 +02:00 · 2abd733199
parent b93b90c0d7
commit 2abd733199
1 changed files with 2 additions and 2 deletions
--- a/src/strategies/avx2/quant-avx2.c
+++ b/src/strategies/avx2/quant-avx2.c
@ -817,14 +817,14 @@ static uint32_t fast_coeff_cost_avx2(const coeff_t *coeff, int32_t width, int32_
  for (int i = 0; i < width * width; i += 32) {
    __m256i curr_lo      = _mm256_loadu_si256 ((const __m256i *)(coeff + i));
    __m256i curr_abs_lo  = _mm256_abs_epi16   (curr_lo);
-    __m256i curr_max3_lo = _mm256_min_epi16   (curr_abs_lo, threes);
+    __m256i curr_max3_lo = _mm256_min_epu16   (curr_abs_lo, threes);

    // 4x4 blocks only have 16 coeffs, so handle them separately
    __m256i curr_max3_hi;
    if (width >= 8) {
      __m256i curr_hi      = _mm256_loadu_si256 ((const __m256i *)(coeff + i + 16));
      __m256i curr_abs_hi  = _mm256_abs_epi16   (curr_hi);
-              curr_max3_hi = _mm256_min_epi16   (curr_abs_hi, threes);
+              curr_max3_hi = _mm256_min_epu16   (curr_abs_hi, threes);
              curr_max3_hi = _mm256_slli_epi16  (curr_max3_hi, 8);
    } else {
      // Set MSBs for high bytes if they're meaningless, so shuffles will