mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Fine-tune pack_16x16b_to_16x2b
Avoid mm_set1 operation when it's possible to create the constant with one bit-shift operation from another instead. Thanks Intel for 3-operand instruction encoding!
This commit is contained in:
parent
314f5b0e1f
commit
3f7340c932
|
@ -71,7 +71,7 @@ static INLINE uint32_t pack_16x16b_to_16x2b(__m256i src)
|
|||
* XXXX XXXX YYYY YYYY Comparison result, for movemask
|
||||
*/
|
||||
const __m256i threes = _mm256_set1_epi16 (3);
|
||||
const __m256i cmpmask = _mm256_set1_epi16 (0x0180);
|
||||
const __m256i cmpmask = _mm256_slli_epi16 (threes, 7); // 0x0180 (avoid set1)
|
||||
|
||||
__m256i clipped = _mm256_min_epu16 (src, threes);
|
||||
__m256i shifted = _mm256_slli_epi16 (clipped, 7);
|
||||
|
|
Loading…
Reference in a new issue