From aa94bcedbc332943b07010bbcb948b1cca8e3ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reima=20Hyv=C3=B6nen?= Date: Fri, 10 Aug 2018 16:38:49 +0300 Subject: [PATCH] Stream is now pointer --- src/strategies/avx2/picture-avx2.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/strategies/avx2/picture-avx2.c b/src/strategies/avx2/picture-avx2.c index 82c229ea..1d6d5c3b 100644 --- a/src/strategies/avx2/picture-avx2.c +++ b/src/strategies/avx2/picture-avx2.c @@ -750,8 +750,8 @@ static void inter_recon_bipred_no_mov_avx2( { case 4: - - _mm_stream_si32((int*)&(lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu]), _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_y_epi8))); + int32_t * pointer = (int32_t*)&(lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu]); + *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_y_epi8)); break; @@ -809,8 +809,11 @@ static void inter_recon_bipred_no_mov_avx2( break; case 8: - _mm_stream_si32((int*)&(lcu->rec.u[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_u_epi8))); - _mm_stream_si32((int*)&(lcu->rec.v[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_v_epi8))); + int32_t * pointer = (int32_t*)&(lcu->rec.u[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]); + *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_u_epi8)); + + pointer = (int32_t*)&(lcu->rec.v[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]); + *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_v_epi8)); break; @@ -915,9 +918,10 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0, { case 4: - temp_epi8 = _mm256_packus_epi16(temp_y_epi16, temp_y_epi16); - _mm_stream_si32((int*)&(lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu]), _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8))); + + int32_t * pointer = (int32_t*)&(lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu]); + *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8)); break; case 8: @@ -1003,11 +1007,14 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0, case 8: temp_epi8 = _mm256_packus_epi16(temp_u_epi16, temp_u_epi16); - _mm_stream_si32((int*)&(lcu->rec.u[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8))); - + + int32_t * pointer = (int32_t*)&(lcu->rec.u[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]); + *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8)); + temp_epi8 = _mm256_packus_epi16(temp_v_epi16, temp_v_epi16); - _mm_stream_si32((int*)&(lcu->rec.v[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8))); - + + pointer = (int32_t*)&(lcu->rec.v[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]); + *pointer = _mm_cvtsi128_si32(_mm256_castsi256_si128(temp_epi8)); break;