Found false address load from calc_sao_edge_dir. Should now work like generic

This commit is contained in:
Reima Hyvönen 2019-03-28 12:46:41 +02:00 committed by Pauli Oikkonen
parent 50e0a47b7a
commit 8b253209a8

View file

@ -252,13 +252,13 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data,
// Check wich values are right for specific cat amount.
// It's done for every single value that cat could get {1, 2, 0, 3, 4}
//--------------------------------------------------------------------------
// v_cat == 0
__m256i mask_epi32 = _mm256_cmpeq_epi32(zeros_epi32, v_cat_epi32);
int temp_cnt = _mm_popcnt_u32(_mm256_movemask_epi8(mask_epi32)) / 4;
cat_sum_cnt[1][0] += temp_cnt;
temp_mem_epi32 = _mm256_load_si256((__m256i*)&orig_data[y * block_width + x] - c_data[0]);
temp_mem_epi32 = _mm256_sub_epi32(_mm256_load_si256((__m256i*)&orig_data[y * block_width + x]), _mm256_cvtepu8_epi32(vector_c_epi8));
temp_epi32 = _mm256_and_si256(mask_epi32, temp_mem_epi32);
tmp_zero_values_epi32 = _mm256_add_epi32(tmp_zero_values_epi32, temp_epi32);
//--------------------------------------------------------------------------
@ -336,9 +336,11 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data,
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(&vector_a_epi8, &vector_b_epi8, &vector_c_epi8);
__m256i temp_mem_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c_data[0]));
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c_data[0]);
__m256i temp_mem_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x]));
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4]);
_mm256_inserti128_si256(temp_mem_epi32, temp_mem_upper_epi32, 1);
temp_mem_epi32 = _mm256_sub_epi32(temp_mem_epi32, _mm256_cvtepu8_epi32(vector_c_epi8));
// Check wich values are right for specific cat amount.
// It's done for every single value that cat could get {1, 2, 0, 3, 4}