mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-24 02:24:07 +00:00
Found false address load from calc_sao_edge_dir. Should now work like generic
This commit is contained in:
parent
50e0a47b7a
commit
8b253209a8
|
@ -252,13 +252,13 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data,
|
|||
|
||||
// Check wich values are right for specific cat amount.
|
||||
// It's done for every single value that cat could get {1, 2, 0, 3, 4}
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// v_cat == 0
|
||||
__m256i mask_epi32 = _mm256_cmpeq_epi32(zeros_epi32, v_cat_epi32);
|
||||
int temp_cnt = _mm_popcnt_u32(_mm256_movemask_epi8(mask_epi32)) / 4;
|
||||
cat_sum_cnt[1][0] += temp_cnt;
|
||||
temp_mem_epi32 = _mm256_load_si256((__m256i*)&orig_data[y * block_width + x] - c_data[0]);
|
||||
temp_mem_epi32 = _mm256_sub_epi32(_mm256_load_si256((__m256i*)&orig_data[y * block_width + x]), _mm256_cvtepu8_epi32(vector_c_epi8));
|
||||
temp_epi32 = _mm256_and_si256(mask_epi32, temp_mem_epi32);
|
||||
tmp_zero_values_epi32 = _mm256_add_epi32(tmp_zero_values_epi32, temp_epi32);
|
||||
//--------------------------------------------------------------------------
|
||||
|
@ -336,9 +336,11 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data,
|
|||
|
||||
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(&vector_a_epi8, &vector_b_epi8, &vector_c_epi8);
|
||||
|
||||
__m256i temp_mem_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c_data[0]));
|
||||
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c_data[0]);
|
||||
__m256i temp_mem_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x]));
|
||||
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4]);
|
||||
_mm256_inserti128_si256(temp_mem_epi32, temp_mem_upper_epi32, 1);
|
||||
|
||||
temp_mem_epi32 = _mm256_sub_epi32(temp_mem_epi32, _mm256_cvtepu8_epi32(vector_c_epi8));
|
||||
|
||||
// Check wich values are right for specific cat amount.
|
||||
// It's done for every single value that cat could get {1, 2, 0, 3, 4}
|
||||
|
|
Loading…
Reference in a new issue