Defined some AVX functions

This commit is contained in:
Reima Hyvönen 2019-03-20 16:03:38 +02:00 committed by Pauli Oikkonen
parent c884c738b1
commit 203580047d

View file

@ -138,11 +138,11 @@ static int sao_edge_ddistortion_avx2(const kvz_pixel *orig_data,
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8); __m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8);
__m128i diff_lower_epi32 = _mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c); tmp_diff_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c));
__m128i diff_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c); __m128i diff_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c);
_mm256_insertf128_si256(tmp_diff_epi32, diff_upper_epi32, 0x1);
tmp_diff_epi32 = _mm256_set_m128i(diff_upper_epi32, diff_lower_epi32);
tmp_offset_epi32 = _mm256_permutevar8x32_epi32(offsets_epi32, v_cat_epi32); tmp_offset_epi32 = _mm256_permutevar8x32_epi32(offsets_epi32, v_cat_epi32);
offset_zeros_epi32 = _mm256_cmpeq_epi32(zeros_epi32, tmp_offset_epi32); offset_zeros_epi32 = _mm256_cmpeq_epi32(zeros_epi32, tmp_offset_epi32);
@ -306,10 +306,9 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data,
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8); __m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8);
__m128i temp_mem_lower_epi32 = _mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c); __m256i temp_mem_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c));
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c); __m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c);
_mm256_insertf128_si256(temp_mem_epi32, temp_mem_upper_epi32, 0x1);
__m256i temp_mem_epi32 = _mm256_set_m128i(temp_mem_upper_epi32, temp_mem_lower_epi32);
// Check wich values are right for specific cat amount. // Check wich values are right for specific cat amount.
// It's done for every single value that cat could get {1, 2, 0, 3, 4} // It's done for every single value that cat could get {1, 2, 0, 3, 4}