mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 19:24:06 +00:00
Defined some AVX functions
This commit is contained in:
parent
c884c738b1
commit
203580047d
|
@ -138,11 +138,11 @@ static int sao_edge_ddistortion_avx2(const kvz_pixel *orig_data,
|
||||||
|
|
||||||
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8);
|
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8);
|
||||||
|
|
||||||
__m128i diff_lower_epi32 = _mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c);
|
tmp_diff_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c));
|
||||||
|
|
||||||
__m128i diff_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c);
|
__m128i diff_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c);
|
||||||
|
_mm256_insertf128_si256(tmp_diff_epi32, diff_upper_epi32, 0x1);
|
||||||
tmp_diff_epi32 = _mm256_set_m128i(diff_upper_epi32, diff_lower_epi32);
|
|
||||||
tmp_offset_epi32 = _mm256_permutevar8x32_epi32(offsets_epi32, v_cat_epi32);
|
tmp_offset_epi32 = _mm256_permutevar8x32_epi32(offsets_epi32, v_cat_epi32);
|
||||||
|
|
||||||
offset_zeros_epi32 = _mm256_cmpeq_epi32(zeros_epi32, tmp_offset_epi32);
|
offset_zeros_epi32 = _mm256_cmpeq_epi32(zeros_epi32, tmp_offset_epi32);
|
||||||
|
@ -306,10 +306,9 @@ static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data,
|
||||||
|
|
||||||
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8);
|
__m256i v_cat_epi32 = sao_calc_eo_cat_avx2(vector_a_epi8, vector_b_epi8, vector_c_epi8);
|
||||||
|
|
||||||
__m128i temp_mem_lower_epi32 = _mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c);
|
__m256i temp_mem_epi32 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i*)&orig_data[y * block_width + x] - c));
|
||||||
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c);
|
__m128i temp_mem_upper_epi32 = _mm_loadl_epi64((__m128i*)&orig_data[y * block_width + x + 4] - c);
|
||||||
|
_mm256_insertf128_si256(temp_mem_epi32, temp_mem_upper_epi32, 0x1);
|
||||||
__m256i temp_mem_epi32 = _mm256_set_m128i(temp_mem_upper_epi32, temp_mem_lower_epi32);
|
|
||||||
|
|
||||||
// Check wich values are right for specific cat amount.
|
// Check wich values are right for specific cat amount.
|
||||||
// It's done for every single value that cat could get {1, 2, 0, 3, 4}
|
// It's done for every single value that cat could get {1, 2, 0, 3, 4}
|
||||||
|
|
Loading…
Reference in a new issue