mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-12-04 13:54:05 +00:00
Add missing packus to the end of planar calculation.
This commit is contained in:
parent
4ae234ef24
commit
0eb0f110c2
|
@ -797,7 +797,7 @@ void uvg_intra_pred_planar_avx2(const cu_loc_t* const cu_loc,
|
||||||
color_t color,
|
color_t color,
|
||||||
const uint8_t* const ref_top,
|
const uint8_t* const ref_top,
|
||||||
const uint8_t* const ref_left,
|
const uint8_t* const ref_left,
|
||||||
uint8_t* const dst)
|
uint8_t* dst)
|
||||||
{
|
{
|
||||||
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
const int width = color == COLOR_Y ? cu_loc->width : cu_loc->chroma_width;
|
||||||
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
const int height = color == COLOR_Y ? cu_loc->height : cu_loc->chroma_height;
|
||||||
|
@ -831,14 +831,20 @@ void uvg_intra_pred_planar_avx2(const cu_loc_t* const cu_loc,
|
||||||
// debug
|
// debug
|
||||||
int16_t* res = (int16_t*)v_res;
|
int16_t* res = (int16_t*)v_res;
|
||||||
|
|
||||||
/*if (samples == 16) {
|
if (samples == 16) {
|
||||||
|
__m256i v_tmp = _mm256_packus_epi16(v_res[0], v_res[0]);
|
||||||
|
v_tmp = _mm256_permute4x64_epi64(v_tmp, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
__m128i v_tmp2 = _mm256_castsi256_si128(v_tmp);
|
||||||
|
_mm_store_si128((__m128i*)dst, v_tmp2);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (int i = 0, s = 0; i < samples; i += 16, s += 2) {
|
for (int i = 0, s = 0; i < samples; i += 32, s += 2) {
|
||||||
_mm256_store_si256((__m256i*)dst[i], _mm256_packus_epi16(v_res[s + 0], v_res[s + 1]));
|
__m256i v_tmp = _mm256_packus_epi16(v_res[s + 0], v_res[s + 1]);
|
||||||
|
v_tmp = _mm256_permute4x64_epi64(v_tmp, _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
|
||||||
|
_mm256_store_si256((__m256i*)&dst[i], v_tmp);
|
||||||
}
|
}
|
||||||
}*/
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue