mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
some more optimation for bipred
This commit is contained in:
parent
9a339eef89
commit
cc064da143
|
@ -84,11 +84,7 @@
|
|||
</Lib>
|
||||
<YASM>
|
||||
<Defines>ARCH_X86_64=1;%(Defines)</Defines>
|
||||
<<<<<<< HEAD
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
|
||||
=======
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
|
||||
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
|
||||
</YASM>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)..\..\pthreads.2\include;$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories);$(SolutionDir)..\src\strategies;</AdditionalIncludeDirectories>
|
||||
|
@ -97,11 +93,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<YASM>
|
||||
<Defines>ARCH_X86_64=0;PREFIX</Defines>
|
||||
<<<<<<< HEAD
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
|
||||
=======
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
|
||||
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
|
||||
</YASM>
|
||||
<Lib>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x86</AdditionalLibraryDirectories>
|
||||
|
@ -116,11 +108,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<YASM>
|
||||
<Defines>ARCH_X86_64=0;PREFIX</Defines>
|
||||
<<<<<<< HEAD
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
|
||||
=======
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
|
||||
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
|
||||
</YASM>
|
||||
<Lib>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x86</AdditionalLibraryDirectories>
|
||||
|
@ -135,11 +123,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<YASM>
|
||||
<Defines>ARCH_X86_64=1;%(Defines)</Defines>
|
||||
<<<<<<< HEAD
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86_asm;</IncludePaths>
|
||||
=======
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths);$(SolutionDir)..\src\strategies\x86-asm;</IncludePaths>
|
||||
>>>>>>> cbb5b20449e091471e8608616b30f3b199b29bfd
|
||||
</YASM>
|
||||
<Lib>
|
||||
<AdditionalLibraryDirectories>$(SolutionDir)..\..\pthreads.2\lib\x64</AdditionalLibraryDirectories>
|
||||
|
|
|
@ -939,11 +939,12 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
|
|||
|
||||
case 16:
|
||||
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_y_epi16, temp_y_epi16), 0b10011100);
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_y_epi16, temp_y_epi16), 0b11011000);
|
||||
_mm_storeu_si128((__m128i*)&(lcu->rec.y[(y_in_lcu)* LCU_WIDTH + x_in_lcu]), _mm256_castsi256_si128(temp_epi8));
|
||||
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
if (temp == 0) {
|
||||
|
||||
|
@ -954,7 +955,7 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
|
|||
}
|
||||
|
||||
else {
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16, temp_y_epi16), 0b10011100);
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16, temp_y_epi16), 0b11011000);
|
||||
|
||||
// Store 256-bits of integer data into memory
|
||||
_mm256_storeu_si256((__m256i*)&(lcu->rec.y[start_point]), temp_epi8);
|
||||
|
@ -1036,10 +1037,10 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
|
|||
|
||||
case 32:
|
||||
|
||||
temp_epi8_u = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_u_epi16, temp_u_epi16), 0b10011100);
|
||||
temp_epi8_u = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_u_epi16, temp_u_epi16), 0b11011000);
|
||||
_mm_storeu_si128((__m128i*)&(lcu->rec.u[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm256_castsi256_si128(temp_epi8_u));
|
||||
|
||||
temp_epi8_v = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_v_epi16, temp_v_epi16), 0b10011100);
|
||||
temp_epi8_v = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_v_epi16, temp_v_epi16), 0b11011000);
|
||||
_mm_storeu_si128((__m128i*)&(lcu->rec.v[(y_in_lcu)* LCU_WIDTH_C + x_in_lcu]), _mm256_castsi256_si128(temp_epi8_v));
|
||||
|
||||
break;
|
||||
|
@ -1058,11 +1059,11 @@ static void inter_recon_bipred_avx2(const int hi_prec_luma_rec0,
|
|||
}
|
||||
|
||||
else {
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_u, temp_u_epi16), 0b10011100);
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_u, temp_u_epi16), 0b11011000);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)&(lcu->rec.u[start_point_uv]), temp_epi8);
|
||||
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_v, temp_v_epi16), 0b10011100);
|
||||
temp_epi8 = _mm256_permute4x64_epi64(_mm256_packus_epi16(temp_epi16_v, temp_v_epi16), 0b11011000);
|
||||
|
||||
_mm256_storeu_si256((__m256i*)&(lcu->rec.v[start_point_uv]), temp_epi8);
|
||||
|
||||
|
|
|
@ -33,8 +33,8 @@ static lcu_t lcu1;
|
|||
int temp1, temp2, temp3, temp4;
|
||||
|
||||
int16_t mv_param[2][2] = { { 3,3 },{ 3,3 } };
|
||||
int width = 64;
|
||||
int height = 64;
|
||||
int width = 32;
|
||||
int height = 32;
|
||||
int xpos = 0;
|
||||
int ypos = 0;
|
||||
|
||||
|
|
Loading…
Reference in a new issue