mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-27 11:24:05 +00:00
[build] Remove support for the yasm asm build
This commit is contained in:
parent
f8375f9bc6
commit
227556a13e
|
@ -1,33 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<ImportGroup Label="PropertySheets" />
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup>
|
|
||||||
<IntDir>$(Platform)-$(Configuration)\</IntDir>
|
|
||||||
<OutDir>$(SolutionDir)..\bin\$(Platform)-$(Configuration)\</OutDir>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<CompileAs>CompileAsC</CompileAs>
|
|
||||||
<WarningLevel>Level4</WarningLevel>
|
|
||||||
<AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
|
|
||||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
|
||||||
<PreprocessorDefinitions>KVZ_DLL_EXPORTS;KVZ_COMPILE_ASM;WIN32_LEAN_AND_MEAN;WIN32;WIN64;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
|
||||||
<AdditionalIncludeDirectories>$(SolutionDir)..\src\threadwrapper\include;$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
|
||||||
<DisableSpecificWarnings>4244;4204;4206;4028;4152;4996;4018;4456;4389;4100;4131;4459;4706;4214;4127;4201</DisableSpecificWarnings>
|
|
||||||
<OpenMPSupport>false</OpenMPSupport>
|
|
||||||
<TreatSpecificWarningsAsErrors>4013;4029;4047;4716;4700;4020;4021;4133</TreatSpecificWarningsAsErrors>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
|
||||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
|
||||||
<SubSystem>Console</SubSystem>
|
|
||||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
|
||||||
</Link>
|
|
||||||
<YASM>
|
|
||||||
<Defines>HAVE_ALIGNED_STACK=1</Defines>
|
|
||||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths)</IncludePaths>
|
|
||||||
</YASM>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup />
|
|
||||||
</Project>
|
|
|
@ -1,26 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<ImportGroup Label="PropertySheets" />
|
|
||||||
<PropertyGroup Label="UserMacros" />
|
|
||||||
<PropertyGroup />
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<ClCompile>
|
|
||||||
<FloatingPointModel>Fast</FloatingPointModel>
|
|
||||||
<Optimization>Full</Optimization>
|
|
||||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
|
||||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
|
||||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
|
||||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
|
||||||
<OmitFramePointers>
|
|
||||||
</OmitFramePointers>
|
|
||||||
<EnableFiberSafeOptimizations>
|
|
||||||
</EnableFiberSafeOptimizations>
|
|
||||||
</ClCompile>
|
|
||||||
<Link>
|
|
||||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
|
||||||
</Link>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
<ItemGroup />
|
|
||||||
</Project>
|
|
|
@ -1,31 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<PropertyGroup
|
|
||||||
Condition="'$(YASMBeforeTargets)' == '' and '$(YASMAfterTargets)' == '' and '$(ConfigurationType)' != 'Makefile'">
|
|
||||||
<YASMBeforeTargets>Midl</YASMBeforeTargets>
|
|
||||||
<YASMAfterTargets>CustomBuild</YASMAfterTargets>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup>
|
|
||||||
<YASMDependsOn
|
|
||||||
Condition="'$(ConfigurationType)' != 'Makefile'">_SelectedFiles;$(YASMDependsOn)</YASMDependsOn>
|
|
||||||
</PropertyGroup>
|
|
||||||
<!-- Object format name for vsyasm must be in lower case. -->
|
|
||||||
<PropertyGroup Condition="'$(Platform)' == 'Win32'">
|
|
||||||
<YASMFormat>win32</YASMFormat>
|
|
||||||
</PropertyGroup>
|
|
||||||
<PropertyGroup Condition="'$(Platform)' == 'x64'">
|
|
||||||
<YASMFormat>win64</YASMFormat>
|
|
||||||
</PropertyGroup>
|
|
||||||
<ItemDefinitionGroup>
|
|
||||||
<YASM>
|
|
||||||
<Debug>False</Debug>
|
|
||||||
<ObjectFile>$(IntDir)</ObjectFile>
|
|
||||||
<PreProc>0</PreProc>
|
|
||||||
<Parser>0</Parser>
|
|
||||||
<CommandLineTemplate>vsyasm.exe -Xvc -f $(YASMFormat) [AllOptions] [AdditionalOptions] [Inputs]</CommandLineTemplate>
|
|
||||||
<Outputs>%(ObjectFile)</Outputs>
|
|
||||||
<ExecutionDescription>Assembling %(Filename)%(Extension)</ExecutionDescription>
|
|
||||||
<ShowOnlyRuleProperties>false</ShowOnlyRuleProperties>
|
|
||||||
</YASM>
|
|
||||||
</ItemDefinitionGroup>
|
|
||||||
</Project>
|
|
|
@ -1,109 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
|
||||||
<ItemGroup>
|
|
||||||
<PropertyPageSchema
|
|
||||||
Include="$(MSBuildThisFileDirectory)$(MSBuildThisFileName).xml" />
|
|
||||||
<AvailableItemName
|
|
||||||
Include="YASM">
|
|
||||||
<Targets>_YASM</Targets>
|
|
||||||
</AvailableItemName>
|
|
||||||
</ItemGroup>
|
|
||||||
<UsingTask
|
|
||||||
TaskName="YASM"
|
|
||||||
TaskFactory="XamlTaskFactory"
|
|
||||||
AssemblyName="Microsoft.Build.Tasks.v4.0">
|
|
||||||
<Task>$(MSBuildThisFileDirectory)$(MSBuildThisFileName).xml</Task>
|
|
||||||
</UsingTask>
|
|
||||||
<Target
|
|
||||||
Name="_YASM"
|
|
||||||
BeforeTargets="$(YASMBeforeTargets)"
|
|
||||||
AfterTargets="$(YASMAfterTargets)"
|
|
||||||
Condition="'@(YASM)' != ''"
|
|
||||||
DependsOnTargets="$(YASMDependsOn);ComputeYASMOutput"
|
|
||||||
Outputs="@(YASM->'$(ProjectDir)$(IntDir)%(Filename).obj')"
|
|
||||||
Inputs="@(YASM);%(YASM.AdditionalDependencies);$(MSBuildProjectFile)">
|
|
||||||
<ItemGroup
|
|
||||||
Condition="'@(SelectedFiles)' != ''">
|
|
||||||
<YASM
|
|
||||||
Remove="@(YASM)"
|
|
||||||
Condition="'%(Identity)' != '@(SelectedFiles)'" />
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<YASM_tlog
|
|
||||||
Include="%(YASM.ObjectFile)"
|
|
||||||
Condition="'%(YASM.ObjectFile)' != '' and '%(YASM.ExcludedFromBuild)' != 'true'">
|
|
||||||
<Source>@(YASM->'%(FullPath)', '|')</Source>
|
|
||||||
</YASM_tlog>
|
|
||||||
</ItemGroup>
|
|
||||||
<Message
|
|
||||||
Importance="High"
|
|
||||||
Text="%(YASM.ExecutionDescription)" />
|
|
||||||
<WriteLinesToFile
|
|
||||||
Condition="'@(YASM_tlog)' != '' and '%(YASM_tlog.ExcludedFromBuild)' != 'true'"
|
|
||||||
File="$(TLogLocation)$(ProjectName).write.1.tlog"
|
|
||||||
Lines="^%(YASM_tlog.Source);@(YASM->'$(ProjectDir)$(IntDir)%(Filename).obj')"
|
|
||||||
Encoding="Unicode" />
|
|
||||||
<YASM
|
|
||||||
Condition="'@(YASM)' != '' and '%(YASM.ExcludedFromBuild)' != 'true'"
|
|
||||||
CommandLineTemplate="%(YASM.CommandLineTemplate)"
|
|
||||||
Debug="%(YASM.Debug)"
|
|
||||||
PreIncludeFile="%(YASM.PreIncludeFile)"
|
|
||||||
IncludePaths="%(YASM.IncludePaths)"
|
|
||||||
Defines="%(YASM.Defines)"
|
|
||||||
UnDefines="%(YASM.UnDefines)"
|
|
||||||
ObjectFile="%(YASM.ObjectFile)"
|
|
||||||
ListFile="%(YASM.ListFile)"
|
|
||||||
MapFile="%(YASM.MapFile)"
|
|
||||||
ErrorFile="%(YASM.ErrorFile)"
|
|
||||||
SymbolPrefix="%(YASM.SymbolPrefix)"
|
|
||||||
SymbolSuffix="%(YASM.SymbolSuffix)"
|
|
||||||
PreProc="%(YASM.PreProc)"
|
|
||||||
Parser="%(YASM.Parser)"
|
|
||||||
AdditionalOptions="%(YASM.AdditionalOptions)"
|
|
||||||
Inputs="@(YASM)" />
|
|
||||||
</Target>
|
|
||||||
<PropertyGroup>
|
|
||||||
<ComputeLinkInputsTargets>
|
|
||||||
$(ComputeLinkInputsTargets);
|
|
||||||
ComputeYASMOutput;
|
|
||||||
</ComputeLinkInputsTargets>
|
|
||||||
<ComputeLibInputsTargets>
|
|
||||||
$(ComputeLibInputsTargets);
|
|
||||||
ComputeYASMOutput;
|
|
||||||
</ComputeLibInputsTargets>
|
|
||||||
</PropertyGroup>
|
|
||||||
<Target
|
|
||||||
Name="ComputeYASMOutput"
|
|
||||||
Condition="'@(YASM)' != ''">
|
|
||||||
<ItemGroup>
|
|
||||||
<YASMDirsToMake
|
|
||||||
Condition="'@(YASM)' != '' and '%(YASM.ExcludedFromBuild)' != 'true' and !HasTrailingSlash('%(YASM.ObjectFile)')"
|
|
||||||
Include="%(YASM.ObjectFile)" />
|
|
||||||
<Link
|
|
||||||
Include="%(YASMDirsToMake.Identity)"
|
|
||||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
|
||||||
<Lib
|
|
||||||
Include="%(YASMDirsToMake.Identity)"
|
|
||||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
|
||||||
<ImpLib
|
|
||||||
Include="%(YASMDirsToMake.Identity)"
|
|
||||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<YASMDirsToMake
|
|
||||||
Condition="'@(YASM)' != '' and '%(YASM.ExcludedFromBuild)' != 'true' and HasTrailingSlash('%(YASM.ObjectFile)')"
|
|
||||||
Include="@(YASM->'%(ObjectFile)%(Filename).obj')" />
|
|
||||||
<Link
|
|
||||||
Include="%(YASMDirsToMake.Identity)"
|
|
||||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
|
||||||
<Lib
|
|
||||||
Include="%(YASMDirsToMake.Identity)"
|
|
||||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
|
||||||
<ImpLib
|
|
||||||
Include="%(YASMDirsToMake.Identity)"
|
|
||||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
|
||||||
</ItemGroup>
|
|
||||||
<MakeDir
|
|
||||||
Directories="@(YASMDirsToMake->'%(RootDir)%(Directory)')" />
|
|
||||||
</Target>
|
|
||||||
</Project>
|
|
|
@ -1,283 +0,0 @@
|
||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<ProjectSchemaDefinitions xmlns="clr-namespace:Microsoft.Build.Framework.XamlTypes;assembly=Microsoft.Build.Framework" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:sys="clr-namespace:System;assembly=mscorlib" xmlns:transformCallback="Microsoft.Cpp.Dev10.ConvertPropertyCallback">
|
|
||||||
<Rule
|
|
||||||
Name="YASM"
|
|
||||||
PageTemplate="tool"
|
|
||||||
DisplayName="Yasm Assembler"
|
|
||||||
Order="200">
|
|
||||||
|
|
||||||
<Rule.DataSource>
|
|
||||||
<DataSource
|
|
||||||
Persistence="ProjectFile"
|
|
||||||
ItemType="YASM" />
|
|
||||||
</Rule.DataSource>
|
|
||||||
|
|
||||||
<Rule.Categories>
|
|
||||||
|
|
||||||
<Category
|
|
||||||
Name="General">
|
|
||||||
<Category.DisplayName>
|
|
||||||
<sys:String>General</sys:String>
|
|
||||||
</Category.DisplayName>
|
|
||||||
</Category>
|
|
||||||
|
|
||||||
<Category
|
|
||||||
Name="Symbols">
|
|
||||||
<Category.DisplayName>
|
|
||||||
<sys:String>Symbols</sys:String>
|
|
||||||
</Category.DisplayName>
|
|
||||||
</Category>
|
|
||||||
|
|
||||||
<Category
|
|
||||||
Name="Files">
|
|
||||||
<Category.DisplayName>
|
|
||||||
<sys:String>Files</sys:String>
|
|
||||||
</Category.DisplayName>
|
|
||||||
</Category>
|
|
||||||
|
|
||||||
<Category
|
|
||||||
Name="Command Line"
|
|
||||||
Subtype="CommandLine">
|
|
||||||
<Category.DisplayName>
|
|
||||||
<sys:String>Command Line</sys:String>
|
|
||||||
</Category.DisplayName>
|
|
||||||
</Category>
|
|
||||||
|
|
||||||
</Rule.Categories>
|
|
||||||
|
|
||||||
<StringListProperty
|
|
||||||
Name="Inputs"
|
|
||||||
Category="Command Line"
|
|
||||||
IsRequired="true"
|
|
||||||
Switch=" ">
|
|
||||||
<StringListProperty.DataSource>
|
|
||||||
<DataSource
|
|
||||||
Persistence="ProjectFile"
|
|
||||||
ItemType="YASM"
|
|
||||||
SourceType="Item" />
|
|
||||||
</StringListProperty.DataSource>
|
|
||||||
</StringListProperty>
|
|
||||||
|
|
||||||
<BoolProperty
|
|
||||||
Name="Debug"
|
|
||||||
Subcategory="Configuration"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Debug Information"
|
|
||||||
Description="Generate debugging information"
|
|
||||||
Switch="-g cv8" />
|
|
||||||
|
|
||||||
<StringListProperty
|
|
||||||
Name="IncludePaths"
|
|
||||||
Subcategory="Configuration"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Include Paths"
|
|
||||||
Description="Set the paths for any additional include files"
|
|
||||||
Switch="-i "[value]"" />
|
|
||||||
|
|
||||||
<StringListProperty
|
|
||||||
Name="Defines"
|
|
||||||
Category="Symbols"
|
|
||||||
Subcategory="Pre-Defined Symbols"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Defined Symbols"
|
|
||||||
Description="Specify pre-defined symbols ('symbol' or 'symbol = value') "
|
|
||||||
Switch="-d "[value]"" />
|
|
||||||
|
|
||||||
<StringListProperty
|
|
||||||
Name="UnDefines"
|
|
||||||
Category="Symbols"
|
|
||||||
Subcategory="Pre-Defined Symbols"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Remove Symbols"
|
|
||||||
Description="Remove pre-defined symbols "
|
|
||||||
Switch="-u "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="ObjectFile"
|
|
||||||
Subcategory="Output"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Object File Name"
|
|
||||||
Description="Select the output file name"
|
|
||||||
Switch="-o "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="ListFile"
|
|
||||||
Category="Files"
|
|
||||||
Subcategory="Output"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="List File Name"
|
|
||||||
Description="Select an output listing by setting its file name"
|
|
||||||
Switch="-l "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="PreIncludeFile"
|
|
||||||
Category="Files"
|
|
||||||
Subcategory="Configuration"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Pre Include File"
|
|
||||||
Description="Select a pre-included file by setting its name"
|
|
||||||
Switch="-P "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="MapFile"
|
|
||||||
Category="Files"
|
|
||||||
Subcategory="Output"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Map File Name"
|
|
||||||
Description="Select a map output by setting its file name"
|
|
||||||
Switch="--mapdir= "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="ErrorFile"
|
|
||||||
Category="Files"
|
|
||||||
Subcategory="Output"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Error File Name"
|
|
||||||
Description="Send error/warning messages to a file by setting its name"
|
|
||||||
Switch="-E "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="SymbolPrefix"
|
|
||||||
Category="Symbols"
|
|
||||||
Subcategory="Symbols"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="External Symbol Prefix"
|
|
||||||
Description="Prepend symbol to all external symbols"
|
|
||||||
Switch="--prefix "[value]"" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="SymbolSuffix"
|
|
||||||
Category="Symbols"
|
|
||||||
Subcategory="Symbols"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="External Symbol Suffix"
|
|
||||||
Description="Append symbol to all external symbols"
|
|
||||||
Switch="--suffix "[value]"" />
|
|
||||||
|
|
||||||
<EnumProperty
|
|
||||||
Name="PreProc"
|
|
||||||
Subcategory="Configuration"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Pre-Processor"
|
|
||||||
Description="Select the pre-processor ('nasm' or 'raw')">
|
|
||||||
<EnumValue
|
|
||||||
Name="0"
|
|
||||||
DisplayName="Nasm "
|
|
||||||
Switch="-rnasm" />
|
|
||||||
<EnumValue
|
|
||||||
Name="1"
|
|
||||||
DisplayName="Raw"
|
|
||||||
Switch="-rraw" />
|
|
||||||
</EnumProperty>
|
|
||||||
|
|
||||||
<EnumProperty
|
|
||||||
Name="Parser"
|
|
||||||
Subcategory="Configuration"
|
|
||||||
HelpContext="0"
|
|
||||||
DisplayName="Parser"
|
|
||||||
Description="Select the parser for Intel ('nasm') or AT&T ( 'gas') syntax">
|
|
||||||
<EnumValue
|
|
||||||
Name="0"
|
|
||||||
DisplayName="Nasm"
|
|
||||||
Switch="-pnasm" />
|
|
||||||
<EnumValue
|
|
||||||
Name="1"
|
|
||||||
DisplayName="Gas"
|
|
||||||
Switch="-pgas" />
|
|
||||||
</EnumProperty>
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="CommandLineTemplate"
|
|
||||||
DisplayName="Command Line"
|
|
||||||
Visible="False"
|
|
||||||
IncludeInCommandLine="False" />
|
|
||||||
|
|
||||||
<DynamicEnumProperty
|
|
||||||
Name="YASMBeforeTargets"
|
|
||||||
Category="General"
|
|
||||||
EnumProvider="Targets"
|
|
||||||
IncludeInCommandLine="False">
|
|
||||||
<DynamicEnumProperty.DisplayName>
|
|
||||||
<sys:String>Execute Before</sys:String>
|
|
||||||
</DynamicEnumProperty.DisplayName>
|
|
||||||
<DynamicEnumProperty.Description>
|
|
||||||
<sys:String>Specifies the targets for the build customization to run before.</sys:String>
|
|
||||||
</DynamicEnumProperty.Description>
|
|
||||||
<DynamicEnumProperty.ProviderSettings>
|
|
||||||
<NameValuePair
|
|
||||||
Name="Exclude"
|
|
||||||
Value="^YASMBeforeTargets|^Compute" />
|
|
||||||
</DynamicEnumProperty.ProviderSettings>
|
|
||||||
<DynamicEnumProperty.DataSource>
|
|
||||||
<DataSource
|
|
||||||
Persistence="ProjectFile"
|
|
||||||
HasConfigurationCondition="true" />
|
|
||||||
</DynamicEnumProperty.DataSource>
|
|
||||||
</DynamicEnumProperty>
|
|
||||||
|
|
||||||
<DynamicEnumProperty
|
|
||||||
Name="YASMAfterTargets"
|
|
||||||
Category="General"
|
|
||||||
EnumProvider="Targets"
|
|
||||||
IncludeInCommandLine="False">
|
|
||||||
<DynamicEnumProperty.DisplayName>
|
|
||||||
<sys:String>Execute After</sys:String>
|
|
||||||
</DynamicEnumProperty.DisplayName>
|
|
||||||
<DynamicEnumProperty.Description>
|
|
||||||
<sys:String>Specifies the targets for the build customization to run after.</sys:String>
|
|
||||||
</DynamicEnumProperty.Description>
|
|
||||||
<DynamicEnumProperty.ProviderSettings>
|
|
||||||
<NameValuePair
|
|
||||||
Name="Exclude"
|
|
||||||
Value="^YASMAfterTargets|^Compute" />
|
|
||||||
</DynamicEnumProperty.ProviderSettings>
|
|
||||||
<DynamicEnumProperty.DataSource>
|
|
||||||
<DataSource
|
|
||||||
Persistence="ProjectFile"
|
|
||||||
ItemType=""
|
|
||||||
HasConfigurationCondition="true" />
|
|
||||||
</DynamicEnumProperty.DataSource>
|
|
||||||
</DynamicEnumProperty>
|
|
||||||
|
|
||||||
<StringListProperty
|
|
||||||
Name="Outputs"
|
|
||||||
DisplayName="Outputs"
|
|
||||||
Visible="False"
|
|
||||||
IncludeInCommandLine="False" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Name="ExecutionDescription"
|
|
||||||
DisplayName="Execution Description"
|
|
||||||
Visible="False"
|
|
||||||
IncludeInCommandLine="False" />
|
|
||||||
|
|
||||||
<StringListProperty
|
|
||||||
Name="AdditionalDependencies"
|
|
||||||
DisplayName="Additional Dependencies"
|
|
||||||
IncludeInCommandLine="False"
|
|
||||||
Visible="true" />
|
|
||||||
|
|
||||||
<StringProperty
|
|
||||||
Subtype="AdditionalOptions"
|
|
||||||
Name="AdditionalOptions"
|
|
||||||
Category="Command Line">
|
|
||||||
<StringProperty.DisplayName>
|
|
||||||
<sys:String>Additional Options</sys:String>
|
|
||||||
</StringProperty.DisplayName>
|
|
||||||
<StringProperty.Description>
|
|
||||||
<sys:String>Additional Options</sys:String>
|
|
||||||
</StringProperty.Description>
|
|
||||||
</StringProperty>
|
|
||||||
</Rule>
|
|
||||||
|
|
||||||
<ItemType
|
|
||||||
Name="YASM"
|
|
||||||
DisplayName="Yasm Assembler" />
|
|
||||||
<FileExtension
|
|
||||||
Name="*.asm"
|
|
||||||
ContentType="YASM" />
|
|
||||||
<ContentType
|
|
||||||
Name="YASM"
|
|
||||||
DisplayName="Yasm Assembler"
|
|
||||||
ItemType="YASM" />
|
|
||||||
</ProjectSchemaDefinitions>
|
|
|
@ -37,7 +37,6 @@
|
||||||
#include "strategies/generic/picture-generic.h"
|
#include "strategies/generic/picture-generic.h"
|
||||||
#include "strategies/sse2/picture-sse2.h"
|
#include "strategies/sse2/picture-sse2.h"
|
||||||
#include "strategies/sse41/picture-sse41.h"
|
#include "strategies/sse41/picture-sse41.h"
|
||||||
#include "strategies/x86_asm/picture-x86-asm.h"
|
|
||||||
#include "strategyselector.h"
|
#include "strategyselector.h"
|
||||||
|
|
||||||
|
|
||||||
|
@ -93,9 +92,6 @@ int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
||||||
if (kvz_g_hardware_flags.intel_flags.sse41) {
|
if (kvz_g_hardware_flags.intel_flags.sse41) {
|
||||||
success &= kvz_strategy_register_picture_sse41(opaque, bitdepth);
|
success &= kvz_strategy_register_picture_sse41(opaque, bitdepth);
|
||||||
}
|
}
|
||||||
if (kvz_g_hardware_flags.intel_flags.avx) {
|
|
||||||
success &= kvz_strategy_register_picture_x86_asm_avx(opaque, bitdepth);
|
|
||||||
}
|
|
||||||
if (kvz_g_hardware_flags.intel_flags.avx2) {
|
if (kvz_g_hardware_flags.intel_flags.avx2) {
|
||||||
success &= kvz_strategy_register_picture_avx2(opaque, bitdepth);
|
success &= kvz_strategy_register_picture_avx2(opaque, bitdepth);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,385 +0,0 @@
|
||||||
;/*****************************************************************************
|
|
||||||
; * This file is part of Kvazaar HEVC encoder.
|
|
||||||
; *
|
|
||||||
; * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
||||||
; * All rights reserved.
|
|
||||||
; *
|
|
||||||
; * Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
; * are permitted provided that the following conditions are met:
|
|
||||||
; *
|
|
||||||
; * * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
; * list of conditions and the following disclaimer.
|
|
||||||
; *
|
|
||||||
; * * Redistributions in binary form must reproduce the above copyright notice, this
|
|
||||||
; * list of conditions and the following disclaimer in the documentation and/or
|
|
||||||
; * other materials provided with the distribution.
|
|
||||||
; *
|
|
||||||
; * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
||||||
; * contributors may be used to endorse or promote products derived from
|
|
||||||
; * this software without specific prior written permission.
|
|
||||||
; *
|
|
||||||
; * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
; * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
; * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
; * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
; * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
; * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
||||||
; * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
; * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
; * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
; ****************************************************************************/
|
|
||||||
|
|
||||||
%include "x86inc.asm"
|
|
||||||
|
|
||||||
;cglobal and RET macros are from the x86.inc
|
|
||||||
;they push and pop the necessary registers to
|
|
||||||
;stack depending on the operating system
|
|
||||||
|
|
||||||
;Usage: cglobal name, %1, %2, %3
|
|
||||||
;1%: Number of arguments
|
|
||||||
;2%: Number of registers used
|
|
||||||
;3%: Number of xmm registers used.
|
|
||||||
;More info in x86inc.asm
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
;Set x86inc.asm macros to use avx and xmm registers
|
|
||||||
INIT_XMM avx
|
|
||||||
|
|
||||||
;KVZ_SAD_4X4
|
|
||||||
;Calculates SAD of the 16 consequtive bytes in memory
|
|
||||||
;r0 address of the first value(current frame)
|
|
||||||
;r1 address of the first value(reference frame)
|
|
||||||
|
|
||||||
cglobal sad_4x4, 2, 2, 2
|
|
||||||
|
|
||||||
;Load 16 bytes of both frames
|
|
||||||
vmovdqu m0, [r0]
|
|
||||||
vmovdqu m1, [r1]
|
|
||||||
|
|
||||||
;Calculate SAD. The results are written in
|
|
||||||
;m0[15:0] and m0[79:64]
|
|
||||||
vpsadbw m0, m1
|
|
||||||
|
|
||||||
;Sum the results
|
|
||||||
vmovhlps m1, m0
|
|
||||||
vpaddw m0, m1
|
|
||||||
|
|
||||||
;Write the result to eax
|
|
||||||
vmovd eax, m0
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_4X4_STRIDE
|
|
||||||
;Calculates SAD of a 4x4 block inside a frame with stride
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
;r2 stride
|
|
||||||
|
|
||||||
cglobal sad_4x4_stride, 3, 3, 2
|
|
||||||
|
|
||||||
;Load 4 times 4 bytes of both frames
|
|
||||||
vpinsrd m0, [r0], 0
|
|
||||||
add r0, r2
|
|
||||||
vpinsrd m0, [r0], 1
|
|
||||||
vpinsrd m0, [r0+r2], 2
|
|
||||||
vpinsrd m0, [r0+r2*2], 3
|
|
||||||
|
|
||||||
vpinsrd m1, [r1], 0
|
|
||||||
add r1, r2
|
|
||||||
vpinsrd m1, [r1], 1
|
|
||||||
vpinsrd m1, [r1+r2], 2
|
|
||||||
vpinsrd m1, [r1+r2*2], 3
|
|
||||||
|
|
||||||
vpsadbw m0, m1
|
|
||||||
|
|
||||||
vmovhlps m1, m0
|
|
||||||
vpaddw m0, m1
|
|
||||||
|
|
||||||
vmovd eax, m0
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_8X8
|
|
||||||
;Calculates SAD of the 64 consequtive bytes in memory
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
|
|
||||||
cglobal sad_8x8, 2, 2, 5
|
|
||||||
|
|
||||||
;Load the first half of both frames
|
|
||||||
vmovdqu m0, [r0]
|
|
||||||
vmovdqu m2, [r0+16]
|
|
||||||
|
|
||||||
vmovdqu m1, [r1]
|
|
||||||
vmovdqu m3, [r1+16]
|
|
||||||
|
|
||||||
;Calculate SADs for both
|
|
||||||
vpsadbw m0, m1
|
|
||||||
vpsadbw m2, m3
|
|
||||||
|
|
||||||
;Sum
|
|
||||||
vpaddw m0, m2
|
|
||||||
|
|
||||||
;Repeat for the latter half
|
|
||||||
vmovdqu m1, [r0+16*2]
|
|
||||||
vmovdqu m3, [r0+16*3]
|
|
||||||
|
|
||||||
vmovdqu m2, [r1+16*2]
|
|
||||||
vmovdqu m4, [r1+16*3]
|
|
||||||
|
|
||||||
vpsadbw m1, m2
|
|
||||||
vpsadbw m3, m4
|
|
||||||
|
|
||||||
vpaddw m1, m3
|
|
||||||
|
|
||||||
;Sum all the SADs
|
|
||||||
vpaddw m0, m1
|
|
||||||
|
|
||||||
vmovhlps m1, m0
|
|
||||||
vpaddw m0, m1
|
|
||||||
|
|
||||||
vmovd eax, m0
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_8X8_STRIDE
|
|
||||||
;Calculates SAD of a 8x8 block inside a frame with stride
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
;r2 stride
|
|
||||||
|
|
||||||
cglobal sad_8x8_stride, 3, 3, 5
|
|
||||||
|
|
||||||
;Zero m0 register
|
|
||||||
vpxor m0, m0
|
|
||||||
|
|
||||||
;Load the first half to m1 and m3 registers(cur)
|
|
||||||
;Current frame
|
|
||||||
;Load to the high 64 bits of xmm
|
|
||||||
vmovhpd m1, [r0]
|
|
||||||
add r0, r2
|
|
||||||
;Load to the low 64 bits
|
|
||||||
vmovlpd m1, [r0]
|
|
||||||
|
|
||||||
vmovhpd m3, [r0+r2]
|
|
||||||
vmovlpd m3, [r0+r2*2]
|
|
||||||
;lea calculates the address to r0,
|
|
||||||
;but doesn't load anything from
|
|
||||||
;the memory. Equivalent for
|
|
||||||
;two add r0, r2 instructions.
|
|
||||||
lea r0, [r0+r2*2]
|
|
||||||
add r0, r2
|
|
||||||
|
|
||||||
;Reference frame
|
|
||||||
vmovhpd m2, [r1]
|
|
||||||
add r1, r2
|
|
||||||
vmovlpd m2, [r1]
|
|
||||||
|
|
||||||
vmovhpd m4, [r1+r2]
|
|
||||||
vmovlpd m4, [r1+r2*2]
|
|
||||||
lea r1, [r1+r2*2]
|
|
||||||
add r1, r2
|
|
||||||
|
|
||||||
vpsadbw m1, m2
|
|
||||||
vpsadbw m3, m4
|
|
||||||
|
|
||||||
vpaddw m0, m1
|
|
||||||
vpaddw m0, m3
|
|
||||||
|
|
||||||
;Repeat for the other half
|
|
||||||
vmovhpd m1, [r0]
|
|
||||||
add r0, r2
|
|
||||||
vmovlpd m1, [r0]
|
|
||||||
|
|
||||||
vmovhpd m3, [r0+r2]
|
|
||||||
vmovlpd m3, [r0+r2*2]
|
|
||||||
lea r0, [r0+r2*2]
|
|
||||||
add r0, r2
|
|
||||||
|
|
||||||
vmovhpd m2, [r1]
|
|
||||||
add r1, r2
|
|
||||||
vmovlpd m2, [r1]
|
|
||||||
|
|
||||||
vmovhpd m4, [r1+r2]
|
|
||||||
vmovlpd m4, [r1+r2*2]
|
|
||||||
lea r1, [r1+r2*2]
|
|
||||||
add r1, r2
|
|
||||||
|
|
||||||
vpsadbw m1, m2
|
|
||||||
vpsadbw m3, m4
|
|
||||||
|
|
||||||
vpaddw m0, m1
|
|
||||||
vpaddw m0, m3
|
|
||||||
|
|
||||||
vmovhlps m1, m0
|
|
||||||
vpaddw m0, m1
|
|
||||||
|
|
||||||
vmovd eax, m0
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_16X16
|
|
||||||
;Calculates SAD of the 256 consequtive bytes in memory
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
|
|
||||||
cglobal sad_16x16, 2, 2, 5
|
|
||||||
|
|
||||||
;Zero m4
|
|
||||||
vpxor m4, m4
|
|
||||||
|
|
||||||
%assign i 0
|
|
||||||
|
|
||||||
;Repeat 8 times.
|
|
||||||
%rep 8
|
|
||||||
|
|
||||||
;Load the next to rows of the current frame
|
|
||||||
vmovdqu m0, [r0 + 16 * i]
|
|
||||||
vmovdqu m2, [r0 + 16 * (i + 1)]
|
|
||||||
|
|
||||||
;Load the next to rows of the reference frame
|
|
||||||
vmovdqu m1, [r1 + 16 * i]
|
|
||||||
vmovdqu m3, [r1 + 16 * (i + 1)]
|
|
||||||
|
|
||||||
vpsadbw m0, m1
|
|
||||||
vpsadbw m2, m3
|
|
||||||
|
|
||||||
;Accumulate SADs to m4
|
|
||||||
vpaddw m4, m0
|
|
||||||
vpaddw m4, m2
|
|
||||||
|
|
||||||
%assign i i+2
|
|
||||||
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
;Calculate the final sum
|
|
||||||
vmovhlps m0, m4
|
|
||||||
vpaddw m4, m0
|
|
||||||
|
|
||||||
vmovd eax, m4
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_16X16_STRIDE
|
|
||||||
;Calculates SAD of a 16x16 block inside a frame with stride
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
;r2 stride
|
|
||||||
|
|
||||||
cglobal sad_16x16_stride, 3, 3, 5
|
|
||||||
|
|
||||||
vpxor m4, m4
|
|
||||||
|
|
||||||
%rep 8
|
|
||||||
|
|
||||||
; Load the next 2 rows from rec_buf to m0 and m2
|
|
||||||
vmovdqu m0, [r0]
|
|
||||||
vmovdqu m2, [r0 + r2]
|
|
||||||
lea r0, [r0 + r2*2]
|
|
||||||
|
|
||||||
; Load the next 2 rows from ref_buf to m1 and m3
|
|
||||||
vmovdqu m1, [r1]
|
|
||||||
vmovdqu m3, [r1 + r2]
|
|
||||||
lea r1, [r1 + r2*2]
|
|
||||||
|
|
||||||
vpsadbw m0, m1
|
|
||||||
vpsadbw m2, m3
|
|
||||||
|
|
||||||
vpaddw m4, m0
|
|
||||||
vpaddw m4, m2
|
|
||||||
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
vmovhlps m0, m4
|
|
||||||
vpaddw m4, m0
|
|
||||||
|
|
||||||
vmovd eax, m4
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_32x32_STRIDE
|
|
||||||
;Calculates SAD of a 32x32 block inside a frame with stride
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
;r2 stride
|
|
||||||
cglobal sad_32x32_stride, 3, 3, 5
|
|
||||||
vpxor m4, m4
|
|
||||||
|
|
||||||
; Handle 2 lines per iteration
|
|
||||||
%rep 16
|
|
||||||
vmovdqu m0, [r0]
|
|
||||||
vmovdqu m1, [r0 + 16]
|
|
||||||
vmovdqu m2, [r0 + r2]
|
|
||||||
vmovdqu m3, [r0 + r2 + 16]
|
|
||||||
lea r0, [r0 + 2 * r2]
|
|
||||||
|
|
||||||
vpsadbw m0, [r1]
|
|
||||||
vpsadbw m1, [r1 + 16]
|
|
||||||
vpsadbw m2, [r1 + r2]
|
|
||||||
vpsadbw m3, [r1 + r2 + 16]
|
|
||||||
lea r1, [r1 + 2 * r2]
|
|
||||||
|
|
||||||
vpaddd m4, m0
|
|
||||||
vpaddd m4, m1
|
|
||||||
vpaddd m4, m2
|
|
||||||
vpaddd m4, m3
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
vmovhlps m0, m4
|
|
||||||
vpaddd m4, m0
|
|
||||||
|
|
||||||
vmovd eax, m4
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SAD_64x64_STRIDE
|
|
||||||
;Calculates SAD of a 64x64 block inside a frame with stride
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
;r2 stride
|
|
||||||
cglobal sad_64x64_stride, 3, 4, 5
|
|
||||||
vpxor m4, m4 ; sum accumulation register
|
|
||||||
mov r3, 4 ; number of iterations in the loop
|
|
||||||
|
|
||||||
Process16Lines:
|
|
||||||
; Intel optimization manual says to not unroll beyond 500 instructions.
|
|
||||||
; Didn't seem to have much of an affect on Ivy Bridge or Haswell, but
|
|
||||||
; smaller is better, when speed is the same, right?
|
|
||||||
%rep 16
|
|
||||||
vmovdqu m0, [r0]
|
|
||||||
vmovdqu m1, [r0 + 1*16]
|
|
||||||
vmovdqu m2, [r0 + 2*16]
|
|
||||||
vmovdqu m3, [r0 + 3*16]
|
|
||||||
|
|
||||||
vpsadbw m0, [r1]
|
|
||||||
vpsadbw m1, [r1 + 1*16]
|
|
||||||
vpsadbw m2, [r1 + 2*16]
|
|
||||||
vpsadbw m3, [r1 + 3*16]
|
|
||||||
|
|
||||||
lea r0, [r0 + r2]
|
|
||||||
lea r1, [r1 + r2]
|
|
||||||
|
|
||||||
vpaddd m4, m0
|
|
||||||
vpaddd m4, m1
|
|
||||||
vpaddd m4, m2
|
|
||||||
vpaddd m4, m3
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
dec r3
|
|
||||||
jnz Process16Lines
|
|
||||||
|
|
||||||
vmovhlps m0, m4
|
|
||||||
vpaddd m4, m0
|
|
||||||
|
|
||||||
vmovd eax, m4
|
|
||||||
|
|
||||||
RET
|
|
|
@ -1,56 +0,0 @@
|
||||||
#ifndef _PICTURE_X86_ASM_SAD_H_
|
|
||||||
#define _PICTURE_X86_ASM_SAD_H_
|
|
||||||
/*****************************************************************************
|
|
||||||
* This file is part of uvg266 VVC encoder.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
* are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer in the documentation and/or
|
|
||||||
* other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
||||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \ingroup Optimization
|
|
||||||
* \file
|
|
||||||
* Optimizations for AVX, utilizing ASM implementations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "global.h" // IWYU pragma: keep
|
|
||||||
#include "kvazaar.h"
|
|
||||||
|
|
||||||
#if KVZ_BIT_DEPTH == 8
|
|
||||||
unsigned kvz_sad_4x4_avx(const uint8_t*, const uint8_t*);
|
|
||||||
unsigned kvz_sad_8x8_avx(const uint8_t*, const uint8_t*);
|
|
||||||
unsigned kvz_sad_16x16_avx(const uint8_t*, const uint8_t*);
|
|
||||||
|
|
||||||
unsigned kvz_sad_4x4_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
|
||||||
unsigned kvz_sad_8x8_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
|
||||||
unsigned kvz_sad_16x16_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
|
||||||
unsigned kvz_sad_32x32_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
|
||||||
unsigned kvz_sad_64x64_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
|
||||||
#endif // KVZ_BIT_DEPTH == 8
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,575 +0,0 @@
|
||||||
;/*****************************************************************************
|
|
||||||
; * This file is part of Kvazaar HEVC encoder.
|
|
||||||
; *
|
|
||||||
; * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
||||||
; * All rights reserved.
|
|
||||||
; *
|
|
||||||
; * Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
; * are permitted provided that the following conditions are met:
|
|
||||||
; *
|
|
||||||
; * * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
; * list of conditions and the following disclaimer.
|
|
||||||
; *
|
|
||||||
; * * Redistributions in binary form must reproduce the above copyright notice, this
|
|
||||||
; * list of conditions and the following disclaimer in the documentation and/or
|
|
||||||
; * other materials provided with the distribution.
|
|
||||||
; *
|
|
||||||
; * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
||||||
; * contributors may be used to endorse or promote products derived from
|
|
||||||
; * this software without specific prior written permission.
|
|
||||||
; *
|
|
||||||
; * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
; * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
; * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
; * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
; * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
; * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
||||||
; * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
; * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
; * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
; ****************************************************************************/
|
|
||||||
|
|
||||||
%include "x86inc.asm"
|
|
||||||
|
|
||||||
;cglobal and RET macros are from the x86.inc
|
|
||||||
;they push and pop the necessary registers to
|
|
||||||
;stack depending on the operating system
|
|
||||||
|
|
||||||
;Usage: cglobal name, %1, %2, %3
|
|
||||||
;1%: Number of arguments
|
|
||||||
;2%: Number of registers used
|
|
||||||
;3%: Number of xmm registers used.
|
|
||||||
;More info in x86inc.asm
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
;Set x86inc.asm macros to use avx and xmm registers
|
|
||||||
INIT_XMM avx
|
|
||||||
|
|
||||||
;KVZ_ZERO_EXTEND_WD
|
|
||||||
;zero extend all packed words in xmm to dwords in 2 xmm registers
|
|
||||||
;%1 source register
|
|
||||||
;%2 lower destination register
|
|
||||||
;%3 higher destination register
|
|
||||||
|
|
||||||
%macro KVZ_ZERO_EXTEND_WD 3
|
|
||||||
|
|
||||||
;Zero extend high 64 bits
|
|
||||||
vmovhlps %3, %1
|
|
||||||
vpmovzxwd %3, %3
|
|
||||||
;Zero extend low 64 bits
|
|
||||||
vpmovzxwd %2, %1
|
|
||||||
|
|
||||||
%endmacro ; KVZ_ZERO_EXTEND_WD
|
|
||||||
|
|
||||||
; Use nondestructive horizontal add and sub to calculate both at the same time.
|
|
||||||
; TODO: It would probably be possible to do this with 3 registers (destructive vphsubw).
|
|
||||||
; args:
|
|
||||||
; 1, 2: input registers
|
|
||||||
; 3, 4: output registers
|
|
||||||
|
|
||||||
%macro SATD_HORIZONTAL_SUB_AND_ADD 4
|
|
||||||
|
|
||||||
; TODO: It might be possible to do this with 3 registers?
|
|
||||||
|
|
||||||
;First stage
|
|
||||||
vphaddw %3, %1, %2
|
|
||||||
vphsubw %4, %1, %2
|
|
||||||
|
|
||||||
;Second stage
|
|
||||||
vphaddw %1, %3, %4
|
|
||||||
vphsubw %2, %3, %4
|
|
||||||
|
|
||||||
;Third stage
|
|
||||||
vphaddw %3, %1, %2
|
|
||||||
vphsubw %4, %1, %2
|
|
||||||
|
|
||||||
%endmacro ; SATD_HORIZONTAL_SUB_AND_ADD
|
|
||||||
|
|
||||||
;KVZ_SATD_8X8_STRIDE
|
|
||||||
;Calculates SATD of a 8x8 block inside a frame with stride
|
|
||||||
;r0 address of the first value(reference)
|
|
||||||
;r1 address of the first value(current)
|
|
||||||
;r2 stride
|
|
||||||
;
|
|
||||||
;The Result is written in the register r4
|
|
||||||
|
|
||||||
%macro KVZ_SATD_8X8_STRIDE 0
|
|
||||||
|
|
||||||
;Calculate differences of the 8 rows into
|
|
||||||
;registers m0-m7
|
|
||||||
vpmovzxbw m0, [r0]
|
|
||||||
vpmovzxbw m7, [r2]
|
|
||||||
vpsubw m0, m7
|
|
||||||
|
|
||||||
vpmovzxbw m1, [r0+r1]
|
|
||||||
vpmovzxbw m7, [r2+r3]
|
|
||||||
vpsubw m1, m7
|
|
||||||
|
|
||||||
;Set r0 and r2 2 rows forward
|
|
||||||
lea r0, [r0+r1*2]
|
|
||||||
lea r2, [r2+r3*2]
|
|
||||||
|
|
||||||
vpmovzxbw m2, [r0]
|
|
||||||
vpmovzxbw m7, [r2]
|
|
||||||
vpsubw m2, m7
|
|
||||||
|
|
||||||
vpmovzxbw m3, [r0+r1]
|
|
||||||
vpmovzxbw m7, [r2+r3]
|
|
||||||
vpsubw m3, m7
|
|
||||||
|
|
||||||
lea r0, [r0+r1*2]
|
|
||||||
lea r2, [r2+r3*2]
|
|
||||||
|
|
||||||
vpmovzxbw m4, [r0]
|
|
||||||
vpmovzxbw m7, [r2]
|
|
||||||
vpsubw m4, m7
|
|
||||||
|
|
||||||
vpmovzxbw m5, [r0+r1]
|
|
||||||
vpmovzxbw m7, [r2+r3]
|
|
||||||
vpsubw m5, m7
|
|
||||||
|
|
||||||
lea r0, [r0+r1*2]
|
|
||||||
lea r2, [r2+r3*2]
|
|
||||||
|
|
||||||
vpmovzxbw m6, [r0]
|
|
||||||
vpmovzxbw m7, [r2]
|
|
||||||
vpsubw m6, m7
|
|
||||||
|
|
||||||
;32-bit AVX doesn't have registers
|
|
||||||
;xmm8-xmm15, use stack instead
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
vpmovzxbw m7, [r0+r1]
|
|
||||||
vpmovzxbw m8, [r2+r3]
|
|
||||||
vpsubw m7, m8
|
|
||||||
%else
|
|
||||||
%define temp0 esp+16*3
|
|
||||||
%define temp1 esp+16*2
|
|
||||||
%define temp2 esp+16*1
|
|
||||||
%define temp3 esp+16*0
|
|
||||||
|
|
||||||
;Reserve memory for 4 x 128 bits.
|
|
||||||
sub esp, 16*4
|
|
||||||
|
|
||||||
vpmovzxbw m7, [r2+r3]
|
|
||||||
vmovdqu [temp0], m7
|
|
||||||
vpmovzxbw m7, [r0+r1]
|
|
||||||
vpsubw m7, [temp0]
|
|
||||||
|
|
||||||
;Put rows 5-8 to stack
|
|
||||||
vmovdqu [temp0], m4
|
|
||||||
vmovdqu [temp1], m5
|
|
||||||
vmovdqu [temp2], m6
|
|
||||||
vmovdqu [temp3], m7
|
|
||||||
%endif
|
|
||||||
|
|
||||||
;Hadamard transform (FWHT algorithm)
|
|
||||||
;Horizontal transform
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
;Calculate horizontal transform for each row.
|
|
||||||
;Transforms of two rows are interleaved in register pairs.
|
|
||||||
;(m8 and m9, m10 and m11,...)
|
|
||||||
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m0, m1, m8, m9
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m2, m3, m10, m11
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m4, m5, m12, m13
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m6, m7, m14, m15
|
|
||||||
|
|
||||||
%else
|
|
||||||
;Calculate horizontal transforms for the first four rows.
|
|
||||||
;Then load the other four into the registers and store
|
|
||||||
;ready transforms in the stack.
|
|
||||||
;Input registers are m0-m3, results are written in
|
|
||||||
;registers m4-m7 (and memory).
|
|
||||||
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m0, m1, m4, m5
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m2, m3, m6, m7
|
|
||||||
|
|
||||||
vmovdqu m3, [temp3]
|
|
||||||
vmovdqu m2, [temp2]
|
|
||||||
vmovdqu m1, [temp1]
|
|
||||||
vmovdqu m0, [temp0]
|
|
||||||
|
|
||||||
vmovdqu [temp3], m7
|
|
||||||
vmovdqu [temp2], m6
|
|
||||||
vmovdqu [temp1], m5
|
|
||||||
vmovdqu [temp0], m4
|
|
||||||
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m0, m1, m4, m5
|
|
||||||
SATD_HORIZONTAL_SUB_AND_ADD m2, m3, m6, m7
|
|
||||||
%endif
|
|
||||||
|
|
||||||
|
|
||||||
;Vertical transform
|
|
||||||
;Transform columns of the 8x8 block.
|
|
||||||
;First sum the interleaved horizontally
|
|
||||||
;transformed values with one horizontal add
|
|
||||||
;for each pair of rows. Then calculate
|
|
||||||
;with regular packed additions and
|
|
||||||
;subtractions.
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
;Horizontally transformed values are in registers m8-m15
|
|
||||||
;Results are written in m0-m7
|
|
||||||
|
|
||||||
;First stage
|
|
||||||
vphaddw m0, m8, m9
|
|
||||||
vphsubw m1, m8, m9
|
|
||||||
|
|
||||||
vphaddw m2, m10, m11
|
|
||||||
vphsubw m3, m10, m11
|
|
||||||
|
|
||||||
vphaddw m4, m12, m13
|
|
||||||
vphsubw m5, m12, m13
|
|
||||||
|
|
||||||
vphaddw m6, m14, m15
|
|
||||||
vphsubw m7, m14, m15
|
|
||||||
|
|
||||||
;Second stage
|
|
||||||
vpaddw m8, m0, m2
|
|
||||||
vpaddw m9, m1, m3
|
|
||||||
vpsubw m10, m0, m2
|
|
||||||
vpsubw m11, m1, m3
|
|
||||||
|
|
||||||
vpaddw m12, m4, m6
|
|
||||||
vpaddw m13, m5, m7
|
|
||||||
vpsubw m14, m4, m6
|
|
||||||
vpsubw m15, m5, m7
|
|
||||||
|
|
||||||
;Third stage
|
|
||||||
vpaddw m0, m8, m12
|
|
||||||
vpaddw m1, m9, m13
|
|
||||||
vpaddw m2, m10, m14
|
|
||||||
vpaddw m3, m11, m15
|
|
||||||
|
|
||||||
vpsubw m4, m8, m12
|
|
||||||
vpsubw m5, m9, m13
|
|
||||||
vpsubw m6, m10, m14
|
|
||||||
vpsubw m7, m11, m15
|
|
||||||
|
|
||||||
%else
|
|
||||||
;Transformed values are in registers m4-m7
|
|
||||||
;and in memory(temp0-temp3). Transformed values
|
|
||||||
;are written in m4-m7. Also calculate absolute
|
|
||||||
;values for them and accumulate into ymm0.
|
|
||||||
|
|
||||||
;First stage
|
|
||||||
vphaddw m0, m4, m5
|
|
||||||
vphsubw m1, m4, m5
|
|
||||||
|
|
||||||
vphaddw m2, m6, m7
|
|
||||||
vphsubw m3, m6, m7
|
|
||||||
|
|
||||||
;Second stage
|
|
||||||
vpaddw m4, m0, m2
|
|
||||||
vpaddw m5, m1, m3
|
|
||||||
vpsubw m6, m0, m2
|
|
||||||
vpsubw m7, m1, m3
|
|
||||||
|
|
||||||
vmovdqu m3, [temp3]
|
|
||||||
vmovdqu m2, [temp2]
|
|
||||||
vmovdqu m1, [temp1]
|
|
||||||
vmovdqu m0, [temp0]
|
|
||||||
|
|
||||||
vmovdqu [temp3], m7
|
|
||||||
vmovdqu [temp2], m6
|
|
||||||
vmovdqu [temp1], m5
|
|
||||||
vmovdqu [temp0], m4
|
|
||||||
|
|
||||||
;First stage (second half)
|
|
||||||
vphaddw m4, m0, m1
|
|
||||||
vphsubw m5, m0, m1
|
|
||||||
|
|
||||||
vphaddw m6, m2, m3
|
|
||||||
vphsubw m7, m2, m3
|
|
||||||
|
|
||||||
;Second stage (second half)
|
|
||||||
vpaddw m0, m4, m6
|
|
||||||
vpaddw m1, m5, m7
|
|
||||||
vpsubw m2, m4, m6
|
|
||||||
vpsubw m3, m5, m7
|
|
||||||
|
|
||||||
;Third stage
|
|
||||||
vpaddw m4, m0, [temp0]
|
|
||||||
vpaddw m5, m1, [temp1]
|
|
||||||
vpsubw m6, m0, [temp0]
|
|
||||||
vpsubw m7, m1, [temp1]
|
|
||||||
|
|
||||||
;Calculate the absolute values and
|
|
||||||
;zero extend 16-bit values to 32-bit
|
|
||||||
;values. Then sum the values.
|
|
||||||
|
|
||||||
vpabsw m4, m4
|
|
||||||
KVZ_ZERO_EXTEND_WD m4, m4, m1
|
|
||||||
vpaddd m4, m1
|
|
||||||
|
|
||||||
vpabsw m5, m5
|
|
||||||
KVZ_ZERO_EXTEND_WD m5, m5, m1
|
|
||||||
vpaddd m5, m1
|
|
||||||
|
|
||||||
vpabsw m6, m6
|
|
||||||
KVZ_ZERO_EXTEND_WD m6, m6, m1
|
|
||||||
vpaddd m6, m1
|
|
||||||
|
|
||||||
vpabsw m7, m7
|
|
||||||
KVZ_ZERO_EXTEND_WD m7, m7, m1
|
|
||||||
vpaddd m7, m1
|
|
||||||
|
|
||||||
vpaddd m0, m4, m5
|
|
||||||
vpaddd m0, m6
|
|
||||||
vpaddd m0, m7
|
|
||||||
|
|
||||||
;Repeat for the rest
|
|
||||||
vpaddw m4, m2, [temp2]
|
|
||||||
vpaddw m5, m3, [temp3]
|
|
||||||
vpsubw m6, m2, [temp2]
|
|
||||||
vpsubw m7, m3, [temp3]
|
|
||||||
|
|
||||||
vpabsw m4, m4
|
|
||||||
KVZ_ZERO_EXTEND_WD m4, m4, m1
|
|
||||||
vpaddd m4, m1
|
|
||||||
|
|
||||||
vpabsw m5, m5
|
|
||||||
KVZ_ZERO_EXTEND_WD m5, m5, m1
|
|
||||||
vpaddd m5, m1
|
|
||||||
|
|
||||||
vpabsw m6, m6
|
|
||||||
KVZ_ZERO_EXTEND_WD m6, m6, m1
|
|
||||||
vpaddd m6, m1
|
|
||||||
|
|
||||||
vpabsw m7, m7
|
|
||||||
KVZ_ZERO_EXTEND_WD m7, m7, m1
|
|
||||||
vpaddd m7, m1
|
|
||||||
|
|
||||||
;Sum the other half of the packed results to ymm4
|
|
||||||
vpaddd m4, m5
|
|
||||||
vpaddd m4, m6
|
|
||||||
vpaddd m4, m7
|
|
||||||
|
|
||||||
;Sum all packed results to ymm0
|
|
||||||
vpaddd m0, m4
|
|
||||||
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
|
|
||||||
;Calculate the absolute values and
|
|
||||||
;zero extend 16-bit values to 32-bit
|
|
||||||
;values. In other words: extend xmm to
|
|
||||||
;corresponding ymm.
|
|
||||||
|
|
||||||
vpabsw m0, m0
|
|
||||||
KVZ_ZERO_EXTEND_WD m0, m0, m8
|
|
||||||
vpaddd m0, m8
|
|
||||||
|
|
||||||
vpabsw m1, m1
|
|
||||||
KVZ_ZERO_EXTEND_WD m1, m1, m8
|
|
||||||
vpaddd m1, m8
|
|
||||||
|
|
||||||
vpabsw m2, m2
|
|
||||||
KVZ_ZERO_EXTEND_WD m2, m2, m8
|
|
||||||
vpaddd m1, m8
|
|
||||||
|
|
||||||
vpabsw m3, m3
|
|
||||||
KVZ_ZERO_EXTEND_WD m3, m3, m8
|
|
||||||
vpaddd m3, m8
|
|
||||||
|
|
||||||
vpabsw m4, m4
|
|
||||||
KVZ_ZERO_EXTEND_WD m4, m4, m8
|
|
||||||
vpaddd m4, m8
|
|
||||||
|
|
||||||
vpabsw m5, m5
|
|
||||||
KVZ_ZERO_EXTEND_WD m5, m5, m8
|
|
||||||
vpaddd m5, m8
|
|
||||||
|
|
||||||
vpabsw m6, m6
|
|
||||||
KVZ_ZERO_EXTEND_WD m6, m6, m8
|
|
||||||
vpaddd m6, m8
|
|
||||||
|
|
||||||
vpabsw m7, m7
|
|
||||||
KVZ_ZERO_EXTEND_WD m7, m7, m8
|
|
||||||
vpaddd m7, m8
|
|
||||||
|
|
||||||
;Calculate packed sum of transformed values to ymm0
|
|
||||||
vpaddd m0, m1
|
|
||||||
vpaddd m0, m2
|
|
||||||
vpaddd m0, m3
|
|
||||||
vpaddd m0, m4
|
|
||||||
vpaddd m0, m5
|
|
||||||
vpaddd m0, m6
|
|
||||||
vpaddd m0, m7
|
|
||||||
%endif
|
|
||||||
|
|
||||||
;Sum the packed values to m0[32:0]
|
|
||||||
vphaddd m0, m0
|
|
||||||
vphaddd m0, m0
|
|
||||||
|
|
||||||
;The result is in the lowest 32 bits in m0
|
|
||||||
vmovd r4d, m0
|
|
||||||
|
|
||||||
;8x8 Hadamard transform requires
|
|
||||||
;adding 2 and dividing by 4
|
|
||||||
add r4, 2
|
|
||||||
shr r4, 2
|
|
||||||
|
|
||||||
;Zero high 128 bits of ymm registers to
|
|
||||||
;prevent AVX-SSE transition penalty.
|
|
||||||
vzeroupper
|
|
||||||
|
|
||||||
%if ARCH_X86_64 == 0
|
|
||||||
add esp, 16*4
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%endmacro ; KVZ_SATD_8X8_STRIDE
|
|
||||||
|
|
||||||
;KVZ_SATD_4X4
|
|
||||||
;Calculates SATD of the 16 consequtive bytes in memory
|
|
||||||
;r0 address of the first value(current)
|
|
||||||
;r1 address of the first value(reference)
|
|
||||||
|
|
||||||
cglobal satd_4x4, 2, 2, 6
|
|
||||||
|
|
||||||
;Load 8 bytes from memory and zero extend
|
|
||||||
;to 16-bit values. Calculate difference.
|
|
||||||
vpmovzxbw m0, [r0]
|
|
||||||
vpmovzxbw m2, [r1]
|
|
||||||
vpsubw m0, m2
|
|
||||||
|
|
||||||
vpmovzxbw m1, [r0+8]
|
|
||||||
vpmovzxbw m3, [r1+8]
|
|
||||||
vpsubw m1, m3
|
|
||||||
|
|
||||||
;Hadamard transform
|
|
||||||
;Horizontal phase
|
|
||||||
;First stage
|
|
||||||
vphaddw m4, m0, m1
|
|
||||||
vphsubw m5, m0, m1
|
|
||||||
;Second stage
|
|
||||||
vphaddw m0, m4, m5
|
|
||||||
vphsubw m1, m4, m5
|
|
||||||
|
|
||||||
;Vertical phase
|
|
||||||
;First stage
|
|
||||||
vphaddw m4, m0, m1
|
|
||||||
vphsubw m5, m0, m1
|
|
||||||
;Second stage
|
|
||||||
vphaddw m0, m4, m5
|
|
||||||
vphsubw m1, m4, m5
|
|
||||||
|
|
||||||
;Calculate absolute values
|
|
||||||
vpabsw m0, m0
|
|
||||||
vpabsw m1, m1
|
|
||||||
|
|
||||||
;Sum the all the transformed values
|
|
||||||
vpaddw m0, m1
|
|
||||||
|
|
||||||
vphaddw m0, m0
|
|
||||||
vphaddw m0, m0
|
|
||||||
vphaddw m0, m0
|
|
||||||
|
|
||||||
;Extract the lowest 16 bits of m0
|
|
||||||
;into eax
|
|
||||||
vpextrw eax, m0, 0
|
|
||||||
|
|
||||||
;4x4 Hadamard transform requires
|
|
||||||
;Addition of 1 and division by 2
|
|
||||||
add eax, 1
|
|
||||||
shr eax, 1
|
|
||||||
|
|
||||||
RET
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;KVZ_SATD_8X8
|
|
||||||
;Calculates SATD of a 8x8 block inside a frame with stride
|
|
||||||
;r0 address of the first value(reference)
|
|
||||||
;r1 address of the first value(current)
|
|
||||||
;r2 stride
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
cglobal satd_8x8, 4, 5, 16
|
|
||||||
%else
|
|
||||||
cglobal satd_8x8, 4, 5, 8
|
|
||||||
%endif
|
|
||||||
|
|
||||||
;Set arguments
|
|
||||||
mov r2, r1
|
|
||||||
mov r1, 8
|
|
||||||
mov r3, 8
|
|
||||||
|
|
||||||
;Calculate 8x8 SATD. Result is written
|
|
||||||
;in the register r4.
|
|
||||||
KVZ_SATD_8X8_STRIDE
|
|
||||||
mov rax, r4
|
|
||||||
RET
|
|
||||||
|
|
||||||
;KVZ_SATD_NXN
|
|
||||||
;Calculates SATD of a NxN block inside a frame with stride
|
|
||||||
;r0 address of the first value(reference)
|
|
||||||
;r1 address of the first value(current)
|
|
||||||
|
|
||||||
%macro KVZ_SATD_NXN 1
|
|
||||||
|
|
||||||
%if ARCH_X86_64
|
|
||||||
cglobal satd_%1x%1, 2, 7, 16
|
|
||||||
%else
|
|
||||||
cglobal satd_%1x%1, 2, 7, 8
|
|
||||||
%endif
|
|
||||||
|
|
||||||
;Set arguments
|
|
||||||
mov r2, r1
|
|
||||||
mov r1, %1
|
|
||||||
mov r3, %1
|
|
||||||
|
|
||||||
;Zero r5 and r6
|
|
||||||
xor r5, r5
|
|
||||||
xor r6, r6
|
|
||||||
|
|
||||||
;Calculate SATDs of each 8x8 sub-blocks
|
|
||||||
;and accumulate the results in r6. Repeat yloop
|
|
||||||
;N times. Repeat xloop N times. r4 and r5 are counters
|
|
||||||
;for the loops.
|
|
||||||
|
|
||||||
.yloop
|
|
||||||
|
|
||||||
;zero r4
|
|
||||||
xor r4, r4
|
|
||||||
|
|
||||||
.xloop
|
|
||||||
push r4
|
|
||||||
|
|
||||||
;Calculate SATD of the sub-block. Result is
|
|
||||||
;written in the register r4.
|
|
||||||
KVZ_SATD_8X8_STRIDE
|
|
||||||
add r6, r4
|
|
||||||
|
|
||||||
;Set r2 and r0 to the next sub-block
|
|
||||||
;on the same row
|
|
||||||
sub r2, 6*%1-8
|
|
||||||
sub r0, 6*%1-8
|
|
||||||
|
|
||||||
pop r4
|
|
||||||
add r4, 8
|
|
||||||
cmp r4, %1
|
|
||||||
jne .xloop
|
|
||||||
|
|
||||||
;Set r2 and r0 to the first sub-block
|
|
||||||
;on the next row(of 8x8 sub-blocks)
|
|
||||||
add r2, 7*%1
|
|
||||||
add r0, 7*%1
|
|
||||||
|
|
||||||
add r5, 8
|
|
||||||
cmp r5, %1
|
|
||||||
jne .yloop
|
|
||||||
|
|
||||||
mov rax, r6
|
|
||||||
RET
|
|
||||||
|
|
||||||
%endmacro ; KVZ_SATD_NXN
|
|
||||||
|
|
||||||
KVZ_SATD_NXN 16
|
|
||||||
KVZ_SATD_NXN 32
|
|
||||||
KVZ_SATD_NXN 64
|
|
|
@ -1,50 +0,0 @@
|
||||||
#ifndef _PICTURE_X86_ASM_SATD_H_
|
|
||||||
#define _PICTURE_X86_ASM_SATD_H_
|
|
||||||
/*****************************************************************************
|
|
||||||
* This file is part of uvg266 VVC encoder.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
* are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer in the documentation and/or
|
|
||||||
* other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
||||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \ingroup Optimization
|
|
||||||
* \file
|
|
||||||
* Optimizations for AVX, utilizing ASM implementations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "global.h" // IWYU pragma: keep
|
|
||||||
|
|
||||||
|
|
||||||
unsigned kvz_satd_4x4_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
|
||||||
unsigned kvz_satd_8x8_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
|
||||||
unsigned kvz_satd_16x16_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
|
||||||
unsigned kvz_satd_32x32_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
|
||||||
unsigned kvz_satd_64x64_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,132 +0,0 @@
|
||||||
/*****************************************************************************
|
|
||||||
* This file is part of uvg266 VVC encoder.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
* are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer in the documentation and/or
|
|
||||||
* other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
||||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
#include "strategies/x86_asm/picture-x86-asm.h"
|
|
||||||
|
|
||||||
#if defined(KVZ_COMPILE_ASM)
|
|
||||||
#include "kvazaar.h"
|
|
||||||
#if KVZ_BIT_DEPTH == 8
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#include "strategies/x86_asm/picture-x86-asm-sad.h"
|
|
||||||
#include "strategies/x86_asm/picture-x86-asm-satd.h"
|
|
||||||
#include "strategies/sse41/picture-sse41.h"
|
|
||||||
#include "strategyselector.h"
|
|
||||||
|
|
||||||
|
|
||||||
static unsigned kvz_sad_32x32_avx(const uint8_t *data1, const uint8_t *data2)
|
|
||||||
{
|
|
||||||
unsigned sad = 0;
|
|
||||||
sad += kvz_sad_16x16_avx(data1, data2);
|
|
||||||
sad += kvz_sad_16x16_avx(data1 + 8 * 32, data2 + 8 * 32);
|
|
||||||
sad += kvz_sad_16x16_avx(data1 + 16 * 32, data2 + 16 * 32);
|
|
||||||
sad += kvz_sad_16x16_avx(data1 + 24 * 32, data2 + 24 * 32);
|
|
||||||
return sad;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned kvz_sad_64x64_avx(const uint8_t *data1, const uint8_t *data2)
|
|
||||||
{
|
|
||||||
unsigned sad = 0;
|
|
||||||
sad += kvz_sad_32x32_avx(data1, data2);
|
|
||||||
sad += kvz_sad_32x32_avx(data1 + 16 * 64, data2 + 16 * 64);
|
|
||||||
sad += kvz_sad_32x32_avx(data1 + 32 * 64, data2 + 32 * 64);
|
|
||||||
sad += kvz_sad_32x32_avx(data1 + 48 * 64, data2 + 48 * 64);
|
|
||||||
return sad;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned kvz_sad_other_avx(const uint8_t *data1, const uint8_t *data2,
|
|
||||||
int width, int height,
|
|
||||||
unsigned stride)
|
|
||||||
{
|
|
||||||
unsigned sad = 0;
|
|
||||||
|
|
||||||
for (int y = 0; y < height; ++y) {
|
|
||||||
for (int x = 0; x < width; ++x) {
|
|
||||||
sad += abs(data1[y * stride + x] - data2[y * stride + x]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return sad;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned reg_sad_x86_asm(const uint8_t *data1, const uint8_t * data2,
|
|
||||||
const int width, const int height,
|
|
||||||
const unsigned stride1, const unsigned stride2)
|
|
||||||
{
|
|
||||||
if (width == height) {
|
|
||||||
if (width == 8) {
|
|
||||||
return kvz_sad_8x8_stride_avx(data1, data2, stride1);
|
|
||||||
} else if (width == 16) {
|
|
||||||
return kvz_sad_16x16_stride_avx(data1, data2, stride1);
|
|
||||||
} else if (width == 32) {
|
|
||||||
return kvz_sad_32x32_stride_avx(data1, data2, stride1);
|
|
||||||
} else if (width == 64) {
|
|
||||||
return kvz_sad_64x64_stride_avx(data1, data2, stride1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (width * height >= 16) {
|
|
||||||
// Call the vectorized general SAD SSE41 function when the block
|
|
||||||
// is big enough to make it worth it.
|
|
||||||
return kvz_reg_sad_sse41(data1, data2, width, height, stride1, stride2);
|
|
||||||
} else {
|
|
||||||
return kvz_sad_other_avx(data1, data2, width, height, stride1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // KVZ_BIT_DEPTH == 8
|
|
||||||
#endif //defined(KVZ_COMPILE_ASM)
|
|
||||||
|
|
||||||
int kvz_strategy_register_picture_x86_asm_avx(void* opaque, uint8_t bitdepth)
|
|
||||||
{
|
|
||||||
bool success = true;
|
|
||||||
#if defined(KVZ_COMPILE_ASM)
|
|
||||||
#if KVZ_BIT_DEPTH == 8
|
|
||||||
if (bitdepth == 8){
|
|
||||||
success &= kvz_strategyselector_register(opaque, "reg_sad", "x86_asm_avx", 30, ®_sad_x86_asm);
|
|
||||||
|
|
||||||
success &= kvz_strategyselector_register(opaque, "sad_4x4", "x86_asm_avx", 30, &kvz_sad_4x4_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "sad_8x8", "x86_asm_avx", 30, &kvz_sad_8x8_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "sad_16x16", "x86_asm_avx", 30, &kvz_sad_16x16_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "sad_32x32", "x86_asm_avx", 30, &kvz_sad_32x32_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "sad_64x64", "x86_asm_avx", 30, &kvz_sad_64x64_avx);
|
|
||||||
|
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_4x4", "x86_asm_avx", 30, &kvz_satd_4x4_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_8x8", "x86_asm_avx", 30, &kvz_satd_8x8_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_16x16", "x86_asm_avx", 30, &kvz_satd_16x16_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_32x32", "x86_asm_avx", 30, &kvz_satd_32x32_avx);
|
|
||||||
success &= kvz_strategyselector_register(opaque, "satd_64x64", "x86_asm_avx", 30, &kvz_satd_64x64_avx);
|
|
||||||
}
|
|
||||||
#endif // KVZ_BIT_DEPTH == 8
|
|
||||||
#endif //!defined(KVZ_COMPILE_ASM)
|
|
||||||
return success;
|
|
||||||
}
|
|
|
@ -1,46 +0,0 @@
|
||||||
#ifndef STRATEGIES_PICTURE_X86_ASM_H_
|
|
||||||
#define STRATEGIES_PICTURE_X86_ASM_H_
|
|
||||||
/*****************************************************************************
|
|
||||||
* This file is part of uvg266 VVC encoder.
|
|
||||||
*
|
|
||||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
|
||||||
* All rights reserved.
|
|
||||||
*
|
|
||||||
* Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
* are permitted provided that the following conditions are met:
|
|
||||||
*
|
|
||||||
* * Redistributions of source code must retain the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer.
|
|
||||||
*
|
|
||||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
|
||||||
* list of conditions and the following disclaimer in the documentation and/or
|
|
||||||
* other materials provided with the distribution.
|
|
||||||
*
|
|
||||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
|
||||||
* contributors may be used to endorse or promote products derived from
|
|
||||||
* this software without specific prior written permission.
|
|
||||||
*
|
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
||||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
||||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
||||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
|
||||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
||||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
|
||||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
||||||
****************************************************************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \ingroup Optimization
|
|
||||||
* \file
|
|
||||||
* Optimizations for AVX, utilizing ASM implementations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "global.h" // IWYU pragma: keep
|
|
||||||
|
|
||||||
|
|
||||||
int kvz_strategy_register_picture_x86_asm_avx(void* opaque, uint8_t bitdepth);
|
|
||||||
|
|
||||||
#endif //STRATEGIES_PICTURE_X86_ASM_H_
|
|
File diff suppressed because it is too large
Load diff
|
@ -258,7 +258,6 @@ int kvz_strategyselector_register(void * const opaque, const char * const type,
|
||||||
|
|
||||||
//Check what strategies are available when they are registered
|
//Check what strategies are available when they are registered
|
||||||
if (strcmp(strategy_name, "avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
|
if (strcmp(strategy_name, "avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
|
||||||
if (strcmp(strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
|
|
||||||
if (strcmp(strategy_name, "avx2") == 0) kvz_g_strategies_available.intel_flags.avx2++;
|
if (strcmp(strategy_name, "avx2") == 0) kvz_g_strategies_available.intel_flags.avx2++;
|
||||||
if (strcmp(strategy_name, "mmx") == 0) kvz_g_strategies_available.intel_flags.mmx++;
|
if (strcmp(strategy_name, "mmx") == 0) kvz_g_strategies_available.intel_flags.mmx++;
|
||||||
if (strcmp(strategy_name, "sse") == 0) kvz_g_strategies_available.intel_flags.sse++;
|
if (strcmp(strategy_name, "sse") == 0) kvz_g_strategies_available.intel_flags.sse++;
|
||||||
|
@ -330,7 +329,6 @@ static void* strategyselector_choose_for(const strategy_list_t * const strategie
|
||||||
|
|
||||||
//Check what strategy we are going to use
|
//Check what strategy we are going to use
|
||||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
|
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
|
||||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
|
|
||||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx2") == 0) kvz_g_strategies_in_use.intel_flags.avx2++;
|
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx2") == 0) kvz_g_strategies_in_use.intel_flags.avx2++;
|
||||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "mmx") == 0) kvz_g_strategies_in_use.intel_flags.mmx++;
|
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "mmx") == 0) kvz_g_strategies_in_use.intel_flags.mmx++;
|
||||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse") == 0) kvz_g_strategies_in_use.intel_flags.sse++;
|
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse") == 0) kvz_g_strategies_in_use.intel_flags.sse++;
|
||||||
|
|
Loading…
Reference in a new issue