mirror of
https://github.com/ultravideo/uvg266.git
synced 2024-11-23 18:14:06 +00:00
[build] Remove support for the yasm asm build
This commit is contained in:
parent
f8375f9bc6
commit
227556a13e
|
@ -1,33 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ImportGroup Label="PropertySheets" />
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<IntDir>$(Platform)-$(Configuration)\</IntDir>
|
||||
<OutDir>$(SolutionDir)..\bin\$(Platform)-$(Configuration)\</OutDir>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<CompileAs>CompileAsC</CompileAs>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<AssemblerOutput>AssemblyAndSourceCode</AssemblerOutput>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<PreprocessorDefinitions>KVZ_DLL_EXPORTS;KVZ_COMPILE_ASM;WIN32_LEAN_AND_MEAN;WIN32;WIN64;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)..\src\threadwrapper\include;$(SolutionDir)..\src;$(SolutionDir)..\src\extras;$(SolutionDir)..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4244;4204;4206;4028;4152;4996;4018;4456;4389;4100;4131;4459;4706;4214;4127;4201</DisableSpecificWarnings>
|
||||
<OpenMPSupport>false</OpenMPSupport>
|
||||
<TreatSpecificWarningsAsErrors>4013;4029;4047;4716;4700;4020;4021;4133</TreatSpecificWarningsAsErrors>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
</Link>
|
||||
<YASM>
|
||||
<Defines>HAVE_ALIGNED_STACK=1</Defines>
|
||||
<IncludePaths>$(SolutionDir)..\src\extras;%(IncludePaths)</IncludePaths>
|
||||
</YASM>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|
|
@ -1,26 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ImportGroup Label="PropertySheets" />
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
<Optimization>Full</Optimization>
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<OmitFramePointers>
|
||||
</OmitFramePointers>
|
||||
<EnableFiberSafeOptimizations>
|
||||
</EnableFiberSafeOptimizations>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|
|
@ -1,31 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup
|
||||
Condition="'$(YASMBeforeTargets)' == '' and '$(YASMAfterTargets)' == '' and '$(ConfigurationType)' != 'Makefile'">
|
||||
<YASMBeforeTargets>Midl</YASMBeforeTargets>
|
||||
<YASMAfterTargets>CustomBuild</YASMAfterTargets>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<YASMDependsOn
|
||||
Condition="'$(ConfigurationType)' != 'Makefile'">_SelectedFiles;$(YASMDependsOn)</YASMDependsOn>
|
||||
</PropertyGroup>
|
||||
<!-- Object format name for vsyasm must be in lower case. -->
|
||||
<PropertyGroup Condition="'$(Platform)' == 'Win32'">
|
||||
<YASMFormat>win32</YASMFormat>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Platform)' == 'x64'">
|
||||
<YASMFormat>win64</YASMFormat>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<YASM>
|
||||
<Debug>False</Debug>
|
||||
<ObjectFile>$(IntDir)</ObjectFile>
|
||||
<PreProc>0</PreProc>
|
||||
<Parser>0</Parser>
|
||||
<CommandLineTemplate>vsyasm.exe -Xvc -f $(YASMFormat) [AllOptions] [AdditionalOptions] [Inputs]</CommandLineTemplate>
|
||||
<Outputs>%(ObjectFile)</Outputs>
|
||||
<ExecutionDescription>Assembling %(Filename)%(Extension)</ExecutionDescription>
|
||||
<ShowOnlyRuleProperties>false</ShowOnlyRuleProperties>
|
||||
</YASM>
|
||||
</ItemDefinitionGroup>
|
||||
</Project>
|
|
@ -1,109 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<PropertyPageSchema
|
||||
Include="$(MSBuildThisFileDirectory)$(MSBuildThisFileName).xml" />
|
||||
<AvailableItemName
|
||||
Include="YASM">
|
||||
<Targets>_YASM</Targets>
|
||||
</AvailableItemName>
|
||||
</ItemGroup>
|
||||
<UsingTask
|
||||
TaskName="YASM"
|
||||
TaskFactory="XamlTaskFactory"
|
||||
AssemblyName="Microsoft.Build.Tasks.v4.0">
|
||||
<Task>$(MSBuildThisFileDirectory)$(MSBuildThisFileName).xml</Task>
|
||||
</UsingTask>
|
||||
<Target
|
||||
Name="_YASM"
|
||||
BeforeTargets="$(YASMBeforeTargets)"
|
||||
AfterTargets="$(YASMAfterTargets)"
|
||||
Condition="'@(YASM)' != ''"
|
||||
DependsOnTargets="$(YASMDependsOn);ComputeYASMOutput"
|
||||
Outputs="@(YASM->'$(ProjectDir)$(IntDir)%(Filename).obj')"
|
||||
Inputs="@(YASM);%(YASM.AdditionalDependencies);$(MSBuildProjectFile)">
|
||||
<ItemGroup
|
||||
Condition="'@(SelectedFiles)' != ''">
|
||||
<YASM
|
||||
Remove="@(YASM)"
|
||||
Condition="'%(Identity)' != '@(SelectedFiles)'" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<YASM_tlog
|
||||
Include="%(YASM.ObjectFile)"
|
||||
Condition="'%(YASM.ObjectFile)' != '' and '%(YASM.ExcludedFromBuild)' != 'true'">
|
||||
<Source>@(YASM->'%(FullPath)', '|')</Source>
|
||||
</YASM_tlog>
|
||||
</ItemGroup>
|
||||
<Message
|
||||
Importance="High"
|
||||
Text="%(YASM.ExecutionDescription)" />
|
||||
<WriteLinesToFile
|
||||
Condition="'@(YASM_tlog)' != '' and '%(YASM_tlog.ExcludedFromBuild)' != 'true'"
|
||||
File="$(TLogLocation)$(ProjectName).write.1.tlog"
|
||||
Lines="^%(YASM_tlog.Source);@(YASM->'$(ProjectDir)$(IntDir)%(Filename).obj')"
|
||||
Encoding="Unicode" />
|
||||
<YASM
|
||||
Condition="'@(YASM)' != '' and '%(YASM.ExcludedFromBuild)' != 'true'"
|
||||
CommandLineTemplate="%(YASM.CommandLineTemplate)"
|
||||
Debug="%(YASM.Debug)"
|
||||
PreIncludeFile="%(YASM.PreIncludeFile)"
|
||||
IncludePaths="%(YASM.IncludePaths)"
|
||||
Defines="%(YASM.Defines)"
|
||||
UnDefines="%(YASM.UnDefines)"
|
||||
ObjectFile="%(YASM.ObjectFile)"
|
||||
ListFile="%(YASM.ListFile)"
|
||||
MapFile="%(YASM.MapFile)"
|
||||
ErrorFile="%(YASM.ErrorFile)"
|
||||
SymbolPrefix="%(YASM.SymbolPrefix)"
|
||||
SymbolSuffix="%(YASM.SymbolSuffix)"
|
||||
PreProc="%(YASM.PreProc)"
|
||||
Parser="%(YASM.Parser)"
|
||||
AdditionalOptions="%(YASM.AdditionalOptions)"
|
||||
Inputs="@(YASM)" />
|
||||
</Target>
|
||||
<PropertyGroup>
|
||||
<ComputeLinkInputsTargets>
|
||||
$(ComputeLinkInputsTargets);
|
||||
ComputeYASMOutput;
|
||||
</ComputeLinkInputsTargets>
|
||||
<ComputeLibInputsTargets>
|
||||
$(ComputeLibInputsTargets);
|
||||
ComputeYASMOutput;
|
||||
</ComputeLibInputsTargets>
|
||||
</PropertyGroup>
|
||||
<Target
|
||||
Name="ComputeYASMOutput"
|
||||
Condition="'@(YASM)' != ''">
|
||||
<ItemGroup>
|
||||
<YASMDirsToMake
|
||||
Condition="'@(YASM)' != '' and '%(YASM.ExcludedFromBuild)' != 'true' and !HasTrailingSlash('%(YASM.ObjectFile)')"
|
||||
Include="%(YASM.ObjectFile)" />
|
||||
<Link
|
||||
Include="%(YASMDirsToMake.Identity)"
|
||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
||||
<Lib
|
||||
Include="%(YASMDirsToMake.Identity)"
|
||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
||||
<ImpLib
|
||||
Include="%(YASMDirsToMake.Identity)"
|
||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<YASMDirsToMake
|
||||
Condition="'@(YASM)' != '' and '%(YASM.ExcludedFromBuild)' != 'true' and HasTrailingSlash('%(YASM.ObjectFile)')"
|
||||
Include="@(YASM->'%(ObjectFile)%(Filename).obj')" />
|
||||
<Link
|
||||
Include="%(YASMDirsToMake.Identity)"
|
||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
||||
<Lib
|
||||
Include="%(YASMDirsToMake.Identity)"
|
||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
||||
<ImpLib
|
||||
Include="%(YASMDirsToMake.Identity)"
|
||||
Condition="'%(Extension)'=='.obj' or '%(Extension)'=='.res' or '%(Extension)'=='.rsc' or '%(Extension)'=='.lib'" />
|
||||
</ItemGroup>
|
||||
<MakeDir
|
||||
Directories="@(YASMDirsToMake->'%(RootDir)%(Directory)')" />
|
||||
</Target>
|
||||
</Project>
|
|
@ -1,283 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<ProjectSchemaDefinitions xmlns="clr-namespace:Microsoft.Build.Framework.XamlTypes;assembly=Microsoft.Build.Framework" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:sys="clr-namespace:System;assembly=mscorlib" xmlns:transformCallback="Microsoft.Cpp.Dev10.ConvertPropertyCallback">
|
||||
<Rule
|
||||
Name="YASM"
|
||||
PageTemplate="tool"
|
||||
DisplayName="Yasm Assembler"
|
||||
Order="200">
|
||||
|
||||
<Rule.DataSource>
|
||||
<DataSource
|
||||
Persistence="ProjectFile"
|
||||
ItemType="YASM" />
|
||||
</Rule.DataSource>
|
||||
|
||||
<Rule.Categories>
|
||||
|
||||
<Category
|
||||
Name="General">
|
||||
<Category.DisplayName>
|
||||
<sys:String>General</sys:String>
|
||||
</Category.DisplayName>
|
||||
</Category>
|
||||
|
||||
<Category
|
||||
Name="Symbols">
|
||||
<Category.DisplayName>
|
||||
<sys:String>Symbols</sys:String>
|
||||
</Category.DisplayName>
|
||||
</Category>
|
||||
|
||||
<Category
|
||||
Name="Files">
|
||||
<Category.DisplayName>
|
||||
<sys:String>Files</sys:String>
|
||||
</Category.DisplayName>
|
||||
</Category>
|
||||
|
||||
<Category
|
||||
Name="Command Line"
|
||||
Subtype="CommandLine">
|
||||
<Category.DisplayName>
|
||||
<sys:String>Command Line</sys:String>
|
||||
</Category.DisplayName>
|
||||
</Category>
|
||||
|
||||
</Rule.Categories>
|
||||
|
||||
<StringListProperty
|
||||
Name="Inputs"
|
||||
Category="Command Line"
|
||||
IsRequired="true"
|
||||
Switch=" ">
|
||||
<StringListProperty.DataSource>
|
||||
<DataSource
|
||||
Persistence="ProjectFile"
|
||||
ItemType="YASM"
|
||||
SourceType="Item" />
|
||||
</StringListProperty.DataSource>
|
||||
</StringListProperty>
|
||||
|
||||
<BoolProperty
|
||||
Name="Debug"
|
||||
Subcategory="Configuration"
|
||||
HelpContext="0"
|
||||
DisplayName="Debug Information"
|
||||
Description="Generate debugging information"
|
||||
Switch="-g cv8" />
|
||||
|
||||
<StringListProperty
|
||||
Name="IncludePaths"
|
||||
Subcategory="Configuration"
|
||||
HelpContext="0"
|
||||
DisplayName="Include Paths"
|
||||
Description="Set the paths for any additional include files"
|
||||
Switch="-i "[value]"" />
|
||||
|
||||
<StringListProperty
|
||||
Name="Defines"
|
||||
Category="Symbols"
|
||||
Subcategory="Pre-Defined Symbols"
|
||||
HelpContext="0"
|
||||
DisplayName="Defined Symbols"
|
||||
Description="Specify pre-defined symbols ('symbol' or 'symbol = value') "
|
||||
Switch="-d "[value]"" />
|
||||
|
||||
<StringListProperty
|
||||
Name="UnDefines"
|
||||
Category="Symbols"
|
||||
Subcategory="Pre-Defined Symbols"
|
||||
HelpContext="0"
|
||||
DisplayName="Remove Symbols"
|
||||
Description="Remove pre-defined symbols "
|
||||
Switch="-u "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="ObjectFile"
|
||||
Subcategory="Output"
|
||||
HelpContext="0"
|
||||
DisplayName="Object File Name"
|
||||
Description="Select the output file name"
|
||||
Switch="-o "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="ListFile"
|
||||
Category="Files"
|
||||
Subcategory="Output"
|
||||
HelpContext="0"
|
||||
DisplayName="List File Name"
|
||||
Description="Select an output listing by setting its file name"
|
||||
Switch="-l "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="PreIncludeFile"
|
||||
Category="Files"
|
||||
Subcategory="Configuration"
|
||||
HelpContext="0"
|
||||
DisplayName="Pre Include File"
|
||||
Description="Select a pre-included file by setting its name"
|
||||
Switch="-P "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="MapFile"
|
||||
Category="Files"
|
||||
Subcategory="Output"
|
||||
HelpContext="0"
|
||||
DisplayName="Map File Name"
|
||||
Description="Select a map output by setting its file name"
|
||||
Switch="--mapdir= "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="ErrorFile"
|
||||
Category="Files"
|
||||
Subcategory="Output"
|
||||
HelpContext="0"
|
||||
DisplayName="Error File Name"
|
||||
Description="Send error/warning messages to a file by setting its name"
|
||||
Switch="-E "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="SymbolPrefix"
|
||||
Category="Symbols"
|
||||
Subcategory="Symbols"
|
||||
HelpContext="0"
|
||||
DisplayName="External Symbol Prefix"
|
||||
Description="Prepend symbol to all external symbols"
|
||||
Switch="--prefix "[value]"" />
|
||||
|
||||
<StringProperty
|
||||
Name="SymbolSuffix"
|
||||
Category="Symbols"
|
||||
Subcategory="Symbols"
|
||||
HelpContext="0"
|
||||
DisplayName="External Symbol Suffix"
|
||||
Description="Append symbol to all external symbols"
|
||||
Switch="--suffix "[value]"" />
|
||||
|
||||
<EnumProperty
|
||||
Name="PreProc"
|
||||
Subcategory="Configuration"
|
||||
HelpContext="0"
|
||||
DisplayName="Pre-Processor"
|
||||
Description="Select the pre-processor ('nasm' or 'raw')">
|
||||
<EnumValue
|
||||
Name="0"
|
||||
DisplayName="Nasm "
|
||||
Switch="-rnasm" />
|
||||
<EnumValue
|
||||
Name="1"
|
||||
DisplayName="Raw"
|
||||
Switch="-rraw" />
|
||||
</EnumProperty>
|
||||
|
||||
<EnumProperty
|
||||
Name="Parser"
|
||||
Subcategory="Configuration"
|
||||
HelpContext="0"
|
||||
DisplayName="Parser"
|
||||
Description="Select the parser for Intel ('nasm') or AT&T ( 'gas') syntax">
|
||||
<EnumValue
|
||||
Name="0"
|
||||
DisplayName="Nasm"
|
||||
Switch="-pnasm" />
|
||||
<EnumValue
|
||||
Name="1"
|
||||
DisplayName="Gas"
|
||||
Switch="-pgas" />
|
||||
</EnumProperty>
|
||||
|
||||
<StringProperty
|
||||
Name="CommandLineTemplate"
|
||||
DisplayName="Command Line"
|
||||
Visible="False"
|
||||
IncludeInCommandLine="False" />
|
||||
|
||||
<DynamicEnumProperty
|
||||
Name="YASMBeforeTargets"
|
||||
Category="General"
|
||||
EnumProvider="Targets"
|
||||
IncludeInCommandLine="False">
|
||||
<DynamicEnumProperty.DisplayName>
|
||||
<sys:String>Execute Before</sys:String>
|
||||
</DynamicEnumProperty.DisplayName>
|
||||
<DynamicEnumProperty.Description>
|
||||
<sys:String>Specifies the targets for the build customization to run before.</sys:String>
|
||||
</DynamicEnumProperty.Description>
|
||||
<DynamicEnumProperty.ProviderSettings>
|
||||
<NameValuePair
|
||||
Name="Exclude"
|
||||
Value="^YASMBeforeTargets|^Compute" />
|
||||
</DynamicEnumProperty.ProviderSettings>
|
||||
<DynamicEnumProperty.DataSource>
|
||||
<DataSource
|
||||
Persistence="ProjectFile"
|
||||
HasConfigurationCondition="true" />
|
||||
</DynamicEnumProperty.DataSource>
|
||||
</DynamicEnumProperty>
|
||||
|
||||
<DynamicEnumProperty
|
||||
Name="YASMAfterTargets"
|
||||
Category="General"
|
||||
EnumProvider="Targets"
|
||||
IncludeInCommandLine="False">
|
||||
<DynamicEnumProperty.DisplayName>
|
||||
<sys:String>Execute After</sys:String>
|
||||
</DynamicEnumProperty.DisplayName>
|
||||
<DynamicEnumProperty.Description>
|
||||
<sys:String>Specifies the targets for the build customization to run after.</sys:String>
|
||||
</DynamicEnumProperty.Description>
|
||||
<DynamicEnumProperty.ProviderSettings>
|
||||
<NameValuePair
|
||||
Name="Exclude"
|
||||
Value="^YASMAfterTargets|^Compute" />
|
||||
</DynamicEnumProperty.ProviderSettings>
|
||||
<DynamicEnumProperty.DataSource>
|
||||
<DataSource
|
||||
Persistence="ProjectFile"
|
||||
ItemType=""
|
||||
HasConfigurationCondition="true" />
|
||||
</DynamicEnumProperty.DataSource>
|
||||
</DynamicEnumProperty>
|
||||
|
||||
<StringListProperty
|
||||
Name="Outputs"
|
||||
DisplayName="Outputs"
|
||||
Visible="False"
|
||||
IncludeInCommandLine="False" />
|
||||
|
||||
<StringProperty
|
||||
Name="ExecutionDescription"
|
||||
DisplayName="Execution Description"
|
||||
Visible="False"
|
||||
IncludeInCommandLine="False" />
|
||||
|
||||
<StringListProperty
|
||||
Name="AdditionalDependencies"
|
||||
DisplayName="Additional Dependencies"
|
||||
IncludeInCommandLine="False"
|
||||
Visible="true" />
|
||||
|
||||
<StringProperty
|
||||
Subtype="AdditionalOptions"
|
||||
Name="AdditionalOptions"
|
||||
Category="Command Line">
|
||||
<StringProperty.DisplayName>
|
||||
<sys:String>Additional Options</sys:String>
|
||||
</StringProperty.DisplayName>
|
||||
<StringProperty.Description>
|
||||
<sys:String>Additional Options</sys:String>
|
||||
</StringProperty.Description>
|
||||
</StringProperty>
|
||||
</Rule>
|
||||
|
||||
<ItemType
|
||||
Name="YASM"
|
||||
DisplayName="Yasm Assembler" />
|
||||
<FileExtension
|
||||
Name="*.asm"
|
||||
ContentType="YASM" />
|
||||
<ContentType
|
||||
Name="YASM"
|
||||
DisplayName="Yasm Assembler"
|
||||
ItemType="YASM" />
|
||||
</ProjectSchemaDefinitions>
|
|
@ -37,7 +37,6 @@
|
|||
#include "strategies/generic/picture-generic.h"
|
||||
#include "strategies/sse2/picture-sse2.h"
|
||||
#include "strategies/sse41/picture-sse41.h"
|
||||
#include "strategies/x86_asm/picture-x86-asm.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
|
||||
|
@ -93,9 +92,6 @@ int kvz_strategy_register_picture(void* opaque, uint8_t bitdepth) {
|
|||
if (kvz_g_hardware_flags.intel_flags.sse41) {
|
||||
success &= kvz_strategy_register_picture_sse41(opaque, bitdepth);
|
||||
}
|
||||
if (kvz_g_hardware_flags.intel_flags.avx) {
|
||||
success &= kvz_strategy_register_picture_x86_asm_avx(opaque, bitdepth);
|
||||
}
|
||||
if (kvz_g_hardware_flags.intel_flags.avx2) {
|
||||
success &= kvz_strategy_register_picture_avx2(opaque, bitdepth);
|
||||
}
|
||||
|
|
|
@ -1,385 +0,0 @@
|
|||
;/*****************************************************************************
|
||||
; * This file is part of Kvazaar HEVC encoder.
|
||||
; *
|
||||
; * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
; * All rights reserved.
|
||||
; *
|
||||
; * Redistribution and use in source and binary forms, with or without modification,
|
||||
; * are permitted provided that the following conditions are met:
|
||||
; *
|
||||
; * * Redistributions of source code must retain the above copyright notice, this
|
||||
; * list of conditions and the following disclaimer.
|
||||
; *
|
||||
; * * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
; * list of conditions and the following disclaimer in the documentation and/or
|
||||
; * other materials provided with the distribution.
|
||||
; *
|
||||
; * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
; * contributors may be used to endorse or promote products derived from
|
||||
; * this software without specific prior written permission.
|
||||
; *
|
||||
; * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
; * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
; * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
; * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
; * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
; * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
; * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
; * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; ****************************************************************************/
|
||||
|
||||
%include "x86inc.asm"
|
||||
|
||||
;cglobal and RET macros are from the x86.inc
|
||||
;they push and pop the necessary registers to
|
||||
;stack depending on the operating system
|
||||
|
||||
;Usage: cglobal name, %1, %2, %3
|
||||
;1%: Number of arguments
|
||||
;2%: Number of registers used
|
||||
;3%: Number of xmm registers used.
|
||||
;More info in x86inc.asm
|
||||
|
||||
SECTION .text
|
||||
|
||||
;Set x86inc.asm macros to use avx and xmm registers
|
||||
INIT_XMM avx
|
||||
|
||||
;KVZ_SAD_4X4
|
||||
;Calculates SAD of the 16 consequtive bytes in memory
|
||||
;r0 address of the first value(current frame)
|
||||
;r1 address of the first value(reference frame)
|
||||
|
||||
cglobal sad_4x4, 2, 2, 2
|
||||
|
||||
;Load 16 bytes of both frames
|
||||
vmovdqu m0, [r0]
|
||||
vmovdqu m1, [r1]
|
||||
|
||||
;Calculate SAD. The results are written in
|
||||
;m0[15:0] and m0[79:64]
|
||||
vpsadbw m0, m1
|
||||
|
||||
;Sum the results
|
||||
vmovhlps m1, m0
|
||||
vpaddw m0, m1
|
||||
|
||||
;Write the result to eax
|
||||
vmovd eax, m0
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_4X4_STRIDE
|
||||
;Calculates SAD of a 4x4 block inside a frame with stride
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
;r2 stride
|
||||
|
||||
cglobal sad_4x4_stride, 3, 3, 2
|
||||
|
||||
;Load 4 times 4 bytes of both frames
|
||||
vpinsrd m0, [r0], 0
|
||||
add r0, r2
|
||||
vpinsrd m0, [r0], 1
|
||||
vpinsrd m0, [r0+r2], 2
|
||||
vpinsrd m0, [r0+r2*2], 3
|
||||
|
||||
vpinsrd m1, [r1], 0
|
||||
add r1, r2
|
||||
vpinsrd m1, [r1], 1
|
||||
vpinsrd m1, [r1+r2], 2
|
||||
vpinsrd m1, [r1+r2*2], 3
|
||||
|
||||
vpsadbw m0, m1
|
||||
|
||||
vmovhlps m1, m0
|
||||
vpaddw m0, m1
|
||||
|
||||
vmovd eax, m0
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_8X8
|
||||
;Calculates SAD of the 64 consequtive bytes in memory
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
|
||||
cglobal sad_8x8, 2, 2, 5
|
||||
|
||||
;Load the first half of both frames
|
||||
vmovdqu m0, [r0]
|
||||
vmovdqu m2, [r0+16]
|
||||
|
||||
vmovdqu m1, [r1]
|
||||
vmovdqu m3, [r1+16]
|
||||
|
||||
;Calculate SADs for both
|
||||
vpsadbw m0, m1
|
||||
vpsadbw m2, m3
|
||||
|
||||
;Sum
|
||||
vpaddw m0, m2
|
||||
|
||||
;Repeat for the latter half
|
||||
vmovdqu m1, [r0+16*2]
|
||||
vmovdqu m3, [r0+16*3]
|
||||
|
||||
vmovdqu m2, [r1+16*2]
|
||||
vmovdqu m4, [r1+16*3]
|
||||
|
||||
vpsadbw m1, m2
|
||||
vpsadbw m3, m4
|
||||
|
||||
vpaddw m1, m3
|
||||
|
||||
;Sum all the SADs
|
||||
vpaddw m0, m1
|
||||
|
||||
vmovhlps m1, m0
|
||||
vpaddw m0, m1
|
||||
|
||||
vmovd eax, m0
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_8X8_STRIDE
|
||||
;Calculates SAD of a 8x8 block inside a frame with stride
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
;r2 stride
|
||||
|
||||
cglobal sad_8x8_stride, 3, 3, 5
|
||||
|
||||
;Zero m0 register
|
||||
vpxor m0, m0
|
||||
|
||||
;Load the first half to m1 and m3 registers(cur)
|
||||
;Current frame
|
||||
;Load to the high 64 bits of xmm
|
||||
vmovhpd m1, [r0]
|
||||
add r0, r2
|
||||
;Load to the low 64 bits
|
||||
vmovlpd m1, [r0]
|
||||
|
||||
vmovhpd m3, [r0+r2]
|
||||
vmovlpd m3, [r0+r2*2]
|
||||
;lea calculates the address to r0,
|
||||
;but doesn't load anything from
|
||||
;the memory. Equivalent for
|
||||
;two add r0, r2 instructions.
|
||||
lea r0, [r0+r2*2]
|
||||
add r0, r2
|
||||
|
||||
;Reference frame
|
||||
vmovhpd m2, [r1]
|
||||
add r1, r2
|
||||
vmovlpd m2, [r1]
|
||||
|
||||
vmovhpd m4, [r1+r2]
|
||||
vmovlpd m4, [r1+r2*2]
|
||||
lea r1, [r1+r2*2]
|
||||
add r1, r2
|
||||
|
||||
vpsadbw m1, m2
|
||||
vpsadbw m3, m4
|
||||
|
||||
vpaddw m0, m1
|
||||
vpaddw m0, m3
|
||||
|
||||
;Repeat for the other half
|
||||
vmovhpd m1, [r0]
|
||||
add r0, r2
|
||||
vmovlpd m1, [r0]
|
||||
|
||||
vmovhpd m3, [r0+r2]
|
||||
vmovlpd m3, [r0+r2*2]
|
||||
lea r0, [r0+r2*2]
|
||||
add r0, r2
|
||||
|
||||
vmovhpd m2, [r1]
|
||||
add r1, r2
|
||||
vmovlpd m2, [r1]
|
||||
|
||||
vmovhpd m4, [r1+r2]
|
||||
vmovlpd m4, [r1+r2*2]
|
||||
lea r1, [r1+r2*2]
|
||||
add r1, r2
|
||||
|
||||
vpsadbw m1, m2
|
||||
vpsadbw m3, m4
|
||||
|
||||
vpaddw m0, m1
|
||||
vpaddw m0, m3
|
||||
|
||||
vmovhlps m1, m0
|
||||
vpaddw m0, m1
|
||||
|
||||
vmovd eax, m0
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_16X16
|
||||
;Calculates SAD of the 256 consequtive bytes in memory
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
|
||||
cglobal sad_16x16, 2, 2, 5
|
||||
|
||||
;Zero m4
|
||||
vpxor m4, m4
|
||||
|
||||
%assign i 0
|
||||
|
||||
;Repeat 8 times.
|
||||
%rep 8
|
||||
|
||||
;Load the next to rows of the current frame
|
||||
vmovdqu m0, [r0 + 16 * i]
|
||||
vmovdqu m2, [r0 + 16 * (i + 1)]
|
||||
|
||||
;Load the next to rows of the reference frame
|
||||
vmovdqu m1, [r1 + 16 * i]
|
||||
vmovdqu m3, [r1 + 16 * (i + 1)]
|
||||
|
||||
vpsadbw m0, m1
|
||||
vpsadbw m2, m3
|
||||
|
||||
;Accumulate SADs to m4
|
||||
vpaddw m4, m0
|
||||
vpaddw m4, m2
|
||||
|
||||
%assign i i+2
|
||||
|
||||
%endrep
|
||||
|
||||
;Calculate the final sum
|
||||
vmovhlps m0, m4
|
||||
vpaddw m4, m0
|
||||
|
||||
vmovd eax, m4
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_16X16_STRIDE
|
||||
;Calculates SAD of a 16x16 block inside a frame with stride
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
;r2 stride
|
||||
|
||||
cglobal sad_16x16_stride, 3, 3, 5
|
||||
|
||||
vpxor m4, m4
|
||||
|
||||
%rep 8
|
||||
|
||||
; Load the next 2 rows from rec_buf to m0 and m2
|
||||
vmovdqu m0, [r0]
|
||||
vmovdqu m2, [r0 + r2]
|
||||
lea r0, [r0 + r2*2]
|
||||
|
||||
; Load the next 2 rows from ref_buf to m1 and m3
|
||||
vmovdqu m1, [r1]
|
||||
vmovdqu m3, [r1 + r2]
|
||||
lea r1, [r1 + r2*2]
|
||||
|
||||
vpsadbw m0, m1
|
||||
vpsadbw m2, m3
|
||||
|
||||
vpaddw m4, m0
|
||||
vpaddw m4, m2
|
||||
|
||||
%endrep
|
||||
|
||||
vmovhlps m0, m4
|
||||
vpaddw m4, m0
|
||||
|
||||
vmovd eax, m4
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_32x32_STRIDE
|
||||
;Calculates SAD of a 32x32 block inside a frame with stride
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
;r2 stride
|
||||
cglobal sad_32x32_stride, 3, 3, 5
|
||||
vpxor m4, m4
|
||||
|
||||
; Handle 2 lines per iteration
|
||||
%rep 16
|
||||
vmovdqu m0, [r0]
|
||||
vmovdqu m1, [r0 + 16]
|
||||
vmovdqu m2, [r0 + r2]
|
||||
vmovdqu m3, [r0 + r2 + 16]
|
||||
lea r0, [r0 + 2 * r2]
|
||||
|
||||
vpsadbw m0, [r1]
|
||||
vpsadbw m1, [r1 + 16]
|
||||
vpsadbw m2, [r1 + r2]
|
||||
vpsadbw m3, [r1 + r2 + 16]
|
||||
lea r1, [r1 + 2 * r2]
|
||||
|
||||
vpaddd m4, m0
|
||||
vpaddd m4, m1
|
||||
vpaddd m4, m2
|
||||
vpaddd m4, m3
|
||||
%endrep
|
||||
|
||||
vmovhlps m0, m4
|
||||
vpaddd m4, m0
|
||||
|
||||
vmovd eax, m4
|
||||
|
||||
RET
|
||||
|
||||
|
||||
;KVZ_SAD_64x64_STRIDE
|
||||
;Calculates SAD of a 64x64 block inside a frame with stride
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
;r2 stride
|
||||
cglobal sad_64x64_stride, 3, 4, 5
|
||||
vpxor m4, m4 ; sum accumulation register
|
||||
mov r3, 4 ; number of iterations in the loop
|
||||
|
||||
Process16Lines:
|
||||
; Intel optimization manual says to not unroll beyond 500 instructions.
|
||||
; Didn't seem to have much of an affect on Ivy Bridge or Haswell, but
|
||||
; smaller is better, when speed is the same, right?
|
||||
%rep 16
|
||||
vmovdqu m0, [r0]
|
||||
vmovdqu m1, [r0 + 1*16]
|
||||
vmovdqu m2, [r0 + 2*16]
|
||||
vmovdqu m3, [r0 + 3*16]
|
||||
|
||||
vpsadbw m0, [r1]
|
||||
vpsadbw m1, [r1 + 1*16]
|
||||
vpsadbw m2, [r1 + 2*16]
|
||||
vpsadbw m3, [r1 + 3*16]
|
||||
|
||||
lea r0, [r0 + r2]
|
||||
lea r1, [r1 + r2]
|
||||
|
||||
vpaddd m4, m0
|
||||
vpaddd m4, m1
|
||||
vpaddd m4, m2
|
||||
vpaddd m4, m3
|
||||
%endrep
|
||||
|
||||
dec r3
|
||||
jnz Process16Lines
|
||||
|
||||
vmovhlps m0, m4
|
||||
vpaddd m4, m0
|
||||
|
||||
vmovd eax, m4
|
||||
|
||||
RET
|
|
@ -1,56 +0,0 @@
|
|||
#ifndef _PICTURE_X86_ASM_SAD_H_
|
||||
#define _PICTURE_X86_ASM_SAD_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* \ingroup Optimization
|
||||
* \file
|
||||
* Optimizations for AVX, utilizing ASM implementations.
|
||||
*/
|
||||
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
#include "kvazaar.h"
|
||||
|
||||
#if KVZ_BIT_DEPTH == 8
|
||||
unsigned kvz_sad_4x4_avx(const uint8_t*, const uint8_t*);
|
||||
unsigned kvz_sad_8x8_avx(const uint8_t*, const uint8_t*);
|
||||
unsigned kvz_sad_16x16_avx(const uint8_t*, const uint8_t*);
|
||||
|
||||
unsigned kvz_sad_4x4_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
||||
unsigned kvz_sad_8x8_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
||||
unsigned kvz_sad_16x16_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
||||
unsigned kvz_sad_32x32_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
||||
unsigned kvz_sad_64x64_stride_avx(const uint8_t *data1, const uint8_t *data2, unsigned stride);
|
||||
#endif // KVZ_BIT_DEPTH == 8
|
||||
|
||||
#endif
|
|
@ -1,575 +0,0 @@
|
|||
;/*****************************************************************************
|
||||
; * This file is part of Kvazaar HEVC encoder.
|
||||
; *
|
||||
; * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
; * All rights reserved.
|
||||
; *
|
||||
; * Redistribution and use in source and binary forms, with or without modification,
|
||||
; * are permitted provided that the following conditions are met:
|
||||
; *
|
||||
; * * Redistributions of source code must retain the above copyright notice, this
|
||||
; * list of conditions and the following disclaimer.
|
||||
; *
|
||||
; * * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
; * list of conditions and the following disclaimer in the documentation and/or
|
||||
; * other materials provided with the distribution.
|
||||
; *
|
||||
; * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
; * contributors may be used to endorse or promote products derived from
|
||||
; * this software without specific prior written permission.
|
||||
; *
|
||||
; * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
; * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
; * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
; * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
; * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
; * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
; * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
; * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
; * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
; ****************************************************************************/
|
||||
|
||||
%include "x86inc.asm"
|
||||
|
||||
;cglobal and RET macros are from the x86.inc
|
||||
;they push and pop the necessary registers to
|
||||
;stack depending on the operating system
|
||||
|
||||
;Usage: cglobal name, %1, %2, %3
|
||||
;1%: Number of arguments
|
||||
;2%: Number of registers used
|
||||
;3%: Number of xmm registers used.
|
||||
;More info in x86inc.asm
|
||||
|
||||
SECTION .text
|
||||
|
||||
;Set x86inc.asm macros to use avx and xmm registers
|
||||
INIT_XMM avx
|
||||
|
||||
;KVZ_ZERO_EXTEND_WD
|
||||
;zero extend all packed words in xmm to dwords in 2 xmm registers
|
||||
;%1 source register
|
||||
;%2 lower destination register
|
||||
;%3 higher destination register
|
||||
|
||||
%macro KVZ_ZERO_EXTEND_WD 3
|
||||
|
||||
;Zero extend high 64 bits
|
||||
vmovhlps %3, %1
|
||||
vpmovzxwd %3, %3
|
||||
;Zero extend low 64 bits
|
||||
vpmovzxwd %2, %1
|
||||
|
||||
%endmacro ; KVZ_ZERO_EXTEND_WD
|
||||
|
||||
; Use nondestructive horizontal add and sub to calculate both at the same time.
|
||||
; TODO: It would probably be possible to do this with 3 registers (destructive vphsubw).
|
||||
; args:
|
||||
; 1, 2: input registers
|
||||
; 3, 4: output registers
|
||||
|
||||
%macro SATD_HORIZONTAL_SUB_AND_ADD 4
|
||||
|
||||
; TODO: It might be possible to do this with 3 registers?
|
||||
|
||||
;First stage
|
||||
vphaddw %3, %1, %2
|
||||
vphsubw %4, %1, %2
|
||||
|
||||
;Second stage
|
||||
vphaddw %1, %3, %4
|
||||
vphsubw %2, %3, %4
|
||||
|
||||
;Third stage
|
||||
vphaddw %3, %1, %2
|
||||
vphsubw %4, %1, %2
|
||||
|
||||
%endmacro ; SATD_HORIZONTAL_SUB_AND_ADD
|
||||
|
||||
;KVZ_SATD_8X8_STRIDE
|
||||
;Calculates SATD of a 8x8 block inside a frame with stride
|
||||
;r0 address of the first value(reference)
|
||||
;r1 address of the first value(current)
|
||||
;r2 stride
|
||||
;
|
||||
;The Result is written in the register r4
|
||||
|
||||
%macro KVZ_SATD_8X8_STRIDE 0
|
||||
|
||||
;Calculate differences of the 8 rows into
|
||||
;registers m0-m7
|
||||
vpmovzxbw m0, [r0]
|
||||
vpmovzxbw m7, [r2]
|
||||
vpsubw m0, m7
|
||||
|
||||
vpmovzxbw m1, [r0+r1]
|
||||
vpmovzxbw m7, [r2+r3]
|
||||
vpsubw m1, m7
|
||||
|
||||
;Set r0 and r2 2 rows forward
|
||||
lea r0, [r0+r1*2]
|
||||
lea r2, [r2+r3*2]
|
||||
|
||||
vpmovzxbw m2, [r0]
|
||||
vpmovzxbw m7, [r2]
|
||||
vpsubw m2, m7
|
||||
|
||||
vpmovzxbw m3, [r0+r1]
|
||||
vpmovzxbw m7, [r2+r3]
|
||||
vpsubw m3, m7
|
||||
|
||||
lea r0, [r0+r1*2]
|
||||
lea r2, [r2+r3*2]
|
||||
|
||||
vpmovzxbw m4, [r0]
|
||||
vpmovzxbw m7, [r2]
|
||||
vpsubw m4, m7
|
||||
|
||||
vpmovzxbw m5, [r0+r1]
|
||||
vpmovzxbw m7, [r2+r3]
|
||||
vpsubw m5, m7
|
||||
|
||||
lea r0, [r0+r1*2]
|
||||
lea r2, [r2+r3*2]
|
||||
|
||||
vpmovzxbw m6, [r0]
|
||||
vpmovzxbw m7, [r2]
|
||||
vpsubw m6, m7
|
||||
|
||||
;32-bit AVX doesn't have registers
|
||||
;xmm8-xmm15, use stack instead
|
||||
|
||||
%if ARCH_X86_64
|
||||
vpmovzxbw m7, [r0+r1]
|
||||
vpmovzxbw m8, [r2+r3]
|
||||
vpsubw m7, m8
|
||||
%else
|
||||
%define temp0 esp+16*3
|
||||
%define temp1 esp+16*2
|
||||
%define temp2 esp+16*1
|
||||
%define temp3 esp+16*0
|
||||
|
||||
;Reserve memory for 4 x 128 bits.
|
||||
sub esp, 16*4
|
||||
|
||||
vpmovzxbw m7, [r2+r3]
|
||||
vmovdqu [temp0], m7
|
||||
vpmovzxbw m7, [r0+r1]
|
||||
vpsubw m7, [temp0]
|
||||
|
||||
;Put rows 5-8 to stack
|
||||
vmovdqu [temp0], m4
|
||||
vmovdqu [temp1], m5
|
||||
vmovdqu [temp2], m6
|
||||
vmovdqu [temp3], m7
|
||||
%endif
|
||||
|
||||
;Hadamard transform (FWHT algorithm)
|
||||
;Horizontal transform
|
||||
|
||||
%if ARCH_X86_64
|
||||
;Calculate horizontal transform for each row.
|
||||
;Transforms of two rows are interleaved in register pairs.
|
||||
;(m8 and m9, m10 and m11,...)
|
||||
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m0, m1, m8, m9
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m2, m3, m10, m11
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m4, m5, m12, m13
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m6, m7, m14, m15
|
||||
|
||||
%else
|
||||
;Calculate horizontal transforms for the first four rows.
|
||||
;Then load the other four into the registers and store
|
||||
;ready transforms in the stack.
|
||||
;Input registers are m0-m3, results are written in
|
||||
;registers m4-m7 (and memory).
|
||||
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m0, m1, m4, m5
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m2, m3, m6, m7
|
||||
|
||||
vmovdqu m3, [temp3]
|
||||
vmovdqu m2, [temp2]
|
||||
vmovdqu m1, [temp1]
|
||||
vmovdqu m0, [temp0]
|
||||
|
||||
vmovdqu [temp3], m7
|
||||
vmovdqu [temp2], m6
|
||||
vmovdqu [temp1], m5
|
||||
vmovdqu [temp0], m4
|
||||
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m0, m1, m4, m5
|
||||
SATD_HORIZONTAL_SUB_AND_ADD m2, m3, m6, m7
|
||||
%endif
|
||||
|
||||
|
||||
;Vertical transform
|
||||
;Transform columns of the 8x8 block.
|
||||
;First sum the interleaved horizontally
|
||||
;transformed values with one horizontal add
|
||||
;for each pair of rows. Then calculate
|
||||
;with regular packed additions and
|
||||
;subtractions.
|
||||
|
||||
%if ARCH_X86_64
|
||||
;Horizontally transformed values are in registers m8-m15
|
||||
;Results are written in m0-m7
|
||||
|
||||
;First stage
|
||||
vphaddw m0, m8, m9
|
||||
vphsubw m1, m8, m9
|
||||
|
||||
vphaddw m2, m10, m11
|
||||
vphsubw m3, m10, m11
|
||||
|
||||
vphaddw m4, m12, m13
|
||||
vphsubw m5, m12, m13
|
||||
|
||||
vphaddw m6, m14, m15
|
||||
vphsubw m7, m14, m15
|
||||
|
||||
;Second stage
|
||||
vpaddw m8, m0, m2
|
||||
vpaddw m9, m1, m3
|
||||
vpsubw m10, m0, m2
|
||||
vpsubw m11, m1, m3
|
||||
|
||||
vpaddw m12, m4, m6
|
||||
vpaddw m13, m5, m7
|
||||
vpsubw m14, m4, m6
|
||||
vpsubw m15, m5, m7
|
||||
|
||||
;Third stage
|
||||
vpaddw m0, m8, m12
|
||||
vpaddw m1, m9, m13
|
||||
vpaddw m2, m10, m14
|
||||
vpaddw m3, m11, m15
|
||||
|
||||
vpsubw m4, m8, m12
|
||||
vpsubw m5, m9, m13
|
||||
vpsubw m6, m10, m14
|
||||
vpsubw m7, m11, m15
|
||||
|
||||
%else
|
||||
;Transformed values are in registers m4-m7
|
||||
;and in memory(temp0-temp3). Transformed values
|
||||
;are written in m4-m7. Also calculate absolute
|
||||
;values for them and accumulate into ymm0.
|
||||
|
||||
;First stage
|
||||
vphaddw m0, m4, m5
|
||||
vphsubw m1, m4, m5
|
||||
|
||||
vphaddw m2, m6, m7
|
||||
vphsubw m3, m6, m7
|
||||
|
||||
;Second stage
|
||||
vpaddw m4, m0, m2
|
||||
vpaddw m5, m1, m3
|
||||
vpsubw m6, m0, m2
|
||||
vpsubw m7, m1, m3
|
||||
|
||||
vmovdqu m3, [temp3]
|
||||
vmovdqu m2, [temp2]
|
||||
vmovdqu m1, [temp1]
|
||||
vmovdqu m0, [temp0]
|
||||
|
||||
vmovdqu [temp3], m7
|
||||
vmovdqu [temp2], m6
|
||||
vmovdqu [temp1], m5
|
||||
vmovdqu [temp0], m4
|
||||
|
||||
;First stage (second half)
|
||||
vphaddw m4, m0, m1
|
||||
vphsubw m5, m0, m1
|
||||
|
||||
vphaddw m6, m2, m3
|
||||
vphsubw m7, m2, m3
|
||||
|
||||
;Second stage (second half)
|
||||
vpaddw m0, m4, m6
|
||||
vpaddw m1, m5, m7
|
||||
vpsubw m2, m4, m6
|
||||
vpsubw m3, m5, m7
|
||||
|
||||
;Third stage
|
||||
vpaddw m4, m0, [temp0]
|
||||
vpaddw m5, m1, [temp1]
|
||||
vpsubw m6, m0, [temp0]
|
||||
vpsubw m7, m1, [temp1]
|
||||
|
||||
;Calculate the absolute values and
|
||||
;zero extend 16-bit values to 32-bit
|
||||
;values. Then sum the values.
|
||||
|
||||
vpabsw m4, m4
|
||||
KVZ_ZERO_EXTEND_WD m4, m4, m1
|
||||
vpaddd m4, m1
|
||||
|
||||
vpabsw m5, m5
|
||||
KVZ_ZERO_EXTEND_WD m5, m5, m1
|
||||
vpaddd m5, m1
|
||||
|
||||
vpabsw m6, m6
|
||||
KVZ_ZERO_EXTEND_WD m6, m6, m1
|
||||
vpaddd m6, m1
|
||||
|
||||
vpabsw m7, m7
|
||||
KVZ_ZERO_EXTEND_WD m7, m7, m1
|
||||
vpaddd m7, m1
|
||||
|
||||
vpaddd m0, m4, m5
|
||||
vpaddd m0, m6
|
||||
vpaddd m0, m7
|
||||
|
||||
;Repeat for the rest
|
||||
vpaddw m4, m2, [temp2]
|
||||
vpaddw m5, m3, [temp3]
|
||||
vpsubw m6, m2, [temp2]
|
||||
vpsubw m7, m3, [temp3]
|
||||
|
||||
vpabsw m4, m4
|
||||
KVZ_ZERO_EXTEND_WD m4, m4, m1
|
||||
vpaddd m4, m1
|
||||
|
||||
vpabsw m5, m5
|
||||
KVZ_ZERO_EXTEND_WD m5, m5, m1
|
||||
vpaddd m5, m1
|
||||
|
||||
vpabsw m6, m6
|
||||
KVZ_ZERO_EXTEND_WD m6, m6, m1
|
||||
vpaddd m6, m1
|
||||
|
||||
vpabsw m7, m7
|
||||
KVZ_ZERO_EXTEND_WD m7, m7, m1
|
||||
vpaddd m7, m1
|
||||
|
||||
;Sum the other half of the packed results to ymm4
|
||||
vpaddd m4, m5
|
||||
vpaddd m4, m6
|
||||
vpaddd m4, m7
|
||||
|
||||
;Sum all packed results to ymm0
|
||||
vpaddd m0, m4
|
||||
|
||||
%endif
|
||||
|
||||
%if ARCH_X86_64
|
||||
|
||||
;Calculate the absolute values and
|
||||
;zero extend 16-bit values to 32-bit
|
||||
;values. In other words: extend xmm to
|
||||
;corresponding ymm.
|
||||
|
||||
vpabsw m0, m0
|
||||
KVZ_ZERO_EXTEND_WD m0, m0, m8
|
||||
vpaddd m0, m8
|
||||
|
||||
vpabsw m1, m1
|
||||
KVZ_ZERO_EXTEND_WD m1, m1, m8
|
||||
vpaddd m1, m8
|
||||
|
||||
vpabsw m2, m2
|
||||
KVZ_ZERO_EXTEND_WD m2, m2, m8
|
||||
vpaddd m1, m8
|
||||
|
||||
vpabsw m3, m3
|
||||
KVZ_ZERO_EXTEND_WD m3, m3, m8
|
||||
vpaddd m3, m8
|
||||
|
||||
vpabsw m4, m4
|
||||
KVZ_ZERO_EXTEND_WD m4, m4, m8
|
||||
vpaddd m4, m8
|
||||
|
||||
vpabsw m5, m5
|
||||
KVZ_ZERO_EXTEND_WD m5, m5, m8
|
||||
vpaddd m5, m8
|
||||
|
||||
vpabsw m6, m6
|
||||
KVZ_ZERO_EXTEND_WD m6, m6, m8
|
||||
vpaddd m6, m8
|
||||
|
||||
vpabsw m7, m7
|
||||
KVZ_ZERO_EXTEND_WD m7, m7, m8
|
||||
vpaddd m7, m8
|
||||
|
||||
;Calculate packed sum of transformed values to ymm0
|
||||
vpaddd m0, m1
|
||||
vpaddd m0, m2
|
||||
vpaddd m0, m3
|
||||
vpaddd m0, m4
|
||||
vpaddd m0, m5
|
||||
vpaddd m0, m6
|
||||
vpaddd m0, m7
|
||||
%endif
|
||||
|
||||
;Sum the packed values to m0[32:0]
|
||||
vphaddd m0, m0
|
||||
vphaddd m0, m0
|
||||
|
||||
;The result is in the lowest 32 bits in m0
|
||||
vmovd r4d, m0
|
||||
|
||||
;8x8 Hadamard transform requires
|
||||
;adding 2 and dividing by 4
|
||||
add r4, 2
|
||||
shr r4, 2
|
||||
|
||||
;Zero high 128 bits of ymm registers to
|
||||
;prevent AVX-SSE transition penalty.
|
||||
vzeroupper
|
||||
|
||||
%if ARCH_X86_64 == 0
|
||||
add esp, 16*4
|
||||
%endif
|
||||
|
||||
%endmacro ; KVZ_SATD_8X8_STRIDE
|
||||
|
||||
;KVZ_SATD_4X4
|
||||
;Calculates SATD of the 16 consequtive bytes in memory
|
||||
;r0 address of the first value(current)
|
||||
;r1 address of the first value(reference)
|
||||
|
||||
cglobal satd_4x4, 2, 2, 6
|
||||
|
||||
;Load 8 bytes from memory and zero extend
|
||||
;to 16-bit values. Calculate difference.
|
||||
vpmovzxbw m0, [r0]
|
||||
vpmovzxbw m2, [r1]
|
||||
vpsubw m0, m2
|
||||
|
||||
vpmovzxbw m1, [r0+8]
|
||||
vpmovzxbw m3, [r1+8]
|
||||
vpsubw m1, m3
|
||||
|
||||
;Hadamard transform
|
||||
;Horizontal phase
|
||||
;First stage
|
||||
vphaddw m4, m0, m1
|
||||
vphsubw m5, m0, m1
|
||||
;Second stage
|
||||
vphaddw m0, m4, m5
|
||||
vphsubw m1, m4, m5
|
||||
|
||||
;Vertical phase
|
||||
;First stage
|
||||
vphaddw m4, m0, m1
|
||||
vphsubw m5, m0, m1
|
||||
;Second stage
|
||||
vphaddw m0, m4, m5
|
||||
vphsubw m1, m4, m5
|
||||
|
||||
;Calculate absolute values
|
||||
vpabsw m0, m0
|
||||
vpabsw m1, m1
|
||||
|
||||
;Sum the all the transformed values
|
||||
vpaddw m0, m1
|
||||
|
||||
vphaddw m0, m0
|
||||
vphaddw m0, m0
|
||||
vphaddw m0, m0
|
||||
|
||||
;Extract the lowest 16 bits of m0
|
||||
;into eax
|
||||
vpextrw eax, m0, 0
|
||||
|
||||
;4x4 Hadamard transform requires
|
||||
;Addition of 1 and division by 2
|
||||
add eax, 1
|
||||
shr eax, 1
|
||||
|
||||
RET
|
||||
|
||||
|
||||
|
||||
;KVZ_SATD_8X8
|
||||
;Calculates SATD of a 8x8 block inside a frame with stride
|
||||
;r0 address of the first value(reference)
|
||||
;r1 address of the first value(current)
|
||||
;r2 stride
|
||||
|
||||
%if ARCH_X86_64
|
||||
cglobal satd_8x8, 4, 5, 16
|
||||
%else
|
||||
cglobal satd_8x8, 4, 5, 8
|
||||
%endif
|
||||
|
||||
;Set arguments
|
||||
mov r2, r1
|
||||
mov r1, 8
|
||||
mov r3, 8
|
||||
|
||||
;Calculate 8x8 SATD. Result is written
|
||||
;in the register r4.
|
||||
KVZ_SATD_8X8_STRIDE
|
||||
mov rax, r4
|
||||
RET
|
||||
|
||||
;KVZ_SATD_NXN
|
||||
;Calculates SATD of a NxN block inside a frame with stride
|
||||
;r0 address of the first value(reference)
|
||||
;r1 address of the first value(current)
|
||||
|
||||
%macro KVZ_SATD_NXN 1
|
||||
|
||||
%if ARCH_X86_64
|
||||
cglobal satd_%1x%1, 2, 7, 16
|
||||
%else
|
||||
cglobal satd_%1x%1, 2, 7, 8
|
||||
%endif
|
||||
|
||||
;Set arguments
|
||||
mov r2, r1
|
||||
mov r1, %1
|
||||
mov r3, %1
|
||||
|
||||
;Zero r5 and r6
|
||||
xor r5, r5
|
||||
xor r6, r6
|
||||
|
||||
;Calculate SATDs of each 8x8 sub-blocks
|
||||
;and accumulate the results in r6. Repeat yloop
|
||||
;N times. Repeat xloop N times. r4 and r5 are counters
|
||||
;for the loops.
|
||||
|
||||
.yloop
|
||||
|
||||
;zero r4
|
||||
xor r4, r4
|
||||
|
||||
.xloop
|
||||
push r4
|
||||
|
||||
;Calculate SATD of the sub-block. Result is
|
||||
;written in the register r4.
|
||||
KVZ_SATD_8X8_STRIDE
|
||||
add r6, r4
|
||||
|
||||
;Set r2 and r0 to the next sub-block
|
||||
;on the same row
|
||||
sub r2, 6*%1-8
|
||||
sub r0, 6*%1-8
|
||||
|
||||
pop r4
|
||||
add r4, 8
|
||||
cmp r4, %1
|
||||
jne .xloop
|
||||
|
||||
;Set r2 and r0 to the first sub-block
|
||||
;on the next row(of 8x8 sub-blocks)
|
||||
add r2, 7*%1
|
||||
add r0, 7*%1
|
||||
|
||||
add r5, 8
|
||||
cmp r5, %1
|
||||
jne .yloop
|
||||
|
||||
mov rax, r6
|
||||
RET
|
||||
|
||||
%endmacro ; KVZ_SATD_NXN
|
||||
|
||||
KVZ_SATD_NXN 16
|
||||
KVZ_SATD_NXN 32
|
||||
KVZ_SATD_NXN 64
|
|
@ -1,50 +0,0 @@
|
|||
#ifndef _PICTURE_X86_ASM_SATD_H_
|
||||
#define _PICTURE_X86_ASM_SATD_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* \ingroup Optimization
|
||||
* \file
|
||||
* Optimizations for AVX, utilizing ASM implementations.
|
||||
*/
|
||||
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
|
||||
|
||||
unsigned kvz_satd_4x4_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||
unsigned kvz_satd_8x8_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||
unsigned kvz_satd_16x16_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||
unsigned kvz_satd_32x32_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||
unsigned kvz_satd_64x64_avx(const kvz_pixel *org, const kvz_pixel *cur);
|
||||
|
||||
#endif
|
|
@ -1,132 +0,0 @@
|
|||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
#include "strategies/x86_asm/picture-x86-asm.h"
|
||||
|
||||
#if defined(KVZ_COMPILE_ASM)
|
||||
#include "kvazaar.h"
|
||||
#if KVZ_BIT_DEPTH == 8
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "strategies/x86_asm/picture-x86-asm-sad.h"
|
||||
#include "strategies/x86_asm/picture-x86-asm-satd.h"
|
||||
#include "strategies/sse41/picture-sse41.h"
|
||||
#include "strategyselector.h"
|
||||
|
||||
|
||||
static unsigned kvz_sad_32x32_avx(const uint8_t *data1, const uint8_t *data2)
|
||||
{
|
||||
unsigned sad = 0;
|
||||
sad += kvz_sad_16x16_avx(data1, data2);
|
||||
sad += kvz_sad_16x16_avx(data1 + 8 * 32, data2 + 8 * 32);
|
||||
sad += kvz_sad_16x16_avx(data1 + 16 * 32, data2 + 16 * 32);
|
||||
sad += kvz_sad_16x16_avx(data1 + 24 * 32, data2 + 24 * 32);
|
||||
return sad;
|
||||
}
|
||||
|
||||
static unsigned kvz_sad_64x64_avx(const uint8_t *data1, const uint8_t *data2)
|
||||
{
|
||||
unsigned sad = 0;
|
||||
sad += kvz_sad_32x32_avx(data1, data2);
|
||||
sad += kvz_sad_32x32_avx(data1 + 16 * 64, data2 + 16 * 64);
|
||||
sad += kvz_sad_32x32_avx(data1 + 32 * 64, data2 + 32 * 64);
|
||||
sad += kvz_sad_32x32_avx(data1 + 48 * 64, data2 + 48 * 64);
|
||||
return sad;
|
||||
}
|
||||
|
||||
static unsigned kvz_sad_other_avx(const uint8_t *data1, const uint8_t *data2,
|
||||
int width, int height,
|
||||
unsigned stride)
|
||||
{
|
||||
unsigned sad = 0;
|
||||
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
sad += abs(data1[y * stride + x] - data2[y * stride + x]);
|
||||
}
|
||||
}
|
||||
|
||||
return sad;
|
||||
}
|
||||
|
||||
static unsigned reg_sad_x86_asm(const uint8_t *data1, const uint8_t * data2,
|
||||
const int width, const int height,
|
||||
const unsigned stride1, const unsigned stride2)
|
||||
{
|
||||
if (width == height) {
|
||||
if (width == 8) {
|
||||
return kvz_sad_8x8_stride_avx(data1, data2, stride1);
|
||||
} else if (width == 16) {
|
||||
return kvz_sad_16x16_stride_avx(data1, data2, stride1);
|
||||
} else if (width == 32) {
|
||||
return kvz_sad_32x32_stride_avx(data1, data2, stride1);
|
||||
} else if (width == 64) {
|
||||
return kvz_sad_64x64_stride_avx(data1, data2, stride1);
|
||||
}
|
||||
}
|
||||
|
||||
if (width * height >= 16) {
|
||||
// Call the vectorized general SAD SSE41 function when the block
|
||||
// is big enough to make it worth it.
|
||||
return kvz_reg_sad_sse41(data1, data2, width, height, stride1, stride2);
|
||||
} else {
|
||||
return kvz_sad_other_avx(data1, data2, width, height, stride1);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // KVZ_BIT_DEPTH == 8
|
||||
#endif //defined(KVZ_COMPILE_ASM)
|
||||
|
||||
int kvz_strategy_register_picture_x86_asm_avx(void* opaque, uint8_t bitdepth)
|
||||
{
|
||||
bool success = true;
|
||||
#if defined(KVZ_COMPILE_ASM)
|
||||
#if KVZ_BIT_DEPTH == 8
|
||||
if (bitdepth == 8){
|
||||
success &= kvz_strategyselector_register(opaque, "reg_sad", "x86_asm_avx", 30, ®_sad_x86_asm);
|
||||
|
||||
success &= kvz_strategyselector_register(opaque, "sad_4x4", "x86_asm_avx", 30, &kvz_sad_4x4_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "sad_8x8", "x86_asm_avx", 30, &kvz_sad_8x8_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "sad_16x16", "x86_asm_avx", 30, &kvz_sad_16x16_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "sad_32x32", "x86_asm_avx", 30, &kvz_sad_32x32_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "sad_64x64", "x86_asm_avx", 30, &kvz_sad_64x64_avx);
|
||||
|
||||
success &= kvz_strategyselector_register(opaque, "satd_4x4", "x86_asm_avx", 30, &kvz_satd_4x4_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "satd_8x8", "x86_asm_avx", 30, &kvz_satd_8x8_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "satd_16x16", "x86_asm_avx", 30, &kvz_satd_16x16_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "satd_32x32", "x86_asm_avx", 30, &kvz_satd_32x32_avx);
|
||||
success &= kvz_strategyselector_register(opaque, "satd_64x64", "x86_asm_avx", 30, &kvz_satd_64x64_avx);
|
||||
}
|
||||
#endif // KVZ_BIT_DEPTH == 8
|
||||
#endif //!defined(KVZ_COMPILE_ASM)
|
||||
return success;
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
#ifndef STRATEGIES_PICTURE_X86_ASM_H_
|
||||
#define STRATEGIES_PICTURE_X86_ASM_H_
|
||||
/*****************************************************************************
|
||||
* This file is part of uvg266 VVC encoder.
|
||||
*
|
||||
* Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright notice, this
|
||||
* list of conditions and the following disclaimer in the documentation and/or
|
||||
* other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
|
||||
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
****************************************************************************/
|
||||
|
||||
/**
|
||||
* \ingroup Optimization
|
||||
* \file
|
||||
* Optimizations for AVX, utilizing ASM implementations.
|
||||
*/
|
||||
|
||||
#include "global.h" // IWYU pragma: keep
|
||||
|
||||
|
||||
int kvz_strategy_register_picture_x86_asm_avx(void* opaque, uint8_t bitdepth);
|
||||
|
||||
#endif //STRATEGIES_PICTURE_X86_ASM_H_
|
File diff suppressed because it is too large
Load diff
|
@ -258,7 +258,6 @@ int kvz_strategyselector_register(void * const opaque, const char * const type,
|
|||
|
||||
//Check what strategies are available when they are registered
|
||||
if (strcmp(strategy_name, "avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
|
||||
if (strcmp(strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_available.intel_flags.avx++;
|
||||
if (strcmp(strategy_name, "avx2") == 0) kvz_g_strategies_available.intel_flags.avx2++;
|
||||
if (strcmp(strategy_name, "mmx") == 0) kvz_g_strategies_available.intel_flags.mmx++;
|
||||
if (strcmp(strategy_name, "sse") == 0) kvz_g_strategies_available.intel_flags.sse++;
|
||||
|
@ -330,7 +329,6 @@ static void* strategyselector_choose_for(const strategy_list_t * const strategie
|
|||
|
||||
//Check what strategy we are going to use
|
||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
|
||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "x86_asm_avx") == 0) kvz_g_strategies_in_use.intel_flags.avx++;
|
||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "avx2") == 0) kvz_g_strategies_in_use.intel_flags.avx2++;
|
||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "mmx") == 0) kvz_g_strategies_in_use.intel_flags.mmx++;
|
||||
if (strcmp(strategies->strategies[max_priority_i].strategy_name, "sse") == 0) kvz_g_strategies_in_use.intel_flags.sse++;
|
||||
|
|
Loading…
Reference in a new issue