mirror of
https://gitlab.com/sheaf/metabrush.git
synced 2024-11-05 14:53:37 +00:00
more SIMD experiments
This commit is contained in:
parent
ed8720555f
commit
2bae92dc5e
|
@ -161,7 +161,7 @@ common gtk
|
||||||
, haskell-gi
|
, haskell-gi
|
||||||
>= 0.26.10 && < 0.27
|
>= 0.26.10 && < 0.27
|
||||||
, haskell-gi-base
|
, haskell-gi-base
|
||||||
>= 0.26.10 && < 0.27
|
>= 0.26.6 && < 0.27
|
||||||
|
|
||||||
-- Workaround for https://github.com/haskell/cabal/issues/4237
|
-- Workaround for https://github.com/haskell/cabal/issues/4237
|
||||||
-- See https://github.com/commercialhaskell/stack/issues/2197
|
-- See https://github.com/commercialhaskell/stack/issues/2197
|
||||||
|
|
|
@ -10,11 +10,11 @@ description:
|
||||||
Computing brush strokes using Bézier curves.
|
Computing brush strokes using Bézier curves.
|
||||||
|
|
||||||
extra-source-files:
|
extra-source-files:
|
||||||
cbits/**/*.{cpp, hpp}
|
cbits/**/*.{S,c,h,cpp,hpp}
|
||||||
|
|
||||||
flag use-simd
|
flag use-simd
|
||||||
description: Use SIMD instructions to implement interval arithmetic.
|
description: Use SIMD instructions to implement interval arithmetic.
|
||||||
default: True
|
default: False
|
||||||
manual: True
|
manual: True
|
||||||
|
|
||||||
flag use-fma
|
flag use-fma
|
||||||
|
@ -106,7 +106,7 @@ common common
|
||||||
cpp-options:
|
cpp-options:
|
||||||
-DUSE_SIMD
|
-DUSE_SIMD
|
||||||
ghc-options:
|
ghc-options:
|
||||||
-mavx2
|
-mavx
|
||||||
build-depends:
|
build-depends:
|
||||||
base
|
base
|
||||||
>= 4.20
|
>= 4.20
|
||||||
|
@ -213,15 +213,21 @@ library
|
||||||
, rounded-hw
|
, rounded-hw
|
||||||
^>= 0.4
|
^>= 0.4
|
||||||
, time
|
, time
|
||||||
^>= 1.12.2
|
>= 1.12.2 && < 1.15
|
||||||
, transformers
|
, transformers
|
||||||
>= 0.5.6.2 && < 0.7
|
>= 0.5.6.2 && < 0.7
|
||||||
|
|
||||||
-- Extra C++ code for 2D linear systems of interval equations
|
|
||||||
include-dirs:
|
include-dirs:
|
||||||
cbits
|
cbits
|
||||||
|
c-sources:
|
||||||
|
-- C code for: setting the rounding mode
|
||||||
|
cbits/rounding.c
|
||||||
cxx-sources:
|
cxx-sources:
|
||||||
|
-- Walter's C++ code for 2D linear systems of interval equations
|
||||||
cbits/lp_2d.cpp
|
cbits/lp_2d.cpp
|
||||||
|
asm-sources:
|
||||||
|
-- X86 assembly for interval multiplication using SIMD instructions
|
||||||
|
cbits/mul.S
|
||||||
cxx-options:
|
cxx-options:
|
||||||
-std=c++20
|
-std=c++20
|
||||||
-mavx2
|
-mavx2
|
||||||
|
|
|
@ -1,39 +0,0 @@
|
||||||
packages: .
|
|
||||||
|
|
||||||
constraints:
|
|
||||||
acts -finitary,
|
|
||||||
fp-ieee +fma3,
|
|
||||||
rounded-hw -pure-hs -c99 -avx512 +ghc-prim -x87-long-double
|
|
||||||
|
|
||||||
tests: True
|
|
||||||
|
|
||||||
allow-newer:
|
|
||||||
*:base, *:template-haskell,
|
|
||||||
acts:deepseq,
|
|
||||||
falsify:tasty,
|
|
||||||
indexed-traversable:containers
|
|
||||||
|
|
||||||
--------------
|
|
||||||
-- GHC 9.10 --
|
|
||||||
--------------
|
|
||||||
|
|
||||||
-- eigen
|
|
||||||
source-repository-package
|
|
||||||
type: git
|
|
||||||
location: https://github.com/sheaf/eigen
|
|
||||||
tag: 3bc1d4bc53edb75e2ee3893763361aa0d5c7714a
|
|
||||||
|
|
||||||
--------------
|
|
||||||
-- GHC HEAD --
|
|
||||||
--------------
|
|
||||||
|
|
||||||
repository head.hackage.ghc.haskell.org
|
|
||||||
url: https://ghc.gitlab.haskell.org/head.hackage/
|
|
||||||
secure: True
|
|
||||||
key-threshold: 3
|
|
||||||
root-keys:
|
|
||||||
7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d
|
|
||||||
26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329
|
|
||||||
f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89
|
|
||||||
|
|
||||||
active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org:override
|
|
30
brush-strokes/cbits/mul.S
Normal file
30
brush-strokes/cbits/mul.S
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
.global in2_mul8
|
||||||
|
in2_mul8:
|
||||||
|
vshufpd $3, %xmm1, %xmm1, %xmm4
|
||||||
|
vshufpd $3, %xmm2, %xmm2, %xmm3
|
||||||
|
vxorpd %xmm1, %xmm4, %xmm5
|
||||||
|
vxorpd %xmm2, %xmm3, %xmm6
|
||||||
|
vtestpd %xmm5, %xmm6
|
||||||
|
je .LBB1_2
|
||||||
|
vxorpd %xmm5, %xmm5, %xmm5
|
||||||
|
vcmplepd %xmm5, %xmm3, %xmm3
|
||||||
|
vcmplepd %xmm5, %xmm4, %xmm4
|
||||||
|
vmovsd .LCPI1_1(%rip), %xmm5
|
||||||
|
vxorpd %xmm5, %xmm1, %xmm1
|
||||||
|
vshufps $78, %xmm1, %xmm1, %xmm6
|
||||||
|
vblendvpd %xmm3, %xmm6, %xmm1, %xmm1
|
||||||
|
vxorpd %xmm5, %xmm2, %xmm2
|
||||||
|
vshufps $78, %xmm2, %xmm2, %xmm3
|
||||||
|
vblendvpd %xmm4, %xmm3, %xmm2, %xmm2
|
||||||
|
vxorpd %xmm5, %xmm2, %xmm2
|
||||||
|
vmulpd %xmm2, %xmm1, %xmm1
|
||||||
|
jmp *(%rbp)
|
||||||
|
.LCPI1_1:
|
||||||
|
.quad -9223372036854775808
|
||||||
|
.LBB1_2:
|
||||||
|
vshufpd $1, %xmm1, %xmm1, %xmm4
|
||||||
|
vmovddup %xmm2, %xmm2
|
||||||
|
vmulpd %xmm4, %xmm2, %xmm2
|
||||||
|
vmulpd %xmm1, %xmm3, %xmm1
|
||||||
|
vmaxpd %xmm1, %xmm2, %xmm1
|
||||||
|
jmp *(%rbp)
|
9
brush-strokes/cbits/rounding.c
Normal file
9
brush-strokes/cbits/rounding.c
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
|
||||||
|
// Function to set the CSR rounding mode for SSE operations
|
||||||
|
void set_sse_rounding_mode(unsigned int rounding_mode) {
|
||||||
|
unsigned int csr = _mm_getcsr();
|
||||||
|
csr &= ~_MM_ROUND_MASK; // Clear existing rounding mode bits
|
||||||
|
csr |= rounding_mode; // Set new rounding mode
|
||||||
|
_mm_setcsr(csr);
|
||||||
|
}
|
|
@ -51,8 +51,15 @@ import Bench.Types
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pattern ROUND_UP :: Word
|
||||||
|
pattern ROUND_UP = 0x4000
|
||||||
|
|
||||||
|
foreign import ccall "set_sse_rounding_mode"
|
||||||
|
setSSERoundingMode :: Word -> IO ()
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = withCP65001 $ do
|
main = withCP65001 $ do
|
||||||
|
setSSERoundingMode ROUND_UP
|
||||||
putStrLn "Running cusp-finding benchmarks."
|
putStrLn "Running cusp-finding benchmarks."
|
||||||
for_ benchGroups $ \ ( groupName, benchGroupCases ) -> do
|
for_ benchGroups $ \ ( groupName, benchGroupCases ) -> do
|
||||||
putStrLn $ unlines
|
putStrLn $ unlines
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
|
{-# LANGUAGE GHCForeignImportPrim #-}
|
||||||
|
{-# LANGUAGE UnliftedFFITypes #-}
|
||||||
|
|
||||||
module Math.Interval.Internal.SIMD
|
module Math.Interval.Internal.SIMD
|
||||||
( 𝕀(𝕀, inf, sup)
|
( 𝕀(𝕀, inf, sup)
|
||||||
|
@ -12,7 +14,7 @@ import qualified Prelude
|
||||||
import GHC.Exts
|
import GHC.Exts
|
||||||
( isTrue#
|
( isTrue#
|
||||||
, Double(D#), Double#
|
, Double(D#), Double#
|
||||||
, (<=##), (*##), negateDouble#
|
, (<=##), negateDouble#
|
||||||
, DoubleX2#
|
, DoubleX2#
|
||||||
, packDoubleX2#, unpackDoubleX2#
|
, packDoubleX2#, unpackDoubleX2#
|
||||||
, broadcastDoubleX2#
|
, broadcastDoubleX2#
|
||||||
|
@ -40,26 +42,31 @@ import Math.Ring
|
||||||
|
|
||||||
-- | A non-empty interval of real numbers (possibly unbounded).
|
-- | A non-empty interval of real numbers (possibly unbounded).
|
||||||
data 𝕀 = MkI DoubleX2#
|
data 𝕀 = MkI DoubleX2#
|
||||||
-- Interval [lo..hi] represented as (hi, -lo)
|
-- Interval [lo..hi] represented as (-lo, hi)
|
||||||
|
|
||||||
|
foreign import prim "in2_mul8"
|
||||||
|
mulI :: DoubleX2# -> DoubleX2# -> DoubleX2#
|
||||||
|
|
||||||
instance NFData 𝕀 where
|
instance NFData 𝕀 where
|
||||||
rnf ( MkI !_ ) = ()
|
rnf ( MkI !_ ) = ()
|
||||||
|
|
||||||
{-# COMPLETE PackI #-}
|
{-# COMPLETE PackI #-}
|
||||||
|
{-# INLINE PackI #-}
|
||||||
pattern PackI :: Double# -> Double# -> 𝕀
|
pattern PackI :: Double# -> Double# -> 𝕀
|
||||||
pattern PackI hi m_lo <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# hi, m_lo #) )
|
pattern PackI m_lo hi <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# m_lo, hi #) )
|
||||||
where
|
where
|
||||||
PackI hi m_lo = MkI ( packDoubleX2# (# hi, m_lo #) )
|
PackI m_lo hi = MkI ( packDoubleX2# (# m_lo, hi #) )
|
||||||
|
|
||||||
|
|
||||||
{-# COMPLETE 𝕀 #-}
|
{-# COMPLETE 𝕀 #-}
|
||||||
|
{-# INLINE 𝕀 #-}
|
||||||
pattern 𝕀 :: Double -> Double -> 𝕀
|
pattern 𝕀 :: Double -> Double -> 𝕀
|
||||||
pattern 𝕀 { inf, sup } <- ( ( \ ( PackI hi m_lo ) -> (# D# ( negateDouble# m_lo ), D# hi #) )
|
pattern 𝕀 { inf, sup } <- ( ( \ ( PackI m_lo hi ) -> (# D# ( negateDouble# m_lo ), D# hi #) )
|
||||||
-> (# inf, sup #) )
|
-> (# inf, sup #) )
|
||||||
where
|
where
|
||||||
𝕀 (D# lo) (D# hi) =
|
𝕀 (D# lo) (D# hi) =
|
||||||
let m_lo = negateDouble# lo
|
let m_lo = negateDouble# lo
|
||||||
in PackI hi m_lo
|
in PackI m_lo hi
|
||||||
|
|
||||||
deriving via ViaPrelude 𝕀
|
deriving via ViaPrelude 𝕀
|
||||||
instance AbelianGroup 𝕀
|
instance AbelianGroup 𝕀
|
||||||
|
@ -73,56 +80,37 @@ instance Prelude.Num 𝕀 where
|
||||||
fromInteger i =
|
fromInteger i =
|
||||||
let !j = Prelude.fromInteger i
|
let !j = Prelude.fromInteger i
|
||||||
in 𝕀 j j
|
in 𝕀 j j
|
||||||
abs x@(PackI hi m_lo)
|
abs x@(PackI m_lo hi)
|
||||||
| D# m_lo <= 0
|
| D# m_lo <= 0
|
||||||
= PackI hi m_lo
|
= PackI m_lo hi
|
||||||
| D# hi <= 0
|
| D# hi <= 0
|
||||||
= Prelude.negate x
|
= Prelude.negate x
|
||||||
| otherwise
|
| otherwise
|
||||||
= 𝕀 0 (max (D# m_lo) (D# hi))
|
= 𝕀 0 ( D# ( maxDouble# m_lo hi ) )
|
||||||
signum _ = error "No implementation of signum for intervals"
|
signum _ = error "No implementation of signum for intervals"
|
||||||
|
|
||||||
|
|
||||||
instance Ring 𝕀 where
|
instance Ring 𝕀 where
|
||||||
( PackI b ma ) * ( PackI d mc ) =
|
( MkI x ) * ( MkI y ) = error "TODO: Cabal bug #5623 if used in TH" --MkI ( mulI x y )
|
||||||
case ( zeroPos b ma, zeroPos d mc ) of
|
-- This uses the C code from Lockless Inc.
|
||||||
-- From "Fast and Correct SIMD Algorithms for Interval Arithmetic"
|
-- TODO: compare with
|
||||||
-- Frédéric Goualard, 2008
|
-- "Fast and Correct SIMD Algorithms for Interval Arithmetic"
|
||||||
( LT, LT ) -> PackI (ma *## mc) ( ( negateDouble# b ) *## d)
|
-- Frédéric Goualard, 2008
|
||||||
( LT, EQ ) -> PackI (ma *## mc) ( ma *## d )
|
|
||||||
( LT, GT ) -> PackI ( b *## ( negateDouble# mc ) ) ( ma *## d )
|
|
||||||
( EQ, LT ) -> PackI (ma *## mc) ( b *## mc )
|
|
||||||
( EQ, EQ ) -> PackI
|
|
||||||
( maxDouble# ( ma *## d ) ( b *## mc ) )
|
|
||||||
( maxDouble# ( ma *## mc ) ( b *## d ) )
|
|
||||||
( EQ, GT ) -> PackI ( b *## d ) ( ma *## d )
|
|
||||||
( GT, LT ) -> PackI ( ( negateDouble# ma ) *## d ) ( b *## mc )
|
|
||||||
( GT, EQ ) -> PackI ( b *## d ) ( b *## mc )
|
|
||||||
( GT, GT ) -> PackI ( b *## d ) ( ( negateDouble# ma ) *## mc )
|
|
||||||
where
|
|
||||||
zeroPos :: Double# -> Double# -> Ordering
|
|
||||||
zeroPos hi m_lo
|
|
||||||
| isTrue# (hi <=## 0.0##)
|
|
||||||
= LT
|
|
||||||
| isTrue# (m_lo <=## 0.0##)
|
|
||||||
= GT
|
|
||||||
| otherwise
|
|
||||||
= EQ
|
|
||||||
iv ^ 1 = iv
|
iv ^ 1 = iv
|
||||||
PackI hi m_lo ^ n
|
PackI m_lo hi ^ n
|
||||||
| odd n || isTrue# (m_lo <=## 0.0##)
|
| odd n || isTrue# (m_lo <=## 0.0##)
|
||||||
, let !(D# m_lo') = D# m_lo Prelude.^ n
|
, let !(D# m_lo') = D# m_lo Prelude.^ n
|
||||||
!(D# hi' ) = D# hi Prelude.^ n
|
!(D# hi' ) = D# hi Prelude.^ n
|
||||||
= PackI hi' m_lo'
|
= PackI m_lo' hi'
|
||||||
| isTrue# (hi <=## 0.0##)
|
| isTrue# (hi <=## 0.0##)
|
||||||
, let !(D# m_lo') = (D# hi ) Prelude.^ n
|
, let !(D# m_lo') = (D# hi ) Prelude.^ n
|
||||||
!(D# hi') = (D# m_lo) Prelude.^ n
|
!(D# hi') = (D# m_lo) Prelude.^ n
|
||||||
= PackI hi' m_lo'
|
= PackI m_lo' hi'
|
||||||
| otherwise
|
| otherwise
|
||||||
, let !(D# hi1) = (D# m_lo) Prelude.^ n
|
, let !(D# hi1) = (D# m_lo) Prelude.^ n
|
||||||
!(D# hi2) = (D# hi ) Prelude.^ n
|
!(D# hi2) = (D# hi ) Prelude.^ n
|
||||||
hi' = maxDouble# hi1 hi2
|
hi' = maxDouble# hi1 hi2
|
||||||
= PackI hi' 0.0##
|
= PackI 0.0## hi'
|
||||||
|
|
||||||
instance Prelude.Fractional 𝕀 where
|
instance Prelude.Fractional 𝕀 where
|
||||||
fromRational r =
|
fromRational r =
|
||||||
|
|
|
@ -16,18 +16,21 @@ source-repository-package
|
||||||
tag: ec171cd5d185309692b745e2e2f291eab4038fb9
|
tag: ec171cd5d185309692b745e2e2f291eab4038fb9
|
||||||
|
|
||||||
allow-newer:
|
allow-newer:
|
||||||
*:base, *:template-haskell, *:ghc-prim,
|
*:base, *:template-haskell, *:ghc-prim, *:Cabal,
|
||||||
acts:deepseq,
|
acts:deepseq,
|
||||||
digit:lens,
|
digit:lens,
|
||||||
eigen:primitive,
|
eigen:primitive,
|
||||||
eigen:transformers,
|
eigen:transformers,
|
||||||
|
falsify:tasty,
|
||||||
gi-cairo-connector:mtl,
|
gi-cairo-connector:mtl,
|
||||||
hedgehog:containers,
|
hedgehog:containers,
|
||||||
hedgehog:resourcet,
|
hedgehog:resourcet,
|
||||||
|
indexed-traversable:containers,
|
||||||
JuicyPixels:zlib,
|
JuicyPixels:zlib,
|
||||||
natural:lens,
|
natural:lens,
|
||||||
natural:semigroupoids,
|
natural:semigroupoids,
|
||||||
records-sop:deepseq,
|
records-sop:deepseq,
|
||||||
|
th-abstraction:containers,
|
||||||
waargonaut:bifunctors,
|
waargonaut:bifunctors,
|
||||||
waargonaut:bytestring,
|
waargonaut:bytestring,
|
||||||
waargonaut:containers,
|
waargonaut:containers,
|
||||||
|
|
Loading…
Reference in a new issue