diff --git a/brush-strokes/brush-strokes.cabal b/brush-strokes/brush-strokes.cabal index 3fef7fe..734724b 100644 --- a/brush-strokes/brush-strokes.cabal +++ b/brush-strokes/brush-strokes.cabal @@ -14,7 +14,7 @@ extra-source-files: flag use-simd description: Use SIMD instructions to implement interval arithmetic. - default: False + default: True manual: True flag use-fma @@ -36,6 +36,8 @@ common common ^>= 0.2.1 , containers >= 0.6.0.1 && < 0.8 + , deepseq + >= 1.4.4.0 && < 1.6 , tree-view ^>= 0.5 @@ -107,12 +109,6 @@ common common -DUSE_SIMD ghc-options: -mavx - build-depends: - base - >= 4.20 - -- 4.21 - , ghc-prim - >= 0.11 if flag(asserts) cpp-options: @@ -180,6 +176,10 @@ library if flag(use-simd) other-modules: Math.Interval.Internal.SIMD + build-depends: + simd-interval + , ghc-prim + >= 0.11 if flag(use-fma) && !flag(use-simd) other-modules: @@ -194,8 +194,6 @@ library >= 2.18 && < 3.0 , bifunctors >= 5.5.4 && < 5.7 - , deepseq - >= 1.4.4.0 && < 1.6 , directory >= 1.3.7.1 && < 1.4 , eigen @@ -225,9 +223,6 @@ library cxx-sources: -- Walter's C++ code for 2D linear systems of interval equations cbits/lp_2d.cpp - asm-sources: - -- X86 assembly for interval multiplication using SIMD instructions - cbits/mul.S cxx-options: -std=c++20 -mavx2 @@ -246,6 +241,35 @@ library build-depends: system-cxx-std-lib +-- Separate library to implement interval arithmetic using SIMD, +-- to work around TH linking bugs: +-- +-- - https://gitlab.haskell.org/ghc/ghc/-/issues/25240 +-- - https://github.com/haskell/cabal/issues/5623 +library simd-interval + + import: common + + hs-source-dirs: + src/simd-interval + + exposed-modules: + Math.Interval.Internal.SIMD.Internal + + include-dirs: + cbits + asm-sources: + -- X86 assembly for interval multiplication using SIMD instructions + cbits/mul.S + + build-depends: + base + >= 4.20 + -- 4.21 + , deepseq + , ghc-prim + >= 0.11 + --executable convert-metafont -- -- import: diff --git a/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs b/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs index 7d6c89c..0939179 100644 --- a/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs +++ b/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs @@ -2,6 +2,8 @@ {-# LANGUAGE GHCForeignImportPrim #-} {-# LANGUAGE UnliftedFFITypes #-} +{-# OPTIONS_GHC -Wno-orphans #-} + module Math.Interval.Internal.SIMD ( 𝕀(𝕀, inf, sup) , scaleInterval @@ -12,22 +14,13 @@ module Math.Interval.Internal.SIMD import Prelude hiding ( Num(..), Fractional(..), Floating(..) ) import qualified Prelude import GHC.Exts - ( isTrue# - , Double(D#), Double# - , (<=##), negateDouble# - , DoubleX2# - , packDoubleX2#, unpackDoubleX2# - , broadcastDoubleX2# - , plusDoubleX2#, timesDoubleX2# + ( (<=##), isTrue# + , Double(D#) ) -- ghc-prim import GHC.Prim - ( maxDouble#, shuffleDoubleX2# ) - --- deepseq -import Control.DeepSeq - ( NFData(..) ) + ( maxDouble# ) -- rounded-hw import Numeric.Rounded.Hardware @@ -38,64 +31,18 @@ import qualified Numeric.Rounded.Hardware.Interval.NonEmpty as Interval -- brush-strokes import Math.Ring +-- brush-strokes:simd-interval +import Math.Interval.Internal.SIMD.Internal + -------------------------------------------------------------------------------- --- | A non-empty interval of real numbers (possibly unbounded). -data 𝕀 = MkI DoubleX2# - -- Interval [lo..hi] represented as (-lo, hi) - -foreign import prim "in2_mul8" - mulI :: DoubleX2# -> DoubleX2# -> DoubleX2# - -instance NFData 𝕀 where - rnf ( MkI !_ ) = () - -{-# COMPLETE PackI #-} -{-# INLINE PackI #-} -pattern PackI :: Double# -> Double# -> 𝕀 -pattern PackI m_lo hi <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# m_lo, hi #) ) - where - PackI m_lo hi = MkI ( packDoubleX2# (# m_lo, hi #) ) - - -{-# COMPLETE 𝕀 #-} -{-# INLINE 𝕀 #-} -pattern 𝕀 :: Double -> Double -> 𝕀 -pattern 𝕀 { inf, sup } <- ( ( \ ( PackI m_lo hi ) -> (# D# ( negateDouble# m_lo ), D# hi #) ) - -> (# inf, sup #) ) - where - 𝕀 (D# lo) (D# hi) = - let m_lo = negateDouble# lo - in PackI m_lo hi - deriving via ViaPrelude 𝕀 instance AbelianGroup 𝕀 deriving via ViaPrelude 𝕀 instance Field 𝕀 -instance Prelude.Num 𝕀 where - MkI x + MkI y = MkI ( x `plusDoubleX2#` y ) - negate ( MkI x ) = MkI ( shuffleDoubleX2# x x (# 1#, 0# #) ) - (*) = (*) - fromInteger i = - let !j = Prelude.fromInteger i - in 𝕀 j j - abs x@(PackI m_lo hi) - | D# m_lo <= 0 - = PackI m_lo hi - | D# hi <= 0 - = Prelude.negate x - | otherwise - = 𝕀 0 ( D# ( maxDouble# m_lo hi ) ) - signum _ = error "No implementation of signum for intervals" - - instance Ring 𝕀 where - ( MkI x ) * ( MkI y ) = error "TODO: Cabal bug #5623 if used in TH" --MkI ( mulI x y ) - -- This uses the C code from Lockless Inc. - -- TODO: compare with - -- "Fast and Correct SIMD Algorithms for Interval Arithmetic" - -- FrΓ©dΓ©ric Goualard, 2008 + (*) = (Prelude.*) iv ^ 1 = iv PackI m_lo hi ^ n | odd n || isTrue# (m_lo <=## 0.0##) @@ -103,7 +50,7 @@ instance Ring 𝕀 where !(D# hi' ) = D# hi Prelude.^ n = PackI m_lo' hi' | isTrue# (hi <=## 0.0##) - , let !(D# m_lo') = (D# hi ) Prelude.^ n + , let !(D# m_lo') = Prelude.negate $ (D# hi) Prelude.^ n !(D# hi') = (D# m_lo) Prelude.^ n = PackI m_lo' hi' | otherwise @@ -112,25 +59,6 @@ instance Ring 𝕀 where hi' = maxDouble# hi1 hi2 = PackI 0.0## hi' -instance Prelude.Fractional 𝕀 where - fromRational r = - let q = Prelude.fromRational r - in 𝕀 q q - recip ( 𝕀 lo hi ) -#ifdef ASSERTS - | lo == 0 - = 𝕀 ( Prelude.recip hi ) ( 1 Prelude./ 0 ) - | hi == 0 - = 𝕀 ( -1 Prelude./ 0 ) ( Prelude.recip lo ) - | lo > 0 || hi < 0 -#endif - = 𝕀 ( Prelude.recip hi ) ( Prelude.recip lo ) -#ifdef ASSERTS - | otherwise - = error "BAD interval recip; should use extendedRecip instead" -#endif - p / q = p * Prelude.recip q - instance Floating 𝕀 where sqrt = withHW Prelude.sqrt @@ -140,22 +68,19 @@ instance Transcendental 𝕀 where sin = withHW Prelude.sin atan = withHW Prelude.atan +{- +TODO: consider alternatives for sin/cos, such as: + + - https://github.com/JishinMaster/simd_utils/blob/160c50f07e08d2077ae4368f0aed2f10f7173c67/simd_utils_sse_double.h#L530 + - Intel SVML + - metalibm + - sleef + +-} + {-# INLINE withHW #-} -- | Internal function: use @rounded-hw@ to define a function on intervals. withHW :: (Interval.Interval Double -> Interval.Interval Double) -> 𝕀 -> 𝕀 withHW f = \ ( 𝕀 lo hi ) -> case f ( Interval.I ( Rounded lo ) ( Rounded hi ) ) of Interval.I ( Rounded x ) ( Rounded y ) -> 𝕀 x y - -scaleInterval :: Double -> 𝕀 -> 𝕀 -scaleInterval s@(D# s#) i = - case compare s 0 of - LT -> case negate i of { MkI x -> MkI ( broadcastDoubleX2# ( negateDouble# s# ) `timesDoubleX2#` x ) } - EQ -> 𝕀 0 0 - GT -> case i of { MkI x -> MkI ( broadcastDoubleX2# s# `timesDoubleX2#` x ) } - -{- -TODO: consider using https://github.com/JishinMaster/simd_utils/blob/160c50f07e08d2077ae4368f0aed2f10f7173c67/simd_utils_sse_double.h#L530 -for sin/cos? Or Intel SVML? Or metalibm? - --} diff --git a/brush-strokes/src/lib/Math/Linear.hs b/brush-strokes/src/lib/Math/Linear.hs index 95a1d6c..5dd10dc 100644 --- a/brush-strokes/src/lib/Math/Linear.hs +++ b/brush-strokes/src/lib/Math/Linear.hs @@ -1,7 +1,6 @@ {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE UndecidableInstances #-} module Math.Linear diff --git a/brush-strokes/src/simd-interval/Math/Interval/Internal/SIMD/Internal.hs b/brush-strokes/src/simd-interval/Math/Interval/Internal/SIMD/Internal.hs new file mode 100644 index 0000000..8c5eb27 --- /dev/null +++ b/brush-strokes/src/simd-interval/Math/Interval/Internal/SIMD/Internal.hs @@ -0,0 +1,106 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE GHCForeignImportPrim #-} +{-# LANGUAGE UnliftedFFITypes #-} + +module Math.Interval.Internal.SIMD.Internal + ( 𝕀(𝕀, MkI, PackI, inf, sup) + , scaleInterval + ) + where + +-- base +import Prelude hiding ( Num(..), Fractional(..), Floating(..) ) +import qualified Prelude +import GHC.Exts + ( Double(D#), Double# + , negateDouble# + , DoubleX2# + , packDoubleX2#, unpackDoubleX2# + , broadcastDoubleX2# + , plusDoubleX2#, timesDoubleX2# + ) + +-- ghc-prim +import GHC.Prim + ( maxDouble#, shuffleDoubleX2# ) + +-- deepseq +import Control.DeepSeq + ( NFData(..) ) + +-------------------------------------------------------------------------------- + +-- | A non-empty interval of real numbers (possibly unbounded). +data 𝕀 = MkI DoubleX2# + -- Interval [lo..hi] represented as (-lo, hi) + +instance NFData 𝕀 where + rnf ( MkI !_ ) = () + +foreign import prim "in2_mul8" + mulI :: DoubleX2# -> DoubleX2# -> DoubleX2# + +{-# COMPLETE PackI #-} +{-# INLINE PackI #-} +pattern PackI :: Double# -> Double# -> 𝕀 +pattern PackI m_lo hi <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# m_lo, hi #) ) + where + PackI m_lo hi = MkI ( packDoubleX2# (# m_lo, hi #) ) + + +{-# COMPLETE 𝕀 #-} +{-# INLINE 𝕀 #-} +pattern 𝕀 :: Double -> Double -> 𝕀 +pattern 𝕀 { inf, sup } <- ( ( \ ( PackI m_lo hi ) -> (# D# ( negateDouble# m_lo ), D# hi #) ) + -> (# inf, sup #) ) + where + 𝕀 (D# lo) (D# hi) = + let m_lo = negateDouble# lo + in PackI m_lo hi + +instance Prelude.Num 𝕀 where + MkI x + MkI y = MkI ( x `plusDoubleX2#` y ) + negate ( MkI x ) = MkI ( shuffleDoubleX2# x x (# 1#, 0# #) ) + ( MkI x ) * ( MkI y ) = MkI ( mulI x y ) + -- This uses the C code from Lockless Inc. + -- TODO: compare with + -- "Fast and Correct SIMD Algorithms for Interval Arithmetic" + -- FrΓ©dΓ©ric Goualard, 2008 + fromInteger i = + let !j = Prelude.fromInteger i + in 𝕀 j j + abs x@(PackI m_lo hi) + | D# m_lo <= 0 + = PackI m_lo hi + | D# hi <= 0 + = Prelude.negate x + | otherwise + = 𝕀 0 ( D# ( maxDouble# m_lo hi ) ) + signum _ = error "No implementation of signum for intervals" + +instance Prelude.Fractional 𝕀 where + fromRational r = + let q = Prelude.fromRational r + in 𝕀 q q + recip ( 𝕀 lo hi ) +#ifdef ASSERTS + | lo == 0 + = 𝕀 ( Prelude.recip hi ) ( 1 Prelude./ 0 ) + | hi == 0 + = 𝕀 ( -1 Prelude./ 0 ) ( Prelude.recip lo ) + | lo > 0 || hi < 0 +#endif + = 𝕀 ( Prelude.recip hi ) ( Prelude.recip lo ) +#ifdef ASSERTS + | otherwise + = error "BAD interval recip; should use extendedRecip instead" +#endif + p / q = p Prelude.* Prelude.recip q + +scaleInterval :: Double -> 𝕀 -> 𝕀 +scaleInterval s@(D# s#) i = + case compare s 0 of + LT -> case Prelude.negate i of { MkI x -> MkI ( broadcastDoubleX2# ( negateDouble# s# ) `timesDoubleX2#` x ) } + EQ -> 𝕀 0 0 + GT -> case i of { MkI x -> MkI ( broadcastDoubleX2# s# `timesDoubleX2#` x ) } diff --git a/cabal.project b/cabal.project index d78a798..709450b 100644 --- a/cabal.project +++ b/cabal.project @@ -25,6 +25,7 @@ allow-newer: gi-cairo-connector:mtl, hedgehog:containers, hedgehog:resourcet, + hw-balancedparens:hedgehog, indexed-traversable:containers, JuicyPixels:zlib, natural:lens, @@ -58,13 +59,13 @@ source-repository-package -- GHC HEAD -- -------------- -repository head.hackage.ghc.haskell.org - url: https://ghc.gitlab.haskell.org/head.hackage/ - secure: True - key-threshold: 3 - root-keys: - f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89 - 26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329 - 7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d - -active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org:override +-- repository head.hackage.ghc.haskell.org +-- url: https://ghc.gitlab.haskell.org/head.hackage/ +-- secure: True +-- key-threshold: 3 +-- root-keys: +-- f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89 +-- 7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d +-- 26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329 +-- +-- active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org