workaround for TH linking bugs

This commit is contained in:
sheaf 2024-09-06 13:08:40 +02:00
parent 9ff25a25aa
commit bb9b381cb5
5 changed files with 173 additions and 118 deletions

View file

@ -14,7 +14,7 @@ extra-source-files:
flag use-simd flag use-simd
description: Use SIMD instructions to implement interval arithmetic. description: Use SIMD instructions to implement interval arithmetic.
default: False default: True
manual: True manual: True
flag use-fma flag use-fma
@ -36,6 +36,8 @@ common common
^>= 0.2.1 ^>= 0.2.1
, containers , containers
>= 0.6.0.1 && < 0.8 >= 0.6.0.1 && < 0.8
, deepseq
>= 1.4.4.0 && < 1.6
, tree-view , tree-view
^>= 0.5 ^>= 0.5
@ -107,12 +109,6 @@ common common
-DUSE_SIMD -DUSE_SIMD
ghc-options: ghc-options:
-mavx -mavx
build-depends:
base
>= 4.20
-- 4.21
, ghc-prim
>= 0.11
if flag(asserts) if flag(asserts)
cpp-options: cpp-options:
@ -180,6 +176,10 @@ library
if flag(use-simd) if flag(use-simd)
other-modules: other-modules:
Math.Interval.Internal.SIMD Math.Interval.Internal.SIMD
build-depends:
simd-interval
, ghc-prim
>= 0.11
if flag(use-fma) && !flag(use-simd) if flag(use-fma) && !flag(use-simd)
other-modules: other-modules:
@ -194,8 +194,6 @@ library
>= 2.18 && < 3.0 >= 2.18 && < 3.0
, bifunctors , bifunctors
>= 5.5.4 && < 5.7 >= 5.5.4 && < 5.7
, deepseq
>= 1.4.4.0 && < 1.6
, directory , directory
>= 1.3.7.1 && < 1.4 >= 1.3.7.1 && < 1.4
, eigen , eigen
@ -225,9 +223,6 @@ library
cxx-sources: cxx-sources:
-- Walter's C++ code for 2D linear systems of interval equations -- Walter's C++ code for 2D linear systems of interval equations
cbits/lp_2d.cpp cbits/lp_2d.cpp
asm-sources:
-- X86 assembly for interval multiplication using SIMD instructions
cbits/mul.S
cxx-options: cxx-options:
-std=c++20 -std=c++20
-mavx2 -mavx2
@ -246,6 +241,35 @@ library
build-depends: build-depends:
system-cxx-std-lib system-cxx-std-lib
-- Separate library to implement interval arithmetic using SIMD,
-- to work around TH linking bugs:
--
-- - https://gitlab.haskell.org/ghc/ghc/-/issues/25240
-- - https://github.com/haskell/cabal/issues/5623
library simd-interval
import: common
hs-source-dirs:
src/simd-interval
exposed-modules:
Math.Interval.Internal.SIMD.Internal
include-dirs:
cbits
asm-sources:
-- X86 assembly for interval multiplication using SIMD instructions
cbits/mul.S
build-depends:
base
>= 4.20
-- 4.21
, deepseq
, ghc-prim
>= 0.11
--executable convert-metafont --executable convert-metafont
-- --
-- import: -- import:

View file

@ -2,6 +2,8 @@
{-# LANGUAGE GHCForeignImportPrim #-} {-# LANGUAGE GHCForeignImportPrim #-}
{-# LANGUAGE UnliftedFFITypes #-} {-# LANGUAGE UnliftedFFITypes #-}
{-# OPTIONS_GHC -Wno-orphans #-}
module Math.Interval.Internal.SIMD module Math.Interval.Internal.SIMD
( 𝕀(𝕀, inf, sup) ( 𝕀(𝕀, inf, sup)
, scaleInterval , scaleInterval
@ -12,22 +14,13 @@ module Math.Interval.Internal.SIMD
import Prelude hiding ( Num(..), Fractional(..), Floating(..) ) import Prelude hiding ( Num(..), Fractional(..), Floating(..) )
import qualified Prelude import qualified Prelude
import GHC.Exts import GHC.Exts
( isTrue# ( (<=##), isTrue#
, Double(D#), Double# , Double(D#)
, (<=##), negateDouble#
, DoubleX2#
, packDoubleX2#, unpackDoubleX2#
, broadcastDoubleX2#
, plusDoubleX2#, timesDoubleX2#
) )
-- ghc-prim -- ghc-prim
import GHC.Prim import GHC.Prim
( maxDouble#, shuffleDoubleX2# ) ( maxDouble# )
-- deepseq
import Control.DeepSeq
( NFData(..) )
-- rounded-hw -- rounded-hw
import Numeric.Rounded.Hardware import Numeric.Rounded.Hardware
@ -38,64 +31,18 @@ import qualified Numeric.Rounded.Hardware.Interval.NonEmpty as Interval
-- brush-strokes -- brush-strokes
import Math.Ring import Math.Ring
-- brush-strokes:simd-interval
import Math.Interval.Internal.SIMD.Internal
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- | A non-empty interval of real numbers (possibly unbounded).
data 𝕀 = MkI DoubleX2#
-- Interval [lo..hi] represented as (-lo, hi)
foreign import prim "in2_mul8"
mulI :: DoubleX2# -> DoubleX2# -> DoubleX2#
instance NFData 𝕀 where
rnf ( MkI !_ ) = ()
{-# COMPLETE PackI #-}
{-# INLINE PackI #-}
pattern PackI :: Double# -> Double# -> 𝕀
pattern PackI m_lo hi <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# m_lo, hi #) )
where
PackI m_lo hi = MkI ( packDoubleX2# (# m_lo, hi #) )
{-# COMPLETE 𝕀 #-}
{-# INLINE 𝕀 #-}
pattern 𝕀 :: Double -> Double -> 𝕀
pattern 𝕀 { inf, sup } <- ( ( \ ( PackI m_lo hi ) -> (# D# ( negateDouble# m_lo ), D# hi #) )
-> (# inf, sup #) )
where
𝕀 (D# lo) (D# hi) =
let m_lo = negateDouble# lo
in PackI m_lo hi
deriving via ViaPrelude 𝕀 deriving via ViaPrelude 𝕀
instance AbelianGroup 𝕀 instance AbelianGroup 𝕀
deriving via ViaPrelude 𝕀 deriving via ViaPrelude 𝕀
instance Field 𝕀 instance Field 𝕀
instance Prelude.Num 𝕀 where
MkI x + MkI y = MkI ( x `plusDoubleX2#` y )
negate ( MkI x ) = MkI ( shuffleDoubleX2# x x (# 1#, 0# #) )
(*) = (*)
fromInteger i =
let !j = Prelude.fromInteger i
in 𝕀 j j
abs x@(PackI m_lo hi)
| D# m_lo <= 0
= PackI m_lo hi
| D# hi <= 0
= Prelude.negate x
| otherwise
= 𝕀 0 ( D# ( maxDouble# m_lo hi ) )
signum _ = error "No implementation of signum for intervals"
instance Ring 𝕀 where instance Ring 𝕀 where
( MkI x ) * ( MkI y ) = error "TODO: Cabal bug #5623 if used in TH" --MkI ( mulI x y ) (*) = (Prelude.*)
-- This uses the C code from Lockless Inc.
-- TODO: compare with
-- "Fast and Correct SIMD Algorithms for Interval Arithmetic"
-- Frédéric Goualard, 2008
iv ^ 1 = iv iv ^ 1 = iv
PackI m_lo hi ^ n PackI m_lo hi ^ n
| odd n || isTrue# (m_lo <=## 0.0##) | odd n || isTrue# (m_lo <=## 0.0##)
@ -103,7 +50,7 @@ instance Ring 𝕀 where
!(D# hi' ) = D# hi Prelude.^ n !(D# hi' ) = D# hi Prelude.^ n
= PackI m_lo' hi' = PackI m_lo' hi'
| isTrue# (hi <=## 0.0##) | isTrue# (hi <=## 0.0##)
, let !(D# m_lo') = (D# hi ) Prelude.^ n , let !(D# m_lo') = Prelude.negate $ (D# hi) Prelude.^ n
!(D# hi') = (D# m_lo) Prelude.^ n !(D# hi') = (D# m_lo) Prelude.^ n
= PackI m_lo' hi' = PackI m_lo' hi'
| otherwise | otherwise
@ -112,25 +59,6 @@ instance Ring 𝕀 where
hi' = maxDouble# hi1 hi2 hi' = maxDouble# hi1 hi2
= PackI 0.0## hi' = PackI 0.0## hi'
instance Prelude.Fractional 𝕀 where
fromRational r =
let q = Prelude.fromRational r
in 𝕀 q q
recip ( 𝕀 lo hi )
#ifdef ASSERTS
| lo == 0
= 𝕀 ( Prelude.recip hi ) ( 1 Prelude./ 0 )
| hi == 0
= 𝕀 ( -1 Prelude./ 0 ) ( Prelude.recip lo )
| lo > 0 || hi < 0
#endif
= 𝕀 ( Prelude.recip hi ) ( Prelude.recip lo )
#ifdef ASSERTS
| otherwise
= error "BAD interval recip; should use extendedRecip instead"
#endif
p / q = p * Prelude.recip q
instance Floating 𝕀 where instance Floating 𝕀 where
sqrt = withHW Prelude.sqrt sqrt = withHW Prelude.sqrt
@ -140,22 +68,19 @@ instance Transcendental 𝕀 where
sin = withHW Prelude.sin sin = withHW Prelude.sin
atan = withHW Prelude.atan atan = withHW Prelude.atan
{-
TODO: consider alternatives for sin/cos, such as:
- https://github.com/JishinMaster/simd_utils/blob/160c50f07e08d2077ae4368f0aed2f10f7173c67/simd_utils_sse_double.h#L530
- Intel SVML
- metalibm
- sleef
-}
{-# INLINE withHW #-} {-# INLINE withHW #-}
-- | Internal function: use @rounded-hw@ to define a function on intervals. -- | Internal function: use @rounded-hw@ to define a function on intervals.
withHW :: (Interval.Interval Double -> Interval.Interval Double) -> 𝕀 -> 𝕀 withHW :: (Interval.Interval Double -> Interval.Interval Double) -> 𝕀 -> 𝕀
withHW f = \ ( 𝕀 lo hi ) -> withHW f = \ ( 𝕀 lo hi ) ->
case f ( Interval.I ( Rounded lo ) ( Rounded hi ) ) of case f ( Interval.I ( Rounded lo ) ( Rounded hi ) ) of
Interval.I ( Rounded x ) ( Rounded y ) -> 𝕀 x y Interval.I ( Rounded x ) ( Rounded y ) -> 𝕀 x y
scaleInterval :: Double -> 𝕀 -> 𝕀
scaleInterval s@(D# s#) i =
case compare s 0 of
LT -> case negate i of { MkI x -> MkI ( broadcastDoubleX2# ( negateDouble# s# ) `timesDoubleX2#` x ) }
EQ -> 𝕀 0 0
GT -> case i of { MkI x -> MkI ( broadcastDoubleX2# s# `timesDoubleX2#` x ) }
{-
TODO: consider using https://github.com/JishinMaster/simd_utils/blob/160c50f07e08d2077ae4368f0aed2f10f7173c67/simd_utils_sse_double.h#L530
for sin/cos? Or Intel SVML? Or metalibm?
-}

View file

@ -1,7 +1,6 @@
{-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE PolyKinds #-} {-# LANGUAGE PolyKinds #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE UndecidableInstances #-}
module Math.Linear module Math.Linear

View file

@ -0,0 +1,106 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GHCForeignImportPrim #-}
{-# LANGUAGE UnliftedFFITypes #-}
module Math.Interval.Internal.SIMD.Internal
( 𝕀(𝕀, MkI, PackI, inf, sup)
, scaleInterval
)
where
-- base
import Prelude hiding ( Num(..), Fractional(..), Floating(..) )
import qualified Prelude
import GHC.Exts
( Double(D#), Double#
, negateDouble#
, DoubleX2#
, packDoubleX2#, unpackDoubleX2#
, broadcastDoubleX2#
, plusDoubleX2#, timesDoubleX2#
)
-- ghc-prim
import GHC.Prim
( maxDouble#, shuffleDoubleX2# )
-- deepseq
import Control.DeepSeq
( NFData(..) )
--------------------------------------------------------------------------------
-- | A non-empty interval of real numbers (possibly unbounded).
data 𝕀 = MkI DoubleX2#
-- Interval [lo..hi] represented as (-lo, hi)
instance NFData 𝕀 where
rnf ( MkI !_ ) = ()
foreign import prim "in2_mul8"
mulI :: DoubleX2# -> DoubleX2# -> DoubleX2#
{-# COMPLETE PackI #-}
{-# INLINE PackI #-}
pattern PackI :: Double# -> Double# -> 𝕀
pattern PackI m_lo hi <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# m_lo, hi #) )
where
PackI m_lo hi = MkI ( packDoubleX2# (# m_lo, hi #) )
{-# COMPLETE 𝕀 #-}
{-# INLINE 𝕀 #-}
pattern 𝕀 :: Double -> Double -> 𝕀
pattern 𝕀 { inf, sup } <- ( ( \ ( PackI m_lo hi ) -> (# D# ( negateDouble# m_lo ), D# hi #) )
-> (# inf, sup #) )
where
𝕀 (D# lo) (D# hi) =
let m_lo = negateDouble# lo
in PackI m_lo hi
instance Prelude.Num 𝕀 where
MkI x + MkI y = MkI ( x `plusDoubleX2#` y )
negate ( MkI x ) = MkI ( shuffleDoubleX2# x x (# 1#, 0# #) )
( MkI x ) * ( MkI y ) = MkI ( mulI x y )
-- This uses the C code from Lockless Inc.
-- TODO: compare with
-- "Fast and Correct SIMD Algorithms for Interval Arithmetic"
-- Frédéric Goualard, 2008
fromInteger i =
let !j = Prelude.fromInteger i
in 𝕀 j j
abs x@(PackI m_lo hi)
| D# m_lo <= 0
= PackI m_lo hi
| D# hi <= 0
= Prelude.negate x
| otherwise
= 𝕀 0 ( D# ( maxDouble# m_lo hi ) )
signum _ = error "No implementation of signum for intervals"
instance Prelude.Fractional 𝕀 where
fromRational r =
let q = Prelude.fromRational r
in 𝕀 q q
recip ( 𝕀 lo hi )
#ifdef ASSERTS
| lo == 0
= 𝕀 ( Prelude.recip hi ) ( 1 Prelude./ 0 )
| hi == 0
= 𝕀 ( -1 Prelude./ 0 ) ( Prelude.recip lo )
| lo > 0 || hi < 0
#endif
= 𝕀 ( Prelude.recip hi ) ( Prelude.recip lo )
#ifdef ASSERTS
| otherwise
= error "BAD interval recip; should use extendedRecip instead"
#endif
p / q = p Prelude.* Prelude.recip q
scaleInterval :: Double -> 𝕀 -> 𝕀
scaleInterval s@(D# s#) i =
case compare s 0 of
LT -> case Prelude.negate i of { MkI x -> MkI ( broadcastDoubleX2# ( negateDouble# s# ) `timesDoubleX2#` x ) }
EQ -> 𝕀 0 0
GT -> case i of { MkI x -> MkI ( broadcastDoubleX2# s# `timesDoubleX2#` x ) }

View file

@ -25,6 +25,7 @@ allow-newer:
gi-cairo-connector:mtl, gi-cairo-connector:mtl,
hedgehog:containers, hedgehog:containers,
hedgehog:resourcet, hedgehog:resourcet,
hw-balancedparens:hedgehog,
indexed-traversable:containers, indexed-traversable:containers,
JuicyPixels:zlib, JuicyPixels:zlib,
natural:lens, natural:lens,
@ -58,13 +59,13 @@ source-repository-package
-- GHC HEAD -- -- GHC HEAD --
-------------- --------------
repository head.hackage.ghc.haskell.org -- repository head.hackage.ghc.haskell.org
url: https://ghc.gitlab.haskell.org/head.hackage/ -- url: https://ghc.gitlab.haskell.org/head.hackage/
secure: True -- secure: True
key-threshold: 3 -- key-threshold: 3
root-keys: -- root-keys:
f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89 -- f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89
26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329 -- 7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d
7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d -- 26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329
--
active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org:override -- active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org