diff --git a/MetaBrush.cabal b/MetaBrush.cabal
index 5893218..69eb3e2 100644
--- a/MetaBrush.cabal
+++ b/MetaBrush.cabal
@@ -161,7 +161,7 @@ common gtk
       , haskell-gi
            >= 0.26.10    && < 0.27
       , haskell-gi-base
-           >= 0.26.10    && < 0.27
+           >= 0.26.6     && < 0.27
 
     -- Workaround for https://github.com/haskell/cabal/issues/4237
     -- See https://github.com/commercialhaskell/stack/issues/2197
diff --git a/brush-strokes/brush-strokes.cabal b/brush-strokes/brush-strokes.cabal
index 5beaaf9..3fef7fe 100644
--- a/brush-strokes/brush-strokes.cabal
+++ b/brush-strokes/brush-strokes.cabal
@@ -10,11 +10,11 @@ description:
   Computing brush strokes using Bézier curves.
 
 extra-source-files:
-  cbits/**/*.{cpp, hpp}
+  cbits/**/*.{S,c,h,cpp,hpp}
 
 flag use-simd
   description: Use SIMD instructions to implement interval arithmetic.
-  default: True
+  default: False
   manual:  True
 
 flag use-fma
@@ -106,7 +106,7 @@ common common
       cpp-options:
           -DUSE_SIMD
       ghc-options:
-          -mavx2
+          -mavx
       build-depends:
           base
             >= 4.20
@@ -213,15 +213,21 @@ library
       , rounded-hw
           ^>= 0.4
       , time
-          ^>= 1.12.2
+           >= 1.12.2 && < 1.15
       , transformers
            >= 0.5.6.2 && < 0.7
 
-    -- Extra C++ code for 2D linear systems of interval equations
     include-dirs:
       cbits
+    c-sources:
+      -- C code for: setting the rounding mode
+      cbits/rounding.c
     cxx-sources:
+      -- Walter's C++ code for 2D linear systems of interval equations
       cbits/lp_2d.cpp
+    asm-sources:
+      -- X86 assembly for interval multiplication using SIMD instructions
+      cbits/mul.S
     cxx-options:
       -std=c++20
       -mavx2
diff --git a/brush-strokes/cabal.project b/brush-strokes/cabal.project
deleted file mode 100644
index d4e54c1..0000000
--- a/brush-strokes/cabal.project
+++ /dev/null
@@ -1,39 +0,0 @@
-packages: .
-
-constraints:
-  acts -finitary,
-  fp-ieee +fma3,
-  rounded-hw -pure-hs -c99 -avx512 +ghc-prim -x87-long-double
-
-tests: True
-
-allow-newer:
-  *:base, *:template-haskell,
-  acts:deepseq,
-  falsify:tasty,
-  indexed-traversable:containers
-
---------------
--- GHC 9.10 --
---------------
-
--- eigen
-source-repository-package
-  type: git
-  location: https://github.com/sheaf/eigen
-  tag: 3bc1d4bc53edb75e2ee3893763361aa0d5c7714a
-
---------------
--- GHC HEAD --
---------------
-
-repository head.hackage.ghc.haskell.org
-   url: https://ghc.gitlab.haskell.org/head.hackage/
-   secure: True
-   key-threshold: 3
-   root-keys:
-       7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d
-       26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329
-       f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89
-
-active-repositories: hackage.haskell.org, head.hackage.ghc.haskell.org:override
diff --git a/brush-strokes/cbits/mul.S b/brush-strokes/cbits/mul.S
new file mode 100644
index 0000000..1abee26
--- /dev/null
+++ b/brush-strokes/cbits/mul.S
@@ -0,0 +1,30 @@
+.global in2_mul8
+in2_mul8:
+        vshufpd $3, %xmm1, %xmm1, %xmm4
+        vshufpd $3, %xmm2, %xmm2, %xmm3
+        vxorpd  %xmm1, %xmm4, %xmm5
+        vxorpd  %xmm2, %xmm3, %xmm6
+        vtestpd %xmm5, %xmm6
+        je      .LBB1_2
+        vxorpd  %xmm5, %xmm5, %xmm5
+        vcmplepd        %xmm5, %xmm3, %xmm3
+        vcmplepd        %xmm5, %xmm4, %xmm4
+        vmovsd  .LCPI1_1(%rip), %xmm5
+        vxorpd  %xmm5, %xmm1, %xmm1
+        vshufps $78, %xmm1, %xmm1, %xmm6
+        vblendvpd       %xmm3, %xmm6, %xmm1, %xmm1
+        vxorpd  %xmm5, %xmm2, %xmm2
+        vshufps $78, %xmm2, %xmm2, %xmm3
+        vblendvpd       %xmm4, %xmm3, %xmm2, %xmm2
+        vxorpd  %xmm5, %xmm2, %xmm2
+        vmulpd  %xmm2, %xmm1, %xmm1
+        jmp *(%rbp)
+.LCPI1_1:
+        .quad   -9223372036854775808
+.LBB1_2:
+        vshufpd $1, %xmm1, %xmm1, %xmm4
+        vmovddup        %xmm2, %xmm2
+        vmulpd  %xmm4, %xmm2, %xmm2
+        vmulpd  %xmm1, %xmm3, %xmm1
+        vmaxpd  %xmm1, %xmm2, %xmm1
+        jmp *(%rbp)
diff --git a/brush-strokes/cbits/rounding.c b/brush-strokes/cbits/rounding.c
new file mode 100644
index 0000000..04f6b21
--- /dev/null
+++ b/brush-strokes/cbits/rounding.c
@@ -0,0 +1,9 @@
+#include <xmmintrin.h>
+
+// Function to set the CSR rounding mode for SSE operations
+void set_sse_rounding_mode(unsigned int rounding_mode) {
+    unsigned int csr = _mm_getcsr();
+    csr &= ~_MM_ROUND_MASK; // Clear existing rounding mode bits
+    csr |= rounding_mode;   // Set new rounding mode
+    _mm_setcsr(csr);
+}
diff --git a/brush-strokes/src/cusps/bench/Main.hs b/brush-strokes/src/cusps/bench/Main.hs
index d9fd97d..5a15b3b 100644
--- a/brush-strokes/src/cusps/bench/Main.hs
+++ b/brush-strokes/src/cusps/bench/Main.hs
@@ -51,8 +51,15 @@ import Bench.Types
 
 --------------------------------------------------------------------------------
 
+pattern ROUND_UP :: Word
+pattern ROUND_UP = 0x4000
+
+foreign import ccall "set_sse_rounding_mode"
+  setSSERoundingMode :: Word -> IO ()
+
 main :: IO ()
 main = withCP65001 $ do
+  setSSERoundingMode ROUND_UP
   putStrLn "Running cusp-finding benchmarks."
   for_ benchGroups $ \ ( groupName, benchGroupCases ) -> do
     putStrLn $ unlines
diff --git a/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs b/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs
index 15f3267..7d6c89c 100644
--- a/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs
+++ b/brush-strokes/src/lib/Math/Interval/Internal/SIMD.hs
@@ -1,4 +1,6 @@
 {-# LANGUAGE CPP #-}
+{-# LANGUAGE GHCForeignImportPrim #-}
+{-# LANGUAGE UnliftedFFITypes #-}
 
 module Math.Interval.Internal.SIMD
   ( 𝕀(𝕀, inf, sup)
@@ -12,7 +14,7 @@ import qualified Prelude
 import GHC.Exts
   ( isTrue#
   , Double(D#), Double#
-  , (<=##), (*##), negateDouble#
+  , (<=##), negateDouble#
   , DoubleX2#
   , packDoubleX2#, unpackDoubleX2#
   , broadcastDoubleX2#
@@ -40,26 +42,31 @@ import Math.Ring
 
 -- | A non-empty interval of real numbers (possibly unbounded).
 data 𝕀 = MkI DoubleX2#
-  -- Interval [lo..hi] represented as (hi, -lo)
+  -- Interval [lo..hi] represented as (-lo, hi)
+
+foreign import prim "in2_mul8"
+  mulI :: DoubleX2# -> DoubleX2# -> DoubleX2#
 
 instance NFData 𝕀 where
   rnf ( MkI !_ ) = ()
 
 {-# COMPLETE PackI #-}
+{-# INLINE PackI #-}
 pattern PackI :: Double# -> Double# -> 𝕀
-pattern PackI hi m_lo <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# hi, m_lo #) )
+pattern PackI m_lo hi <- ( ( \ ( MkI x ) -> unpackDoubleX2# x ) -> (# m_lo, hi #) )
   where
-    PackI hi m_lo = MkI ( packDoubleX2# (# hi, m_lo #) )
+    PackI m_lo hi = MkI ( packDoubleX2# (# m_lo, hi #) )
 
 
 {-# COMPLETE 𝕀 #-}
+{-# INLINE 𝕀 #-}
 pattern 𝕀 :: Double -> Double -> 𝕀
-pattern 𝕀 { inf, sup } <- ( ( \ ( PackI hi m_lo ) -> (# D# ( negateDouble# m_lo ), D# hi #) )
+pattern 𝕀 { inf, sup } <- ( ( \ ( PackI m_lo hi ) -> (# D# ( negateDouble# m_lo ), D# hi #) )
                             -> (# inf, sup #) )
   where
     𝕀 (D# lo) (D# hi) =
       let m_lo = negateDouble# lo
-      in  PackI hi m_lo
+      in  PackI m_lo hi
 
 deriving via ViaPrelude 𝕀
   instance AbelianGroup 𝕀
@@ -73,56 +80,37 @@ instance Prelude.Num 𝕀 where
   fromInteger i =
     let !j = Prelude.fromInteger i
     in 𝕀 j j
-  abs x@(PackI hi m_lo)
+  abs x@(PackI m_lo hi)
     | D# m_lo <= 0
-    = PackI hi m_lo
+    = PackI m_lo hi
     | D# hi <= 0
     = Prelude.negate x
     | otherwise
-    = 𝕀 0 (max (D# m_lo) (D# hi))
+    = 𝕀 0 ( D# ( maxDouble# m_lo hi ) )
   signum _ = error "No implementation of signum for intervals"
 
 
 instance Ring 𝕀 where
-  ( PackI b ma ) * ( PackI d mc ) =
-    case ( zeroPos b ma, zeroPos d mc ) of
-      -- From "Fast and Correct SIMD Algorithms for Interval Arithmetic"
-      --   Frédéric Goualard, 2008
-      ( LT, LT ) -> PackI (ma *## mc) ( ( negateDouble# b ) *## d)
-      ( LT, EQ ) -> PackI (ma *## mc) ( ma *## d )
-      ( LT, GT ) -> PackI ( b *## ( negateDouble# mc ) ) ( ma *## d )
-      ( EQ, LT ) -> PackI (ma *## mc) (  b *## mc )
-      ( EQ, EQ ) -> PackI
-                      ( maxDouble# ( ma *## d  ) ( b *## mc ) )
-                      ( maxDouble# ( ma *## mc ) ( b *## d  ) )
-      ( EQ, GT ) -> PackI ( b *## d ) ( ma *## d )
-      ( GT, LT ) -> PackI ( ( negateDouble# ma ) *## d ) (  b *## mc )
-      ( GT, EQ ) -> PackI ( b *## d ) (  b *## mc )
-      ( GT, GT ) -> PackI ( b *## d ) ( ( negateDouble# ma ) *## mc )
-     where
-      zeroPos :: Double# -> Double# -> Ordering
-      zeroPos hi m_lo
-        | isTrue# (hi <=## 0.0##)
-        = LT
-        | isTrue# (m_lo <=## 0.0##)
-        = GT
-        | otherwise
-        = EQ
+  ( MkI x ) * ( MkI y ) = error "TODO: Cabal bug #5623 if used in TH" --MkI ( mulI x y )
+    -- This uses the C code from Lockless Inc.
+    -- TODO: compare with
+    --   "Fast and Correct SIMD Algorithms for Interval Arithmetic"
+    --   Frédéric Goualard, 2008
   iv ^ 1 = iv
-  PackI hi m_lo ^ n
+  PackI m_lo hi ^ n
     | odd n || isTrue# (m_lo <=## 0.0##)
     , let !(D# m_lo') = D# m_lo Prelude.^ n
           !(D# hi'  ) = D# hi   Prelude.^ n
-    = PackI hi' m_lo'
+    = PackI m_lo' hi'
     | isTrue# (hi <=## 0.0##)
     , let !(D# m_lo') = (D# hi  ) Prelude.^ n
           !(D# hi')   = (D# m_lo) Prelude.^ n
-    = PackI hi' m_lo'
+    = PackI m_lo' hi'
     | otherwise
     , let !(D# hi1) = (D# m_lo) Prelude.^ n
           !(D# hi2) = (D# hi  ) Prelude.^ n
           hi' = maxDouble# hi1 hi2
-    = PackI hi' 0.0##
+    = PackI 0.0## hi'
 
 instance Prelude.Fractional 𝕀 where
   fromRational r =
diff --git a/cabal.project b/cabal.project
index 6cfca0a..d78a798 100644
--- a/cabal.project
+++ b/cabal.project
@@ -16,18 +16,21 @@ source-repository-package
   tag: ec171cd5d185309692b745e2e2f291eab4038fb9
 
 allow-newer:
-  *:base, *:template-haskell, *:ghc-prim,
+  *:base, *:template-haskell, *:ghc-prim, *:Cabal,
   acts:deepseq,
   digit:lens,
   eigen:primitive,
   eigen:transformers,
+  falsify:tasty,
   gi-cairo-connector:mtl,
   hedgehog:containers,
   hedgehog:resourcet,
+  indexed-traversable:containers,
   JuicyPixels:zlib,
   natural:lens,
   natural:semigroupoids,
   records-sop:deepseq,
+  th-abstraction:containers,
   waargonaut:bifunctors,
   waargonaut:bytestring,
   waargonaut:containers,