@@ -144,7 +144,7 @@ import Control.DeepSeq            (NFData (..), NFData1 (..), NFData2 (..))
144144import  Control.Monad.ST            (ST , runST )
145145import  Data.Bifoldable             (Bifoldable  (.. ))
146146import  Data.Bits                   (complement , popCount , unsafeShiftL ,
147-                                    unsafeShiftR , (.&.) , (.|.) )
147+                                    unsafeShiftR , (.&.) , (.|.) ,  countTrailingZeros )
148148import  Data.Coerce                 (coerce )
149149import  Data.Data                   (Constr , Data  (.. ), DataType )
150150import  Data.Functor.Classes        (Eq1  (.. ), Eq2  (.. ), Ord1  (.. ), Ord2  (.. ),
@@ -1622,26 +1622,27 @@ unionArrayBy :: (a -> a -> a) -> Bitmap -> Bitmap -> A.Array a -> A.Array a
16221622--  Core size reductions with GHC 9.2.2. See the Core diffs in
16231623--  https://github.com/haskell-unordered-containers/unordered-containers/pull/376.
16241624unionArrayBy f ! b1 ! b2 ! ary1 ! ary2 =  A. run $  do 
1625-     let  b'  =  b1 .|.  b2
1626-     mary <-  A. new_ (popCount b' )
1625+     let  bCombined  =  b1 .|.  b2
1626+     mary <-  A. new_ (popCount bCombined )
16271627    --  iterate over nonzero bits of b1 .|. b2
1628-     --  it would be nice if we could shift m by more than 1 each time
1629-     let  ba =  b1 .&.  b2
1630-         go ! i ! i1 ! i2 ! m
1631-             |  m >  b'        =  return  () 
1632-             |  b' .&.  m ==  0  =  go i i1 i2 (m `unsafeShiftL`  1 )
1633-             |  ba .&.  m /=  0  =  do 
1628+     let  go ! i ! i1 ! i2 ! b
1629+             |  b ==  0  =  return  () 
1630+             |  testBit (b1 .&.  b2) =  do 
16341631                x1 <-  A. indexM ary1 i1
16351632                x2 <-  A. indexM ary2 i2
16361633                A. write mary i $!  f x1 x2
1637-                 go (i+ 1 ) (i1+ 1 ) (i2+ 1 ) (m  `unsafeShiftL`   1 ) 
1638-             |  b1  .&.  m  /=   0  =  do 
1634+                 go (i+ 1 ) (i1+ 1 ) (i2+ 1 ) b' 
1635+             |  testBit b1  =  do 
16391636                A. write mary i =<<  A. indexM ary1 i1
1640-                 go (i+ 1 ) (i1+ 1 )   i2    (m  `unsafeShiftL`   1 ) 
1641-             |  otherwise       =  do 
1637+                 go (i+ 1 ) (i1+ 1 ) i2 b' 
1638+             |  otherwise  =  do 
16421639                A. write mary i =<<  A. indexM ary2 i2
1643-                 go (i+ 1 )  i1    (i2+ 1 ) (m `unsafeShiftL`  1 )
1644-     go 0  0  0  (b' .&.  negate  b') --  XXX: b' must be non-zero
1640+                 go (i+ 1 ) i1 (i2+ 1 ) b'
1641+           where 
1642+             m =  1  `unsafeShiftL`  (countTrailingZeros b)
1643+             testBit x =  x .&.  m /=  0 
1644+             b' =  b .&.  complement m
1645+     go 0  0  0  bCombined
16451646    return  mary
16461647    --  TODO: For the case where b1 .&. b2 == b1, i.e. when one is a
16471648    --  subset of the other, we could use a slightly simpler algorithm,
0 commit comments