From fda0ac9e5ffaae7b1bed575d2b58bbb7935bcb20 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Tue, 25 Mar 2025 20:36:48 +0530 Subject: [PATCH] Allow list fusion for Text and Text.Lazy unpack * Make Data.Text.unpack and Data.Text.Lazy.unpack good producers in list fusion. This allows them to fuse with good consumers of lists. Rewrite-back rules are included since the function bodies are large and we don't want to inline them if fusion doesn't occur. * For Data.Text.Lazy, this change means that `unpack`, which uses `unstreamList`, no longer fuses with `streamList` under Text's stream fusion framework. This scenario seems very unlikely, since nothing else must be done to the list in between the two functions. Even `pack . unpack` does not satisfy this rule. So we are not losing anything valuable here. * Add benchmarks for unpack, fusion and no fusion. --- benchmarks/haskell/Benchmarks/Pure.hs | 8 ++++++++ src/Data/Text/Lazy.hs | 18 ++++++++++++++++- src/Data/Text/Show.hs | 29 +++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/benchmarks/haskell/Benchmarks/Pure.hs b/benchmarks/haskell/Benchmarks/Pure.hs index cec8a2d1..b411c17d 100644 --- a/benchmarks/haskell/Benchmarks/Pure.hs +++ b/benchmarks/haskell/Benchmarks/Pure.hs @@ -208,6 +208,14 @@ benchmark kind ~Env{..} = [ benchT $ nf (T.zipWith min tb) ta , benchTL $ nf (TL.zipWith min tlb) tla ] + , bgroup "length . unpack" -- length should fuse with unpack + [ benchT $ nf (L.length . T.unpack) ta + , benchTL $ nf (L.length . TL.unpack) tla + ] + , bgroup "length . drop 1 . unpack" -- no list fusion because of drop 1 + [ benchT $ nf (L.length . L.drop 1 . T.unpack) ta + , benchTL $ nf (L.length . L.drop 1 . TL.unpack) tla + ] , bgroup "length" [ bgroup "cons" [ benchT $ nf (T.length . T.cons c) ta diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 4ea92ac7..0bb73972 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -424,7 +424,23 @@ unpack :: #endif Text -> String unpack t = S.unstreamList (stream t) -{-# INLINE [1] unpack #-} +{-# NOINLINE unpack #-} + +foldrFB :: (Char -> b -> b) -> b -> Text -> b +foldrFB = foldr +{-# INLINE [0] foldrFB #-} + +-- List fusion rules for `unpack`: +-- * `unpack` rewrites to `build` up till (but not including) phase 1. `build` +-- fuses if `foldr` is applied to it. +-- * If it doesn't fuse: In phase 1, `build` inlines to give us `foldrFB (:) []` +-- and we rewrite that back to `unpack`. +-- * If it fuses: In phase 0, `foldrFB` inlines and `foldr` inlines. GHC +-- optimizes the fused code. +{-# RULES +"Text.Lazy.unpack" [~1] forall t. unpack t = Exts.build (\lcons lnil -> foldrFB lcons lnil t) +"Text.Lazy.unpackBack" [1] foldrFB (:) [] = unpack + #-} -- | /O(n)/ Convert a literal string into a Text. unpackCString# :: Addr# -> Text diff --git a/src/Data/Text/Show.hs b/src/Data/Text/Show.hs index 696d1354..6a0efe70 100644 --- a/src/Data/Text/Show.hs +++ b/src/Data/Text/Show.hs @@ -31,6 +31,7 @@ import Data.Text.Internal.Encoding.Utf8 (utf8Length) import Data.Text.Internal.Unsafe.Char (unsafeWrite) import Data.Text.Unsafe (Iter(..), iterArray) import GHC.Exts (Ptr(..), Int(..), Addr#, indexWord8OffAddr#) +import qualified GHC.Exts as Exts import GHC.Word (Word8(..)) import qualified Data.Text.Array as A #if !MIN_VERSION_ghc_prim(0,7,0) @@ -53,12 +54,32 @@ unpack :: HasCallStack => #endif Text -> String -unpack (Text arr off len) = go off +unpack t = foldrText (:) [] t +{-# NOINLINE unpack #-} + +foldrText :: (Char -> b -> b) -> b -> Text -> b +foldrText f z (Text arr off len) = go off where go !i - | i >= off + len = [] - | otherwise = let !(Iter c l) = iterArray arr i in c : go (i + l) -{-# INLINE [1] unpack #-} + | i >= off + len = z + | otherwise = let !(Iter c l) = iterArray arr i in f c (go (i + l)) +{-# INLINE foldrText #-} + +foldrTextFB :: (Char -> b -> b) -> b -> Text -> b +foldrTextFB = foldrText +{-# INLINE [0] foldrTextFB #-} + +-- List fusion rules for `unpack`: +-- * `unpack` rewrites to `build` up till (but not including) phase 1. `build` +-- fuses if `foldr` is applied to it. +-- * If it doesn't fuse: In phase 1, `build` inlines to give us +-- `foldrTextFB (:) []` and we rewrite that back to `unpack`. +-- * If it fuses: In phase 0, `foldrTextFB` inlines and `foldrText` inlines. GHC +-- optimizes the fused code. +{-# RULES +"Text.unpack" [~1] forall t. unpack t = Exts.build (\lcons lnil -> foldrTextFB lcons lnil t) +"Text.unpackBack" [1] foldrTextFB (:) [] = unpack + #-} -- | /O(n)/ Convert a null-terminated --