Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve lazy performance of Data.Text.Lazy.inits
Browse files Browse the repository at this point in the history
The previous implementation, itself based on an earlier version of
`Data.List.inits`, inherited the flaw that accessing the i-th element took
quadratic time O(i²). This now takes linear time O(i) as expected.

The current version of `Data.List.inits` uses a banker's queue to
obtain good performance when generating very long lists.
For lazy text, consisting of a few big chunks, that benefit seems
negligible. So I chose a simpler implementation.
Lysxia committed Mar 14, 2024
1 parent a011d4a commit 4495cae
Showing 4 changed files with 37 additions and 4 deletions.
2 changes: 2 additions & 0 deletions benchmarks/haskell/Benchmarks.hs
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
import qualified Benchmarks.Equality as Equality
import qualified Benchmarks.FileRead as FileRead
import qualified Benchmarks.FoldLines as FoldLines
import qualified Benchmarks.Micro as Micro
import qualified Benchmarks.Multilang as Multilang
import qualified Benchmarks.Pure as Pure
import qualified Benchmarks.ReadNumbers as ReadNumbers
@@ -61,6 +62,7 @@ main = do
defaultMain
[ Builder.benchmark
, Concat.benchmark
, Micro.benchmark
, bgroup "DecodeUtf8"
[ env (DecodeUtf8.initEnv (tf "libya-chinese.html")) (DecodeUtf8.benchmark "html")
, env (DecodeUtf8.initEnv (tf "yiwiki.xml")) (DecodeUtf8.benchmark "xml")
22 changes: 22 additions & 0 deletions benchmarks/haskell/Benchmarks/Micro.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- | Benchmarks on artificial data.

module Benchmarks.Micro (benchmark) where

import qualified Data.Text.Lazy as TL
import qualified Data.Text as T
import Test.Tasty.Bench (Benchmark, bgroup, bench, nf)

benchmark :: Benchmark
benchmark = bgroup "Micro"
[ -- Accessing i-th element should take O(i) time.
-- The 2k case should run in 2x the time of the 1k case.
bgroup "Lazy.inits"
[ bench "last 1k" $ nf (last . TL.inits) (chunks 1000)
, bench "last 2k" $ nf (last . TL.inits) (chunks 2000)
, bench "map-take1 1k" $ nf (map (TL.take 1) . TL.inits) (chunks 1000)
, bench "map-take1 2k" $ nf (map (TL.take 1) . TL.inits) (chunks 2000)
]
]

chunks :: Int -> TL.Text
chunks n = TL.fromChunks (replicate n (T.pack "a"))
16 changes: 12 additions & 4 deletions src/Data/Text/Lazy.hs
Original file line number Diff line number Diff line change
@@ -1436,10 +1436,18 @@ inits = (NE.toList P.$!) . initsNE
--
-- @since 2.1.2
initsNE :: Text -> NonEmpty Text
initsNE = (Empty NE.:|) . inits'
where inits' Empty = []
inits' (Chunk t ts) = L.map (\t' -> Chunk t' Empty) (NE.tail (T.initsNE t))
++ L.map (Chunk t) (inits' ts)
initsNE ts0 = Empty NE.:| inits' 0 ts0
where
inits' :: Int64 -- Number of previous chunks i
-> Text -- The remainder after dropping i chunks from ts0
-> [Text] -- Prefixes longer than the first i chunks of ts0.
inits' _ Empty = []
inits' i (Chunk t ts) = L.map (takeChunks i ts) (NE.tail (T.initsNE t))
++ inits' (i + 1) ts

takeChunks :: Int64 -> Text -> T.Text -> Text
takeChunks !i (Chunk t ts) lastChunk | i > 0 = Chunk t (takeChunks (i - 1) ts lastChunk)
takeChunks _ _ lastChunk = Chunk lastChunk Empty

-- | /O(n)/ Return all final segments of the given 'Text', longest
-- first.
1 change: 1 addition & 0 deletions text.cabal
Original file line number Diff line number Diff line change
@@ -333,6 +333,7 @@ benchmark text-benchmarks
Benchmarks.Equality
Benchmarks.FileRead
Benchmarks.FoldLines
Benchmarks.Micro
Benchmarks.Multilang
Benchmarks.Programs.BigTable
Benchmarks.Programs.Cut

0 comments on commit 4495cae

Please sign in to comment.