Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite intercalate in a more direct way #459

Merged
merged 3 commits into from
Jan 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 22 additions & 26 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,13 @@ infixl 5 `snoc`
cons :: Word8 -> ByteString -> ByteString
cons c (BS x l) = unsafeCreate (l+1) $ \p -> unsafeWithForeignPtr x $ \f -> do
poke p c
memcpy (p `plusPtr` 1) f (fromIntegral l)
memcpy (p `plusPtr` 1) f l
{-# INLINE cons #-}

-- | /O(n)/ Append a byte to the end of a 'ByteString'
snoc :: ByteString -> Word8 -> ByteString
snoc (BS x l) c = unsafeCreate (l+1) $ \p -> unsafeWithForeignPtr x $ \f -> do
memcpy p f (fromIntegral l)
memcpy p f l
poke (p `plusPtr` l) c
{-# INLINE snoc #-}

Expand Down Expand Up @@ -1209,28 +1209,24 @@ groupBy k xs = case uncons xs of
-- 'ByteString's and concatenates the list after interspersing the first
-- argument between each element of the list.
intercalate :: ByteString -> [ByteString] -> ByteString
intercalate s = concat . List.intersperse s
intercalate _ [] = mempty
intercalate (BS fSepPtr sepLen) (BS fhPtr hLen : t) =
unsafeCreate totalLen $ \dstPtr0 ->
unsafeWithForeignPtr fSepPtr $ \sepPtr -> do
unsafeWithForeignPtr fhPtr $ \hPtr ->
memcpy dstPtr0 hPtr hLen
let go _ [] = pure ()
go dstPtr (BS fChunkPtr chunkLen : chunks) = do
memcpy dstPtr sepPtr sepLen
let destPtr' = dstPtr `plusPtr` sepLen
unsafeWithForeignPtr fChunkPtr $ \chunkPtr ->
memcpy destPtr' chunkPtr chunkLen
go (destPtr' `plusPtr` chunkLen) chunks
go (dstPtr0 `plusPtr` hLen) t
where
totalLen = List.foldl' (\acc (BS _ chunkLen) -> acc + chunkLen + sepLen) hLen t
{-# INLINE [1] intercalate #-}

{-# RULES
"ByteString specialise intercalate c -> intercalateByte" forall c s1 s2 .
intercalate (singleton c) [s1, s2] = intercalateWithByte c s1 s2
#-}

-- | /O(n)/ intercalateWithByte. An efficient way to join to two ByteStrings
-- with a char. Around 4 times faster than the generalised join.
--
intercalateWithByte :: Word8 -> ByteString -> ByteString -> ByteString
intercalateWithByte c f@(BS ffp l) g@(BS fgp m) = unsafeCreate len $ \ptr ->
unsafeWithForeignPtr ffp $ \fp ->
unsafeWithForeignPtr fgp $ \gp -> do
memcpy ptr fp (fromIntegral l)
poke (ptr `plusPtr` l) c
memcpy (ptr `plusPtr` (l + 1)) gp (fromIntegral m)
where
len = length f + length g + 1
{-# INLINE intercalateWithByte #-}

-- ---------------------------------------------------------------------
-- Indexing ByteStrings

Expand Down Expand Up @@ -1692,7 +1688,7 @@ sort :: ByteString -> ByteString
sort (BS input l)
-- qsort outperforms counting sort for small arrays
| l <= 20 = unsafeCreate l $ \ptr -> unsafeWithForeignPtr input $ \inp -> do
memcpy ptr inp (fromIntegral l)
memcpy ptr inp l
c_sort ptr (fromIntegral l)
| otherwise = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do

Expand Down Expand Up @@ -1729,7 +1725,7 @@ useAsCString (BS fp l) action =
allocaBytes (l+1) $ \buf ->
-- Cannot use unsafeWithForeignPtr, because action can diverge
withForeignPtr fp $ \p -> do
memcpy buf p (fromIntegral l)
memcpy buf p l
pokeByteOff buf l (0::Word8)
action (castPtr buf)

Expand All @@ -1756,7 +1752,7 @@ packCString cstr = do
-- Haskell heap.
packCStringLen :: CStringLen -> IO ByteString
packCStringLen (cstr, len) | len >= 0 = create len $ \p ->
memcpy p (castPtr cstr) (fromIntegral len)
memcpy p (castPtr cstr) len
packCStringLen (_, len) =
moduleErrorIO "packCStringLen" ("negative length: " ++ show len)

Expand All @@ -1770,7 +1766,7 @@ packCStringLen (_, len) =
--
copy :: ByteString -> ByteString
copy (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f ->
memcpy p f (fromIntegral l)
memcpy p f l

-- ---------------------------------------------------------------------
-- Line IO
Expand Down
5 changes: 5 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ main = do
[ bench "intersperse" $ whnf (S.intersperse 32) byteStringData
, bench "intersperse (unaligned)" $ whnf (S.intersperse 32) (S.drop 1 byteStringData)
]
, bgroup "intercalate"
[ bench "intercalate (large)" $ whnf (S.intercalate $ S8.pack " and also ") (replicate 300 (S8.pack "expression"))
, bench "intercalate (small)" $ whnf (S.intercalate $ S8.pack "&") (replicate 30 (S8.pack "foo"))
, bench "intercalate (tiny)" $ whnf (S.intercalate $ S8.pack "&") (S8.pack <$> ["foo", "bar", "baz"])
]
, bgroup "partition"
[
bgroup "strict"
Expand Down