diff --git a/Data/ByteString.hs b/Data/ByteString.hs index 801afcea8..313ac3483 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -138,8 +138,6 @@ module Data.ByteString ( -- ** Search for arbitrary substrings breakSubstring, -- :: ByteString -> ByteString -> (ByteString,ByteString) - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] -- * Searching ByteStrings @@ -1337,10 +1335,9 @@ stripSuffix bs1@(PS _ _ l1) bs2@(PS _ _ l2) | bs1 `isSuffixOf` bs2 = Just (unsafeTake (l2 - l1) bs2) | otherwise = Nothing --- | Check whether one string is a substring of another. @isInfixOf --- p s@ is equivalent to @not (null (findSubstrings p s))@. +-- | Check whether one string is a substring of another. isInfixOf :: ByteString -> ByteString -> Bool -isInfixOf p s = isJust (findSubstring p s) +isInfixOf p s = null p || not (null $ snd $ breakSubstring p s) -- | Break a string on a substring, returning a pair of the part of the -- string prior to the match, and the rest of the string. @@ -1349,14 +1346,6 @@ isInfixOf p s = isJust (findSubstring p s) -- -- > break (== c) l == breakSubstring (singleton c) l -- --- and: --- --- > findSubstring s l == --- > if null s then Just 0 --- > else case breakSubstring s l of --- > (x,y) | null y -> Nothing --- > | otherwise -> Just (length x) --- -- For example, to tokenise a string, dropping delimiters: -- -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t) @@ -1426,51 +1415,6 @@ breakSubstring pat = w' = mask .&. ((w `shiftL` 8) .|. b) {-# INLINE shift #-} --- | Get the first index of a substring in another string, --- or 'Nothing' if the string is not found. --- @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@. -findSubstring :: ByteString -- ^ String to search for. - -> ByteString -- ^ String to seach in. - -> Maybe Int -findSubstring pat src - | null pat && null src = Just 0 - | null b = Nothing - | otherwise = Just (length a) - where (a, b) = breakSubstring pat src - -{-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-} - --- | Find the indices of all non-overlapping occurences of a substring in a --- string. --- --- Note, prior to @0.10.6.0@ this function returned the indices of all --- possibly-overlapping matches. -findSubstrings :: ByteString -- ^ String to search for. - -> ByteString -- ^ String to seach in. - -> [Int] -findSubstrings pat src - | null pat = [0 .. ls] - | otherwise = search 0 - where - lp = length pat - ls = length src - search !n - | (n > ls - lp) || null b = [] - | otherwise = let k = n + length a - in k : search (k + lp) - where - (a, b) = breakSubstring pat (unsafeDrop n src) - --- In --- [0.10.6.0]() --- 'findSubstrings' was refactored to call an improved 'breakString' --- implementation, but the refactored code no longer matches overlapping --- strings. The behaviour change appears to be inadvertent, but the function --- had already been deprecated for more than seven years. At this time --- (@0.10.10.1@), the deprecation was twelve years in the past. --- -{-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-} - -- --------------------------------------------------------------------- -- Zipping diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs index 6a01e6582..add5f4828 100644 --- a/Data/ByteString/Char8.hs +++ b/Data/ByteString/Char8.hs @@ -148,8 +148,6 @@ module Data.ByteString.Char8 ( -- ** Search for arbitrary substrings breakSubstring, -- :: ByteString -> ByteString -> (ByteString,ByteString) - findSubstring, -- :: ByteString -> ByteString -> Maybe Int - findSubstrings, -- :: ByteString -> ByteString -> [Int] -- * Searching ByteStrings @@ -249,7 +247,7 @@ import Data.ByteString (empty,null,length,tail,init,append ,concat,take,drop,splitAt,intercalate ,sort,isPrefixOf,isSuffixOf,isInfixOf ,stripPrefix,stripSuffix - ,findSubstring,findSubstrings,breakSubstring,copy,group + ,breakSubstring,copy,group ,getLine, getContents, putStr, interact ,readFile, writeFile, appendFile diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs index 773894322..a50cbf5ba 100644 --- a/Data/ByteString/Lazy.hs +++ b/Data/ByteString/Lazy.hs @@ -151,8 +151,6 @@ module Data.ByteString.Lazy ( -- ** Search for arbitrary substrings -- isSubstringOf, -- :: ByteString -> ByteString -> Bool --- findSubstring, -- :: ByteString -> ByteString -> Maybe Int --- findSubstrings, -- :: ByteString -> ByteString -> [Int] -- * Searching ByteStrings diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index 91290e526..47408d08e 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -280,80 +280,6 @@ main = do ] ] - , bgroup "substrings" - [ bgroup "easy" - [ bench "easy1" . nf (uncurry S.findSubstrings) - $ easySubstrings 1 1000000 - , bench "easy4" . nf (uncurry S.findSubstrings) - $ easySubstrings 4 1000000 - , bench "easy16" . nf (uncurry S.findSubstrings) - $ easySubstrings 16 1000000 - , bench "easy64" . nf (uncurry S.findSubstrings) - $ easySubstrings 64 1000000 - , bench "easy128" . nf (uncurry S.findSubstrings) - $ easySubstrings 128 1000000 - , bench "easy1024" . nf (uncurry S.findSubstrings) - $ easySubstrings 1024 1000000 - ] - , bgroup "random" - [ bench "random1" . nf (uncurry S.findSubstrings) - $ randomSubstrings 1 1000000 - , bench "random4" . nf (uncurry S.findSubstrings) - $ randomSubstrings 4 1000000 - , bench "random16" . nf (uncurry S.findSubstrings) - $ randomSubstrings 16 1000000 - , bench "random64" . nf (uncurry S.findSubstrings) - $ randomSubstrings 64 1000000 - , bench "random128" . nf (uncurry S.findSubstrings) - $ randomSubstrings 128 1000000 - , bench "random1024" . nf (uncurry S.findSubstrings) - $ randomSubstrings 1024 1000000 - - ] - , bgroup "hard" - [ bench "hard1" . nf (uncurry S.findSubstrings) - $ hardSubstrings 1 1000000 - , bench "hard4" . nf (uncurry S.findSubstrings) - $ hardSubstrings 4 1000000 - , bench "hard16" . nf (uncurry S.findSubstrings) - $ hardSubstrings 16 1000000 - , bench "hard64" . nf (uncurry S.findSubstrings) - $ hardSubstrings 64 1000000 - , bench "hard128" . nf (uncurry S.findSubstrings) - $ hardSubstrings 128 1000000 - , bench "hard1024" . nf (uncurry S.findSubstrings) - $ hardSubstrings 1024 1000000 - ] - , bgroup "pathological" - [ bench "pathological1" . nf (uncurry S.findSubstrings) - $ pathologicalSubstrings 1 1000000 - , bench "pathological4" . nf (uncurry S.findSubstrings) - $ pathologicalSubstrings 4 1000000 - , bench "pathological16" . nf (uncurry S.findSubstrings) - $ pathologicalSubstrings 16 1000000 - , bench "pathological64" . nf (uncurry S.findSubstrings) - $ pathologicalSubstrings 64 1000000 - , bench "pathological128" . nf (uncurry S.findSubstrings) - $ pathologicalSubstrings 128 1000000 - , bench "pathological1024" . nf (uncurry S.findSubstrings) - $ pathologicalSubstrings 1024 1000000 - ] - , bgroup "html" - [ bench "html1" . nfIO . fmap (uncurry S.findSubstrings) - $ htmlSubstrings wikiPage 1 1000000 - , bench "html4" . nfIO . fmap (uncurry S.findSubstrings) - $ htmlSubstrings wikiPage 4 1000000 - , bench "html16" . nfIO . fmap (uncurry S.findSubstrings) - $ htmlSubstrings wikiPage 16 1000000 - , bench "html64" . nfIO . fmap (uncurry S.findSubstrings) - $ htmlSubstrings wikiPage 64 1000000 - , bench "html128" . nfIO . fmap (uncurry S.findSubstrings) - $ htmlSubstrings wikiPage 128 1000000 - , bench "html1024" . nfIO . fmap (uncurry S.findSubstrings) - $ htmlSubstrings wikiPage 1024 1000000 - ] - ] - , bgroup "Data.ByteString.Builder.Prim" [ benchFE "char7" $ toEnum >$< P.char7 , benchFE "char8" $ toEnum >$< P.char8 diff --git a/tests/Properties.hs b/tests/Properties.hs index 16b57ba3c..2ac652236 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -1252,48 +1252,6 @@ prop_initsBB xs = inits xs == map P.unpack (P.inits (P.pack xs)) prop_tailsBB xs = tails xs == map P.unpack (P.tails (P.pack xs)) --- The correspondence between the test 'ByteString' and naive test 'String' --- must be injective, otherwise the ByteString may find matches at positions --- that don't match in the "corresponding" string. To that end, we start --- with and pack a Word8 array, rather than a unicode String. --- -prop_findSubstringsBB :: [Word8] -> Int -> Int -> Bool -prop_findSubstringsBB ws x l - = let bstr = P.pack ws - -- we look for some random substring of the test string - slice = C.take l $ C.drop x bstr - str = C.unpack bstr - substr = C.unpack slice - in C.findSubstrings slice bstr == naive_findSubstrings substr str - where - -- naive reference implementation - -- Note, overlapping matches have been broken since 2015, so at this - -- point just test for the current behaviour. - naive_findSubstrings :: String -> String -> [Int] - naive_findSubstrings p q - | null p = [0..length q] - | otherwise = go 0 (length p) p (length q) q - go n !lp p !lq q = - if (lp > lq) - then [] - else if p `isPrefixOf` q - then n : go (n + lp) lp p (lq - lp) (drop lp q) - else go (n + 1) lp p (lq - 1) (tail q) - --- See above re injective string -> bytestring correspondence. -prop_findSubstringBB :: [Word8] -> Int -> Int -> Bool -prop_findSubstringBB ws x l - = let bstr = P.pack ws - -- we look for some random substring of the test string - slice = C.take l $ C.drop x bstr - str = C.unpack bstr - substr = C.unpack slice - in C.findSubstring slice bstr == naive_findSubstring substr str - where - -- naive reference implementation - naive_findSubstring :: String -> String -> Maybe Int - naive_findSubstring p q = listToMaybe [x | x <- [0..length q], p `isPrefixOf` drop x q] - -- correspondance between break and breakSubstring prop_breakSubstringBB c l = P.break (== c) l == P.breakSubstring (P.singleton c) l @@ -1304,12 +1262,6 @@ prop_breakSubstring_isInfixOf s l (x,y) | P.null y -> False | otherwise -> True -prop_breakSubstring_findSubstring s l - = P.findSubstring s l == if P.null s then Just 0 - else case P.breakSubstring s l of - (x,y) | P.null y -> Nothing - | otherwise -> Just (P.length x) - prop_replicate1BB c = forAll arbitrarySizedIntegral $ \n -> P.unpack (P.replicate n c) == replicate n c prop_replicate2BB c = forAll arbitrarySizedIntegral $ \n -> @@ -2277,10 +2229,7 @@ bb_tests = , testProperty "copy" prop_copyLL , testProperty "inits" prop_initsBB , testProperty "tails" prop_tailsBB - , testProperty "findSubstrings "prop_findSubstringsBB - , testProperty "findSubstring "prop_findSubstringBB , testProperty "breakSubstring 1"prop_breakSubstringBB - , testProperty "breakSubstring 2"prop_breakSubstring_findSubstring , testProperty "breakSubstring 3"prop_breakSubstring_isInfixOf , testProperty "replicate1" prop_replicate1BB