Skip to content

Commit

Permalink
Add Data.ByteString.Short.isValidUtf8 (#450)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbiehl authored Dec 12, 2021
1 parent f6065fa commit c7cba39
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 6 deletions.
3 changes: 3 additions & 0 deletions Data/ByteString/Short.hs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ module Data.ByteString.Short (
-- * Other operations
empty, null, length, index, indexMaybe, (!?),

-- ** Encoding validation
isValidUtf8,

-- * Low level conversions
-- ** Packing 'Foreign.C.String.CString's and pointers
packCString,
Expand Down
14 changes: 14 additions & 0 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ module Data.ByteString.Short.Internal (
-- * Low level operations
createFromPtr, copyToPtr,

-- ** Encoding validation
isValidUtf8,

-- * Low level conversions
-- ** Packing 'CString's and pointers
packCString,
Expand Down Expand Up @@ -599,6 +602,17 @@ useAsCStringLen bs action =
action (buf, l)
where l = length bs

-- | /O(n)/ Check whether a 'ShortByteString' represents valid UTF-8.
--
-- @since 0.11.3.0
isValidUtf8 :: ShortByteString -> Bool
isValidUtf8 sbs@(SBS ba#) = accursedUnutterablePerformIO $ do
i <- cIsValidUtf8 ba# (fromIntegral (length sbs))
return $ i /= 0

foreign import ccall unsafe "bytestring_is_valid_utf8" cIsValidUtf8
:: ByteArray# -> CSize -> IO CInt

-- ---------------------------------------------------------------------
-- Internal utilities

Expand Down
21 changes: 15 additions & 6 deletions tests/IsValidUtf8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module IsValidUtf8 (testSuite) where

import Data.Bits (shiftR, (.&.), shiftL)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Short as SBS
import qualified Data.ByteString as B
import Data.Char (chr, ord)
import Data.Word (Word8)
Expand All @@ -17,17 +18,25 @@ import Test.Tasty.QuickCheck (testProperty, QuickCheckTests)

testSuite :: TestTree
testSuite = testGroup "UTF-8 validation" $ [
adjustOption (max testCount) . testProperty "Valid UTF-8" $ goValid,
adjustOption (max testCount) . testProperty "Invalid UTF-8" $ goInvalid,
adjustOption (max testCount) . testProperty "Valid UTF-8 ByteString" $ goValidBS,
adjustOption (max testCount) . testProperty "Invalid UTF-8 ByteString" $ goInvalidBS,
adjustOption (max testCount) . testProperty "Valid UTF-8 ShortByteString" $ goValidSBS,
adjustOption (max testCount) . testProperty "Invalid UTF-8 ShortByteString" $ goInvalidSBS,
testGroup "Regressions" checkRegressions
]
where
goValid :: Property
goValid = forAll arbitrary $
goValidBS :: Property
goValidBS = forAll arbitrary $
\(ValidUtf8 ss) -> (B.isValidUtf8 . foldMap sequenceToBS $ ss) === True
goInvalid :: Property
goInvalid = forAll arbitrary $
goInvalidBS :: Property
goInvalidBS = forAll arbitrary $
\inv -> (B.isValidUtf8 . toByteString $ inv) === False
goValidSBS :: Property
goValidSBS = forAll arbitrary $
\(ValidUtf8 ss) -> (SBS.isValidUtf8 . SBS.toShort . foldMap sequenceToBS $ ss) === True
goInvalidSBS :: Property
goInvalidSBS = forAll arbitrary $
\inv -> (SBS.isValidUtf8 . SBS.toShort . toByteString $ inv) === False
testCount :: QuickCheckTests
testCount = 1000

Expand Down

0 comments on commit c7cba39

Please sign in to comment.