diff --git a/Data/ByteString/Short.hs b/Data/ByteString/Short.hs index fd3e55118..3fd1aee0a 100644 --- a/Data/ByteString/Short.hs +++ b/Data/ByteString/Short.hs @@ -76,6 +76,9 @@ module Data.ByteString.Short ( -- * Other operations empty, null, length, index, indexMaybe, (!?), + -- ** Encoding validation + isValidUtf8, + -- * Low level conversions -- ** Packing 'Foreign.C.String.CString's and pointers packCString, diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 4e30656a8..c4f66eb76 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -35,6 +35,9 @@ module Data.ByteString.Short.Internal ( -- * Low level operations createFromPtr, copyToPtr, + -- ** Encoding validation + isValidUtf8, + -- * Low level conversions -- ** Packing 'CString's and pointers packCString, @@ -599,6 +602,17 @@ useAsCStringLen bs action = action (buf, l) where l = length bs +-- | /O(n)/ Check whether a 'ShortByteString' represents valid UTF-8. +-- +-- @since 0.11.3.0 +isValidUtf8 :: ShortByteString -> Bool +isValidUtf8 sbs@(SBS ba#) = accursedUnutterablePerformIO $ do + i <- cIsValidUtf8 ba# (fromIntegral (length sbs)) + return $ i /= 0 + +foreign import ccall unsafe "bytestring_is_valid_utf8" cIsValidUtf8 + :: ByteArray# -> CSize -> IO CInt + -- --------------------------------------------------------------------- -- Internal utilities diff --git a/tests/IsValidUtf8.hs b/tests/IsValidUtf8.hs index 2ae2157be..a09f587dc 100644 --- a/tests/IsValidUtf8.hs +++ b/tests/IsValidUtf8.hs @@ -4,6 +4,7 @@ module IsValidUtf8 (testSuite) where import Data.Bits (shiftR, (.&.), shiftL) import Data.ByteString (ByteString) +import qualified Data.ByteString.Short as SBS import qualified Data.ByteString as B import Data.Char (chr, ord) import Data.Word (Word8) @@ -17,17 +18,25 @@ import Test.Tasty.QuickCheck (testProperty, QuickCheckTests) testSuite :: TestTree testSuite = testGroup "UTF-8 validation" $ [ - adjustOption (max testCount) . testProperty "Valid UTF-8" $ goValid, - adjustOption (max testCount) . testProperty "Invalid UTF-8" $ goInvalid, + adjustOption (max testCount) . testProperty "Valid UTF-8 ByteString" $ goValidBS, + adjustOption (max testCount) . testProperty "Invalid UTF-8 ByteString" $ goInvalidBS, + adjustOption (max testCount) . testProperty "Valid UTF-8 ShortByteString" $ goValidSBS, + adjustOption (max testCount) . testProperty "Invalid UTF-8 ShortByteString" $ goInvalidSBS, testGroup "Regressions" checkRegressions ] where - goValid :: Property - goValid = forAll arbitrary $ + goValidBS :: Property + goValidBS = forAll arbitrary $ \(ValidUtf8 ss) -> (B.isValidUtf8 . foldMap sequenceToBS $ ss) === True - goInvalid :: Property - goInvalid = forAll arbitrary $ + goInvalidBS :: Property + goInvalidBS = forAll arbitrary $ \inv -> (B.isValidUtf8 . toByteString $ inv) === False + goValidSBS :: Property + goValidSBS = forAll arbitrary $ + \(ValidUtf8 ss) -> (SBS.isValidUtf8 . SBS.toShort . foldMap sequenceToBS $ ss) === True + goInvalidSBS :: Property + goInvalidSBS = forAll arbitrary $ + \inv -> (SBS.isValidUtf8 . SBS.toShort . toByteString $ inv) === False testCount :: QuickCheckTests testCount = 1000