Skip to content

Commit

Permalink
Unicode escape (#354)
Browse files Browse the repository at this point in the history
* [#334] parse and unparse tests

* removed parsing and unparing tests

* [#334] showUnicodeText

* [#334] escaping unicode character as well as regular characters

* [#334] resolved issue with escaping regular unescaped chars

* added tests, but they are not in use

* examples.hs revert to original content

* [#334] changes requested by chshersh
  • Loading branch information
dariodsa authored Dec 27, 2020
1 parent 3a661a1 commit ee5b8fa
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
25 changes: 23 additions & 2 deletions src/Toml/Type/Printer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@ module Toml.Type.Printer
) where

import Data.Bifunctor (first)
import Data.Char (isAscii, ord)
import Data.Coerce (coerce)
import Data.Function (on)
import Data.HashMap.Strict (HashMap)
import Data.List (sortBy)
import Data.List (sortBy, foldl')
import Data.List.NonEmpty (NonEmpty)
import Data.Semigroup (stimes)
import Data.Text (Text)
import Data.Time (ZonedTime, defaultTimeLocale, formatTime)

import Text.Printf (printf)

import Toml.Type.AnyValue (AnyValue (..))
import Toml.Type.Key (Key (..), Piece (..))
import Toml.Type.PrefixTree (PrefixMap, PrefixTree (..))
Expand All @@ -40,6 +43,7 @@ import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Text as Text



{- | Configures the pretty printer.
@since 0.5.0
Expand Down Expand Up @@ -161,7 +165,7 @@ prettyKeyValue options i = mapOrdered (\kv -> [kvText kv]) options . HashMap.toL
valText (Bool b) = Text.toLower $ showText b
valText (Integer n) = showText n
valText (Double d) = showDouble d
valText (Text s) = showText s
valText (Text s) = showTextUnicode s
valText (Zoned z) = showZonedTime z
valText (Local l) = showText l
valText (Day d) = showText d
Expand All @@ -171,6 +175,23 @@ prettyKeyValue options i = mapOrdered (\kv -> [kvText kv]) options . HashMap.toL
showText :: Show a => a -> Text
showText = Text.pack . show


-- | Function encodes all non-ascii characters in TOML defined form using the isAscii function
showTextUnicode :: Text -> Text
showTextUnicode text = Text.pack $ show finalText
where
xss = Text.unpack text
finalText = foldl' (\acc (ch, asciiCh) -> acc ++ getCh ch asciiCh) "" asciiArr

asciiArr = zip xss $ asciiStatus xss

getCh :: Char -> Bool -> String
getCh ch True = [ch] -- it is true ascii character
getCh ch False = printf "\\U%08x" (ord ch) :: String -- it is not true ascii character, it must be encoded

asciiStatus :: String -> [Bool]
asciiStatus = map isAscii

showDouble :: Double -> Text
showDouble d | isInfinite d && d < 0 = "-inf"
| isInfinite d = "inf"
Expand Down
9 changes: 9 additions & 0 deletions test/Test/Toml/Gen.hs
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,13 @@ genUniHex8Color = do
hex <- genDiffHex 8
pure . Text.pack $ "\\U" ++ hex

-- | Generates some unescaped unicode string
genUnicodeChar :: Gen Text
genUnicodeChar = Gen.element
[ "č", "ć", "š", "đ", "ž", "Ö", "ё"
, "в", "ь", "ж", "ю", "ч", "ü", "я"
]

-- | Generates text from different symbols.
genText :: Gen Text
genText = genNotEscape $ fmap Text.concat $ Gen.list (Range.constant 0 256) $ Gen.choice
Expand All @@ -307,8 +314,10 @@ genText = genNotEscape $ fmap Text.concat $ Gen.list (Range.constant 0 256) $ Ge
, genPunctuation
, genUniHex4Color
, genUniHex8Color
--, genUnicodeChar
]


genString :: Gen String
genString = Text.unpack <$> genText

Expand Down

0 comments on commit ee5b8fa

Please sign in to comment.