Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unescape printable characters #3140

Merged
merged 9 commits into from
Sep 15, 2022
Merged
6 changes: 3 additions & 3 deletions ghcide/src/Development/IDE/GHC/Compat/Outputable.hs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ import qualified Outputable as Out
import SrcLoc
#endif
#if MIN_VERSION_ghc(9,3,0)
import GHC.Utils.Logger
import GHC.Driver.Config.Diagnostic
import Data.Maybe
import Data.Maybe
import GHC.Driver.Config.Diagnostic
import GHC.Utils.Logger
#endif

-- | A compatible function to print `Outputable` instances
Expand Down
14 changes: 11 additions & 3 deletions ghcide/src/Development/IDE/GHC/Util.hs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ import GHC.IO.Exception
import GHC.IO.Handle.Internals
import GHC.IO.Handle.Types
import GHC.Stack
import Ide.PluginUtils (unescape)
import System.Environment.Blank (getEnvDefault)
import System.FilePath
import System.IO.Unsafe
Expand Down Expand Up @@ -287,10 +288,17 @@ instance Outputable SDoc where
#endif

-- | Print a GHC value in `defaultUserStyle` without unique symbols.
-- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally.
--
-- This is the most common print utility, will print with a user-friendly style like: `a_a4ME` as `a`.
-- This is the most common print utility.
-- It will do something additionally compared to what the 'Outputable' instance does.
--
-- It internal using `showSDocUnsafe` with `unsafeGlobalDynFlags`.
-- 1. print with a user-friendly style: `a_a4ME` as `a`.
-- 2. unescape escape sequences of printable unicode characters within a pair of double quotes
printOutputable :: Outputable a => a -> T.Text
printOutputable = T.pack . printWithoutUniques
printOutputable =
-- IfaceTyLit from GHC.Iface.Type implements Outputable with 'show'.
-- Showing a String escapes non-ascii printable characters. We unescape it here.
-- More discussion at https://github.com/haskell/haskell-language-server/issues/3115.
unescape . T.pack . printWithoutUniques
{-# INLINE printOutputable #-}
2 changes: 2 additions & 0 deletions hls-plugin-api/hls-plugin-api.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ library
, text
, transformers
, unordered-containers
, megaparsec > 9

if os(windows)
build-depends: Win32
Expand Down Expand Up @@ -91,4 +92,5 @@ test-suite tests
, tasty
, tasty-hunit
, tasty-rerun
, text
, lsp-types
39 changes: 38 additions & 1 deletion hls-plugin-api/src/Ide/PluginUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module Ide.PluginUtils
handleMaybe,
handleMaybeM,
throwPluginError,
unescape,
)
where

Expand All @@ -43,10 +44,12 @@ import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Data.Algorithm.Diff
import Data.Algorithm.DiffOutput
import Data.Bifunctor (Bifunctor (first))
import Data.Char (isPrint, showLitChar)
import Data.Functor (void)
import qualified Data.HashMap.Strict as H
import Data.List (find)
import Data.String (IsString (fromString))
import qualified Data.Text as T
import Data.Void (Void)
import Ide.Plugin.Config
import Ide.Plugin.Properties
import Ide.Types
Expand All @@ -57,6 +60,9 @@ import Language.LSP.Types hiding
SemanticTokensEdit (_start))
import qualified Language.LSP.Types as J
import Language.LSP.Types.Capabilities
import qualified Text.Megaparsec as P
import qualified Text.Megaparsec.Char as P
import qualified Text.Megaparsec.Char.Lexer as P

-- ---------------------------------------------------------------------

Expand Down Expand Up @@ -255,3 +261,34 @@ pluginResponse :: Monad m => ExceptT String m a -> m (Either ResponseError a)
pluginResponse =
fmap (first (\msg -> ResponseError InternalError (fromString msg) Nothing))
. runExceptT

-- ---------------------------------------------------------------------

type TextParser = P.Parsec Void T.Text

-- | Unescape printable escape sequences within double quotes.
-- This is useful if you have to call 'show' indirectly, and it escapes some characters which you would prefer to
-- display as is.
unescape :: T.Text -> T.Text
unescape input =
case P.runParser escapedTextParser "inline" input of
Left _ -> input
Right strs -> T.pack strs

-- | Parser for a string that contains double quotes. Returns unescaped string.
escapedTextParser :: TextParser String
escapedTextParser = concat <$> P.many (outsideStringLiteral P.<|> stringLiteral)
where
outsideStringLiteral :: TextParser String
outsideStringLiteral = P.someTill (P.anySingleBut '"') (P.lookAhead (void (P.char '"') P.<|> P.eof))

stringLiteral :: TextParser String
stringLiteral = do
inside <- P.char '"' >> P.manyTill P.charLiteral (P.char '"')
let f '"' = "\\\"" -- double quote should still be escaped
-- Despite the docs, 'showLitChar' and 'showLitString' from 'Data.Char' DOES ESCAPE unicode printable
-- characters. So we need to call 'isPrint' from 'Data.Char' manually.
f ch = if isPrint ch then [ch] else showLitChar ch ""
inside' = concatMap f inside

pure $ "\"" <> inside' <> "\""
26 changes: 24 additions & 2 deletions hls-plugin-api/test/Ide/PluginUtilsTest.hs
Original file line number Diff line number Diff line change
@@ -1,13 +1,35 @@
{-# LANGUAGE OverloadedStrings #-}

module Ide.PluginUtilsTest
( tests
) where

import Ide.PluginUtils (positionInRange)
import Data.Char (isPrint)
import qualified Data.Text as T
import Ide.PluginUtils (positionInRange, unescape)
import Language.LSP.Types (Position (Position), Range (Range))
import Test.Tasty
import Test.Tasty.HUnit

tests :: TestTree
tests = testGroup "PluginUtils"
[
[ unescapeTest
]

unescapeTest :: TestTree
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯

unescapeTest = testGroup "unescape"
[ testCase "no double quote" $
unescape "hello世界" @?= "hello世界"
, testCase "whole string quoted" $
unescape "\"hello\\19990\\30028\"" @?= "\"hello世界\""
, testCase "text before quotes should not be unescaped" $
unescape "\\19990a\"hello\\30028\"" @?= "\\19990a\"hello界\""
, testCase "some text after quotes" $
unescape "\"hello\\19990\\30028\"abc" @?= "\"hello世界\"abc"
, testCase "many pairs of quote" $
unescape "oo\"hello\\19990\\30028\"abc\"\1087\1088\1080\1074\1077\1090\"hh" @?= "oo\"hello世界\"abc\"привет\"hh"
, testCase "double quote itself should not be unescaped" $
unescape "\"\\\"\\19990o\"" @?= "\"\\\"世o\""
, testCase "control characters should not be escaped" $
unescape "\"\\n\\t\"" @?= "\"\\n\\t\""
]