Skip to content

Commit 2bd1863

Browse files
authored
unescape printable characters (#3140)
* unescape printable characters * add comments * add tests * improve the parser * simplify code & add more docs
1 parent 1568ce7 commit 2bd1863

File tree

5 files changed

+78
-9
lines changed

5 files changed

+78
-9
lines changed

Diff for: ghcide/src/Development/IDE/GHC/Compat/Outputable.hs

+3-3
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ import qualified Outputable as Out
7979
import SrcLoc
8080
#endif
8181
#if MIN_VERSION_ghc(9,3,0)
82-
import GHC.Utils.Logger
83-
import GHC.Driver.Config.Diagnostic
84-
import Data.Maybe
82+
import Data.Maybe
83+
import GHC.Driver.Config.Diagnostic
84+
import GHC.Utils.Logger
8585
#endif
8686

8787
-- | A compatible function to print `Outputable` instances

Diff for: ghcide/src/Development/IDE/GHC/Util.hs

+11-3
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ import GHC.IO.Exception
8181
import GHC.IO.Handle.Internals
8282
import GHC.IO.Handle.Types
8383
import GHC.Stack
84+
import Ide.PluginUtils (unescape)
8485
import System.Environment.Blank (getEnvDefault)
8586
import System.FilePath
8687
import System.IO.Unsafe
@@ -287,10 +288,17 @@ instance Outputable SDoc where
287288
#endif
288289

289290
-- | Print a GHC value in `defaultUserStyle` without unique symbols.
291+
-- It uses `showSDocUnsafe` with `unsafeGlobalDynFlags` internally.
290292
--
291-
-- This is the most common print utility, will print with a user-friendly style like: `a_a4ME` as `a`.
293+
-- This is the most common print utility.
294+
-- It will do something additionally compared to what the 'Outputable' instance does.
292295
--
293-
-- It internal using `showSDocUnsafe` with `unsafeGlobalDynFlags`.
296+
-- 1. print with a user-friendly style: `a_a4ME` as `a`.
297+
-- 2. unescape escape sequences of printable unicode characters within a pair of double quotes
294298
printOutputable :: Outputable a => a -> T.Text
295-
printOutputable = T.pack . printWithoutUniques
299+
printOutputable =
300+
-- IfaceTyLit from GHC.Iface.Type implements Outputable with 'show'.
301+
-- Showing a String escapes non-ascii printable characters. We unescape it here.
302+
-- More discussion at https://github.com/haskell/haskell-language-server/issues/3115.
303+
unescape . T.pack . printWithoutUniques
296304
{-# INLINE printOutputable #-}

Diff for: hls-plugin-api/hls-plugin-api.cabal

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ library
5656
, text
5757
, transformers
5858
, unordered-containers
59+
, megaparsec > 9
5960

6061
if os(windows)
6162
build-depends: Win32
@@ -91,4 +92,5 @@ test-suite tests
9192
, tasty
9293
, tasty-hunit
9394
, tasty-rerun
95+
, text
9496
, lsp-types

Diff for: hls-plugin-api/src/Ide/PluginUtils.hs

+38-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ module Ide.PluginUtils
3232
handleMaybe,
3333
handleMaybeM,
3434
throwPluginError,
35+
unescape,
3536
)
3637
where
3738

@@ -43,10 +44,12 @@ import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
4344
import Data.Algorithm.Diff
4445
import Data.Algorithm.DiffOutput
4546
import Data.Bifunctor (Bifunctor (first))
47+
import Data.Char (isPrint, showLitChar)
48+
import Data.Functor (void)
4649
import qualified Data.HashMap.Strict as H
47-
import Data.List (find)
4850
import Data.String (IsString (fromString))
4951
import qualified Data.Text as T
52+
import Data.Void (Void)
5053
import Ide.Plugin.Config
5154
import Ide.Plugin.Properties
5255
import Ide.Types
@@ -57,6 +60,9 @@ import Language.LSP.Types hiding
5760
SemanticTokensEdit (_start))
5861
import qualified Language.LSP.Types as J
5962
import Language.LSP.Types.Capabilities
63+
import qualified Text.Megaparsec as P
64+
import qualified Text.Megaparsec.Char as P
65+
import qualified Text.Megaparsec.Char.Lexer as P
6066

6167
-- ---------------------------------------------------------------------
6268

@@ -255,3 +261,34 @@ pluginResponse :: Monad m => ExceptT String m a -> m (Either ResponseError a)
255261
pluginResponse =
256262
fmap (first (\msg -> ResponseError InternalError (fromString msg) Nothing))
257263
. runExceptT
264+
265+
-- ---------------------------------------------------------------------
266+
267+
type TextParser = P.Parsec Void T.Text
268+
269+
-- | Unescape printable escape sequences within double quotes.
270+
-- This is useful if you have to call 'show' indirectly, and it escapes some characters which you would prefer to
271+
-- display as is.
272+
unescape :: T.Text -> T.Text
273+
unescape input =
274+
case P.runParser escapedTextParser "inline" input of
275+
Left _ -> input
276+
Right strs -> T.pack strs
277+
278+
-- | Parser for a string that contains double quotes. Returns unescaped string.
279+
escapedTextParser :: TextParser String
280+
escapedTextParser = concat <$> P.many (outsideStringLiteral P.<|> stringLiteral)
281+
where
282+
outsideStringLiteral :: TextParser String
283+
outsideStringLiteral = P.someTill (P.anySingleBut '"') (P.lookAhead (void (P.char '"') P.<|> P.eof))
284+
285+
stringLiteral :: TextParser String
286+
stringLiteral = do
287+
inside <- P.char '"' >> P.manyTill P.charLiteral (P.char '"')
288+
let f '"' = "\\\"" -- double quote should still be escaped
289+
-- Despite the docs, 'showLitChar' and 'showLitString' from 'Data.Char' DOES ESCAPE unicode printable
290+
-- characters. So we need to call 'isPrint' from 'Data.Char' manually.
291+
f ch = if isPrint ch then [ch] else showLitChar ch ""
292+
inside' = concatMap f inside
293+
294+
pure $ "\"" <> inside' <> "\""

Diff for: hls-plugin-api/test/Ide/PluginUtilsTest.hs

+24-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,35 @@
1+
{-# LANGUAGE OverloadedStrings #-}
2+
13
module Ide.PluginUtilsTest
24
( tests
35
) where
46

5-
import Ide.PluginUtils (positionInRange)
7+
import Data.Char (isPrint)
8+
import qualified Data.Text as T
9+
import Ide.PluginUtils (positionInRange, unescape)
610
import Language.LSP.Types (Position (Position), Range (Range))
711
import Test.Tasty
812
import Test.Tasty.HUnit
913

1014
tests :: TestTree
1115
tests = testGroup "PluginUtils"
12-
[
16+
[ unescapeTest
17+
]
18+
19+
unescapeTest :: TestTree
20+
unescapeTest = testGroup "unescape"
21+
[ testCase "no double quote" $
22+
unescape "hello世界" @?= "hello世界"
23+
, testCase "whole string quoted" $
24+
unescape "\"hello\\19990\\30028\"" @?= "\"hello世界\""
25+
, testCase "text before quotes should not be unescaped" $
26+
unescape "\\19990a\"hello\\30028\"" @?= "\\19990a\"hello界\""
27+
, testCase "some text after quotes" $
28+
unescape "\"hello\\19990\\30028\"abc" @?= "\"hello世界\"abc"
29+
, testCase "many pairs of quote" $
30+
unescape "oo\"hello\\19990\\30028\"abc\"\1087\1088\1080\1074\1077\1090\"hh" @?= "oo\"hello世界\"abc\"привет\"hh"
31+
, testCase "double quote itself should not be unescaped" $
32+
unescape "\"\\\"\\19990o\"" @?= "\"\\\"世o\""
33+
, testCase "control characters should not be escaped" $
34+
unescape "\"\\n\\t\"" @?= "\"\\n\\t\""
1335
]

0 commit comments

Comments
 (0)