forked from haskell/haskell-language-server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPragmas.hs
441 lines (417 loc) · 19.3 KB
/
Pragmas.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
{-# LANGUAGE CPP #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE MultiWayIf #-}
module Development.IDE.Spans.Pragmas
( NextPragmaInfo(..)
, LineSplitTextEdits(..)
, getNextPragmaInfo
, insertNewPragma ) where
import Data.Bits (Bits (setBit))
import Data.Function ((&))
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import Data.Text (Text, pack)
import qualified Data.Text as Text
import Development.IDE (srcSpanToRange)
import Development.IDE.GHC.Compat
import Development.IDE.GHC.Compat.Util
import GHC.LanguageExtensions.Type (Extension)
import qualified Language.LSP.Types as LSP
getNextPragmaInfo :: DynFlags -> Maybe Text -> NextPragmaInfo
getNextPragmaInfo dynFlags sourceText =
if | Just sourceText <- sourceText
, let sourceStringBuffer = stringToStringBuffer (Text.unpack sourceText)
, POk _ parserState <- parsePreDecl dynFlags sourceStringBuffer
-> case parserState of
ParserStateNotDone{ nextPragma } -> nextPragma
ParserStateDone{ nextPragma } -> nextPragma
| otherwise
-> NextPragmaInfo 0 Nothing
insertNewPragma :: NextPragmaInfo -> Extension -> LSP.TextEdit
insertNewPragma (NextPragmaInfo _ (Just (LineSplitTextEdits ins _))) newPragma = ins { LSP._newText = "{-# LANGUAGE " <> pack (show newPragma) <> " #-}\n" } :: LSP.TextEdit
insertNewPragma (NextPragmaInfo nextPragmaLine _) newPragma = LSP.TextEdit pragmaInsertRange $ "{-# LANGUAGE " <> pack (show newPragma) <> " #-}\n"
where
pragmaInsertPosition = LSP.Position (fromIntegral nextPragmaLine) 0
pragmaInsertRange = LSP.Range pragmaInsertPosition pragmaInsertPosition
-- Pre-declaration comments parser -----------------------------------------------------
-- | Each mode represents the "strongest" thing we've seen so far.
-- From strongest to weakest:
-- ModePragma, ModeHaddock, ModeComment, ModeInitial
data Mode = ModePragma | ModeHaddock | ModeComment | ModeInitial deriving Show
data LineSplitTextEdits = LineSplitTextEdits {
lineSplitInsertTextEdit :: !LSP.TextEdit,
lineSplitDeleteTextEdit :: !LSP.TextEdit
} deriving Show
data NextPragmaInfo = NextPragmaInfo {
nextPragmaLine :: !Int,
lineSplitTextEdits :: !(Maybe LineSplitTextEdits)
} deriving Show
data ParserState
= ParserStateNotDone
{ nextPragma :: !NextPragmaInfo
, mode :: !Mode
, lastBlockCommentLine :: !Int
, lastPragmaLine :: !Int
, isLastTokenHash :: !Bool
}
| ParserStateDone { nextPragma :: NextPragmaInfo }
deriving Show
isPragma :: String -> Bool
isPragma = List.isPrefixOf "{-#"
isDownwardBlockHaddock :: String -> Bool
isDownwardBlockHaddock = List.isPrefixOf "{-|"
isDownwardLineHaddock :: String -> Bool
isDownwardLineHaddock = List.isPrefixOf "-- |"
-- need to merge tokens that are deleted/inserted into one TextEdit each
-- to work around some weird TextEdits applied in reversed order issue
updateLineSplitTextEdits :: LSP.Range -> String -> Maybe LineSplitTextEdits -> LineSplitTextEdits
updateLineSplitTextEdits tokenRange tokenString prevLineSplitTextEdits
| Just prevLineSplitTextEdits <- prevLineSplitTextEdits
, let LineSplitTextEdits
{ lineSplitInsertTextEdit = prevInsertTextEdit
, lineSplitDeleteTextEdit = prevDeleteTextEdit } = prevLineSplitTextEdits
, let LSP.TextEdit prevInsertRange prevInsertText = prevInsertTextEdit
, let LSP.TextEdit prevDeleteRange _prevDeleteText = prevDeleteTextEdit
, let LSP.Range prevInsertStartPos prevInsertEndPos = prevInsertRange
, let LSP.Position _prevInsertStartLine _prevInsertStartCol = prevInsertStartPos
, let LSP.Position _prevInsertEndLine _prevInsertEndCol = prevInsertEndPos
, let LSP.Range prevDeleteStartPos prevDeleteEndPos = prevDeleteRange
, let LSP.Position _prevDeleteStartLine _prevDeleteStartCol = prevDeleteStartPos
, let LSP.Position _prevDeleteEndLine prevDeleteEndCol = prevDeleteEndPos
, let currInsertRange = prevInsertRange
, let currInsertText =
Text.init prevInsertText
<> Text.replicate (fromIntegral $ startCol - prevDeleteEndCol) " "
<> Text.pack (List.take newLineCol tokenString)
<> "\n"
, let currInsertTextEdit = LSP.TextEdit currInsertRange currInsertText
, let currDeleteStartPos = prevDeleteStartPos
, let currDeleteEndPos = LSP.Position endLine endCol
, let currDeleteRange = LSP.Range currDeleteStartPos currDeleteEndPos
, let currDeleteTextEdit = LSP.TextEdit currDeleteRange ""
= LineSplitTextEdits currInsertTextEdit currDeleteTextEdit
| otherwise
, let LSP.Range startPos _ = tokenRange
, let deleteTextEdit = LSP.TextEdit (LSP.Range startPos startPos{ LSP._character = startCol + fromIntegral newLineCol }) ""
, let insertPosition = LSP.Position (startLine + 1) 0
, let insertRange = LSP.Range insertPosition insertPosition
, let insertText = Text.pack (List.take newLineCol tokenString) <> "\n"
, let insertTextEdit = LSP.TextEdit insertRange insertText
= LineSplitTextEdits insertTextEdit deleteTextEdit
where
LSP.Range (LSP.Position startLine startCol) (LSP.Position endLine endCol) = tokenRange
newLineCol = Maybe.fromMaybe (length tokenString) (List.elemIndex '\n' tokenString)
-- ITvarsym "#" after a block comment is a parse error so we don't need to worry about it
updateParserState :: Token -> LSP.Range -> ParserState -> ParserState
updateParserState token range prevParserState
| ParserStateNotDone
{ nextPragma = prevNextPragma@NextPragmaInfo{ lineSplitTextEdits = prevLineSplitTextEdits }
, mode = prevMode
, lastBlockCommentLine
, lastPragmaLine
} <- prevParserState
, let defaultParserState = prevParserState { isLastTokenHash = False }
, let LSP.Range (LSP.Position (fromIntegral -> startLine) _) (LSP.Position (fromIntegral -> endLine) _) = range
= case prevMode of
ModeInitial ->
case token of
ITvarsym "#" -> defaultParserState{ isLastTokenHash = True }
#if !MIN_VERSION_ghc(9,2,0)
ITlineComment s
#else
ITlineComment s _
#endif
| isDownwardLineHaddock s -> defaultParserState{ mode = ModeHaddock }
| otherwise ->
defaultParserState
{ nextPragma = NextPragmaInfo (endLine + 1) Nothing
, mode = ModeComment }
#if !MIN_VERSION_ghc(9,2,0)
ITblockComment s
#else
ITblockComment s _
#endif
| isPragma s ->
defaultParserState
{ nextPragma = NextPragmaInfo (endLine + 1) Nothing
, mode = ModePragma
, lastPragmaLine = endLine }
| isDownwardBlockHaddock s -> defaultParserState{ mode = ModeHaddock }
| otherwise ->
defaultParserState
{ nextPragma = NextPragmaInfo (endLine + 1) Nothing
, mode = ModeComment
, lastBlockCommentLine = endLine }
_ -> ParserStateDone prevNextPragma
ModeComment ->
case token of
ITvarsym "#" -> defaultParserState{ isLastTokenHash = True }
#if !MIN_VERSION_ghc(9,2,0)
ITlineComment s
#else
ITlineComment s _
#endif
| hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits
, let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| isDownwardLineHaddock s
, lastBlockCommentLine == startLine
, let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing ->
defaultParserState
{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits }
, mode = ModeHaddock }
| otherwise ->
defaultParserState { nextPragma = NextPragmaInfo (endLine + 1) Nothing }
#if !MIN_VERSION_ghc(9,2,0)
ITblockComment s
#else
ITblockComment s _
#endif
| isPragma s ->
defaultParserState
{ nextPragma = NextPragmaInfo (endLine + 1) Nothing
, mode = ModePragma
, lastPragmaLine = endLine }
| hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits
, let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| isDownwardBlockHaddock s
, lastBlockCommentLine == startLine
, let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing ->
defaultParserState{
nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits },
mode = ModeHaddock }
| otherwise ->
defaultParserState{
nextPragma = NextPragmaInfo (endLine + 1) Nothing,
lastBlockCommentLine = endLine }
_ -> ParserStateDone prevNextPragma
ModeHaddock ->
case token of
ITvarsym "#" ->
defaultParserState{ isLastTokenHash = True }
#if !MIN_VERSION_ghc(9,2,0)
ITlineComment s
#else
ITlineComment s _
#endif
| hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits
, let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| otherwise ->
defaultParserState
#if !MIN_VERSION_ghc(9,2,0)
ITblockComment s
#else
ITblockComment s _
#endif
| isPragma s ->
defaultParserState{
nextPragma = NextPragmaInfo (endLine + 1) Nothing,
mode = ModePragma,
lastPragmaLine = endLine }
| hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits
, let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| otherwise -> defaultParserState{ lastBlockCommentLine = endLine }
_ -> ParserStateDone prevNextPragma
ModePragma ->
case token of
ITvarsym "#" -> defaultParserState{ isLastTokenHash = True }
#if !MIN_VERSION_ghc(9,2,0)
ITlineComment s
#else
ITlineComment s _
#endif
| hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits
, let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| isDownwardLineHaddock s
, lastPragmaLine == startLine
, let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| otherwise ->
defaultParserState
#if !MIN_VERSION_ghc(9,2,0)
ITblockComment s
#else
ITblockComment s _
#endif
| isPragma s ->
defaultParserState{ nextPragma = NextPragmaInfo (endLine + 1) Nothing, lastPragmaLine = endLine }
| hasDeleteStartedOnSameLine startLine prevLineSplitTextEdits
, let currLineSplitTextEdits = updateLineSplitTextEdits range s prevLineSplitTextEdits ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| isDownwardBlockHaddock s
, lastPragmaLine == startLine
, let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| lastPragmaLine == startLine && startLine < endLine
, let currLineSplitTextEdits = updateLineSplitTextEdits range s Nothing ->
defaultParserState{ nextPragma = prevNextPragma{ lineSplitTextEdits = Just currLineSplitTextEdits } }
| otherwise ->
defaultParserState{ lastBlockCommentLine = endLine }
_ -> ParserStateDone prevNextPragma
| otherwise = prevParserState
where
hasDeleteStartedOnSameLine :: Int -> Maybe LineSplitTextEdits -> Bool
hasDeleteStartedOnSameLine line lineSplitTextEdits
| Just lineSplitTextEdits <- lineSplitTextEdits
, let LineSplitTextEdits{ lineSplitDeleteTextEdit } = lineSplitTextEdits
, let LSP.TextEdit deleteRange _ = lineSplitDeleteTextEdit
, let LSP.Range _ deleteEndPosition = deleteRange
, let LSP.Position deleteEndLine _ = deleteEndPosition
= fromIntegral deleteEndLine == line
| otherwise = False
lexUntilNextLineIncl :: P (Located Token)
lexUntilNextLineIncl = do
PState{ last_loc } <- getPState
#if MIN_VERSION_ghc(9,0,0)
let PsSpan{ psRealSpan = lastRealSrcSpan } = last_loc
#else
let lastRealSrcSpan = last_loc
#endif
let prevEndLine = lastRealSrcSpan & realSrcSpanEnd & srcLocLine
locatedToken@(L srcSpan _token) <- lexer False pure
if | RealSrcLoc currEndRealSrcLoc _ <- srcSpan & srcSpanEnd
, let currEndLine = currEndRealSrcLoc & srcLocLine
-> if prevEndLine < currEndLine then
pure locatedToken
else lexUntilNextLineIncl
| otherwise -> pure locatedToken
dropWhileStringBuffer :: (Char -> Bool) -> StringBuffer -> StringBuffer
dropWhileStringBuffer predicate buffer
| atEnd buffer = buffer
| let (c, remainingBuffer) = nextChar buffer
= if predicate c then
dropWhileStringBuffer predicate remainingBuffer
else
buffer
isHorizontalSpace :: Char -> Bool
isHorizontalSpace c = c == ' ' || c == '\t'
data ShebangParserState = ShebangParserState {
nextPragmaLine :: !Int,
newlineCount :: !Int,
prevCharIsHash :: !Bool,
buffer :: !StringBuffer
}
-- lexer seems to ignore shebangs completely hence this function
parseShebangs :: ShebangParserState -> ShebangParserState
parseShebangs prev@ShebangParserState{ newlineCount = prevNewlineCount, prevCharIsHash, buffer = prevBuffer }
| atEnd prevBuffer
= prev
| let (c, currBuffer) = nextChar (dropWhileStringBuffer isHorizontalSpace prevBuffer)
= if c == '#' then
parseShebangs prev{ prevCharIsHash = True, buffer = currBuffer }
else if c == '!' && prevCharIsHash then
parseShebangs prev{ nextPragmaLine = prevNewlineCount + 1, buffer = dropWhileStringBuffer (/= '\n') currBuffer }
else if c == '\n' then
parseShebangs prev{ newlineCount = prevNewlineCount + 1, buffer = currBuffer }
else
prev
-- | Parses blank lines, comments, haddock comments ("-- |"), lines that start
-- with "#!", lines that start with "#", pragma lines using the GHC API lexer.
-- When it doesn't find one of these things then it's assumed that we've found
-- a declaration, end-of-file, or a ghc parse error, and the parser stops.
-- Shebangs are parsed separately than the rest becaues the lexer ignores them.
--
-- The reason for custom parsing instead of using annotations, or turning on/off
-- extensions in the dynflags is because there are a number of extensions that
-- while removing parse errors, can also introduce them. Hence, there are
-- cases where the file cannot be parsed without error when we want to insert
-- extension (and other) pragmas. The compiler (8.10.7) doesn't include
-- annotations in its failure state. So if the compiler someday returns
-- annotation or equivalent information when it fails then we can replace this
-- with that.
--
-- The reason for using the compiler lexer is to reduce duplicated
-- implementation, particularly nested comments, but in retrospect this comes
-- with the disadvantage of the logic feeling more complex, and not being able
-- to handle whitespace directly.
--
-- The parser keeps track of state in order to place the next pragma line
-- according to some rules:
--
-- - Ignore lines starting with '#' except for shebangs.
-- - If pragmas exist place after last pragma
-- - else if haddock comments exist:
-- - If comments exist place after last comment
-- - else if shebangs exist place after last shebang
-- - else place at first line
-- - else if comments exist place after last comment
-- - else if shebangs exist place after last shebang
-- - else place at first line
--
-- Additionally the parser keeps track of information to be able to insert
-- pragmas inbetween lines.
--
-- For example the parser keeps track of information so that
--
-- > {- block comment -} -- | haddock
--
-- can become
--
-- > {- block comment -}
-- > {-# pragma #-}
-- > -- | haddock
--
-- This information does not respect the type of whitespace, because the lexer
-- strips whitespace and gives locations.
--
-- In this example the tabs are converted to spaces in the TextEdits:
--
-- > {- block comment -}<space><tab><tab><space>-- | haddock
--
parsePreDecl :: DynFlags -> StringBuffer -> ParseResult ParserState
parsePreDecl dynFlags buffer = unP (go initialParserState) pState
where
initialShebangParserState = ShebangParserState{
nextPragmaLine = 0,
newlineCount = 0,
prevCharIsHash = False,
buffer = buffer }
ShebangParserState{ nextPragmaLine } = parseShebangs initialShebangParserState
pState = mkLexerPState dynFlags buffer
initialParserState = ParserStateNotDone (NextPragmaInfo nextPragmaLine Nothing) ModeInitial (-1) (-1) False
go :: ParserState -> P ParserState
go prevParserState =
case prevParserState of
ParserStateDone _ -> pure prevParserState
ParserStateNotDone{..} -> do
L srcSpan token <-
if isLastTokenHash then
lexUntilNextLineIncl
else
lexer False pure
case srcSpanToRange srcSpan of
Just range -> go (updateParserState token range prevParserState)
Nothing -> pure prevParserState
mkLexerPState :: DynFlags -> StringBuffer -> PState
mkLexerPState dynFlags stringBuffer =
let
startRealSrcLoc = mkRealSrcLoc "asdf" 1 1
updateDynFlags = flip gopt_unset Opt_Haddock . flip gopt_set Opt_KeepRawTokenStream
finalDynFlags = updateDynFlags dynFlags
#if !MIN_VERSION_ghc(8,8,1)
pState = mkPState finalDynFlags stringBuffer startRealSrcLoc
finalPState = pState{ use_pos_prags = False }
#elif !MIN_VERSION_ghc(8,10,1)
mkLexerParserFlags =
mkParserFlags'
<$> warningFlags
<*> extensionFlags
<*> homeUnitId_
<*> safeImportsOn
<*> gopt Opt_Haddock
<*> gopt Opt_KeepRawTokenStream
<*> const False
finalPState = mkPStatePure (mkLexerParserFlags finalDynFlags) stringBuffer startRealSrcLoc
#else
pState = initParserState (initParserOpts finalDynFlags) stringBuffer startRealSrcLoc
PState{ options = pStateOptions } = pState
finalExtBitsMap = setBit (pExtsBitmap pStateOptions) (fromEnum UsePosPragsBit)
finalPStateOptions = pStateOptions{ pExtsBitmap = finalExtBitsMap }
finalPState = pState{ options = finalPStateOptions }
#endif
in
finalPState