@@ -15,6 +15,13 @@ hljs.debugMode();
15
15
*/
16
16
const expBacktrackingCache = { } ;
17
17
18
+ /**
19
+ * A map for a regex pattern to whether or not it it vulnerable to polynomial backtracking.
20
+ *
21
+ * @type {Record<string, boolean> }
22
+ */
23
+ const polyBacktrackingCache = { } ;
24
+
18
25
function retrieveRules ( language , { name } ) {
19
26
// first we need to get the language compiled so we have
20
27
// access to the raw regex
@@ -56,7 +63,7 @@ function testLanguage(languageName) {
56
63
// });
57
64
// });
58
65
59
- it ( `has ${ rules . length } regex matchers` , ( ) => { } ) ;
66
+ it ( `have ${ rules . length } regex matchers` , ( ) => { } ) ;
60
67
61
68
it ( 'should not use octal escapes' , function ( ) {
62
69
forEachPattern ( rules , ( { ast, rulePath, reportError } ) => {
@@ -194,7 +201,194 @@ function testLanguage(languageName) {
194
201
expBacktrackingCache [ patternStr ] = false ;
195
202
} ) ;
196
203
} ) ;
204
+ it ( 'should not cause polynomial backtracking' , function ( ) {
205
+ forEachPattern ( rules , ( { pattern, ast, rulePath, reportError } ) => {
206
+ const patternStr = String ( pattern ) ;
207
+ if ( polyBacktrackingCache [ patternStr ] === false ) {
208
+ // we know that the pattern won't cause poly backtracking because we checked before
209
+ return ;
210
+ }
211
+
212
+ const EMPTY = ast . flags . unicode ? CharSet . empty ( 0x10FFFF ) : CharSet . empty ( 0xFFFF ) ;
213
+
214
+ /**
215
+ * @param {Node } node
216
+ * @returns {CharSet }
217
+ */
218
+ function toCharSet ( node ) {
219
+ switch ( node . type ) {
220
+ case "Alternative" : {
221
+ if ( node . elements . length === 1 ) {
222
+ return toCharSet ( node . elements [ 0 ] ) ;
223
+ }
224
+ return EMPTY ;
225
+ }
226
+ case "CapturingGroup" :
227
+ case "Group" : {
228
+ let total = EMPTY ;
229
+ for ( const item of node . alternatives ) {
230
+ total = total . union ( toCharSet ( item ) ) ;
231
+ }
232
+ return total ;
233
+ }
234
+ case "Character" :
235
+ return JS . createCharSet ( [ node . value ] , ast . flags ) ;
236
+ case "CharacterClass" : {
237
+ const value = JS . createCharSet ( node . elements . map ( x => {
238
+ if ( x . type === "CharacterSet" ) {
239
+ return x ;
240
+ } else if ( x . type === "Character" ) {
241
+ return x . value ;
242
+ } else {
243
+ return { min : x . min . value , max : x . max . value } ;
244
+ }
245
+ } ) , ast . flags ) ;
246
+ if ( node . negate ) {
247
+ return value . negate ( ) ;
248
+ } else {
249
+ return value ;
250
+ }
251
+ }
252
+ case "CharacterSet" :
253
+ return JS . createCharSet ( [ node ] , ast . flags ) ;
254
+
255
+ default :
256
+ return EMPTY ;
257
+ }
258
+ }
259
+
260
+ /**
261
+ * @param {Element } from
262
+ * @returns {Element | null }
263
+ */
264
+ function getAfter ( from ) {
265
+ const parent = from . parent ;
266
+ if ( parent . type === "Quantifier" ) {
267
+ return getAfter ( parent ) ;
268
+ } else if ( parent . type === "Alternative" ) {
269
+ const index = parent . elements . indexOf ( from ) ;
270
+ const after = parent . elements [ index + 1 ] ;
271
+ if ( after ) {
272
+ return after ;
273
+ } else {
274
+ const grandParent = parent . parent ;
275
+ if ( grandParent . type === "Pattern" ) {
276
+ return null ;
277
+ } else {
278
+ return getAfter ( grandParent ) ;
279
+ }
280
+ }
281
+ } else {
282
+ throw Error ( "Unreachable" ) ;
283
+ }
284
+ }
285
+
286
+ visitRegExpAST ( ast . pattern , {
287
+ onQuantifierLeave ( node ) {
288
+ if ( node . max !== Infinity ) {
289
+ return ;
290
+ }
291
+ const char = toCharSet ( node . element ) ;
292
+ tryReachUntil ( getAfter ( node ) , char , null ) ;
293
+
294
+ /**
295
+ * @param {Quantifier } quantifier
296
+ * @param {CharSet } char
297
+ */
298
+ function assertNoPoly ( quantifier , char ) {
299
+ if ( quantifier . max === Infinity ) {
300
+ const qChar = toCharSet ( quantifier . element ) ;
301
+ if ( qChar && ! qChar . isDisjointWith ( char ) ) {
302
+ const intersection = qChar . intersect ( char ) ;
303
+ const literal = JS . toLiteral ( {
304
+ type : "Concatenation" ,
305
+ elements : [
306
+ { type : "CharacterClass" , characters : intersection }
307
+ ]
308
+ } )
309
+ const lang = `/${ literal . source } /${ literal . flags } ` ;
310
+
311
+ const rangeStr = patternStr . substring ( node . start + 1 , quantifier . end + 1 ) ;
312
+ const rangeHighlight = `^${ "~" . repeat ( node . end - node . start - 1 ) } ${ " " . repeat ( quantifier . start - node . end ) } ^${ "~" . repeat ( quantifier . end - quantifier . start - 1 ) } ` ;
313
+
314
+ reportError ( `${ rulePath } : Polynomial backtracking. By repeating any character that matches ${ lang } , an attack string can be created.\n\n ${ rangeStr } \n ${ rangeHighlight } \n\nFull pattern:\n${ patternStr } \n${ " " . repeat ( node . start + 1 ) } ${ rangeHighlight } ` ) ;
315
+ }
316
+ }
317
+ }
197
318
319
+ /**
320
+ * @param {Element | null | undefined } element
321
+ * @param {CharSet } char
322
+ * @param {Element | null | undefined } until
323
+ * @returns {CharSet }
324
+ */
325
+ function tryReachUntil ( element , char , until ) {
326
+ if ( ! element || element == until || char . isEmpty ) {
327
+ return char ;
328
+ }
329
+
330
+ const after = getAfter ( element ) ;
331
+
332
+ if ( element . type === "Quantifier" ) {
333
+ assertNoPoly ( element , char ) ;
334
+ }
335
+
336
+ return tryReachUntil ( after , goInto ( element , after , char ) , until ) ;
337
+ }
338
+
339
+ /**
340
+ * @param {Element } element
341
+ * @param {Element } after
342
+ * @param {CharSet } char
343
+ * @returns {CharSet }
344
+ */
345
+ function goInto ( element , after , char ) {
346
+ switch ( element . type ) {
347
+ case "Assertion" : {
348
+ if ( element . kind === "lookahead" || element . kind === "lookbehind" ) {
349
+ for ( const alt of element . alternatives ) {
350
+ if ( alt . elements . length > 0 ) {
351
+ tryReachUntil ( alt . elements [ 0 ] , char , after ) ;
352
+ }
353
+ }
354
+ }
355
+ return EMPTY ;
356
+ }
357
+ case "Group" :
358
+ case "CapturingGroup" : {
359
+ let total = EMPTY ;
360
+ for ( const alt of element . alternatives ) {
361
+ if ( alt . elements . length > 0 ) {
362
+ total = total . union ( tryReachUntil ( alt . elements [ 0 ] , char , after ) ) ;
363
+ } else {
364
+ total = char ;
365
+ }
366
+ }
367
+ return total ;
368
+ }
369
+ case "Character" :
370
+ case "CharacterClass" :
371
+ case "CharacterSet" : {
372
+ return char . intersect ( toCharSet ( element ) ) ;
373
+ }
374
+ case "Quantifier" : {
375
+ if ( element . min === 0 ) {
376
+ goInto ( element . element , after , char ) ;
377
+ return char ;
378
+ } else {
379
+ return goInto ( element . element , after , char ) ;
380
+ }
381
+ }
382
+ default :
383
+ return EMPTY ;
384
+ }
385
+ }
386
+ } ,
387
+ } ) ;
388
+
389
+ polyBacktrackingCache [ patternStr ] = false ;
390
+ } ) ;
391
+ } ) ;
198
392
} ) ;
199
393
}
200
394
@@ -209,5 +403,5 @@ for (const language of languages) {
209
403
}
210
404
211
405
describe ( "COMBINED: All grammars" , ( ) => {
212
- it ( `has ${ count } total regex` , ( ) => { } ) ;
406
+ it ( `have ${ count } total regex` , ( ) => { } ) ;
213
407
} ) ;
0 commit comments