Skip to content

Commit fb0a626

Browse files
committed
enh(ci): Add tests for polynomial regex issues
1 parent fa46dd1 commit fb0a626

File tree

1 file changed

+196
-2
lines changed

1 file changed

+196
-2
lines changed

test/regex/index.js

+196-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ hljs.debugMode();
1515
*/
1616
const expBacktrackingCache = {};
1717

18+
/**
19+
* A map for a regex pattern to whether or not it it vulnerable to polynomial backtracking.
20+
*
21+
* @type {Record<string, boolean>}
22+
*/
23+
const polyBacktrackingCache = {};
24+
1825
function retrieveRules(language, { name }) {
1926
// first we need to get the language compiled so we have
2027
// access to the raw regex
@@ -56,7 +63,7 @@ function testLanguage(languageName) {
5663
// });
5764
// });
5865

59-
it(`has ${rules.length} regex matchers`, () => {} );
66+
it(`have ${rules.length} regex matchers`, () => {} );
6067

6168
it('should not use octal escapes', function() {
6269
forEachPattern(rules, ({ ast, rulePath, reportError }) => {
@@ -194,7 +201,194 @@ function testLanguage(languageName) {
194201
expBacktrackingCache[patternStr] = false;
195202
});
196203
});
204+
it('should not cause polynomial backtracking', function () {
205+
forEachPattern(rules, ({ pattern, ast, rulePath, reportError }) => {
206+
const patternStr = String(pattern);
207+
if (polyBacktrackingCache[patternStr] === false) {
208+
// we know that the pattern won't cause poly backtracking because we checked before
209+
return;
210+
}
211+
212+
const EMPTY = ast.flags.unicode ? CharSet.empty(0x10FFFF) : CharSet.empty(0xFFFF);
213+
214+
/**
215+
* @param {Node} node
216+
* @returns {CharSet}
217+
*/
218+
function toCharSet(node) {
219+
switch (node.type) {
220+
case "Alternative": {
221+
if (node.elements.length === 1) {
222+
return toCharSet(node.elements[0]);
223+
}
224+
return EMPTY;
225+
}
226+
case "CapturingGroup":
227+
case "Group": {
228+
let total = EMPTY;
229+
for (const item of node.alternatives) {
230+
total = total.union(toCharSet(item));
231+
}
232+
return total;
233+
}
234+
case "Character":
235+
return JS.createCharSet([node.value], ast.flags);
236+
case "CharacterClass": {
237+
const value = JS.createCharSet(node.elements.map(x => {
238+
if (x.type === "CharacterSet") {
239+
return x;
240+
} else if (x.type === "Character") {
241+
return x.value;
242+
} else {
243+
return { min: x.min.value, max: x.max.value };
244+
}
245+
}), ast.flags);
246+
if (node.negate) {
247+
return value.negate();
248+
} else {
249+
return value;
250+
}
251+
}
252+
case "CharacterSet":
253+
return JS.createCharSet([node], ast.flags);
254+
255+
default:
256+
return EMPTY;
257+
}
258+
}
259+
260+
/**
261+
* @param {Element} from
262+
* @returns {Element | null}
263+
*/
264+
function getAfter(from) {
265+
const parent = from.parent;
266+
if (parent.type === "Quantifier") {
267+
return getAfter(parent);
268+
} else if (parent.type === "Alternative") {
269+
const index = parent.elements.indexOf(from);
270+
const after = parent.elements[index + 1];
271+
if (after) {
272+
return after;
273+
} else {
274+
const grandParent = parent.parent;
275+
if (grandParent.type === "Pattern") {
276+
return null;
277+
} else {
278+
return getAfter(grandParent);
279+
}
280+
}
281+
} else {
282+
throw Error("Unreachable");
283+
}
284+
}
285+
286+
visitRegExpAST(ast.pattern, {
287+
onQuantifierLeave(node) {
288+
if (node.max !== Infinity) {
289+
return;
290+
}
291+
const char = toCharSet(node.element);
292+
tryReachUntil(getAfter(node), char, null);
293+
294+
/**
295+
* @param {Quantifier} quantifier
296+
* @param {CharSet} char
297+
*/
298+
function assertNoPoly(quantifier, char) {
299+
if (quantifier.max === Infinity) {
300+
const qChar = toCharSet(quantifier.element);
301+
if (qChar && !qChar.isDisjointWith(char)) {
302+
const intersection = qChar.intersect(char);
303+
const literal = JS.toLiteral({
304+
type: "Concatenation",
305+
elements: [
306+
{ type: "CharacterClass", characters: intersection }
307+
]
308+
})
309+
const lang = `/${literal.source}/${literal.flags}`;
310+
311+
const rangeStr = patternStr.substring(node.start + 1, quantifier.end + 1);
312+
const rangeHighlight = `^${"~".repeat(node.end - node.start - 1)}${" ".repeat(quantifier.start - node.end)}^${"~".repeat(quantifier.end - quantifier.start - 1)}`;
313+
314+
reportError(`${rulePath}: Polynomial backtracking. By repeating any character that matches ${lang}, an attack string can be created.\n\n ${rangeStr}\n ${rangeHighlight}\n\nFull pattern:\n${patternStr}\n${" ".repeat(node.start + 1)}${rangeHighlight}`);
315+
}
316+
}
317+
}
197318

319+
/**
320+
* @param {Element | null | undefined} element
321+
* @param {CharSet} char
322+
* @param {Element | null | undefined} until
323+
* @returns {CharSet}
324+
*/
325+
function tryReachUntil(element, char, until) {
326+
if (!element || element == until || char.isEmpty) {
327+
return char;
328+
}
329+
330+
const after = getAfter(element);
331+
332+
if (element.type === "Quantifier") {
333+
assertNoPoly(element, char);
334+
}
335+
336+
return tryReachUntil(after, goInto(element, after, char), until);
337+
}
338+
339+
/**
340+
* @param {Element} element
341+
* @param {Element} after
342+
* @param {CharSet} char
343+
* @returns {CharSet}
344+
*/
345+
function goInto(element, after, char) {
346+
switch (element.type) {
347+
case "Assertion": {
348+
if (element.kind === "lookahead" || element.kind === "lookbehind") {
349+
for (const alt of element.alternatives) {
350+
if (alt.elements.length > 0) {
351+
tryReachUntil(alt.elements[0], char, after);
352+
}
353+
}
354+
}
355+
return EMPTY;
356+
}
357+
case "Group":
358+
case "CapturingGroup": {
359+
let total = EMPTY;
360+
for (const alt of element.alternatives) {
361+
if (alt.elements.length > 0) {
362+
total = total.union(tryReachUntil(alt.elements[0], char, after));
363+
} else {
364+
total = char;
365+
}
366+
}
367+
return total;
368+
}
369+
case "Character":
370+
case "CharacterClass":
371+
case "CharacterSet": {
372+
return char.intersect(toCharSet(element));
373+
}
374+
case "Quantifier": {
375+
if (element.min === 0) {
376+
goInto(element.element, after, char);
377+
return char;
378+
} else {
379+
return goInto(element.element, after, char);
380+
}
381+
}
382+
default:
383+
return EMPTY;
384+
}
385+
}
386+
},
387+
});
388+
389+
polyBacktrackingCache[patternStr] = false;
390+
});
391+
});
198392
});
199393
}
200394

@@ -209,5 +403,5 @@ for (const language of languages) {
209403
}
210404

211405
describe("COMBINED: All grammars", () => {
212-
it(`has ${count} total regex`, () => {});
406+
it(`have ${count} total regex`, () => {});
213407
});

0 commit comments

Comments
 (0)