Skip to content

Update regexpp and add support for ES2025 duplicate named capturing groups #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"typescript": "5.0"
},
"dependencies": {
"@eslint-community/regexpp": "^4.8.0",
"@eslint-community/regexpp": "^4.11.0",
"refa": "^0.12.1"
},
"files": [
Expand Down
109 changes: 64 additions & 45 deletions src/basic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -392,11 +392,15 @@ function backreferenceIsPotentiallyEmpty(
): boolean {
if (isEmptyBackreference(back, flags)) {
return true;
} else if (hasSomeAncestor(back.resolved, a => a === root)) {
return !isStrictBackreference(back) || isPotentiallyZeroLengthImpl(back.resolved, root, flags);
} else {
return false;
}
const groups = getReferencedGroupsFromBackreference(back);
if (groups.length === 0) return true;
for (const group of groups.filter(group => hasSomeAncestor(group, a => a === root))) {
if (!isStrictBackreference(back) || isPotentiallyZeroLengthImpl(group, root, flags)) {
return true;
}
}
return false;
}

/**
Expand Down Expand Up @@ -749,19 +753,8 @@ export function getMatchingDirectionFromAssertionKind(
* - The backreference might be before the capturing group. E.g. `/\1(a)/`, `/(?:\1(a))+/`, `/(?<=(a)\1)b/`
*/
export function isEmptyBackreference(backreference: Backreference, flags: ReadonlyFlags): boolean {
const group = backreference.resolved;

const closestAncestor = getClosestAncestor(backreference, group);

if (closestAncestor === group) {
// if the backreference is element of the referenced group
return true;
}

if (closestAncestor.type !== "Alternative") {
// if the closest common ancestor isn't an alternative => they're disjunctive.
return true;
}
const groups = getReferencedGroupsFromBackreference(backreference);
if (groups.length === 0) return true;

const backRefAncestors = new Set<Node>();
for (let a: Node | null = backreference; a; a = a.parent) {
Expand Down Expand Up @@ -812,7 +805,7 @@ export function isEmptyBackreference(backreference: Backreference, flags: Readon
}
}

return !findBackreference(group) || isZeroLength(group, flags);
return groups.every(group => !findBackreference(group) || isZeroLength(group, flags));
}

/**
Expand Down Expand Up @@ -840,19 +833,8 @@ export function isEmptyBackreference(backreference: Backreference, flags: Readon
* - `/(?!(a)).\1/`
*/
export function isStrictBackreference(backreference: Backreference): boolean {
const group = backreference.resolved;

const closestAncestor = getClosestAncestor(backreference, group);

if (closestAncestor === group) {
// if the backreference is element of the referenced group
return false;
}

if (closestAncestor.type !== "Alternative") {
// if the closest common ancestor isn't an alternative => they're disjunctive.
return false;
}
const groups = getReferencedGroupsFromBackreference(backreference);
if (groups.length === 0) return false;

const backRefAncestors = new Set<Node>();
for (let a: Node | null = backreference; a; a = a.parent) {
Expand Down Expand Up @@ -890,7 +872,15 @@ export function isStrictBackreference(backreference: Backreference): boolean {
// The captured text of a capturing group will be reset after leaving a negated lookaround
return false;
} else {
if (parentParent.alternatives.length > 1) {
if (
parentParent.alternatives.length > 1 &&
parentParent.alternatives.some(
alternative =>
!hasSomeDescendant(alternative, node => {
return node.type === "CapturingGroup" && groups.includes(node);
})
)
) {
// e.g.: (?:a|(a))+b\1
return false;
}
Expand All @@ -907,7 +897,7 @@ export function isStrictBackreference(backreference: Backreference): boolean {
}
}

return findBackreference(group);
return groups.every(findBackreference);
}

/**
Expand Down Expand Up @@ -1086,15 +1076,17 @@ function getLengthRangeElementImpl(
return getLengthRangeAlternativesImpl(element.alternatives, flags);

case "Backreference": {
if (isEmptyBackreference(element, flags)) {
const groups = getReferencedGroupsFromBackreference(element);
if (groups.length === 0) {
return ZERO_LENGTH_RANGE;
} else if (isEmptyBackreference(element, flags)) {
return ZERO_LENGTH_RANGE;
} else {
const resolvedRange = getLengthRangeElementImpl(element.resolved, flags);
if (resolvedRange.min > 0 && !isStrictBackreference(element)) {
return { min: 0, max: resolvedRange.max };
} else {
return resolvedRange;
}
const resolvedRanges = groups.map(group => getLengthRangeElementImpl(group, flags));
return {
min: isStrictBackreference(element) ? Math.min(...resolvedRanges.map(r => r.min)) : 0,
max: Math.max(...resolvedRanges.map(r => r.max)),
};
}
}

Expand Down Expand Up @@ -1189,11 +1181,11 @@ function isLengthRangeMinZeroElementImpl(
return isLengthRangeMinZeroAlternativesImpl(element.alternatives, flags);

case "Backreference": {
return (
isEmptyBackreference(element, flags) ||
!isStrictBackreference(element) ||
isLengthRangeMinZeroElementImpl(element.resolved, flags)
);
if (isEmptyBackreference(element, flags) || !isStrictBackreference(element)) {
return true;
}
const groups = getReferencedGroupsFromBackreference(element);
return groups.every(group => isLengthRangeMinZeroElementImpl(group, flags));
}

default:
Expand Down Expand Up @@ -1315,3 +1307,30 @@ export function getEffectiveMaximumRepetition(element: Node): number {
}
return max;
}

/**
* Returns the actually referenced capturing group from the given backreference.
*
* Actual referenced capturing group of a backreference is a capturing group that exists in the same alternative
* as the backreference and that does not have a backreference within it capturing group.
*
* ## Examples
*
* - `/(a)\1/`: This will return (a)
* - `/(a)(?:\1)/`: This will return (a)
* - `/(a)|\1/`: This will return empty
* - `/(a\1)/`: This will return empty
* - `/(?:(?<foo>a)|(?<foo>b))\k<foo>/`: This will return (?<foo>a) and (?<foo>b)
* - `/(?:(?<foo>a)|(?<foo>b)\k<foo>)/`: This will return (?<foo>b)
*/
export function getReferencedGroupsFromBackreference(back: Backreference): CapturingGroup[] {
return (back.ambiguous ? back.resolved : [back.resolved]).filter(group => {
const closestAncestor = getClosestAncestor(back, group);
return (
// A backreference cannot refer to the referenced group if it is element of the referenced group.
closestAncestor !== group &&
// If the closest common ancestor is an alternative, then they're not disjunctive.
closestAncestor.type === "Alternative"
);
});
}
10 changes: 6 additions & 4 deletions src/consumed-chars.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Alternative, Element, Pattern } from "@eslint-community/regexpp/ast";
import { CharSet } from "refa";
import { ReadonlyFlags } from "./flags";
import { hasSomeDescendant, isEmptyBackreference } from "./basic";
import { getReferencedGroupsFromBackreference, hasSomeDescendant, isEmptyBackreference } from "./basic";
import { Chars } from "./chars";
import { toUnicodeSet } from "./to-char-set";

Expand Down Expand Up @@ -49,9 +49,11 @@ export function getConsumedChars(element: Element | Pattern | Alternative, flags

exact = exact && !c.isEmpty;
} else if (d.type === "Backreference" && !isEmptyBackreference(d, flags)) {
const c = getConsumedChars(d.resolved, flags);
sets.push(c.chars);
exact = exact && c.exact && c.chars.size < 2;
for (const resolved of getReferencedGroupsFromBackreference(d)) {
const c = getConsumedChars(resolved, flags);
sets.push(c.chars);
exact = exact && c.exact && c.chars.size < 2;
}
}

// always continue to the next element
Expand Down
20 changes: 16 additions & 4 deletions src/equal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,22 @@ export function structurallyEqual(x: Node | null, y: Node | null): boolean {

case "Backreference": {
const other = y as Backreference;
return (
structurallyEqual(x.resolved, other.resolved) &&
isStrictBackreference(x) == isStrictBackreference(other)
);
const groupsX = x.ambiguous ? x.resolved : [x.resolved];
const groupsY = other.ambiguous ? other.resolved : [other.resolved];
/**
* Keep any groups of `y` that did not match anything.
* If there are any groups remaining after searching the groups of `x`, they do not match.
*/
const unusedGroupsY = new Set(groupsY);
for (const groupX of groupsX) {
const matches = groupsY.filter(groupY => structurallyEqual(groupX, groupY));
if (matches.length === 0) return false;
for (const groupY of matches) {
unusedGroupsY.delete(groupY);
}
}
if (unusedGroupsY.size > 0) return false;
return isStrictBackreference(x) == isStrictBackreference(other);
}

case "Character": {
Expand Down
1 change: 0 additions & 1 deletion src/follow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,6 @@ export function followPaths<S>(
parent.type === "CharacterClassRange" ||
parent.type === "ClassIntersection" ||
parent.type === "ClassSubtraction" ||
parent.type === "ExpressionCharacterClass" ||
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this because of a TS error. I think this is due to the type of regexpp being fixed. In fact, ExpressionCharacterClass never appears here.

parent.type === "StringAlternative"
) {
throw new Error("The given element cannot be part of a character class.");
Expand Down
10 changes: 7 additions & 3 deletions src/longest-prefix.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { CharSet } from "refa";
import { Alternative, CapturingGroup, Element, Group, Quantifier } from "@eslint-community/regexpp/ast";
import {
getReferencedGroupsFromBackreference,
isEmptyBackreference,
isLengthRangeMinZero,
isStrictBackreference,
Expand Down Expand Up @@ -219,8 +220,12 @@ function getElementPrefix(
return EMPTY_COMPLETE;
}
if (isStrictBackreference(element)) {
const inner = getElementPrefix(element.resolved, direction, { ...options, includeAfter: false }, flags);
return inner;
const groups = getReferencedGroupsFromBackreference(element);
const prefixes = groups.map(resolved =>
getElementPrefix(resolved, direction, { ...options, includeAfter: false }, flags)
);

return getAlternationPrefix(element, prefixes, direction, options, flags);
}

if (!mayLookAhead(element, options, direction, flags)) {
Expand Down Expand Up @@ -459,7 +464,6 @@ function isNextCharacterInsideAfter(
parent.type === "CharacterClassRange" ||
parent.type === "ClassIntersection" ||
parent.type === "ClassSubtraction" ||
parent.type === "ExpressionCharacterClass" ||
parent.type === "StringAlternative"
) {
throw new Error("Expected an element outside a character class.");
Expand Down
24 changes: 15 additions & 9 deletions src/next-char.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
isEmptyBackreference,
MatchingDirection,
invertMatchingDirection,
getReferencedGroupsFromBackreference,
} from "./basic";
import { toUnicodeSet } from "./to-char-set";
import { followPaths } from "./follow";
Expand Down Expand Up @@ -718,21 +719,26 @@ function getFirstConsumedCharUncachedImpl(
if (isEmptyBackreference(element, flags)) {
return FirstConsumedChars.emptyConcat(flags);
}
let resolvedChar = getFirstConsumedCharImpl(element.resolved, direction, flags, options);
const groups = getReferencedGroupsFromBackreference(element);

// the resolved character is only exact if it is only a single character.
// i.e. /(\w)\1/ here the (\w) will capture exactly any word character, but the \1 can only match
// one word character and that is the only (\w) matched.
if (resolvedChar.exact && resolvedChar.char.size > 1) {
resolvedChar = { ...resolvedChar, exact: false };
}
const resolvedChars = groups.map(group => {
let resolvedChar = getFirstConsumedCharImpl(group, direction, flags, options);

if (isStrictBackreference(element)) {
// the resolved character is only exact if it is only a single character.
// i.e. /(\w)\1/ here the (\w) will capture exactly any word character, but the \1 can only match
// one word character and that is the only (\w) matched.
if (resolvedChar.exact && resolvedChar.char.size > 1) {
resolvedChar = { ...resolvedChar, exact: false };
}
return resolvedChar;
});

if (isStrictBackreference(element)) {
return FirstConsumedChars.union(resolvedChars, flags);
} else {
// there is at least one path through which the backreference will (possibly) be replaced with the
// empty string
return FirstConsumedChars.makeOptional(resolvedChar);
return FirstConsumedChars.makeOptional(FirstConsumedChars.union(resolvedChars, flags));
}
}

Expand Down
14 changes: 14 additions & 0 deletions tests/__snapshots__/consumed-chars.ts.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`getConsumedChars /(?:(?<foo>a)|(?<foo>b))\\k<foo>/ 1`] = `
Object {
"chars": CharSet (65535) [61..62],
"exact": true,
}
`;

exports[`getConsumedChars /(?:(?<foo>abc)|(?<foo>x))\\k<foo>/ 1`] = `
Object {
"chars": CharSet (65535) [61..63, 78],
"exact": false,
}
`;

exports[`getConsumedChars /(?:\\d*\\.\\d+|\\d+\\.\\d*)_/ 1`] = `
Object {
"chars": CharSet (65535) [2e, 30..39, 5f],
Expand Down
8 changes: 8 additions & 0 deletions tests/__snapshots__/next-char.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,14 @@ Object {
}
`;

exports[`getFirstConsumedChar /(?:(?<foo>a)|(?<foo>b))\\k<foo>/ (ltr) 1`] = `
Object {
"char": CharSet (65535) [61..62],
"empty": false,
"exact": true,
}
`;

exports[`getFirstConsumedChar /(?:(a)|b)\\1/ (rtl) 1`] = `
Object {
"char": CharSet (65535) [61..62],
Expand Down
Loading