Skip to content

Commit 99dd134

Browse files
Length: Slightly changed definition and improved docs
1 parent fbac83e commit 99dd134

File tree

2 files changed

+98
-23
lines changed

2 files changed

+98
-23
lines changed

src/basic.ts

+96-20
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ function isInvokeEvery(
2727
fn: (e: Element | Alternative) => boolean
2828
): boolean {
2929
if (Array.isArray(element)) {
30-
return (element as readonly Alternative[]).every(fn);
30+
const alternatives = element as readonly Alternative[];
31+
return alternatives.length > 0 && alternatives.every(fn);
3132
} else {
3233
return fn(element as Element | Alternative);
3334
}
@@ -43,7 +44,23 @@ function isInvokeSome(
4344
}
4445
}
4546
/**
46-
* Returns whether all paths of the given element don't move the position of the automaton.
47+
* Returns whether all (but at least one of the) paths of the given element do not consume characters.
48+
*
49+
* If this function returns `true`, then {@link isPotentiallyZeroLength} is guaranteed to return `true`.
50+
*
51+
* ## Backreferences
52+
*
53+
* This function uses the same condition for backreferences as {@link isEmpty}.
54+
*
55+
* ## Relations
56+
*
57+
* - `isZeroLength(e) -> isPotentiallyZeroLength(e)`
58+
* - `isZeroLength(e) -> (getLengthRange(e) !== undefined && getLengthRange(e).max == 0)`
59+
*
60+
* @see {@link isPotentiallyZeroLength}
61+
* @see {@link isEmpty}
62+
* @see {@link isPotentiallyEmpty}
63+
* @see {@link getLengthRange}
4764
*/
4865
export function isZeroLength(element: Element | Alternative | readonly Alternative[]): boolean {
4966
return isInvokeEvery(element, isZeroLengthImpl);
@@ -69,18 +86,27 @@ function isZeroLengthImpl(element: Element | Alternative): boolean {
6986

7087
case "CapturingGroup":
7188
case "Group":
72-
return element.alternatives.every(isZeroLengthImpl);
89+
return element.alternatives.length > 0 && element.alternatives.every(isZeroLengthImpl);
7390

7491
default:
7592
throw assertNever(element);
7693
}
7794
}
7895
/**
79-
* Returns whether at least one path of the given element does not move the position of the automation.
96+
* Returns whether at least one path of the given element does not consume characters.
8097
*
8198
* ## Backreferences
8299
*
83100
* This function uses the same condition for backreferences as {@link isPotentiallyEmpty}.
101+
*
102+
* ## Relations
103+
*
104+
* - `isPotentiallyZeroLength(e) -> (getLengthRange(e) !== undefined && getLengthRange(e).min == 0)`
105+
*
106+
* @see {@link isZeroLength}
107+
* @see {@link isEmpty}
108+
* @see {@link isPotentiallyEmpty}
109+
* @see {@link getLengthRange}
84110
*/
85111
export function isPotentiallyZeroLength(element: Element | Alternative | readonly Alternative[]): boolean {
86112
return isInvokeSome(element, e => isPotentiallyZeroLengthImpl(e, e));
@@ -122,8 +148,26 @@ function isPotentiallyZeroLengthImpl(e: Element | Alternative, root: Element | A
122148
}
123149

124150
/**
125-
* Returns whether all paths of the given element does not move the position of the automation and accept
126-
* regardless of prefix and suffix.
151+
* Returns whether all (but at least one of the) paths of the given element do not consume characters and accept do not
152+
* assert characters.
153+
*
154+
* If this function returns `true`, then {@link isZeroLength} and {@link isPotentiallyEmpty} are guaranteed to return
155+
* `true`.
156+
*
157+
* ## Backreferences
158+
*
159+
* A backreferences will only be considered potentially empty, iff it is empty by the definition of
160+
* {@link isEmptyBackreference}.
161+
*
162+
* ## Relations
163+
*
164+
* - `isEmpty(e) -> isZeroLength(e)`
165+
* - `isEmpty(e) -> isPotentiallyEmpty(e)`
166+
*
167+
* @see {@link isZeroLength}
168+
* @see {@link isPotentiallyZeroLength}
169+
* @see {@link isPotentiallyEmpty}
170+
* @see {@link getLengthRange}
127171
*/
128172
export function isEmpty(element: Element | Alternative | readonly Alternative[]): boolean {
129173
return isInvokeEvery(element, isEmptyImpl);
@@ -153,7 +197,7 @@ function isEmptyImpl(element: Element | Alternative): boolean {
153197

154198
case "CapturingGroup":
155199
case "Group":
156-
return element.alternatives.every(isEmptyImpl);
200+
return element.alternatives.length > 0 && element.alternatives.every(isEmptyImpl);
157201

158202
case "Quantifier":
159203
return element.max === 0 || isEmptyImpl(element.element);
@@ -163,11 +207,8 @@ function isEmptyImpl(element: Element | Alternative): boolean {
163207
}
164208
}
165209
/**
166-
* Returns whether at least one path of the given element does not move the position of the automation and accepts
167-
* regardless of prefix and suffix.
168-
*
169-
* This basically means that it can match the empty string and that it does that at any position in any string.
170-
* Lookarounds do not affect this as (as mentioned above) all prefixes and suffixes are accepted.
210+
* Returns whether at least one path of the given element does not consume characters and accept does not assert
211+
* characters.
171212
*
172213
* ## Backreferences
173214
*
@@ -179,6 +220,15 @@ function isEmptyImpl(element: Element | Alternative): boolean {
179220
* * The referenced capturing group is potentially zero-length.
180221
* * The backreferences is not always after its referenced capturing group.
181222
* (see {@link backreferenceAlwaysAfterGroup})
223+
*
224+
* ## Relations
225+
*
226+
* - `isPotentiallyEmpty(e) -> isPotentiallyZeroLength(e)`
227+
*
228+
* @see {@link isZeroLength}
229+
* @see {@link isPotentiallyZeroLength}
230+
* @see {@link isEmpty}
231+
* @see {@link getLengthRange}
182232
*/
183233
export function isPotentiallyEmpty(element: Element | Alternative | readonly Alternative[]): boolean {
184234
return isInvokeSome(element, isPotentiallyEmptyImpl);
@@ -626,6 +676,8 @@ export function backreferenceAlwaysAfterGroup(backreference: Backreference): boo
626676

627677
/**
628678
* The length range of string accepted. All string that are accepted by have a length of `min <= length <= max`.
679+
*
680+
* @see {@link getLengthRange}
629681
*/
630682
export interface LengthRange {
631683
readonly min: number;
@@ -636,7 +688,26 @@ const ONE_LENGTH_RANGE: LengthRange = { min: 1, max: 1 };
636688
/**
637689
* Returns how many characters the given element can consume at most and has to consume at least.
638690
*
639-
* If `undefined`, then the given element can't consume any characters.
691+
* If `undefined` is returned, then the given element can't consume any characters.
692+
*
693+
* **Note:** `undefined` is only returned for empty alternative arrays. All characters classes/sets are assumed to
694+
* consume at least one characters and all assertions are assumed to have some accepting path.
695+
*
696+
* ## Backreferences
697+
*
698+
* While {@link isPotentiallyEmpty} generally assumes the worst-case for backreferences that references capturing group
699+
* outside the given element, this function does not/cannot. The length range of a backreference only depends on the
700+
* referenced capturing group and the relative positions of the backreference and the capturing group within the
701+
* pattern. It does not depend on the given element.
702+
*
703+
* This is an important distinction because it means that `isPotentiallyEmpty(e) -> getLengthRange(e).min == 0` is
704+
* guaranteed but `getLengthRange(e).min == 0 -> isPotentiallyEmpty(e)` is only guaranteed if `e` does not contain
705+
* backreferences.
706+
*
707+
* @see {@link isZeroLength}
708+
* @see {@link isPotentiallyZeroLength}
709+
* @see {@link isEmpty}
710+
* @see {@link isPotentiallyEmpty}
640711
*/
641712
export function getLengthRange(element: Element | Alternative | readonly Alternative[]): LengthRange | undefined {
642713
if (Array.isArray(element)) {
@@ -710,14 +781,19 @@ function getLengthRangeElementImpl(element: Element | Alternative): LengthRange
710781
case "Backreference": {
711782
if (isEmptyBackreference(element)) {
712783
return ZERO_LENGTH_RANGE;
713-
}
714-
const resolvedRange = getLengthRangeElementImpl(element.resolved);
715-
if (!resolvedRange) {
716-
return backreferenceAlwaysAfterGroup(element) ? undefined : ZERO_LENGTH_RANGE;
717-
} else if (resolvedRange.min > 0 && !backreferenceAlwaysAfterGroup(element)) {
718-
return { min: 0, max: resolvedRange.max };
719784
} else {
720-
return resolvedRange;
785+
const resolvedRange = getLengthRangeElementImpl(element.resolved);
786+
if (!resolvedRange) {
787+
if (backreferenceAlwaysAfterGroup(element)) {
788+
return ZERO_LENGTH_RANGE;
789+
} else {
790+
return undefined;
791+
}
792+
} else if (resolvedRange.min > 0 && !backreferenceAlwaysAfterGroup(element)) {
793+
return { min: 0, max: resolvedRange.max };
794+
} else {
795+
return resolvedRange;
796+
}
721797
}
722798
}
723799

tests/length.ts

+2-3
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,9 @@ const model = new Model<PredicateTestCaseInfo>();
2727
model.implication(isEmpty, isPotentiallyEmpty);
2828
model.implication(isEmpty, isZeroLength);
2929
model.implication(isZeroLength, isPotentiallyZeroLength);
30+
model.implication(isZeroLength, isLengthMaxZero);
3031
model.implication(isPotentiallyEmpty, isPotentiallyZeroLength);
31-
32-
model.equivalence(isZeroLength, isLengthMaxZero);
33-
model.equivalence(isPotentiallyZeroLength, isLengthMinZero);
32+
model.implication(isPotentiallyZeroLength, isLengthMinZero);
3433

3534
// test cases
3635

0 commit comments

Comments
 (0)