@@ -27,7 +27,8 @@ function isInvokeEvery(
27
27
fn : ( e : Element | Alternative ) => boolean
28
28
) : boolean {
29
29
if ( Array . isArray ( element ) ) {
30
- return ( element as readonly Alternative [ ] ) . every ( fn ) ;
30
+ const alternatives = element as readonly Alternative [ ] ;
31
+ return alternatives . length > 0 && alternatives . every ( fn ) ;
31
32
} else {
32
33
return fn ( element as Element | Alternative ) ;
33
34
}
@@ -43,7 +44,23 @@ function isInvokeSome(
43
44
}
44
45
}
45
46
/**
46
- * Returns whether all paths of the given element don't move the position of the automaton.
47
+ * Returns whether all (but at least one of the) paths of the given element do not consume characters.
48
+ *
49
+ * If this function returns `true`, then {@link isPotentiallyZeroLength} is guaranteed to return `true`.
50
+ *
51
+ * ## Backreferences
52
+ *
53
+ * This function uses the same condition for backreferences as {@link isEmpty}.
54
+ *
55
+ * ## Relations
56
+ *
57
+ * - `isZeroLength(e) -> isPotentiallyZeroLength(e)`
58
+ * - `isZeroLength(e) -> (getLengthRange(e) !== undefined && getLengthRange(e).max == 0)`
59
+ *
60
+ * @see {@link isPotentiallyZeroLength }
61
+ * @see {@link isEmpty }
62
+ * @see {@link isPotentiallyEmpty }
63
+ * @see {@link getLengthRange }
47
64
*/
48
65
export function isZeroLength ( element : Element | Alternative | readonly Alternative [ ] ) : boolean {
49
66
return isInvokeEvery ( element , isZeroLengthImpl ) ;
@@ -69,18 +86,27 @@ function isZeroLengthImpl(element: Element | Alternative): boolean {
69
86
70
87
case "CapturingGroup" :
71
88
case "Group" :
72
- return element . alternatives . every ( isZeroLengthImpl ) ;
89
+ return element . alternatives . length > 0 && element . alternatives . every ( isZeroLengthImpl ) ;
73
90
74
91
default :
75
92
throw assertNever ( element ) ;
76
93
}
77
94
}
78
95
/**
79
- * Returns whether at least one path of the given element does not move the position of the automation .
96
+ * Returns whether at least one path of the given element does not consume characters .
80
97
*
81
98
* ## Backreferences
82
99
*
83
100
* This function uses the same condition for backreferences as {@link isPotentiallyEmpty}.
101
+ *
102
+ * ## Relations
103
+ *
104
+ * - `isPotentiallyZeroLength(e) -> (getLengthRange(e) !== undefined && getLengthRange(e).min == 0)`
105
+ *
106
+ * @see {@link isZeroLength }
107
+ * @see {@link isEmpty }
108
+ * @see {@link isPotentiallyEmpty }
109
+ * @see {@link getLengthRange }
84
110
*/
85
111
export function isPotentiallyZeroLength ( element : Element | Alternative | readonly Alternative [ ] ) : boolean {
86
112
return isInvokeSome ( element , e => isPotentiallyZeroLengthImpl ( e , e ) ) ;
@@ -122,8 +148,26 @@ function isPotentiallyZeroLengthImpl(e: Element | Alternative, root: Element | A
122
148
}
123
149
124
150
/**
125
- * Returns whether all paths of the given element does not move the position of the automation and accept
126
- * regardless of prefix and suffix.
151
+ * Returns whether all (but at least one of the) paths of the given element do not consume characters and accept do not
152
+ * assert characters.
153
+ *
154
+ * If this function returns `true`, then {@link isZeroLength} and {@link isPotentiallyEmpty} are guaranteed to return
155
+ * `true`.
156
+ *
157
+ * ## Backreferences
158
+ *
159
+ * A backreferences will only be considered potentially empty, iff it is empty by the definition of
160
+ * {@link isEmptyBackreference}.
161
+ *
162
+ * ## Relations
163
+ *
164
+ * - `isEmpty(e) -> isZeroLength(e)`
165
+ * - `isEmpty(e) -> isPotentiallyEmpty(e)`
166
+ *
167
+ * @see {@link isZeroLength }
168
+ * @see {@link isPotentiallyZeroLength }
169
+ * @see {@link isPotentiallyEmpty }
170
+ * @see {@link getLengthRange }
127
171
*/
128
172
export function isEmpty ( element : Element | Alternative | readonly Alternative [ ] ) : boolean {
129
173
return isInvokeEvery ( element , isEmptyImpl ) ;
@@ -153,7 +197,7 @@ function isEmptyImpl(element: Element | Alternative): boolean {
153
197
154
198
case "CapturingGroup" :
155
199
case "Group" :
156
- return element . alternatives . every ( isEmptyImpl ) ;
200
+ return element . alternatives . length > 0 && element . alternatives . every ( isEmptyImpl ) ;
157
201
158
202
case "Quantifier" :
159
203
return element . max === 0 || isEmptyImpl ( element . element ) ;
@@ -163,11 +207,8 @@ function isEmptyImpl(element: Element | Alternative): boolean {
163
207
}
164
208
}
165
209
/**
166
- * Returns whether at least one path of the given element does not move the position of the automation and accepts
167
- * regardless of prefix and suffix.
168
- *
169
- * This basically means that it can match the empty string and that it does that at any position in any string.
170
- * Lookarounds do not affect this as (as mentioned above) all prefixes and suffixes are accepted.
210
+ * Returns whether at least one path of the given element does not consume characters and accept does not assert
211
+ * characters.
171
212
*
172
213
* ## Backreferences
173
214
*
@@ -179,6 +220,15 @@ function isEmptyImpl(element: Element | Alternative): boolean {
179
220
* * The referenced capturing group is potentially zero-length.
180
221
* * The backreferences is not always after its referenced capturing group.
181
222
* (see {@link backreferenceAlwaysAfterGroup})
223
+ *
224
+ * ## Relations
225
+ *
226
+ * - `isPotentiallyEmpty(e) -> isPotentiallyZeroLength(e)`
227
+ *
228
+ * @see {@link isZeroLength }
229
+ * @see {@link isPotentiallyZeroLength }
230
+ * @see {@link isEmpty }
231
+ * @see {@link getLengthRange }
182
232
*/
183
233
export function isPotentiallyEmpty ( element : Element | Alternative | readonly Alternative [ ] ) : boolean {
184
234
return isInvokeSome ( element , isPotentiallyEmptyImpl ) ;
@@ -626,6 +676,8 @@ export function backreferenceAlwaysAfterGroup(backreference: Backreference): boo
626
676
627
677
/**
628
678
* The length range of string accepted. All string that are accepted by have a length of `min <= length <= max`.
679
+ *
680
+ * @see {@link getLengthRange }
629
681
*/
630
682
export interface LengthRange {
631
683
readonly min : number ;
@@ -636,7 +688,26 @@ const ONE_LENGTH_RANGE: LengthRange = { min: 1, max: 1 };
636
688
/**
637
689
* Returns how many characters the given element can consume at most and has to consume at least.
638
690
*
639
- * If `undefined`, then the given element can't consume any characters.
691
+ * If `undefined` is returned, then the given element can't consume any characters.
692
+ *
693
+ * **Note:** `undefined` is only returned for empty alternative arrays. All characters classes/sets are assumed to
694
+ * consume at least one characters and all assertions are assumed to have some accepting path.
695
+ *
696
+ * ## Backreferences
697
+ *
698
+ * While {@link isPotentiallyEmpty} generally assumes the worst-case for backreferences that references capturing group
699
+ * outside the given element, this function does not/cannot. The length range of a backreference only depends on the
700
+ * referenced capturing group and the relative positions of the backreference and the capturing group within the
701
+ * pattern. It does not depend on the given element.
702
+ *
703
+ * This is an important distinction because it means that `isPotentiallyEmpty(e) -> getLengthRange(e).min == 0` is
704
+ * guaranteed but `getLengthRange(e).min == 0 -> isPotentiallyEmpty(e)` is only guaranteed if `e` does not contain
705
+ * backreferences.
706
+ *
707
+ * @see {@link isZeroLength }
708
+ * @see {@link isPotentiallyZeroLength }
709
+ * @see {@link isEmpty }
710
+ * @see {@link isPotentiallyEmpty }
640
711
*/
641
712
export function getLengthRange ( element : Element | Alternative | readonly Alternative [ ] ) : LengthRange | undefined {
642
713
if ( Array . isArray ( element ) ) {
@@ -710,14 +781,19 @@ function getLengthRangeElementImpl(element: Element | Alternative): LengthRange
710
781
case "Backreference" : {
711
782
if ( isEmptyBackreference ( element ) ) {
712
783
return ZERO_LENGTH_RANGE ;
713
- }
714
- const resolvedRange = getLengthRangeElementImpl ( element . resolved ) ;
715
- if ( ! resolvedRange ) {
716
- return backreferenceAlwaysAfterGroup ( element ) ? undefined : ZERO_LENGTH_RANGE ;
717
- } else if ( resolvedRange . min > 0 && ! backreferenceAlwaysAfterGroup ( element ) ) {
718
- return { min : 0 , max : resolvedRange . max } ;
719
784
} else {
720
- return resolvedRange ;
785
+ const resolvedRange = getLengthRangeElementImpl ( element . resolved ) ;
786
+ if ( ! resolvedRange ) {
787
+ if ( backreferenceAlwaysAfterGroup ( element ) ) {
788
+ return ZERO_LENGTH_RANGE ;
789
+ } else {
790
+ return undefined ;
791
+ }
792
+ } else if ( resolvedRange . min > 0 && ! backreferenceAlwaysAfterGroup ( element ) ) {
793
+ return { min : 0 , max : resolvedRange . max } ;
794
+ } else {
795
+ return resolvedRange ;
796
+ }
721
797
}
722
798
}
723
799
0 commit comments