30
30
use function str_replace ;
31
31
use function strlen ;
32
32
use function substr ;
33
+ use function trim ;
33
34
34
35
final class RegexGroupParser
35
36
{
@@ -126,7 +127,11 @@ private function walkRegexAst(
126
127
$ inAlternation ? $ alternationId : null ,
127
128
$ inOptionalQuantification ,
128
129
$ parentGroup ,
129
- $ this ->createGroupType ($ ast , $ this ->allowConstantTypes ($ patternModifiers , $ repeatedMoreThanOnce , $ parentGroup )),
130
+ $ this ->createGroupType (
131
+ $ ast ,
132
+ $ this ->allowConstantTypes ($ patternModifiers , $ repeatedMoreThanOnce , $ parentGroup ),
133
+ $ patternModifiers ,
134
+ ),
130
135
);
131
136
$ parentGroup = $ group ;
132
137
} elseif ($ ast ->getId () === '#namedcapturing ' ) {
@@ -137,7 +142,11 @@ private function walkRegexAst(
137
142
$ inAlternation ? $ alternationId : null ,
138
143
$ inOptionalQuantification ,
139
144
$ parentGroup ,
140
- $ this ->createGroupType ($ ast , $ this ->allowConstantTypes ($ patternModifiers , $ repeatedMoreThanOnce , $ parentGroup )),
145
+ $ this ->createGroupType (
146
+ $ ast ,
147
+ $ this ->allowConstantTypes ($ patternModifiers , $ repeatedMoreThanOnce , $ parentGroup ),
148
+ $ patternModifiers ,
149
+ ),
141
150
);
142
151
$ parentGroup = $ group ;
143
152
} elseif ($ ast ->getId () === '#noncapturing ' ) {
@@ -293,7 +302,7 @@ private function getQuantificationRange(TreeNode $node): array
293
302
return [$ min , $ max ];
294
303
}
295
304
296
- private function createGroupType (TreeNode $ group , bool $ maybeConstant ): Type
305
+ private function createGroupType (TreeNode $ group , bool $ maybeConstant, string $ patternModifiers ): Type
297
306
{
298
307
$ isNonEmpty = TrinaryLogic::createMaybe ();
299
308
$ isNonFalsy = TrinaryLogic::createMaybe ();
@@ -310,6 +319,7 @@ private function createGroupType(TreeNode $group, bool $maybeConstant): Type
310
319
$ inOptionalQuantification ,
311
320
$ onlyLiterals ,
312
321
false ,
322
+ $ patternModifiers ,
313
323
);
314
324
315
325
if ($ maybeConstant && $ onlyLiterals !== null && $ onlyLiterals !== []) {
@@ -356,6 +366,7 @@ private function walkGroupAst(
356
366
bool &$ inOptionalQuantification ,
357
367
?array &$ onlyLiterals ,
358
368
bool $ inClass ,
369
+ string $ patternModifiers ,
359
370
): void
360
371
{
361
372
$ children = $ ast ->getChildren ();
@@ -364,9 +375,31 @@ private function walkGroupAst(
364
375
$ ast ->getId () === '#concatenation '
365
376
&& count ($ children ) > 0
366
377
) {
367
- $ isNonEmpty = TrinaryLogic::createYes ();
368
- if (!$ inAlternation ) {
378
+ $ meaningfulTokens = 0 ;
379
+ foreach ($ children as $ child ) {
380
+ $ nonFalsy = false ;
381
+ if ($ this ->isMaybeEmptyNode ($ child , $ patternModifiers , $ nonFalsy )) {
382
+ continue ;
383
+ }
384
+
385
+ $ meaningfulTokens ++;
386
+
387
+ if (!$ nonFalsy || $ inAlternation ) {
388
+ continue ;
389
+ }
390
+
391
+ // a single token non-falsy on its own
369
392
$ isNonFalsy = TrinaryLogic::createYes ();
393
+ break ;
394
+ }
395
+
396
+ if ($ meaningfulTokens > 0 ) {
397
+ $ isNonEmpty = TrinaryLogic::createYes ();
398
+
399
+ // two non-empty tokens concatenated results in a non-falsy string
400
+ if ($ meaningfulTokens > 1 && !$ inAlternation ) {
401
+ $ isNonFalsy = TrinaryLogic::createYes ();
402
+ }
370
403
}
371
404
} elseif ($ ast ->getId () === '#quantification ' ) {
372
405
[$ min ] = $ this ->getQuantificationRange ($ ast );
@@ -390,17 +423,14 @@ private function walkGroupAst(
390
423
foreach ($ children as $ child ) {
391
424
$ oldLiterals = $ onlyLiterals ;
392
425
393
- if ($ child ->getId () === 'token ' ) {
394
- $ this ->getLiteralValue ($ child , $ oldLiterals , true );
395
- }
396
-
426
+ $ this ->getLiteralValue ($ child , $ oldLiterals , true , $ patternModifiers );
397
427
foreach ($ oldLiterals ?? [] as $ oldLiteral ) {
398
428
$ newLiterals [] = $ oldLiteral ;
399
429
}
400
430
}
401
431
$ onlyLiterals = $ newLiterals ;
402
432
} elseif ($ ast ->getId () === 'token ' ) {
403
- $ literalValue = $ this ->getLiteralValue ($ ast , $ onlyLiterals , !$ inClass );
433
+ $ literalValue = $ this ->getLiteralValue ($ ast , $ onlyLiterals , !$ inClass, $ patternModifiers );
404
434
if ($ literalValue !== null ) {
405
435
if (Strings::match ($ literalValue , '/^\d+$/ ' ) === null ) {
406
436
$ isNumeric = TrinaryLogic::createNo ();
@@ -439,14 +469,46 @@ private function walkGroupAst(
439
469
$ inOptionalQuantification ,
440
470
$ onlyLiterals ,
441
471
$ inClass ,
472
+ $ patternModifiers ,
442
473
);
443
474
}
444
475
}
445
476
477
+ private function isMaybeEmptyNode (TreeNode $ node , string $ patternModifiers , bool &$ isNonFalsy ): bool
478
+ {
479
+ if ($ node ->getId () === '#quantification ' ) {
480
+ [$ min ] = $ this ->getQuantificationRange ($ node );
481
+
482
+ if ($ min > 0 ) {
483
+ return false ;
484
+ }
485
+
486
+ if ($ min === 0 ) {
487
+ return true ;
488
+ }
489
+ }
490
+
491
+ $ literal = $ this ->getLiteralValue ($ node , $ onlyLiterals , false , $ patternModifiers );
492
+ if ($ literal !== null ) {
493
+ if ($ literal !== '' && $ literal !== '0 ' ) {
494
+ $ isNonFalsy = true ;
495
+ }
496
+ return false ;
497
+ }
498
+
499
+ foreach ($ node ->getChildren () as $ child ) {
500
+ if (!$ this ->isMaybeEmptyNode ($ child , $ patternModifiers , $ isNonFalsy )) {
501
+ return false ;
502
+ }
503
+ }
504
+
505
+ return true ;
506
+ }
507
+
446
508
/**
447
509
* @param array<string>|null $onlyLiterals
448
510
*/
449
- private function getLiteralValue (TreeNode $ node , ?array &$ onlyLiterals , bool $ appendLiterals ): ?string
511
+ private function getLiteralValue (TreeNode $ node , ?array &$ onlyLiterals , bool $ appendLiterals, string $ patternModifiers ): ?string
450
512
{
451
513
if ($ node ->getId () !== 'token ' ) {
452
514
return null ;
@@ -457,6 +519,10 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $ap
457
519
$ value = $ node ->getValueValue ();
458
520
459
521
if (in_array ($ token , ['literal ' , 'escaped_end_class ' ], true )) {
522
+ if (str_contains ($ patternModifiers , 'x ' ) && trim ($ value ) === '' ) {
523
+ return null ;
524
+ }
525
+
460
526
if (strlen ($ value ) > 1 && $ value [0 ] === '\\' ) {
461
527
return substr ($ value , 1 );
462
528
} elseif (
0 commit comments