Skip to content

Commit 2229deb

Browse files
authored
Bleeding edge - Precise array shape for preg_match_all() $matches
1 parent e19e6e5 commit 2229deb

6 files changed

+270
-33
lines changed

Diff for: src/Type/Php/PregMatchParameterOutTypeExtension.php

+5-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public function __construct(
2323

2424
public function isFunctionSupported(FunctionReflection $functionReflection, ParameterReflection $parameter): bool
2525
{
26-
return in_array(strtolower($functionReflection->getName()), ['preg_match'], true)
26+
return in_array(strtolower($functionReflection->getName()), ['preg_match', 'preg_match_all'], true)
2727
// the parameter is named different, depending on PHP version.
2828
&& in_array($parameter->getName(), ['subpatterns', 'matches'], true);
2929
}
@@ -46,7 +46,10 @@ public function getParameterOutTypeFromFunctionCall(FunctionReflection $function
4646
$flagsType = $scope->getType($flagsArg->value);
4747
}
4848

49-
return $this->regexShapeMatcher->matchExpr($patternArg->value, $flagsType, TrinaryLogic::createMaybe(), $scope);
49+
if ($functionReflection->getName() === 'preg_match') {
50+
return $this->regexShapeMatcher->matchExpr($patternArg->value, $flagsType, TrinaryLogic::createMaybe(), $scope);
51+
}
52+
return $this->regexShapeMatcher->matchAllExpr($patternArg->value, $flagsType, TrinaryLogic::createMaybe(), $scope);
5053
}
5154

5255
}

Diff for: src/Type/Php/PregMatchTypeSpecifyingExtension.php

+6-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public function setTypeSpecifier(TypeSpecifier $typeSpecifier): void
3232

3333
public function isFunctionSupported(FunctionReflection $functionReflection, FuncCall $node, TypeSpecifierContext $context): bool
3434
{
35-
return in_array(strtolower($functionReflection->getName()), ['preg_match'], true) && !$context->null();
35+
return in_array(strtolower($functionReflection->getName()), ['preg_match', 'preg_match_all'], true) && !$context->null();
3636
}
3737

3838
public function specifyTypes(FunctionReflection $functionReflection, FuncCall $node, Scope $scope, TypeSpecifierContext $context): SpecifiedTypes
@@ -53,7 +53,11 @@ public function specifyTypes(FunctionReflection $functionReflection, FuncCall $n
5353
$flagsType = $scope->getType($flagsArg->value);
5454
}
5555

56-
$matchedType = $this->regexShapeMatcher->matchExpr($patternArg->value, $flagsType, TrinaryLogic::createFromBoolean($context->true()), $scope);
56+
if ($functionReflection->getName() === 'preg_match') {
57+
$matchedType = $this->regexShapeMatcher->matchExpr($patternArg->value, $flagsType, TrinaryLogic::createFromBoolean($context->true()), $scope);
58+
} else {
59+
$matchedType = $this->regexShapeMatcher->matchAllExpr($patternArg->value, $flagsType, TrinaryLogic::createFromBoolean($context->true()), $scope);
60+
}
5761
if ($matchedType === null) {
5862
return new SpecifiedTypes();
5963
}

Diff for: src/Type/Php/RegexArrayShapeMatcher.php

+121-26
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414
use PHPStan\Php\PhpVersion;
1515
use PHPStan\ShouldNotHappenException;
1616
use PHPStan\TrinaryLogic;
17+
use PHPStan\Type\Accessory\AccessoryArrayListType;
1718
use PHPStan\Type\Accessory\AccessoryNonEmptyStringType;
1819
use PHPStan\Type\Accessory\AccessoryNumericStringType;
20+
use PHPStan\Type\ArrayType;
1921
use PHPStan\Type\Constant\ConstantArrayType;
2022
use PHPStan\Type\Constant\ConstantArrayTypeBuilder;
2123
use PHPStan\Type\Constant\ConstantIntegerType;
2224
use PHPStan\Type\Constant\ConstantStringType;
2325
use PHPStan\Type\IntegerRangeType;
26+
use PHPStan\Type\IntegerType;
2427
use PHPStan\Type\IntersectionType;
2528
use PHPStan\Type\StringType;
2629
use PHPStan\Type\Type;
@@ -38,6 +41,8 @@
3841
use function strlen;
3942
use function substr;
4043
use const PREG_OFFSET_CAPTURE;
44+
use const PREG_PATTERN_ORDER;
45+
use const PREG_SET_ORDER;
4146
use const PREG_UNMATCHED_AS_NULL;
4247

4348
/**
@@ -60,20 +65,25 @@ public function __construct(
6065
{
6166
}
6267

68+
public function matchAllExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
69+
{
70+
return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, true);
71+
}
72+
6373
public function matchExpr(Expr $patternExpr, ?Type $flagsType, TrinaryLogic $wasMatched, Scope $scope): ?Type
6474
{
65-
return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched);
75+
return $this->matchPatternType($this->getPatternType($patternExpr, $scope), $flagsType, $wasMatched, false);
6676
}
6777

6878
/**
6979
* @deprecated use matchExpr() instead for a more precise result
7080
*/
7181
public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched): ?Type
7282
{
73-
return $this->matchPatternType($patternType, $flagsType, $wasMatched);
83+
return $this->matchPatternType($patternType, $flagsType, $wasMatched, false);
7484
}
7585

76-
private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched): ?Type
86+
private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
7787
{
7888
if ($wasMatched->no()) {
7989
return new ConstantArrayType([], []);
@@ -90,8 +100,8 @@ private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLo
90100
return null;
91101
}
92102

93-
/** @var int-mask<PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
94-
$flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
103+
/** @var int-mask<PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73> $flags */
104+
$flags = $flagsType->getValue() & (PREG_OFFSET_CAPTURE | PREG_PATTERN_ORDER | PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL | self::PREG_UNMATCHED_AS_NULL_ON_72_73);
95105

96106
// some other unsupported/unexpected flag was passed in
97107
if ($flags !== $flagsType->getValue()) {
@@ -101,7 +111,7 @@ private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLo
101111

102112
$matchedTypes = [];
103113
foreach ($constantStrings as $constantString) {
104-
$matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched);
114+
$matched = $this->matchRegex($constantString->getValue(), $flags, $wasMatched, $matchesAll);
105115
if ($matched === null) {
106116
return null;
107117
}
@@ -117,9 +127,9 @@ private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLo
117127
}
118128

119129
/**
120-
* @param int-mask<PREG_OFFSET_CAPTURE|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
130+
* @param int-mask<PREG_OFFSET_CAPTURE|PREG_PATTERN_ORDER|PREG_SET_ORDER|PREG_UNMATCHED_AS_NULL|self::PREG_UNMATCHED_AS_NULL_ON_72_73>|null $flags
121131
*/
122-
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type
132+
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
123133
{
124134
$parseResult = $this->parseGroups($regex);
125135
if ($parseResult === null) {
@@ -140,7 +150,8 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
140150
$onlyTopLevelAlternationId = $this->getOnlyTopLevelAlternationId($groupList);
141151

142152
if (
143-
$wasMatched->yes()
153+
!$matchesAll
154+
&& $wasMatched->yes()
144155
&& $onlyOptionalTopLevelGroup !== null
145156
) {
146157
// if only one top level capturing optional group exists
@@ -154,17 +165,20 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
154165
$trailingOptionals,
155166
$flags ?? 0,
156167
$markVerbs,
168+
$matchesAll,
157169
);
158170

159-
if (!$this->containsUnmatchedAsNull($flags ?? 0)) {
171+
if (!$this->containsUnmatchedAsNull($flags ?? 0, $matchesAll)) {
160172
$combiType = TypeCombinator::union(
161173
new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true),
162174
$combiType,
163175
);
164176
}
177+
165178
return $combiType;
166179
} elseif (
167-
$wasMatched->yes()
180+
!$matchesAll
181+
&& $wasMatched->yes()
168182
&& $onlyTopLevelAlternationId !== null
169183
&& array_key_exists($onlyTopLevelAlternationId, $groupCombinations)
170184
) {
@@ -181,7 +195,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
181195
$beforeCurrentCombo = false;
182196
} elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
183197
$group->forceNonOptional();
184-
} elseif ($group->getAlternationId() === $onlyTopLevelAlternationId && !$this->containsUnmatchedAsNull($flags ?? 0)) {
198+
} elseif ($group->getAlternationId() === $onlyTopLevelAlternationId && !$this->containsUnmatchedAsNull($flags ?? 0, $matchesAll)) {
185199
unset($comboList[$groupId]);
186200
}
187201
}
@@ -192,6 +206,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
192206
$trailingOptionals,
193207
$flags ?? 0,
194208
$markVerbs,
209+
$matchesAll,
195210
);
196211

197212
$combiTypes[] = $combiType;
@@ -202,7 +217,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
202217
}
203218
}
204219

205-
if ($isOptionalAlternation && !$this->containsUnmatchedAsNull($flags ?? 0)) {
220+
if ($isOptionalAlternation && !$this->containsUnmatchedAsNull($flags ?? 0, $matchesAll)) {
206221
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()], [0], [], true);
207222
}
208223

@@ -215,6 +230,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
215230
$trailingOptionals,
216231
$flags ?? 0,
217232
$markVerbs,
233+
$matchesAll,
218234
);
219235
}
220236

@@ -278,23 +294,24 @@ private function buildArrayType(
278294
int $trailingOptionals,
279295
int $flags,
280296
array $markVerbs,
297+
bool $matchesAll,
281298
): Type
282299
{
283300
$builder = ConstantArrayTypeBuilder::createEmpty();
284301

285302
// first item in matches contains the overall match.
286303
$builder->setOffsetValueType(
287304
$this->getKeyType(0),
288-
TypeCombinator::removeNull($this->getValueType(new StringType(), $flags)),
289-
!$wasMatched->yes(),
305+
$this->createSubjectValueType($wasMatched, $flags, $matchesAll),
306+
$this->isSubjectOptional($wasMatched, $matchesAll),
290307
);
291308

292309
$countGroups = count($captureGroups);
293310
$i = 0;
294311
foreach ($captureGroups as $captureGroup) {
295312
$isTrailingOptional = $i >= $countGroups - $trailingOptionals;
296-
$groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional);
297-
$optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional);
313+
$groupValueType = $this->createGroupValueType($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
314+
$optional = $this->isGroupOptional($captureGroup, $wasMatched, $flags, $isTrailingOptional, $matchesAll);
298315

299316
if ($captureGroup->isNamed()) {
300317
$builder->setOffsetValueType(
@@ -325,17 +342,61 @@ private function buildArrayType(
325342
);
326343
}
327344

345+
if ($matchesAll && $this->containsSetOrder($flags)) {
346+
$arrayType = AccessoryArrayListType::intersectWith(new ArrayType(new IntegerType(), $builder->getArray()));
347+
if (!$wasMatched->yes()) {
348+
$arrayType = TypeCombinator::union(
349+
new ConstantArrayType([], []),
350+
$arrayType,
351+
);
352+
}
353+
return $arrayType;
354+
}
355+
328356
return $builder->getArray();
329357
}
330358

331-
private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional): bool
359+
private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll): bool
360+
{
361+
if ($matchesAll) {
362+
return false;
363+
}
364+
365+
return !$wasMatched->yes();
366+
}
367+
368+
private function createSubjectValueType(TrinaryLogic $wasMatched, int $flags, bool $matchesAll): Type
332369
{
370+
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
371+
372+
if ($matchesAll) {
373+
if (!$wasMatched->yes()) {
374+
$subjectValueType = TypeCombinator::union($subjectValueType, new ConstantStringType(''));
375+
}
376+
if ($this->containsPatternOrder($flags)) {
377+
$subjectValueType = AccessoryArrayListType::intersectWith(new ArrayType(new IntegerType(), $subjectValueType));
378+
}
379+
}
380+
381+
return $subjectValueType;
382+
}
383+
384+
private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): bool
385+
{
386+
if ($matchesAll) {
387+
if ($isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $this->containsSetOrder($flags)) {
388+
return true;
389+
}
390+
391+
return false;
392+
}
393+
333394
if (!$wasMatched->yes()) {
334395
$optional = true;
335396
} else {
336397
if (!$isTrailingOptional) {
337398
$optional = false;
338-
} elseif ($this->containsUnmatchedAsNull($flags)) {
399+
} elseif ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
339400
$optional = false;
340401
} else {
341402
$optional = $captureGroup->isOptional();
@@ -345,25 +406,59 @@ private function isGroupOptional(RegexCapturingGroup $captureGroup, TrinaryLogic
345406
return $optional;
346407
}
347408

348-
private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional): Type
409+
private function createGroupValueType(RegexCapturingGroup $captureGroup, TrinaryLogic $wasMatched, int $flags, bool $isTrailingOptional, bool $matchesAll): Type
349410
{
350-
$groupValueType = $this->getValueType($captureGroup->getType(), $flags);
411+
$groupValueType = $this->getValueType($captureGroup->getType(), $flags, $matchesAll);
412+
413+
if ($matchesAll) {
414+
if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
415+
$groupValueType = TypeCombinator::removeNull($groupValueType);
416+
}
417+
418+
if (!$this->containsSetOrder($flags) && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
419+
$groupValueType = TypeCombinator::removeNull($groupValueType);
420+
$groupValueType = TypeCombinator::union($groupValueType, new ConstantStringType(''));
421+
}
422+
423+
if ($this->containsPatternOrder($flags)) {
424+
$groupValueType = AccessoryArrayListType::intersectWith(new ArrayType(new IntegerType(), $groupValueType));
425+
}
426+
427+
return $groupValueType;
428+
}
351429

352430
if ($wasMatched->yes()) {
353-
if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags) && !$captureGroup->isOptional()) {
431+
if (!$isTrailingOptional && $this->containsUnmatchedAsNull($flags, $matchesAll) && !$captureGroup->isOptional()) {
354432
$groupValueType = TypeCombinator::removeNull($groupValueType);
355433
}
356434
}
357435

358-
if (!$isTrailingOptional && !$this->containsUnmatchedAsNull($flags) && $captureGroup->isOptional()) {
436+
if (!$isTrailingOptional && !$this->containsUnmatchedAsNull($flags, $matchesAll) && $captureGroup->isOptional()) {
359437
$groupValueType = TypeCombinator::union($groupValueType, new ConstantStringType(''));
360438
}
361439

362440
return $groupValueType;
363441
}
364442

365-
private function containsUnmatchedAsNull(int $flags): bool
443+
private function containsPatternOrder(int $flags): bool
444+
{
445+
// If no order flag is given, PREG_PATTERN_ORDER is assumed.
446+
return !$this->containsSetOrder($flags);
447+
}
448+
449+
private function containsSetOrder(int $flags): bool
366450
{
451+
return ($flags & PREG_SET_ORDER) !== 0;
452+
}
453+
454+
private function containsUnmatchedAsNull(int $flags, bool $matchesAll): bool
455+
{
456+
if ($matchesAll) {
457+
// preg_match_all() with PREG_UNMATCHED_AS_NULL works consistently across php-versions
458+
// https://3v4l.org/tKmPn
459+
return ($flags & PREG_UNMATCHED_AS_NULL) !== 0;
460+
}
461+
367462
return ($flags & PREG_UNMATCHED_AS_NULL) !== 0 && (($flags & self::PREG_UNMATCHED_AS_NULL_ON_72_73) !== 0 || $this->phpVersion->supportsPregUnmatchedAsNull());
368463
}
369464

@@ -376,12 +471,12 @@ private function getKeyType(int|string $key): Type
376471
return new ConstantIntegerType($key);
377472
}
378473

379-
private function getValueType(Type $baseType, int $flags): Type
474+
private function getValueType(Type $baseType, int $flags, bool $matchesAll): Type
380475
{
381476
$valueType = $baseType;
382477

383478
$offsetType = IntegerRangeType::fromInterval(0, null);
384-
if ($this->containsUnmatchedAsNull($flags)) {
479+
if ($this->containsUnmatchedAsNull($flags, $matchesAll)) {
385480
$valueType = TypeCombinator::addNull($valueType);
386481
// unmatched groups return -1 as offset
387482
$offsetType = IntegerRangeType::fromInterval(-1, null);

Diff for: tests/PHPStan/Analyser/data/param-out.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -488,10 +488,10 @@ function ($s, $t): void {
488488

489489
function fooMatch(string $input): void {
490490
preg_match_all('/@[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}(?!\w)/', $input, $matches, PREG_PATTERN_ORDER);
491-
assertType('array<list<string>>', $matches);
491+
assertType('array{list<string>}', $matches);
492492

493493
preg_match_all('/@[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}(?!\w)/', $input, $matches, PREG_SET_ORDER);
494-
assertType('list<array<string>>', $matches);
494+
assertType('list<array{string}>', $matches);
495495

496496
preg_match('/@[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}(?!\w)/', $input, $matches, PREG_UNMATCHED_AS_NULL);
497497
assertType("array{0?: string}", $matches);

Diff for: tests/PHPStan/Analyser/nsrt/param-out-default.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public function doBar(): void
3030
public function sayHello(string $row): void
3131
{
3232
preg_match_all('#// error:(.+)#', $row, $matches);
33-
assertType('array<list<string>>', $matches);
33+
assertType('array{list<string>, list<non-empty-string>}', $matches);
3434
}
3535

3636
}

0 commit comments

Comments
 (0)