Skip to content

RegexArrayShapeMatcher - more precise subject types #3897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 24, 2025
35 changes: 26 additions & 9 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
@@ -107,12 +107,17 @@ private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLo
*/
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
{
$parseResult = $this->regexGroupParser->parseGroups($regex);
if ($parseResult === null) {
$astWalkResult = $this->regexGroupParser->parseGroups($regex);
if ($astWalkResult === null) {
// regex could not be parsed by Hoa/Regex
return null;
}
[$groupList, $markVerbs] = $parseResult;
$groupList = $astWalkResult->getCapturingGroups();
$markVerbs = $astWalkResult->getMarkVerbs();
$subjectBaseType = new StringType();
if ($wasMatched->yes()) {
$subjectBaseType = $astWalkResult->getSubjectBaseType();
}

$regexGroupList = new RegexGroupList($groupList);
$trailingOptionals = $regexGroupList->countTrailingOptionals();
@@ -130,6 +135,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
$regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);

$combiType = $this->buildArrayType(
$subjectBaseType,
$regexGroupList,
$wasMatched,
$trailingOptionals,
@@ -141,7 +147,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
// positive match has a subject but not any capturing group
$combiType = TypeCombinator::union(
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
$combiType,
);
}
@@ -180,6 +186,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
}

$combiType = $this->buildArrayType(
$subjectBaseType,
$comboList,
$wasMatched,
$trailingOptionals,
@@ -199,7 +206,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
)
) {
// positive match has a subject but not any capturing group
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
}

return TypeCombinator::union(...$combiTypes);
@@ -208,6 +215,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
// the general case, which should work in all cases but does not yield the most
// precise result possible in some cases
return $this->buildArrayType(
$subjectBaseType,
$regexGroupList,
$wasMatched,
$trailingOptionals,
@@ -221,6 +229,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
* @param list<string> $markVerbs
*/
private function buildArrayType(
Type $subjectBaseType,
RegexGroupList $captureGroups,
TrinaryLogic $wasMatched,
int $trailingOptionals,
@@ -234,7 +243,7 @@ private function buildArrayType(
// first item in matches contains the overall match.
$builder->setOffsetValueType(
$this->getKeyType(0),
$this->createSubjectValueType($flags, $matchesAll),
$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
$this->isSubjectOptional($wasMatched, $matchesAll),
);

@@ -298,13 +307,21 @@ private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll):
return !$wasMatched->yes();
}

private function createSubjectValueType(int $flags, bool $matchesAll): Type
/**
* @param Type $baseType A string type (or string variant) representing the subject of the match
*/
private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
{
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
$subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));

if ($matchesAll) {
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));

if ($this->containsPatternOrder($flags)) {
$subjectValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $subjectValueType), new AccessoryArrayListType());
$subjectValueType = TypeCombinator::intersect(
new ArrayType(new IntegerType(), $subjectValueType),
new AccessoryArrayListType(),
);
}
}

25 changes: 25 additions & 0 deletions src/Type/Regex/RegexAstWalkResult.php
Original file line number Diff line number Diff line change
@@ -2,6 +2,9 @@

namespace PHPStan\Type\Regex;

use PHPStan\Type\StringType;
use PHPStan\Type\Type;

/** @immutable */
final class RegexAstWalkResult
{
@@ -15,6 +18,7 @@ public function __construct(
private int $captureGroupId,
private array $capturingGroups,
private array $markVerbs,
private Type $subjectBaseType,
)
{
}
@@ -27,6 +31,7 @@ public static function createEmpty(): self
100,
[],
[],
new StringType(),
);
}

@@ -37,6 +42,7 @@ public function nextAlternationId(): self
$this->captureGroupId,
$this->capturingGroups,
$this->markVerbs,
$this->subjectBaseType,
);
}

@@ -47,6 +53,7 @@ public function nextCaptureGroupId(): self
$this->captureGroupId + 1,
$this->capturingGroups,
$this->markVerbs,
$this->subjectBaseType,
);
}

@@ -60,6 +67,7 @@ public function addCapturingGroup(RegexCapturingGroup $group): self
$this->captureGroupId,
$capturingGroups,
$this->markVerbs,
$this->subjectBaseType,
);
}

@@ -73,6 +81,18 @@ public function markVerb(string $markVerb): self
$this->captureGroupId,
$this->capturingGroups,
$verbs,
$this->subjectBaseType,
);
}

public function withSubjectBaseType(Type $subjectBaseType): self
{
return new self(
$this->alternationId,
$this->captureGroupId,
$this->capturingGroups,
$this->markVerbs,
$subjectBaseType,
);
}

@@ -102,4 +122,9 @@ public function getMarkVerbs(): array
return $this->markVerbs;
}

public function getSubjectBaseType(): Type
{
return $this->subjectBaseType;
}

}
28 changes: 23 additions & 5 deletions src/Type/Regex/RegexGroupParser.php
Original file line number Diff line number Diff line change
@@ -49,10 +49,7 @@ public function __construct(
{
}

/**
* @return array{array<int, RegexCapturingGroup>, list<string>}|null
*/
public function parseGroups(string $regex): ?array
public function parseGroups(string $regex): ?RegexAstWalkResult
{
if (self::$parser === null) {
/** @throws void */
@@ -105,7 +102,28 @@ public function parseGroups(string $regex): ?array
RegexAstWalkResult::createEmpty(),
);

return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()];
$subjectAsGroupResult = $this->walkGroupAst(
$ast,
false,
false,
$modifiers,
RegexGroupWalkResult::createEmpty(),
);

if (!$subjectAsGroupResult->mightContainEmptyStringLiteral()) {
// we could handle numeric-string, in case we know the regex is delimited by ^ and $
if ($subjectAsGroupResult->isNonFalsy()->yes()) {
$astWalkResult = $astWalkResult->withSubjectBaseType(
TypeCombinator::intersect(new StringType(), new AccessoryNonFalsyStringType()),
);
} elseif ($subjectAsGroupResult->isNonEmpty()->yes()) {
$astWalkResult = $astWalkResult->withSubjectBaseType(
TypeCombinator::intersect(new StringType(), new AccessoryNonEmptyStringType()),
);
}
}

return $astWalkResult;
}

private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode
14 changes: 14 additions & 0 deletions src/Type/Regex/RegexGroupWalkResult.php
Original file line number Diff line number Diff line change
@@ -103,6 +103,20 @@ public function getOnlyLiterals(): ?array
return $this->onlyLiterals;
}

public function mightContainEmptyStringLiteral(): bool
{
if ($this->onlyLiterals === null) {
return false;
}
foreach ($this->onlyLiterals as $onlyLiteral) {
if ($onlyLiteral === '') {
return true;
}
}

return false;
}

public function isNonEmpty(): TrinaryLogic
{
return $this->isNonEmpty;
12 changes: 6 additions & 6 deletions tests/PHPStan/Analyser/nsrt/bug-11293.php
Original file line number Diff line number Diff line change
@@ -9,21 +9,21 @@ class HelloWorld
public function sayHello(string $s): void
{
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) > 0) {
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}

public function sayHello2(string $s): void
{
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) === 1) {
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}

public function sayHello3(string $s): void
{
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) >= 1) {
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}

@@ -35,7 +35,7 @@ public function sayHello4(string $s): void
return;
}

assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}

public function sayHello5(string $s): void
@@ -46,7 +46,7 @@ public function sayHello5(string $s): void
return;
}

assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}

public function sayHello6(string $s): void
@@ -57,6 +57,6 @@ public function sayHello6(string $s): void
return;
}

assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}
Loading