Skip to content

Commit ec261f1

Browse files
committed
Fix output type mismatch with RegexBuilder
Some regex literals (and presumably other `Regex` instances) lose their output type information when used in a RegexBuilder closure due to the way the concatenating builder calls are overloaded. In particular, any output type with labeled tuples or where the sum of tuple components in the accumulated and new output types is greater than 10 will be ignored. Regex internals don't make this distinction, however, so there ends up being a mismatch between what a `Regex.Match` instance tries to produce and the output type of the outermost regex. For example, this code results in a crash, because `regex` is a `Regex<Substring>` but the match tries to produce a `(Substring, number: Substring)`: let regex = Regex { ZeroOrMore(.whitespace) /:(?<number>\d+):/ ZeroOrMore(.whitespace) } let match = try regex.wholeMatch(in: " :21: ") print(match!.output) To fix this, we add a new `ignoreCapturesInTypedOutput` DSLTree node to mark situations where the output type is discarded. This status is propagated through the capture list into the match's storage, which lets us produce the correct output type. Note that we can't just drop the capture groups when building the compiled program because (1) different parts of the regex might reference the capture group and (2) all capture groups are available if a developer converts the output to `AnyRegexOutput`. let anyOutput = AnyRegexOutput(match) // anyOutput[1] == "21" // anyOutput["number"] == Optional("21") Fixes swiftlang#625. rdar://104823356
1 parent 3ca8b13 commit ec261f1

File tree

14 files changed

+272
-50
lines changed

14 files changed

+272
-50
lines changed

Package.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
88
"-define-availability",
99
"-Xfrontend",
1010
"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
11+
"-Xfrontend",
12+
"-define-availability",
13+
"-Xfrontend",
14+
"SwiftStdlib 5.8:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
1115
])
1216

1317
/// Swift settings for building a private stdlib-like module that is to be used

Sources/RegexBuilder/Variadics.swift

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2021-2023 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -571,7 +571,11 @@ extension RegexComponentBuilder {
571571
accumulated: R0, next: R1
572572
) -> Regex<Substring> where R0.RegexOutput == W0 {
573573
let factory = makeFactory()
574-
return factory.accumulate(accumulated, next)
574+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
575+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
576+
} else {
577+
return factory.accumulate(accumulated, next)
578+
}
575579
}
576580
}
577581
@available(SwiftStdlib 5.7, *)
@@ -582,7 +586,11 @@ extension RegexComponentBuilder {
582586
accumulated: R0, next: R1
583587
) -> Regex<(Substring, C0)> where R0.RegexOutput == (W0, C0) {
584588
let factory = makeFactory()
585-
return factory.accumulate(accumulated, next)
589+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
590+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
591+
} else {
592+
return factory.accumulate(accumulated, next)
593+
}
586594
}
587595
}
588596
@available(SwiftStdlib 5.7, *)
@@ -593,7 +601,11 @@ extension RegexComponentBuilder {
593601
accumulated: R0, next: R1
594602
) -> Regex<(Substring, C0, C1)> where R0.RegexOutput == (W0, C0, C1) {
595603
let factory = makeFactory()
596-
return factory.accumulate(accumulated, next)
604+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
605+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
606+
} else {
607+
return factory.accumulate(accumulated, next)
608+
}
597609
}
598610
}
599611
@available(SwiftStdlib 5.7, *)
@@ -604,7 +616,11 @@ extension RegexComponentBuilder {
604616
accumulated: R0, next: R1
605617
) -> Regex<(Substring, C0, C1, C2)> where R0.RegexOutput == (W0, C0, C1, C2) {
606618
let factory = makeFactory()
607-
return factory.accumulate(accumulated, next)
619+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
620+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
621+
} else {
622+
return factory.accumulate(accumulated, next)
623+
}
608624
}
609625
}
610626
@available(SwiftStdlib 5.7, *)
@@ -615,7 +631,11 @@ extension RegexComponentBuilder {
615631
accumulated: R0, next: R1
616632
) -> Regex<(Substring, C0, C1, C2, C3)> where R0.RegexOutput == (W0, C0, C1, C2, C3) {
617633
let factory = makeFactory()
618-
return factory.accumulate(accumulated, next)
634+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
635+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
636+
} else {
637+
return factory.accumulate(accumulated, next)
638+
}
619639
}
620640
}
621641
@available(SwiftStdlib 5.7, *)
@@ -626,7 +646,11 @@ extension RegexComponentBuilder {
626646
accumulated: R0, next: R1
627647
) -> Regex<(Substring, C0, C1, C2, C3, C4)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4) {
628648
let factory = makeFactory()
629-
return factory.accumulate(accumulated, next)
649+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
650+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
651+
} else {
652+
return factory.accumulate(accumulated, next)
653+
}
630654
}
631655
}
632656
@available(SwiftStdlib 5.7, *)
@@ -637,7 +661,11 @@ extension RegexComponentBuilder {
637661
accumulated: R0, next: R1
638662
) -> Regex<(Substring, C0, C1, C2, C3, C4, C5)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5) {
639663
let factory = makeFactory()
640-
return factory.accumulate(accumulated, next)
664+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
665+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
666+
} else {
667+
return factory.accumulate(accumulated, next)
668+
}
641669
}
642670
}
643671
@available(SwiftStdlib 5.7, *)
@@ -648,7 +676,11 @@ extension RegexComponentBuilder {
648676
accumulated: R0, next: R1
649677
) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6) {
650678
let factory = makeFactory()
651-
return factory.accumulate(accumulated, next)
679+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
680+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
681+
} else {
682+
return factory.accumulate(accumulated, next)
683+
}
652684
}
653685
}
654686
@available(SwiftStdlib 5.7, *)
@@ -659,7 +691,11 @@ extension RegexComponentBuilder {
659691
accumulated: R0, next: R1
660692
) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7) {
661693
let factory = makeFactory()
662-
return factory.accumulate(accumulated, next)
694+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
695+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
696+
} else {
697+
return factory.accumulate(accumulated, next)
698+
}
663699
}
664700
}
665701
@available(SwiftStdlib 5.7, *)
@@ -670,7 +706,11 @@ extension RegexComponentBuilder {
670706
accumulated: R0, next: R1
671707
) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8) {
672708
let factory = makeFactory()
673-
return factory.accumulate(accumulated, next)
709+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
710+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
711+
} else {
712+
return factory.accumulate(accumulated, next)
713+
}
674714
}
675715
}
676716
@available(SwiftStdlib 5.7, *)
@@ -681,7 +721,11 @@ extension RegexComponentBuilder {
681721
accumulated: R0, next: R1
682722
) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) {
683723
let factory = makeFactory()
684-
return factory.accumulate(accumulated, next)
724+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
725+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
726+
} else {
727+
return factory.accumulate(accumulated, next)
728+
}
685729
}
686730
}
687731

Sources/VariadicsGenerator/VariadicsGenerator.swift

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ struct VariadicsGenerator: ParsableCommand {
132132
//
133133
// This source file is part of the Swift.org open source project
134134
//
135-
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
135+
// Copyright (c) 2021-2023 Apple Inc. and the Swift project authors
136136
// Licensed under Apache License v2.0 with Runtime Library Exception
137137
//
138138
// See https://swift.org/LICENSE.txt for license information
@@ -308,7 +308,11 @@ struct VariadicsGenerator: ParsableCommand {
308308
output("""
309309
{
310310
let factory = makeFactory()
311-
return factory.accumulate(accumulated, next)
311+
if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) {
312+
return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next))
313+
} else {
314+
return factory.accumulate(accumulated, next)
315+
}
312316
}
313317
}
314318

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,19 @@ extension CaptureList {
2727
public var type: Any.Type
2828
public var optionalDepth: Int
2929
public var location: SourceLocation
30+
public var visibleInTypedOutput: Bool
3031

3132
public init(
3233
name: String? = nil,
3334
type: Any.Type = Substring.self,
3435
optionalDepth: Int,
36+
visibleInTypedOutput: Bool,
3537
_ location: SourceLocation
3638
) {
3739
self.name = name
3840
self.type = type
3941
self.optionalDepth = optionalDepth
42+
self.visibleInTypedOutput = visibleInTypedOutput
4043
self.location = location
4144
}
4245
}
@@ -104,58 +107,60 @@ extension CaptureList {
104107

105108
extension CaptureList.Builder {
106109
public mutating func addCaptures(
107-
of node: AST.Node, optionalNesting nesting: OptionalNesting
110+
of node: AST.Node,
111+
optionalNesting nesting: OptionalNesting,
112+
visibleInTypedOutput: Bool
108113
) {
109114
switch node {
110115
case let .alternation(a):
111116
for child in a.children {
112-
addCaptures(of: child, optionalNesting: nesting.addingOptional)
117+
addCaptures(of: child, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput)
113118
}
114119

115120
case let .concatenation(c):
116121
for child in c.children {
117-
addCaptures(of: child, optionalNesting: nesting)
122+
addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput)
118123
}
119124

120125
case let .group(g):
121126
switch g.kind.value {
122127
case .capture:
123-
captures.append(.init(optionalDepth: nesting.depth, g.location))
128+
captures.append(.init(optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, g.location))
124129

125130
case .namedCapture(let name):
126131
captures.append(.init(
127-
name: name.value, optionalDepth: nesting.depth, g.location))
132+
name: name.value, optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, g.location))
128133

129134
case .balancedCapture(let b):
130135
captures.append(.init(
131-
name: b.name?.value, optionalDepth: nesting.depth, g.location))
136+
name: b.name?.value, optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, g.location))
132137

133138
default: break
134139
}
135-
addCaptures(of: g.child, optionalNesting: nesting)
140+
addCaptures(of: g.child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput)
136141

137142
case .conditional(let c):
138143
switch c.condition.kind {
139144
case .group(let g):
140-
addCaptures(of: .group(g), optionalNesting: nesting)
145+
addCaptures(of: .group(g), optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput)
141146
default:
142147
break
143148
}
144149

145-
addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional)
146-
addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional)
150+
addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput)
151+
addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput)
147152

148153
case .quantification(let q):
149154
var optNesting = nesting
150155
if q.amount.value.bounds.atLeast == 0 {
151156
optNesting = optNesting.addingOptional
152157
}
153-
addCaptures(of: q.child, optionalNesting: optNesting)
158+
addCaptures(of: q.child, optionalNesting: optNesting, visibleInTypedOutput: visibleInTypedOutput)
154159

155160
case .absentFunction(let abs):
156161
switch abs.kind {
157162
case .expression(_, _, let child):
158-
addCaptures(of: child, optionalNesting: nesting)
163+
addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput)
159164
case .clearer, .repeater, .stopper:
160165
break
161166
}
@@ -166,8 +171,8 @@ extension CaptureList.Builder {
166171
}
167172
public static func build(_ ast: AST) -> CaptureList {
168173
var builder = Self()
169-
builder.captures.append(.init(optionalDepth: 0, .fake))
170-
builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false))
174+
builder.captures.append(.init(optionalDepth: 0, visibleInTypedOutput: true, .fake))
175+
builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false), visibleInTypedOutput: true)
171176
return builder.captures
172177
}
173178
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ fileprivate extension Compiler.ByteCodeGen {
874874
switch node {
875875
case .concatenation(let ch):
876876
return ch.flatMap(flatten)
877-
case .convertedRegexLiteral(let n, _):
877+
case .convertedRegexLiteral(let n, _), .ignoreCapturesInTypedOutput(let n):
878878
return flatten(n)
879879
default:
880880
return [node]
@@ -951,6 +951,9 @@ fileprivate extension Compiler.ByteCodeGen {
951951
case let .nonCapturingGroup(kind, child):
952952
try emitNoncapturingGroup(kind.ast, child)
953953

954+
case let .ignoreCapturesInTypedOutput(child):
955+
try emitNode(child)
956+
954957
case .conditional:
955958
throw Unsupported("Conditionals")
956959

Sources/_StringProcessing/Capture.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ extension Sequence where Element == AnyRegexOutput.Element {
6161
// and traffic through existentials
6262
@available(SwiftStdlib 5.7, *)
6363
func existentialOutput(from input: String) -> Any {
64-
let elements = map {
64+
let elements = filter(\.representation.visibleInTypedOutput).map {
6565
$0.existentialOutputComponent(from: input)
6666
}
6767
return elements.count == 1

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ extension DSLTree.Node {
4242
case .orderedChoice, .conditional, .concatenation,
4343
.capture, .nonCapturingGroup,
4444
.quantification, .trivia, .empty,
45-
.absentFunction: return nil
45+
.ignoreCapturesInTypedOutput, .absentFunction: return nil
4646

4747
case .consumer:
4848
fatalError("FIXME: Is this where we handle them?")

Sources/_StringProcessing/Engine/Structuralize.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ extension CaptureList {
1414
optionalDepth: cap.optionalDepth,
1515
content: meStored.deconstructed,
1616
name: cap.name,
17-
referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key
17+
referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key,
18+
visibleInTypedOutput: cap.visibleInTypedOutput
1819
)
1920

2021
result.append(element)

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ extension PrettyPrinter {
131131
printer.printAsPattern(convertedFromAST: child)
132132
}
133133

134+
case let .ignoreCapturesInTypedOutput(child):
135+
printAsPattern(convertedFromAST: child, isTopLevel: isTopLevel)
136+
134137
case .conditional:
135138
print("/* TODO: conditional */")
136139

Sources/_StringProcessing/Regex/AnyRegexOutput.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ extension AnyRegexOutput {
359359

360360
/// The capture reference this element refers to.
361361
var referenceID: ReferenceID? = nil
362+
363+
/// A Boolean value indicating whether this capture should be included in
364+
/// the typed output.
365+
var visibleInTypedOutput: Bool
362366
}
363367

364368
internal init(input: String, elements: [ElementRepresentation]) {

0 commit comments

Comments
 (0)