Skip to content

Commit 1356e8c

Browse files
authored
More optimizations, remove history preservation (#495)
* Re-use the same executor, remember semantic mode. Gives around a 20% perf improvement to first-match style benchmarks. * Remove history preservation Cuts down on memory usage and avoids some ARC overhead. ~20% gains on "AllMatches" and related benchmarks. * Lower-level matchSeq Avoid collection algorithms inside matchSeq, which are liable to add ARC and inefficiencies. Results in a 3x improvement to ReluctantQuantWithTerminal.
1 parent b4f12bb commit 1356e8c

File tree

5 files changed

+71
-78
lines changed

5 files changed

+71
-78
lines changed

Diff for: Sources/_StringProcessing/ByteCodeGen.swift

+8-4
Original file line numberDiff line numberDiff line change
@@ -665,15 +665,19 @@ fileprivate extension Compiler.ByteCodeGen {
665665
}
666666
// If there's a capture transform, apply it now.
667667
if let transform = transform {
668-
let fn = builder.makeTransformFunction { input, storedCapture in
668+
let fn = builder.makeTransformFunction { input, cap in
669669
// If it's a substring capture with no custom value, apply the
670670
// transform directly to the substring to avoid existential traffic.
671-
if let cap = storedCapture.latest, cap.value == nil {
672-
return try transform(input[cap.range])
671+
//
672+
// FIXME: separate out this code path. This is fragile,
673+
// slow, and these are clearly different constructs
674+
if let range = cap.range, cap.value == nil {
675+
return try transform(input[range])
673676
}
677+
674678
let value = constructExistentialOutputComponent(
675679
from: input,
676-
component: storedCapture.latest,
680+
component: cap.deconstructed,
677681
optionalCount: 0)
678682
return try transform(value)
679683
}

Diff for: Sources/_StringProcessing/Engine/MECapture.swift

+22-46
Original file line numberDiff line numberDiff line change
@@ -32,59 +32,48 @@
3232

3333
extension Processor {
3434
struct _StoredCapture {
35-
// Set whenever we push the very first capture, allows us
36-
// to theoretically re-compute anything we want to later.
37-
fileprivate var startState: SavePoint? = nil
38-
39-
// Save the entire history as we go, so that backtracking
40-
// can just lop-off aborted runs.
41-
//
42-
// Backtracking entries can specify a per-capture stack
43-
// index so that we can abort anything that came after.
44-
//
45-
// By remembering the entire history, we waste space, but
46-
// we get flexibility for now.
47-
//
48-
fileprivate var history: Array<(range: Range<Position>, value: Any?)> = []
35+
var range: Range<Position>? = nil
36+
37+
var value: Any? = nil
4938

5039
// An in-progress capture start
5140
fileprivate var currentCaptureBegin: Position? = nil
5241

5342
fileprivate func _invariantCheck() {
54-
if startState == nil {
55-
assert(history.isEmpty)
56-
assert(currentCaptureBegin == nil)
57-
} else if currentCaptureBegin == nil {
58-
assert(!history.isEmpty)
43+
if range == nil {
44+
assert(value == nil)
5945
}
6046
}
6147

6248
// MARK: - IPI
6349

64-
var isEmpty: Bool { history.isEmpty }
65-
66-
var latest: (range: Range<Position>, value: Any?)? { history.last }
50+
var deconstructed: (range: Range<Position>, value: Any?)? {
51+
guard let r = range else { return nil }
52+
return (r, value)
53+
}
6754

6855
/// Start a new capture. If the previously started one was un-ended,
69-
/// will clear it and restart. If this is the first start, will save `initial`.
56+
/// will clear it and restart.
7057
mutating func startCapture(
71-
_ idx: Position, initial: SavePoint
58+
_ idx: Position
7259
) {
7360
_invariantCheck()
7461
defer { _invariantCheck() }
7562

76-
if self.startState == nil {
77-
self.startState = initial
78-
}
7963
currentCaptureBegin = idx
8064
}
8165

8266
mutating func endCapture(_ idx: Position) {
8367
_invariantCheck()
84-
assert(currentCaptureBegin != nil)
8568
defer { _invariantCheck() }
8669

87-
history.append((currentCaptureBegin! ..< idx, value: nil))
70+
guard let low = currentCaptureBegin else {
71+
fatalError("Invariant violated: ending unstarted capture")
72+
}
73+
74+
range = low..<idx
75+
value = nil // TODO: cleaner IPI around this...
76+
currentCaptureBegin = nil
8877
}
8978

9079
mutating func registerValue(
@@ -93,28 +82,15 @@ extension Processor {
9382
) {
9483
_invariantCheck()
9584
defer { _invariantCheck() }
96-
if let sp = overwriteInitial {
97-
self.startState = sp
98-
}
99-
history[history.endIndex - 1].value = value
100-
}
101-
102-
mutating func fail(truncatingAt stackIdx: Int) {
103-
_invariantCheck()
104-
assert(stackIdx <= history.endIndex)
105-
defer { _invariantCheck() }
10685

107-
history.removeSubrange(stackIdx...)
108-
if history.isEmpty {
109-
startState = nil
110-
}
86+
self.value = value
11187
}
11288
}
11389
}
11490

11591
extension Processor._StoredCapture: CustomStringConvertible {
11692
var description: String {
117-
return String(describing: history)
93+
return String(describing: self)
11894
}
11995
}
12096

@@ -124,10 +100,10 @@ struct MECaptureList {
124100

125101
func latestUntyped(from input: String) -> Array<Substring?> {
126102
values.map {
127-
guard let last = $0.latest else {
103+
guard let range = $0.range else {
128104
return nil
129105
}
130-
return input[last.0]
106+
return input[range]
131107
}
132108
}
133109
}

Diff for: Sources/_StringProcessing/Engine/Processor.swift

+33-25
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ extension Processor {
103103
input[bounds]
104104
}
105105

106+
// Advance in our input, without any checks or failure signalling
107+
mutating func _uncheckedForcedConsumeOne() {
108+
assert(currentPosition != end)
109+
input.formIndex(after: &currentPosition)
110+
}
111+
106112
// Advance in our input
107113
//
108114
// Returns whether the advance succeeded. On failure, our
@@ -145,30 +151,26 @@ extension Processor {
145151
return slice
146152
}
147153

148-
mutating func match(_ e: Element) {
154+
// Match against the current input element. Returns whether
155+
// it succeeded vs signaling an error.
156+
mutating func match(_ e: Element) -> Bool {
149157
guard let cur = load(), cur == e else {
150158
signalFailure()
151-
return
152-
}
153-
if consume(1) {
154-
controller.step()
159+
return false
155160
}
161+
_uncheckedForcedConsumeOne()
162+
return true
156163
}
164+
165+
// Match against the current input prefix. Returns whether
166+
// it succeeded vs signaling an error.
157167
mutating func matchSeq<C: Collection>(
158168
_ seq: C
159-
) where C.Element == Input.Element {
160-
let count = seq.count
161-
162-
guard let inputSlice = load(count: count),
163-
seq.elementsEqual(inputSlice)
164-
else {
165-
signalFailure()
166-
return
167-
}
168-
guard consume(.init(count)) else {
169-
fatalError("unreachable")
169+
) -> Bool where C.Element == Input.Element {
170+
for e in seq {
171+
guard match(e) else { return false }
170172
}
171-
controller.step()
173+
return true
172174
}
173175

174176
mutating func signalFailure() {
@@ -356,18 +358,24 @@ extension Processor {
356358

357359
case .match:
358360
let reg = payload.element
359-
match(registers[reg])
361+
if match(registers[reg]) {
362+
controller.step()
363+
}
360364

361365
case .matchSequence:
362366
let reg = payload.sequence
363367
let seq = registers[reg]
364-
matchSeq(seq)
368+
if matchSeq(seq) {
369+
controller.step()
370+
}
365371

366372
case .matchSlice:
367373
let (lower, upper) = payload.pairedPosPos
368374
let range = registers[lower]..<registers[upper]
369375
let slice = input[range]
370-
matchSeq(slice)
376+
if matchSeq(slice) {
377+
controller.step()
378+
}
371379

372380
case .consumeBy:
373381
let reg = payload.consumer
@@ -439,19 +447,19 @@ extension Processor {
439447
// Should we assert it's not finished yet?
440448
// What's the behavior there?
441449
let cap = storedCaptures[capNum]
442-
guard let range = cap.latest?.range else {
450+
guard let range = cap.range else {
443451
signalFailure()
444452
return
445453
}
446-
matchSeq(input[range])
454+
if matchSeq(input[range]) {
455+
controller.step()
456+
}
447457

448458
case .beginCapture:
449459
let capNum = Int(
450460
asserting: payload.capture.rawValue)
451461

452-
let sp = makeSavePoint(self.currentPC)
453-
storedCaptures[capNum].startCapture(
454-
currentPosition, initial: sp)
462+
storedCaptures[capNum].startCapture(currentPosition)
455463
controller.step()
456464

457465
case .endCapture:

Diff for: Sources/_StringProcessing/Engine/Structuralize.swift

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ extension CaptureList {
1212
for (i, (cap, meStored)) in zip(captures, list.values).enumerated() {
1313
let element = AnyRegexOutput.ElementRepresentation(
1414
optionalDepth: cap.optionalDepth,
15-
content: meStored.latest,
15+
content: meStored.deconstructed,
1616
name: cap.name,
1717
referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key
1818
)

Diff for: Sources/_StringProcessing/Regex/Match.swift

+7-2
Original file line numberDiff line numberDiff line change
@@ -140,14 +140,19 @@ extension Regex {
140140
// FIXME: Something more efficient, likely an engine interface, and we
141141
// should scrap the RegexConsumer crap and call this
142142

143+
let executor = Executor(program: regex.program.loweredProgram)
144+
let graphemeSemantic = regex.initialOptions.semanticLevel == .graphemeCluster
145+
143146
var low = inputRange.lowerBound
144147
let high = inputRange.upperBound
145148
while true {
146-
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
149+
if let m: Regex<Output>.Match = try executor.match(
150+
input, in: low..<high, .partialFromFront
151+
) {
147152
return m
148153
}
149154
if low >= high { return nil }
150-
if regex.initialOptions.semanticLevel == .graphemeCluster {
155+
if graphemeSemantic {
151156
input.formIndex(after: &low)
152157
} else {
153158
input.unicodeScalars.formIndex(after: &low)

0 commit comments

Comments
 (0)