Skip to content

Speed up quantification optimizations by unswitching #706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions Sources/_StringProcessing/ConsumerInterface.swift
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,8 @@ extension DSLTree.CustomCharacterClass.Member {

return { input, bounds in
let curIdx = bounds.lowerBound
let nextIndex = isCharacterSemantic
? input.index(after: curIdx)
: input.unicodeScalars.index(after: curIdx)
let nextIndex = input.index(
after: curIdx, isScalarSemantics: !isCharacterSemantic)

// Under grapheme semantics, we compare based on single NFC scalars. If
// such a character is not single scalar under NFC, the match fails. In
Expand Down Expand Up @@ -603,9 +602,9 @@ extension AST.Atom.CharacterProperty {
if p(input, bounds) != nil { return nil }

// TODO: bounds check
return opts.semanticLevel == .graphemeCluster
? input.index(after: bounds.lowerBound)
: input.unicodeScalars.index(after: bounds.lowerBound)
return input.index(
after: bounds.lowerBound,
isScalarSemantics: opts.semanticLevel == .unicodeScalar)
}
}

Expand Down
19 changes: 19 additions & 0 deletions Sources/_StringProcessing/Engine/MEBuiltins.swift
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,25 @@ extension String {
else { return nil }
return next
}

internal func matchRegexDot(
at currentPosition: Index,
limitedBy end: Index,
anyMatchesNewline: Bool,
isScalarSemantics: Bool
) -> Index? {
guard currentPosition < end else { return nil }

if anyMatchesNewline {
return index(
after: currentPosition, isScalarSemantics: isScalarSemantics)
}

return matchAnyNonNewline(
at: currentPosition,
limitedBy: end,
isScalarSemantics: isScalarSemantics)
}
}

// MARK: - Built-in character class matching
Expand Down
174 changes: 134 additions & 40 deletions Sources/_StringProcessing/Engine/MEQuantify.swift
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset

extension Processor {
func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? {
internal mutating func runQuantify(_ payload: QuantifyPayload) -> Bool {
let matched: Bool
switch (payload.quantKind, payload.minTrips, payload.maxExtraTrips) {
case (.reluctant, _, _):
assertionFailure(".reluctant is not supported by .quantify")
// TODO: this was pre-refactoring behavior, should we fatal error
// instead?
return false
case (.eager, 0, nil):
runEagerZeroOrMoreQuantify(payload)
return true
case (.eager, 1, nil):
return runEagerOneOrMoreQuantify(payload)
case (_, 0, 1):
runZeroOrOneQuantify(payload)
return true
default:
return runGeneralQuantify(payload)
}
}

private func doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? {
let isScalarSemantics = payload.isScalarSemantics

switch payload.type {
Expand All @@ -17,8 +40,6 @@ extension Processor {
boundaryCheck: !isScalarSemantics,
isCaseInsensitive: false)
case .builtin:
guard currentPosition < end else { return nil }

// We only emit .quantify if it consumes a single character
return input.matchBuiltinCC(
payload.builtin,
Expand All @@ -28,33 +49,25 @@ extension Processor {
isStrictASCII: payload.builtinIsStrict,
isScalarSemantics: isScalarSemantics)
case .any:
guard currentPosition < end else { return nil }

if payload.anyMatchesNewline {
if isScalarSemantics {
return input.unicodeScalars.index(after: currentPosition)
}
return input.index(after: currentPosition)
}

return input.matchAnyNonNewline(
return input.matchRegexDot(
at: currentPosition,
limitedBy: end,
anyMatchesNewline: payload.anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
}
}

/// Generic quantify instruction interpreter
/// - Handles .eager and .posessive
/// - Handles arbitrary minTrips and maxExtraTrips
mutating func runQuantify(_ payload: QuantifyPayload) -> Bool {
private mutating func runGeneralQuantify(_ payload: QuantifyPayload) -> Bool {
assert(payload.quantKind != .reluctant)

var trips = 0
var maxExtraTrips = payload.maxExtraTrips

while trips < payload.minTrips {
guard let next = _doQuantifyMatch(payload) else {
guard let next = doQuantifyMatch(payload) else {
signalFailure()
return false
}
Expand All @@ -67,7 +80,7 @@ extension Processor {
return true
}

guard let next = _doQuantifyMatch(payload) else {
guard let next = doQuantifyMatch(payload) else {
return true
}
maxExtraTrips = maxExtraTrips.map { $0 - 1 }
Expand All @@ -81,7 +94,7 @@ extension Processor {
while true {
if maxExtraTrips == 0 { break }

guard let next = _doQuantifyMatch(payload) else {
guard let next = doQuantifyMatch(payload) else {
break
}
maxExtraTrips = maxExtraTrips.map({$0 - 1})
Expand All @@ -100,67 +113,148 @@ extension Processor {
}

/// Specialized quantify instruction interpreter for `*`, always succeeds
mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) {
private mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) {
assert(payload.quantKind == .eager
&& payload.minTrips == 0
&& payload.maxExtraTrips == nil)
_doRunEagerZeroOrMoreQuantify(payload)
_ = doRunEagerZeroOrMoreQuantify(payload)
}

// NOTE: So-as to inline into one-or-more call, which makes a significant
// performance difference
// Returns whether it matched at least once
//
// NOTE: inline-always so-as to inline into one-or-more call, which makes a
// significant performance difference
@inline(__always)
mutating func _doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) {
guard let next = _doQuantifyMatch(payload) else {
// Consumed no input, no point saved
return
}

private mutating func doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool {
// Create a quantified save point for every part of the input matched up
// to the final position.
let isScalarSemantics = payload.isScalarSemantics
let rangeStart = currentPosition
var rangeEnd = currentPosition
currentPosition = next
while true {
guard let next = _doQuantifyMatch(payload) else { break }
rangeEnd = currentPosition
currentPosition = next
var matchedOnce = false

switch payload.type {
case .asciiBitset:
let bitset = registers[payload.bitset]
while true {
guard let next = input.matchASCIIBitset(
bitset,
at: currentPosition,
limitedBy: end,
isScalarSemantics: isScalarSemantics)
else {
break
}
matchedOnce = true
rangeEnd = currentPosition
currentPosition = next
assert(currentPosition > rangeEnd)
}
case .asciiChar:
let asciiScalar = UnicodeScalar.init(_value: UInt32(payload.asciiChar))
while true {
guard let next = input.matchScalar(
asciiScalar,
at: currentPosition,
limitedBy: end,
boundaryCheck: !isScalarSemantics,
isCaseInsensitive: false)
else {
break
}
matchedOnce = true
rangeEnd = currentPosition
currentPosition = next
assert(currentPosition > rangeEnd)
}
case .builtin:
let builtin = payload.builtin
let isInverted = payload.builtinIsInverted
let isStrictASCII = payload.builtinIsStrict
while true {
guard let next = input.matchBuiltinCC(
builtin,
at: currentPosition,
limitedBy: end,
isInverted: isInverted,
isStrictASCII: isStrictASCII,
isScalarSemantics: isScalarSemantics)
else {
break
}
matchedOnce = true
rangeEnd = currentPosition
currentPosition = next
assert(currentPosition > rangeEnd)
}
case .any:
let anyMatchesNewline = payload.anyMatchesNewline
while true {
guard let next = input.matchRegexDot(
at: currentPosition,
limitedBy: end,
anyMatchesNewline: anyMatchesNewline,
isScalarSemantics: isScalarSemantics)
else {
break
}
matchedOnce = true
rangeEnd = currentPosition
currentPosition = next
assert(currentPosition > rangeEnd)
}
}

guard matchedOnce else {
// Consumed no input, no point saved
return false
}

savePoints.append(makeQuantifiedSavePoint(rangeStart..<rangeEnd, isScalarSemantics: payload.isScalarSemantics))
// NOTE: We can't assert that rangeEnd trails currentPosition by one
// position, because newline-sequence in scalar semantic mode still
// matches two scalars

savePoints.append(makeQuantifiedSavePoint(
rangeStart..<rangeEnd, isScalarSemantics: payload.isScalarSemantics))
return true
}

/// Specialized quantify instruction interpreter for `+`
mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool {
private mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool {
assert(payload.quantKind == .eager
&& payload.minTrips == 1
&& payload.maxExtraTrips == nil)

// Match at least once
guard let next = _doQuantifyMatch(payload) else {
//
// NOTE: Due to newline-sequence in scalar-semantic mode advancing two
// positions, we can't just have doRunEagerZeroOrMoreQuantify return the
// range-end and advance the range-start ourselves. Instead, we do one
// call before looping.
guard let next = doQuantifyMatch(payload) else {
signalFailure()
return false
}

// Run `a+` as `aa*`
currentPosition = next
_doRunEagerZeroOrMoreQuantify(payload)
doRunEagerZeroOrMoreQuantify(payload)
return true
}

/// Specialized quantify instruction interpreter for ?
mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool {
private mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) {
assert(payload.minTrips == 0
&& payload.maxExtraTrips == 1)
let next = _doQuantifyMatch(payload)
let next = doQuantifyMatch(payload)
guard let idx = next else {
return true // matched zero times
return // matched zero times
}
if payload.quantKind != .possessive {
// Save the zero match
savePoints.append(makeSavePoint(resumingAt: currentPC+1))
}
currentPosition = idx
return true
return
}
}
20 changes: 3 additions & 17 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -515,26 +515,12 @@ extension Processor {
controller.step()
}
case .quantify:
let quantPayload = payload.quantify
let matched: Bool
switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.maxExtraTrips) {
case (.reluctant, _, _):
assertionFailure(".reluctant is not supported by .quantify")
return
case (.eager, 0, nil):
runEagerZeroOrMoreQuantify(quantPayload)
matched = true
case (.eager, 1, nil):
matched = runEagerOneOrMoreQuantify(quantPayload)
case (_, 0, 1):
matched = runZeroOrOneQuantify(quantPayload)
default:
matched = runQuantify(quantPayload)
}
if matched {
if runQuantify(payload.quantify) {
controller.step()
}



case .consumeBy:
let reg = payload.consumer
let consumer = registers[reg]
Expand Down
12 changes: 12 additions & 0 deletions Sources/_StringProcessing/Utility/Misc.swift
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,15 @@ enum QuickResult<R> {
case unknown
}

extension String {
/// Index after in either grapheme or scalar view
func index(after idx: Index, isScalarSemantics: Bool) -> Index {
if isScalarSemantics {
return unicodeScalars.index(after: idx)
} else {
return index(after: idx)
}
}
}