Skip to content

Reuse the executor in firstMatch #489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,22 @@ extension Processor {
assert(currentPosition >= start)
assert(currentPosition <= end)
}

mutating func reset(newPosition: Position)
{
self.controller = Controller(pc: 0)
self.currentPosition = newPosition

self.registers.reset(bounds.upperBound)
for idx in storedCaptures.indices {
storedCaptures[idx] = .init()
}

self.savePoints.removeAll(keepingCapacity: true)
self.callStack.removeAll(keepingCapacity: true)
self.state = .inProgress
self.failureReason = nil
}
}

extension Processor {
Expand Down
24 changes: 23 additions & 1 deletion Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ struct SentinelValue: Hashable, CustomStringConvertible {
extension Processor {
/// Our register file
struct Registers {
var info: MEProgram<Input>.RegisterInfo

// currently, these are static readonly
var elements: [Element]

Expand Down Expand Up @@ -114,7 +116,7 @@ extension Processor.Registers {
_ program: MEProgram<Input>,
_ sentinel: Input.Index
) {
let info = program.registerInfo
self.info = program.registerInfo

self.elements = program.staticElements
assert(elements.count == info.elements)
Expand Down Expand Up @@ -156,6 +158,26 @@ extension Processor.Registers {

self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses)
}

mutating func reset(_ sentinel: Input.Index) {
// note: Is there any issue with the program transform functions holding
// state and not getting reset here? Do we care?
func clear<T>(_ xs: inout [T], _ v: T) {
for idx in xs.indices {
xs[idx] = v
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or as an extension on MutableCollection. @natecook1000 does such an algorithm exist?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could try these out here before adding them to swift-algorithms:

extension MutableCollection {
  mutating func setAll(to element: Element) {
    self.withEach { $0 = element }
  }
  
  mutating func withEach(_ body: (inout Element) throws -> Void) rethrows {
    var i = startIndex
    while i < endIndex {
      try body(&self[i])
      formIndex(after: &i)
    }
  }
}

// usage:
self.bools.setAll(to: false)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also considering an underscore them so that no one accidentally ships it 😅


clear(&self.bools, false)
clear(&self.ints, 0)
clear(&self.floats, 0)
clear(&self.positions, sentinel)
clear(&self.values, SentinelValue())
clear(&self.instructionAddresses, 0)
clear(&self.classStackAddresses, 0)
clear(&self.positionStackAddresses, 0)
clear(&self.savePointAddresses, 0)
}
}

extension MEProgram {
Expand Down
42 changes: 41 additions & 1 deletion Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,47 @@ struct Executor {
) throws -> Regex<Output>.Match? {
var cpu = engine.makeProcessor(
input: input, bounds: inputRange, matchMode: mode)
return try consume(input, &cpu, startingFrom: inputRange.lowerBound)
}

@available(SwiftStdlib 5.7, *)
func firstMatch<Output>(
_ input: String,
in inputRange: Range<String.Index>,
level: MatchingOptions.SemanticLevel
) throws -> Regex<Output>.Match? {
var low = inputRange.lowerBound
let high = inputRange.upperBound
let mode: MatchMode = .partialFromFront
var cpu = engine.makeProcessor(
input: input, bounds: inputRange, matchMode: mode)

while true {
if let m: Regex<Output>.Match = try consume(
input,
&cpu,
startingFrom: low
Comment on lines +47 to +49
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
input,
&cpu,
startingFrom: low
input, &cpu, startingFrom: low

) {
return m
}

if low >= high { return nil }
if level == .graphemeCluster {
input.formIndex(after: &low)
} else {
input.unicodeScalars.formIndex(after: &low)
}

cpu.reset(newPosition: low)
}
}

@available(SwiftStdlib 5.7, *)
func consume<Output>(
_ input: String,
_ cpu: inout Processor<String>,
startingFrom startIdx: String.Index
) throws -> Regex<Output>.Match? {
guard let endIdx = cpu.consume() else {
if let e = cpu.failureReason {
throw e
Expand All @@ -40,7 +80,7 @@ struct Executor {
referencedCaptureOffsets: engine.program.referencedCaptureOffsets,
namedCaptureOffsets: engine.program.namedCaptureOffsets)

let range = inputRange.lowerBound..<endIdx
let range = startIdx..<endIdx
let caps = engine.program.captureList.createElements(capList, input)

// FIXME: This is a workaround for not tracking (or
Expand Down
21 changes: 5 additions & 16 deletions Sources/_StringProcessing/Regex/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -151,22 +151,11 @@ extension Regex {
_ input: String,
in inputRange: Range<String.Index>
) throws -> Regex<Output>.Match? {
// FIXME: Something more efficient, likely an engine interface, and we
// should scrap the RegexConsumer crap and call this

var low = inputRange.lowerBound
let high = inputRange.upperBound
while true {
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
return m
}
if low >= high { return nil }
if regex.initialOptions.semanticLevel == .graphemeCluster {
input.formIndex(after: &low)
} else {
input.unicodeScalars.formIndex(after: &low)
}
}
let executor = Executor(program: regex.program.loweredProgram)
return try executor.firstMatch(
input, in: inputRange,
level: regex.initialOptions.semanticLevel
)
}
}

Expand Down