-
Notifications
You must be signed in to change notification settings - Fork 49
Add regex benchmarker #491
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
98992b0
3e650cb
4ea430c
1384a69
fb7459d
4292b7a
73b0482
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import _StringProcessing | ||
import Foundation | ||
|
||
public protocol RegexBenchmark { | ||
var name: String { get } | ||
func run() | ||
} | ||
|
||
public struct Benchmark: RegexBenchmark { | ||
public let name: String | ||
let regex: Regex<Substring> | ||
let ty: MatchType | ||
let target: String | ||
|
||
public enum MatchType { | ||
case whole | ||
case first | ||
case allMatches | ||
} | ||
|
||
public func run() { | ||
switch ty { | ||
case .whole: blackHole(target.wholeMatch(of: regex)) | ||
case .allMatches: blackHole(target.matches(of: regex)) | ||
case .first: blackHole(target.firstMatch(of: regex)) | ||
} | ||
} | ||
} | ||
|
||
public struct NSBenchmark: RegexBenchmark { | ||
public let name: String | ||
let regex: NSRegularExpression | ||
let ty: NSMatchType | ||
let target: String | ||
|
||
var range: NSRange { | ||
NSRange(target.startIndex..<target.endIndex, in: target) | ||
} | ||
|
||
public enum NSMatchType { | ||
case all | ||
case first | ||
} | ||
|
||
public func run() { | ||
switch ty { | ||
case .all: blackHole(regex.matches(in: target, range: range)) | ||
case .first: blackHole(regex.firstMatch(in: target, range: range)) | ||
} | ||
} | ||
} | ||
|
||
public struct BenchmarkRunner { | ||
// Register instances of Benchmark and run them | ||
let suiteName: String | ||
var suite: [any RegexBenchmark] | ||
let samples: Int | ||
|
||
public init(_ suiteName: String) { | ||
self.suiteName = suiteName | ||
self.suite = [] | ||
self.samples = 20 | ||
} | ||
|
||
public init(_ suiteName: String, _ n: Int) { | ||
self.suiteName = suiteName | ||
self.suite = [] | ||
self.samples = n | ||
} | ||
|
||
public mutating func register(_ new: some RegexBenchmark) { | ||
suite.append(new) | ||
} | ||
|
||
func measure(benchmark: some RegexBenchmark) -> Time { | ||
var times: [Time] = [] | ||
|
||
// initial run to make sure the regex has been compiled | ||
benchmark.run() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Future work: We'll want to know how much time is spent compiling vs not |
||
|
||
// fixme: use suspendingclock? | ||
for _ in 0..<samples { | ||
let start = Tick.now | ||
benchmark.run() | ||
let end = Tick.now | ||
let time = end.elapsedTime(since: start) | ||
times.append(time) | ||
} | ||
// todo: compute stdev and warn if it's too large | ||
|
||
// return median time | ||
times.sort() | ||
return times[samples/2] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Future work: provide the times as a type which the caller can ask for the median of. |
||
} | ||
|
||
public func run() { | ||
print("Running") | ||
for b in suite { | ||
print("- \(b.name) \(measure(benchmark: b))") | ||
} | ||
} | ||
|
||
public func profile() { | ||
print("Starting") | ||
for b in suite { | ||
print("- \(b.name)") | ||
b.run() | ||
print("- done") | ||
} | ||
} | ||
} | ||
|
||
// nom nom nom, consume the argument | ||
@inline(never) | ||
public func blackHole<T>(_ x: T) { | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import ArgumentParser | ||
|
||
@main | ||
struct Runner: ParsableCommand { | ||
@Argument(help: "Names of benchmarks to run") | ||
var specificBenchmarks: [String] = [] | ||
|
||
@Option(help: "Run only once for profiling purposes") | ||
var profile = false | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm.... Sometimes it helps profiling to run many samples since you'll see what warm behavior looks like and the hot parts become more pronounced. What's the difference between this flag and a sample count of 1? |
||
|
||
@Option(help: "How many samples to collect for each benchmark") | ||
var samples = 20 | ||
|
||
func makeRunner() -> BenchmarkRunner { | ||
var benchmark = BenchmarkRunner("RegexBench", samples) | ||
benchmark.addReluctantQuant() | ||
benchmark.addBacktracking() | ||
benchmark.addCSS() | ||
benchmark.addFirstMatch() | ||
return benchmark | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we make it an array or some other way to make registration easier? Seems like we could then filter that array when creating the runner |
||
} | ||
mutating func run() throws { | ||
var runner = makeRunner() | ||
if !self.specificBenchmarks.isEmpty { | ||
runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) } | ||
} | ||
if profile { | ||
runner.profile() | ||
} else { | ||
runner.run() | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import _StringProcessing | ||
import RegexBuilder | ||
import Foundation | ||
|
||
// Tests that involve heavy backtracking | ||
|
||
extension BenchmarkRunner { | ||
mutating func addBacktracking() { | ||
let r = "^ +A" | ||
let s = String(repeating: " ", count: 10000) | ||
|
||
let basicBacktrack = Benchmark( | ||
name: "BasicBacktrack", | ||
regex: try! Regex(r), | ||
ty: .allMatches, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Go ahead and give the |
||
target: s | ||
) | ||
|
||
let basicBacktrackNS = NSBenchmark( | ||
name: "BasicBacktrackNS", | ||
regex: try! NSRegularExpression(pattern: r), | ||
ty: .all, | ||
target: s | ||
) | ||
|
||
let basicBacktrackFirstMatch = Benchmark( | ||
name: "BasicBacktrackFirstMatch", | ||
regex: try! Regex(r), | ||
ty: .first, | ||
target: s | ||
) | ||
|
||
let basicBacktrackNSFirstMatch = NSBenchmark( | ||
name: "BasicBacktrackNSFirstMatch", | ||
regex: try! NSRegularExpression(pattern: r), | ||
ty: .first, | ||
target: s | ||
) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm... we want whole-match benchmarks, match-from-front benchmarks, first-match, and all-matches (which is repeated first-match calls). We also want to make the NSRegularExpression equivalent of each, at least if there can be an equivalent. |
||
register(basicBacktrack) | ||
register(basicBacktrackNS) | ||
register(basicBacktrackFirstMatch) | ||
register(basicBacktrackNSFirstMatch) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think you need this dependency