Skip to content

Add regex benchmarker #491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ let package = Package(
targets: ["_RegexParser"]),
.executable(
name: "VariadicsGenerator",
targets: ["VariadicsGenerator"])
targets: ["VariadicsGenerator"]),
.executable(
name: "RegexBenchmark",
targets: ["RegexBenchmark"])
],
dependencies: [
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"),
Expand Down Expand Up @@ -112,6 +115,17 @@ let package = Package(
"_RegexParser",
"_StringProcessing"
]),
.executableTarget(
name: "RegexBenchmark",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser"),
"_RegexParser",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need this dependency

"_StringProcessing",
"RegexBuilder"
],
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
]),

// MARK: Exercises
.target(
Expand Down
116 changes: 116 additions & 0 deletions Sources/RegexBenchmark/Benchmark.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import _StringProcessing
import Foundation

public protocol RegexBenchmark {
var name: String { get }
func run()
}

public struct Benchmark: RegexBenchmark {
public let name: String
let regex: Regex<Substring>
let ty: MatchType
let target: String

public enum MatchType {
case whole
case first
case allMatches
}

public func run() {
switch ty {
case .whole: blackHole(target.wholeMatch(of: regex))
case .allMatches: blackHole(target.matches(of: regex))
case .first: blackHole(target.firstMatch(of: regex))
}
}
}

public struct NSBenchmark: RegexBenchmark {
public let name: String
let regex: NSRegularExpression
let ty: NSMatchType
let target: String

var range: NSRange {
NSRange(target.startIndex..<target.endIndex, in: target)
}

public enum NSMatchType {
case all
case first
}

public func run() {
switch ty {
case .all: blackHole(regex.matches(in: target, range: range))
case .first: blackHole(regex.firstMatch(in: target, range: range))
}
}
}

public struct BenchmarkRunner {
// Register instances of Benchmark and run them
let suiteName: String
var suite: [any RegexBenchmark]
let samples: Int

public init(_ suiteName: String) {
self.suiteName = suiteName
self.suite = []
self.samples = 20
}

public init(_ suiteName: String, _ n: Int) {
self.suiteName = suiteName
self.suite = []
self.samples = n
}

public mutating func register(_ new: some RegexBenchmark) {
suite.append(new)
}

func measure(benchmark: some RegexBenchmark) -> Time {
var times: [Time] = []

// initial run to make sure the regex has been compiled
benchmark.run()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Future work: We'll want to know how much time is spent compiling vs not


// fixme: use suspendingclock?
for _ in 0..<samples {
let start = Tick.now
benchmark.run()
let end = Tick.now
let time = end.elapsedTime(since: start)
times.append(time)
}
// todo: compute stdev and warn if it's too large

// return median time
times.sort()
return times[samples/2]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Future work: provide the times as a type which the caller can ask for the median of.

}

public func run() {
print("Running")
for b in suite {
print("- \(b.name) \(measure(benchmark: b))")
}
}

public func profile() {
print("Starting")
for b in suite {
print("- \(b.name)")
b.run()
print("- done")
}
}
}

// nom nom nom, consume the argument
@inline(never)
public func blackHole<T>(_ x: T) {
}
33 changes: 33 additions & 0 deletions Sources/RegexBenchmark/CLI.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import ArgumentParser

@main
struct Runner: ParsableCommand {
@Argument(help: "Names of benchmarks to run")
var specificBenchmarks: [String] = []

@Option(help: "Run only once for profiling purposes")
var profile = false
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm.... Sometimes it helps profiling to run many samples since you'll see what warm behavior looks like and the hot parts become more pronounced. What's the difference between this flag and a sample count of 1?


@Option(help: "How many samples to collect for each benchmark")
var samples = 20

func makeRunner() -> BenchmarkRunner {
var benchmark = BenchmarkRunner("RegexBench", samples)
benchmark.addReluctantQuant()
benchmark.addBacktracking()
benchmark.addCSS()
benchmark.addFirstMatch()
return benchmark
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make it an array or some other way to make registration easier? Seems like we could then filter that array when creating the runner

}
mutating func run() throws {
var runner = makeRunner()
if !self.specificBenchmarks.isEmpty {
runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) }
}
if profile {
runner.profile()
} else {
runner.run()
}
}
}
45 changes: 45 additions & 0 deletions Sources/RegexBenchmark/Suite/Backtracking.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import _StringProcessing
import RegexBuilder
import Foundation

// Tests that involve heavy backtracking

extension BenchmarkRunner {
mutating func addBacktracking() {
let r = "^ +A"
let s = String(repeating: " ", count: 10000)

let basicBacktrack = Benchmark(
name: "BasicBacktrack",
regex: try! Regex(r),
ty: .allMatches,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Go ahead and give the ty argument label a full name. That is ok as a argument name, but the label should clarify the use site.

target: s
)

let basicBacktrackNS = NSBenchmark(
name: "BasicBacktrackNS",
regex: try! NSRegularExpression(pattern: r),
ty: .all,
target: s
)

let basicBacktrackFirstMatch = Benchmark(
name: "BasicBacktrackFirstMatch",
regex: try! Regex(r),
ty: .first,
target: s
)

let basicBacktrackNSFirstMatch = NSBenchmark(
name: "BasicBacktrackNSFirstMatch",
regex: try! NSRegularExpression(pattern: r),
ty: .first,
target: s
)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... we want whole-match benchmarks, match-from-front benchmarks, first-match, and all-matches (which is repeated first-match calls). We also want to make the NSRegularExpression equivalent of each, at least if there can be an equivalent.

register(basicBacktrack)
register(basicBacktrackNS)
register(basicBacktrackFirstMatch)
register(basicBacktrackNSFirstMatch)
}
}
Loading