swiftlang · rctcwyvrn · Jun 16, 2022 · Jun 14, 2022 · Jun 15, 2022 · Jun 15, 2022
diff --git a/Package.swift b/Package.swift
@@ -41,7 +41,10 @@ let package = Package(
             targets: ["_RegexParser"]),
         .executable(
             name: "VariadicsGenerator",
-            targets: ["VariadicsGenerator"])
+            targets: ["VariadicsGenerator"]),
+        .executable(
+            name: "RegexBenchmark",
+            targets: ["RegexBenchmark"])
     ],
     dependencies: [
         .package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"),
@@ -112,6 +115,17 @@ let package = Package(
                 "_RegexParser",
                 "_StringProcessing"
             ]),
+        .executableTarget(
+            name: "RegexBenchmark",
+            dependencies: [
+                .product(name: "ArgumentParser", package: "swift-argument-parser"),
+                "_RegexParser",
+                "_StringProcessing",
+                "RegexBuilder"
+            ],
+            swiftSettings: [
+                .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
+            ]),
 
         // MARK: Exercises
         .target(

diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift
@@ -0,0 +1,116 @@
+import _StringProcessing
+import Foundation
+
+public protocol RegexBenchmark {
+  var name: String { get }
+  func run()
+}
+
+public struct Benchmark: RegexBenchmark {
+  public let name: String
+  let regex: Regex<Substring>
+  let ty: MatchType
+  let target: String
+
+  public enum MatchType {
+    case whole
+    case first
+    case allMatches
+  }
+
+  public func run() {
+    switch ty {
+    case .whole: blackHole(target.wholeMatch(of: regex))
+    case .allMatches: blackHole(target.matches(of: regex))
+    case .first: blackHole(target.firstMatch(of: regex))
+    }
+  }
+}
+
+public struct NSBenchmark: RegexBenchmark {
+  public let name: String
+  let regex: NSRegularExpression
+  let ty: NSMatchType
+  let target: String
+
+  var range: NSRange {
+    NSRange(target.startIndex..<target.endIndex, in: target)
+  }
+
+  public enum NSMatchType {
+    case all
+    case first
+  }
+
+  public func run() {
+    switch ty {
+    case .all: blackHole(regex.matches(in: target, range: range))
+    case .first: blackHole(regex.firstMatch(in: target, range: range))
+    }
+  }
+}
+
+public struct BenchmarkRunner {
+  // Register instances of Benchmark and run them
+  let suiteName: String
+  var suite: [any RegexBenchmark]
+  let samples: Int
+
+  public init(_ suiteName: String) {
+    self.suiteName = suiteName
+    self.suite = []
+    self.samples = 20
+  }
+
+  public init(_ suiteName: String, _ n: Int) {
+    self.suiteName = suiteName
+    self.suite = []
+    self.samples = n
+  }
+
+  public mutating func register(_ new: some RegexBenchmark) {
+    suite.append(new)
+  }
+
+  func measure(benchmark: some RegexBenchmark) -> Time {
+    var times: [Time] = []
+
+    // initial run to make sure the regex has been compiled
+    benchmark.run()
+
+    // fixme: use suspendingclock?
+    for _ in 0..<samples {
+      let start = Tick.now
+      benchmark.run()
+      let end = Tick.now
+      let time = end.elapsedTime(since: start)
+      times.append(time)
+    }
+    // todo: compute stdev and warn if it's too large
+
+    // return median time
+    times.sort()
+    return times[samples/2]
+  }
+
+  public func run() {
+    print("Running")
+    for b in suite {
+      print("- \(b.name) \(measure(benchmark: b))")
+    }
+  }
+
+  public func profile() {
+    print("Starting")
+    for b in suite {
+      print("- \(b.name)")
+      b.run()
+      print("- done")
+    }
+  }
+}
+
+// nom nom nom, consume the argument
+@inline(never)
+public func blackHole<T>(_ x: T) {
+}
diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift
@@ -0,0 +1,33 @@
+import ArgumentParser
+
+@main
+struct Runner: ParsableCommand {
+  @Argument(help: "Names of benchmarks to run")
+  var specificBenchmarks: [String] = []
+
+  @Option(help: "Run only once for profiling purposes")
+  var profile = false
+
+  @Option(help: "How many samples to collect for each benchmark")
+  var samples = 20
+
+  func makeRunner() -> BenchmarkRunner {
+    var benchmark = BenchmarkRunner("RegexBench", samples)
+    benchmark.addReluctantQuant()
+    benchmark.addBacktracking()
+    benchmark.addCSS()
+    benchmark.addFirstMatch()
+    return benchmark
+  }
+  mutating func run() throws {
+    var runner = makeRunner()
+    if !self.specificBenchmarks.isEmpty {
+      runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) }
+    }
+    if profile {
+      runner.profile()
+    } else {
+      runner.run()
+    }
+  }
+}
diff --git a/Sources/RegexBenchmark/Suite/Backtracking.swift b/Sources/RegexBenchmark/Suite/Backtracking.swift
@@ -0,0 +1,45 @@
+import _StringProcessing
+import RegexBuilder
+import Foundation
+
+// Tests that involve heavy backtracking
+
+extension BenchmarkRunner {
+  mutating func addBacktracking() {
+    let r = "^ +A"
+    let s = String(repeating: " ", count: 10000)
+
+    let basicBacktrack = Benchmark(
+      name: "BasicBacktrack",
+      regex: try! Regex(r),
+      ty: .allMatches,
+      target: s
+    )
+
+    let basicBacktrackNS = NSBenchmark(
+      name: "BasicBacktrackNS",
+      regex: try! NSRegularExpression(pattern: r),
+      ty: .all,
+      target: s
+    )
+
+    let basicBacktrackFirstMatch = Benchmark(
+      name: "BasicBacktrackFirstMatch",
+      regex: try! Regex(r),
+      ty: .first,
+      target: s
+    )
+
+    let basicBacktrackNSFirstMatch = NSBenchmark(
+      name: "BasicBacktrackNSFirstMatch",
+      regex: try! NSRegularExpression(pattern: r),
+      ty: .first,
+      target: s
+    )
+
+    register(basicBacktrack)
+    register(basicBacktrackNS)
+    register(basicBacktrackFirstMatch)
+    register(basicBacktrackNSFirstMatch)
+  }
+}