Skip to content

Commit c5318ae

Browse files
Lance Parkermilseman
Lance Parker
authored andcommitted
Add benchmarks and tests for the normalized iterator (swiftlang#32)
Add benchmarks and tests for the normalized iterator
1 parent d0c6f79 commit c5318ae

File tree

8 files changed

+19297
-112
lines changed

8 files changed

+19297
-112
lines changed

benchmark/single-source/StringComparison.swift

+255-98
Large diffs are not rendered by default.

benchmark/single-source/StringComparison.swift.gyb

+48-10
Original file line numberDiff line numberDiff line change
@@ -30,32 +30,50 @@ extension String {
3030
}
3131
}
3232

33-
% Names = ["ascii", "latin1", "fastPrenormal", "slowerPrenormal", "nonBMPSlowestPrenormal", "emoji", "abnormal", "zalgo", "longSharedPrefix"]
33+
% AllWorkloads = ["ascii", "latin1", "fastPrenormal", "slowerPrenormal", "nonBMPSlowestPrenormal", "emoji", "abnormal", "zalgo", "longSharedPrefix"]
34+
% ComparisonWorkloads = AllWorkloads
35+
% HashingWorkloads = ["ascii", "latin1", "fastPrenormal", "slowerPrenormal", "nonBMPSlowestPrenormal", "emoji", "abnormal", "zalgo"]
36+
% NormalizedIteratorWorkloads = ["ascii", "latin1", "fastPrenormal", "slowerPrenormal", "nonBMPSlowestPrenormal", "emoji", "abnormal", "zalgo"]
3437

3538
public let StringComparison = [
36-
% for Name in Names:
39+
% for Name in ComparisonWorkloads:
3740
BenchmarkInfo(
3841
name: "StringComparison_${Name}",
3942
runFunction: run_StringComparison_${Name},
4043
tags: [.validation, .api, .String],
41-
setUpFunction: { blackHole(Workload_${Name}) }),
42-
% end # Names
44+
setUpFunction: { blackHole(Workload_${Name}) }
45+
),
46+
% end # ComparisonWorkloads
4347
]
4448

4549
public let StringHashing = [
46-
% for Name in Names:
50+
% for Name in HashingWorkloads:
4751
BenchmarkInfo(
4852
name: "StringHashing_${Name}",
4953
runFunction: run_StringHashing_${Name},
5054
tags: [.validation, .api, .String],
51-
setUpFunction: { blackHole(Workload_${Name}) }),
52-
% end # Names
55+
setUpFunction: { blackHole(Workload_${Name}) }
56+
),
57+
% end # HashingWorkloads
5358
]
5459

55-
% for Name in Names:
60+
public let NormalizedIterator = [
61+
% for Name in NormalizedIteratorWorkloads:
62+
BenchmarkInfo(
63+
name: "NormalizedIterator_${Name}",
64+
runFunction: run_NormalizedIterator_${Name},
65+
tags: [.validation, .String],
66+
setUpFunction: { blackHole(Workload_${Name}) }
67+
),
68+
% end # NormalizedIteratorWorkloads
69+
]
5670

71+
% for Name in AllWorkloads:
5772
var Workload_${Name}: Workload! = Workload.${Name}
5873

74+
% end # AllWorkloads
75+
76+
%for Name in ComparisonWorkloads:
5977
@inline(never)
6078
public func run_StringComparison_${Name}(_ N: Int) {
6179
let workload: Workload = Workload_${Name}
@@ -70,6 +88,9 @@ public func run_StringComparison_${Name}(_ N: Int) {
7088
}
7189
}
7290

91+
% end # ComparisonWorkloads
92+
93+
%for Name in HashingWorkloads:
7394
@inline(never)
7495
public func run_StringHashing_${Name}(_ N: Int) {
7596
let workload: Workload = Workload.${Name}
@@ -81,8 +102,25 @@ public func run_StringHashing_${Name}(_ N: Int) {
81102
}
82103
}
83104
}
84-
85-
% end # Names
105+
106+
% end # HashingWorkloads
107+
108+
%for Name in NormalizedIteratorWorkloads:
109+
@inline(never)
110+
public func run_NormalizedIterator_${Name}(_ N: Int) {
111+
let workload: Workload = Workload.${Name}
112+
let tripCount = workload.tripCount
113+
let payload = workload.payload
114+
for _ in 1...tripCount*N {
115+
for str in payload {
116+
str._withNFCCodeUnits { cu in
117+
blackHole(cu)
118+
}
119+
}
120+
}
121+
}
122+
123+
% end # NormalizedIteratorWorkloads
86124

87125
struct Workload {
88126
static let N = 100

benchmark/utils/main.swift

+1
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ registerBenchmark(NSErrorTest)
257257
registerBenchmark(NSStringConversion)
258258
registerBenchmark(NibbleSort)
259259
registerBenchmark(NopDeinit)
260+
registerBenchmark(NormalizedIterator)
260261
registerBenchmark(ObjectAllocation)
261262
registerBenchmark(ObjectiveCBridging)
262263
registerBenchmark(ObjectiveCBridgingStubs)

stdlib/private/StdlibUnicodeUnittest/StdlibUnicodeUnittest.swift

+83
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,89 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
import StdlibUnittest
14+
import Foundation
15+
16+
extension String {
17+
func parseUTF8CodeUnits() -> [UInt8] {
18+
var utf8 = [UInt8]()
19+
let units = self.split(separator: " ")
20+
let scalars = units.compactMap { string -> Unicode.Scalar? in
21+
let i = Int(string, radix: 16)!
22+
return Unicode.Scalar(i)
23+
24+
}
25+
26+
for scalar in scalars {
27+
utf8 += String(scalar).utf8
28+
}
29+
return utf8
30+
}
31+
32+
func parseUTF16CodeUnits() -> [UInt16] {
33+
var utf16 = [UInt16]()
34+
let units = self.split(separator: " ")
35+
let scalars = units.compactMap { string -> Unicode.Scalar? in
36+
let i = Int(string, radix: 16)!
37+
return Unicode.Scalar(i)
38+
}
39+
40+
for scalar in scalars {
41+
utf16 += scalar.utf16
42+
}
43+
return utf16
44+
}
45+
}
46+
47+
public struct NormalizationTest {
48+
public var sourceUTF16: [UInt16]
49+
public var source: [UInt8]
50+
public var NFC: [UInt8]
51+
public var NFD: [UInt8]
52+
public var NFKC: [UInt8]
53+
public var NFKD: [UInt8]
54+
55+
init(source: String, NFC: String, NFD: String, NFKC: String, NFKD: String) {
56+
self.sourceUTF16 = source.parseUTF16CodeUnits()
57+
self.source = source.parseUTF8CodeUnits()
58+
self.NFC = NFC.parseUTF8CodeUnits()
59+
self.NFD = NFD.parseUTF8CodeUnits()
60+
self.NFKC = NFKC.parseUTF8CodeUnits()
61+
self.NFKD = NFKD.parseUTF8CodeUnits()
62+
}
63+
}
64+
65+
public let normalizationTests: [NormalizationTest] = {
66+
var tests = [NormalizationTest]()
67+
68+
let fileURL = URL(fileURLWithPath: CommandLine.arguments[2])
69+
70+
//Bridged String grapheme breaking is sloooooow.
71+
let fileContents = try! String(contentsOf: fileURL) + ""
72+
73+
for line in fileContents.split(separator: "\n") {
74+
guard line.hasPrefix("#") == false else {
75+
continue
76+
}
77+
78+
let content = line.split(separator: "#").first!
79+
80+
guard !content.isEmpty else {
81+
continue
82+
}
83+
guard !content.hasPrefix("@") else {
84+
continue
85+
}
86+
87+
let columns = content.split(separator: ";").filter { $0 != " " }.map(String.init)
88+
let test = NormalizationTest(source: columns[0],
89+
NFC: columns[1], NFD: columns[2],
90+
NFKC: columns[3], NFKD: columns[4])
91+
92+
tests.append(test)
93+
}
94+
95+
return tests
96+
}()
1497

1598
public struct UTFTest {
1699
public struct Flags : OptionSet {

stdlib/public/core/String.swift

+11
Original file line numberDiff line numberDiff line change
@@ -1246,3 +1246,14 @@ extension String : LosslessStringConvertible {
12461246
self = content
12471247
}
12481248
}
1249+
1250+
extension String {
1251+
public // @testable
1252+
func _withNFCCodeUnits(_ f: (UInt8) throws -> Void) rethrows {
1253+
try _slicedGuts.withNFCCodeUnitsIterator {
1254+
for cu in $0 {
1255+
try f(cu)
1256+
}
1257+
}
1258+
}
1259+
}

stdlib/public/core/StringComparison.swift

+4-4
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ internal enum _StringComparisonResult: Int {
3636
extension _SlicedStringGuts {
3737
@inline(__always)
3838
@_effects(readonly)
39-
internal func withNFCCodeUnits<R>(
39+
internal func withNFCCodeUnitsIterator<R>(
4040
_ f: (_NormalizedUTF8CodeUnitIterator) throws -> R
4141
) rethrows -> R {
4242
if self.isNFCFastUTF8 {
@@ -97,10 +97,10 @@ extension _SlicedStringGuts {
9797
internal func _slowCompare(
9898
with other: _SlicedStringGuts
9999
) -> _StringComparisonResult {
100-
return self.withNFCCodeUnits {
100+
return self.withNFCCodeUnitsIterator {
101101
var selfIter = $0
102-
return other.withNFCCodeUnits {
103-
var otherIter = $0
102+
return other.withNFCCodeUnitsIterator {
103+
let otherIter = $0
104104
return selfIter.compare(with: otherIter)
105105
}
106106
}

0 commit comments

Comments
 (0)