Skip to content

Validate optimizations when a match fails #559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 7, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions Tests/RegexTests/MatchTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,33 @@ func _firstMatch(
input: String,
validateOptimizations: Bool,
syntax: SyntaxOptions = .traditional
) throws -> (String, [String?]) {
) throws -> (String, [String?])? {
var regex = try Regex(regexStr, syntax: syntax)
guard let result = try regex.firstMatch(in: input) else {
throw MatchError("match not found for \(regexStr) in \(input)")
}
let caps = result.output.slices(from: input)

let result = try regex.firstMatch(in: input)

if validateOptimizations {
regex._setCompilerOptionsForTesting(.disableOptimizations)
guard let unoptResult = try regex.firstMatch(in: input) else {
let unoptResult = try regex.firstMatch(in: input)
if result != nil && unoptResult == nil {
throw MatchError("match not found for unoptimized \(regexStr) in \(input)")
}
XCTAssertEqual(
String(input[result.range]),
String(input[unoptResult.range]),
"Unoptimized regex returned a different result")
if result == nil && unoptResult != nil {
throw MatchError("match not found in optimized \(regexStr) in \(input)")
}
if let result = result, let unoptResult = unoptResult {
let optMatch = String(input[result.range])
let unoptMatch = String(input[unoptResult.range])
if optMatch != unoptMatch {
throw MatchError("""

Unoptimized regex returned: '\(unoptMatch)'
Optimized regex returned: '\(optMatch)'
""")
}
}
}
guard let result = result else { return nil }
let caps = result.output.slices(from: input)
return (String(input[result.range]), caps.map { $0.map(String.init) })
}

Expand Down Expand Up @@ -147,21 +157,19 @@ func firstMatchTest(
line: UInt = #line
) {
do {
let (found, _) = try _firstMatch(
let found = try _firstMatch(
regex,
input: input,
validateOptimizations: validateOptimizations,
syntax: syntax)
syntax: syntax)?.0

if xfail {
XCTAssertNotEqual(found, match, file: file, line: line)
} else {
XCTAssertEqual(found, match, file: file, line: line)
}
} catch {
// FIXME: This allows non-matches to succeed even when xfail'd
// When xfail == true, this should report failure for match == nil
if !xfail && match != nil {
if !xfail {
XCTFail("\(error)", file: file, line: line)
}
return
Expand Down Expand Up @@ -421,8 +429,7 @@ extension RegexTests {
"a++a",
("babc", nil),
("baaabc", nil),
("bb", nil),
xfail: true)
("bb", nil))
firstMatchTests(
"a+?a",
("babc", nil),
Expand Down Expand Up @@ -498,23 +505,19 @@ extension RegexTests {
("baabc", nil),
("bb", nil))

// XFAIL'd versions of the above
firstMatchTests(
"a{2,4}+a",
("baaabc", nil),
xfail: true)
("baaabc", nil))
firstMatchTests(
"a{,4}+a",
("babc", nil),
("baabc", nil),
("baaabc", nil),
xfail: true)
("baaabc", nil))
firstMatchTests(
"a{2,}+a",
("baaabc", nil),
("baaaaabc", nil),
("baaaaaaaabc", nil),
xfail: true)
("baaaaaaaabc", nil))

// XFAIL'd possessive tests
firstMatchTests(
Expand Down Expand Up @@ -709,6 +712,11 @@ extension RegexTests {
}
firstMatchTest(#"[\t-\t]"#, input: "\u{8}\u{A}\u{9}", match: "\u{9}")

// FIXME: This produces a different result with and without optimizations.
firstMatchTest(#"[1-2]"#, input: "1️⃣", match: nil, xfail: true)
firstMatchTest(#"[1-2]"#, input: "1️⃣", match: nil,
validateOptimizations: false)

// Currently not supported in the matching engine.
for c: UnicodeScalar in ["a", "b", "c"] {
firstMatchTest(#"[\c!-\C-#]"#, input: "def\(c)", match: "\(c)",
Expand Down Expand Up @@ -1054,8 +1062,8 @@ extension RegexTests {
// TODO: Oniguruma \y and \Y
firstMatchTests(
#"\u{65}"#, // Scalar 'e' is present in both
("Cafe\u{301}", nil), // but scalar mode requires boundary at end of match
xfail: true)
("Cafe\u{301}", nil)) // but scalar mode requires boundary at end of match

firstMatchTests(
#"\u{65}"#, // Scalar 'e' is present in both
("Sol Cafe", "e")) // standalone is okay
Expand Down Expand Up @@ -1647,19 +1655,15 @@ extension RegexTests {
firstMatchTest(#"\u{65 301}$"#, input: eComposed, match: eComposed)

// FIXME: Implicit \y at end of match
firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil,
xfail: true)
firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil)
firstMatchTest(#"\u{65}$"#, input: eDecomposed, match: nil)
// FIXME: \y is unsupported
firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil,
xfail: true)
firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil)

// FIXME: Unicode scalars are only matched at the start of a grapheme cluster
firstMatchTest(#"\u{301}"#, input: eDecomposed, match: "\u{301}",
xfail: true)
// FIXME: \y is unsupported
firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil,
xfail: true)

firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil)
}

func testCanonicalEquivalence() throws {
Expand Down Expand Up @@ -1717,13 +1721,11 @@ extension RegexTests {
// \s
firstMatchTest(#"\s"#, input: " ", match: " ")
// FIXME: \s shouldn't match a number composed with a non-number character
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
xfail: true)
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
// \p{Whitespace}
firstMatchTest(#"\s"#, input: " ", match: " ")
// FIXME: \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
xfail: true)
// \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
}

func testCanonicalEquivalenceCustomCharacterClass() throws {
Expand Down