Skip to content

Commit 102458f

Browse files
committed
syntax: fix trailing - bug
This fixes a bug in the parser where a regex like `(?x)[ / - ]` would fail to parse. In particular, since whitespace insensitive mode is enabled, this regex should be equivalent to `[/-]`, where the `-` is treated as a literal `-` instead of a range since it is the last character in the class. However, the parser did not account for whitespace insensitive mode, so it didn't see the `-` in `(?x)[ / - ]` as trailing, and therefore reported an unclosed character class (since the `]` was treated as part of the range). We fix that in this commit by accounting for whitespace insensitive mode, which we do by adding a `peek` method that skips over whitespace. Fixes #455
1 parent 3e370e4 commit 102458f

File tree

2 files changed

+62
-2
lines changed

2 files changed

+62
-2
lines changed

CHANGELOG.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
TBD
2+
===
3+
Bug gixes:
4+
5+
* [BUG #455](https://github.com/rust-lang/regex/pull/455):
6+
Fix a bug where `(?x)[ / - ]` failed to parse.
7+
8+
19
0.2.8 (2018-03-12)
210
==================
311
Bug gixes:

regex-syntax/src/ast/parse.rs

+54-2
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,32 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
587587
self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
588588
}
589589

590+
/// Like peek, but will ignore spaces when the parser is in whitespace
591+
/// insensitive mode.
592+
fn peek_space(&self) -> Option<char> {
593+
if !self.ignore_whitespace() {
594+
return self.peek();
595+
}
596+
if self.is_eof() {
597+
return None;
598+
}
599+
let mut start = self.offset() + self.char().len_utf8();
600+
let mut in_comment = false;
601+
for (i, c) in self.pattern()[start..].char_indices() {
602+
if c.is_whitespace() {
603+
continue;
604+
} else if !in_comment && c == '#' {
605+
in_comment = true;
606+
} else if in_comment && c == '\n' {
607+
in_comment = false;
608+
} else {
609+
start += i;
610+
break;
611+
}
612+
}
613+
self.pattern()[start..].chars().next()
614+
}
615+
590616
/// Returns true if the next call to `bump` would return false.
591617
fn is_eof(&self) -> bool {
592618
self.offset() == self.pattern().len()
@@ -1773,8 +1799,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
17731799
// after a `-` is a `-`, then `--` corresponds to a "difference"
17741800
// operation.
17751801
if self.char() != '-'
1776-
|| self.peek() == Some(']')
1777-
|| self.peek() == Some('-')
1802+
|| self.peek_space() == Some(']')
1803+
|| self.peek_space() == Some('-')
17781804
{
17791805
return prim1.into_class_set_item(self);
17801806
}
@@ -5297,4 +5323,30 @@ bar
52975323
"#;
52985324
assert!(parser_nest_limit(pattern, 50).parse().is_ok());
52995325
}
5326+
5327+
// This tests that we treat a trailing `-` in a character class as a
5328+
// literal `-` even when whitespace mode is enabled and there is whitespace
5329+
// after the trailing `-`.
5330+
#[test]
5331+
fn regression_455_trailing_dash_ignore_whitespace() {
5332+
assert!(parser("(?x)[ / - ]").parse().is_ok());
5333+
assert!(parser("(?x)[ a - ]").parse().is_ok());
5334+
assert!(parser("(?x)[
5335+
a
5336+
- ]
5337+
").parse().is_ok());
5338+
assert!(parser("(?x)[
5339+
a # wat
5340+
- ]
5341+
").parse().is_ok());
5342+
5343+
assert!(parser("(?x)[ / -").parse().is_err());
5344+
assert!(parser("(?x)[ / - ").parse().is_err());
5345+
assert!(parser("(?x)[
5346+
/ -
5347+
").parse().is_err());
5348+
assert!(parser("(?x)[
5349+
/ - # wat
5350+
").parse().is_err());
5351+
}
53005352
}

0 commit comments

Comments
 (0)