diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 6cd0bc48f4..fd3da9f667 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -156,6 +156,8 @@ pub enum ErrorKind { /// `(?i)*`. It is, however, possible to create a repetition operating on /// an empty sub-expression. For example, `()*` is still considered valid. RepetitionMissing, + /// The unicode class represented by a single character is not valid. + UnicodeCharacterClassInvalid, /// When octal support is disabled, this error is produced when an octal /// escape is used. The octal escape is assumed to be an invocation of /// a backreference, which is the common case. @@ -206,6 +208,7 @@ impl error::Error for Error { RepetitionCountInvalid => "invalid repetition count range", RepetitionCountUnclosed => "unclosed counted repetition", RepetitionMissing => "repetition operator missing expression", + UnicodeCharacterClassInvalid => "invalid unicode character class", UnsupportedBackreference => "backreferences are not supported", UnsupportedLookAround => "look-around is not supported", _ => unreachable!(), @@ -293,6 +296,9 @@ impl fmt::Display for ErrorKind { RepetitionMissing => { write!(f, "repetition operator missing expression") } + UnicodeCharacterClassInvalid => { + write!(f, "invalid unicode character class") + } UnsupportedBackreference => { write!(f, "backreferences are not supported") } diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index c063ea9dc2..9224f9117b 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -2095,6 +2095,12 @@ impl<'s, P: Borrow> ParserI<'s, P> { } else { let start = self.pos(); let c = self.char(); + if c == '\\' { + return Err(self.error( + self.span_char(), + ast::ErrorKind::UnicodeCharacterClassInvalid, + )); + } self.bump_and_bump_space(); let kind = ast::ClassUnicodeKind::OneLetter(c); (start, kind) @@ -5713,6 +5719,20 @@ bar ], })) ); + assert_eq!( + parser(r"\p\{").parse().unwrap_err(), + TestError { + span: span(2..3), + kind: ast::ErrorKind::UnicodeCharacterClassInvalid, + } + ); + assert_eq!( + parser(r"\P\{").parse().unwrap_err(), + TestError { + span: span(2..3), + kind: ast::ErrorKind::UnicodeCharacterClassInvalid, + } + ); } #[test]