Skip to content

Commit 649762d

Browse files
committed
regex: add nest_limit
This commit exposes the `nest_limit` option that regex-syntax provides. The nest limit controls how deeply nested a regex is allowed to be.
1 parent 7f23152 commit 649762d

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

src/exec.rs

+1
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ impl ExecBuilder {
230230
.ignore_whitespace(self.options.ignore_whitespace)
231231
.unicode(self.options.unicode)
232232
.allow_invalid_utf8(!self.only_utf8)
233+
.nest_limit(self.options.nest_limit)
233234
.build();
234235
let expr = try!(parser.parse(pat));
235236
bytes = bytes || !expr.is_always_utf8();

src/re_builder.rs

+63
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pub struct RegexOptions {
1515
pub pats: Vec<String>,
1616
pub size_limit: usize,
1717
pub dfa_size_limit: usize,
18+
pub nest_limit: u32,
1819
pub case_insensitive: bool,
1920
pub multi_line: bool,
2021
pub dot_matches_new_line: bool,
@@ -29,6 +30,7 @@ impl Default for RegexOptions {
2930
pats: vec![],
3031
size_limit: 10 * (1<<20),
3132
dfa_size_limit: 2 * (1<<20),
33+
nest_limit: 250,
3234
case_insensitive: false,
3335
multi_line: false,
3436
dot_matches_new_line: false,
@@ -163,6 +165,36 @@ impl RegexBuilder {
163165
self.0.dfa_size_limit = limit;
164166
self
165167
}
168+
169+
/// Set the nesting limit for this parser.
170+
///
171+
/// The nesting limit controls how deep the abstract syntax tree is allowed
172+
/// to be. If the AST exceeds the given limit (e.g., with too many nested
173+
/// groups), then an error is returned by the parser.
174+
///
175+
/// The purpose of this limit is to act as a heuristic to prevent stack
176+
/// overflow for consumers that do structural induction on an `Ast` using
177+
/// explicit recursion. While this crate never does this (instead using
178+
/// constant stack space and moving the call stack to the heap), other
179+
/// crates may.
180+
///
181+
/// This limit is not checked until the entire Ast is parsed. Therefore,
182+
/// if callers want to put a limit on the amount of heap space used, then
183+
/// they should impose a limit on the length, in bytes, of the concrete
184+
/// pattern string. In particular, this is viable since this parser
185+
/// implementation will limit itself to heap space proportional to the
186+
/// lenth of the pattern string.
187+
///
188+
/// Note that a nest limit of `0` will return a nest limit error for most
189+
/// patterns but not all. For example, a nest limit of `0` permits `a` but
190+
/// not `ab`, since `ab` requires a concatenation, which results in a nest
191+
/// depth of `1`. In general, a nest limit is not something that manifests
192+
/// in an obvious way in the concrete syntax, therefore, it should not be
193+
/// used in a granular way.
194+
pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
195+
self.0.nest_limit = limit;
196+
self
197+
}
166198
}
167199
}
168200
}
@@ -274,6 +306,37 @@ impl RegexSetBuilder {
274306
self.0.dfa_size_limit = limit;
275307
self
276308
}
309+
310+
/// Set the nesting limit for this parser.
311+
///
312+
/// The nesting limit controls how deep the abstract syntax tree is allowed
313+
/// to be. If the AST exceeds the given limit (e.g., with too many nested
314+
/// groups), then an error is returned by the parser.
315+
///
316+
/// The purpose of this limit is to act as a heuristic to prevent stack
317+
/// overflow for consumers that do structural induction on an `Ast` using
318+
/// explicit recursion. While this crate never does this (instead using
319+
/// constant stack space and moving the call stack to the heap), other
320+
/// crates may.
321+
///
322+
/// This limit is not checked until the entire Ast is parsed. Therefore,
323+
/// if callers want to put a limit on the amount of heap space used, then
324+
/// they should impose a limit on the length, in bytes, of the concrete
325+
/// pattern string. In particular, this is viable since this parser
326+
/// implementation will limit itself to heap space proportional to the
327+
/// lenth of the pattern string.
328+
///
329+
/// Note that a nest limit of `0` will return a nest limit error for most
330+
/// patterns but not all. For example, a nest limit of `0` permits `a` but
331+
/// not `ab`, since `ab` requires a concatenation, which results in a nest
332+
/// depth of `1`. In general, a nest limit is not something that manifests
333+
/// in an obvious way in the concrete syntax, therefore, it should not be
334+
/// used in a granular way.
335+
pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder {
336+
self.0.nest_limit = limit;
337+
self
338+
}
339+
277340
}
278341
}
279342
}

0 commit comments

Comments
 (0)