Skip to content

Commit 17d9c1c

Browse files
committed
syntax: unbox Ast and remove AstKind
The AstKind experiment proved unfruitful. I think the issue here is that the savings on Vec<Ast> didn't prove to be enough to offset the extra heap allocation that resulted from the indirection. This seems to be a sweet spot. It would be nice to get Ast down below 16 bytes, but it's not clear how to do that (without much larger changes that I don't feel inclined to pursue). Fixes #1090
1 parent 31b4398 commit 17d9c1c

File tree

7 files changed

+161
-190
lines changed

7 files changed

+161
-190
lines changed

fuzz/fuzz_targets/ast_roundtrip.rs

+11-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use {
44
libfuzzer_sys::{fuzz_target, Corpus},
55
regex_syntax::ast::{
6-
parse::Parser, visit, Ast, Flag, Group, GroupKind, SetFlags, Visitor,
6+
parse::Parser, visit, Ast, Flag, Flags, GroupKind, Visitor,
77
},
88
};
99

@@ -32,16 +32,17 @@ impl Visitor for VerboseVisitor {
3232
}
3333

3434
fn visit_pre(&mut self, ast: &Ast) -> Result<Self::Output, Self::Err> {
35+
let reject_flags = |flags: &Flags| {
36+
flags.flag_state(Flag::IgnoreWhitespace).unwrap_or(false)
37+
};
3538
match ast {
36-
Ast::Flags(SetFlags { flags, .. })
37-
| Ast::Group(Group {
38-
kind: GroupKind::NonCapturing(flags), ..
39-
}) if flags
40-
.flag_state(Flag::IgnoreWhitespace)
41-
.unwrap_or(false) =>
42-
{
43-
Err(())
44-
}
39+
Ast::Flags(x) if reject_flags(&x.flags) => return Err(()),
40+
Ast::Group(x) => match x.kind {
41+
GroupKind::NonCapturing(ref flags) if reject_flags(flags) => {
42+
return Err(())
43+
}
44+
_ => Ok(()),
45+
},
4546
_ => Ok(()),
4647
}
4748
}

regex-cli/cmd/generate/fowler.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,9 @@ fn count_capturing_groups_ast(ast: &regex_syntax::ast::Ast) -> usize {
404404
| Ast::Literal(_)
405405
| Ast::Dot(_)
406406
| Ast::Assertion(_)
407-
| Ast::Class(_) => 0,
407+
| Ast::ClassUnicode(_)
408+
| Ast::ClassPerl(_)
409+
| Ast::ClassBracketed(_) => 0,
408410
Ast::Repetition(ref rep) => count_capturing_groups_ast(&*rep.ast),
409411
Ast::Group(ref group) => {
410412
let this = if group.is_capturing() { 1 } else { 0 };

regex-syntax/src/ast/mod.rs

+68-100
Original file line numberDiff line numberDiff line change
@@ -429,19 +429,9 @@ pub struct Comment {
429429
///
430430
/// This type defines its own destructor that uses constant stack space and
431431
/// heap space proportional to the size of the `Ast`.
432-
///
433-
/// This type boxes the actual kind of the AST element so that an `Ast` value
434-
/// itself has a very small size. This in turn makes things like `Vec<Ast>` use
435-
/// a lot less memory than it might otherwise, which is particularly beneficial
436-
/// for representing long concatenations or alternations.
437-
#[derive(Clone, Debug, Eq, PartialEq)]
438-
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
439-
pub struct Ast(pub Box<AstKind>);
440-
441-
/// The kind of an abstract syntax element.
442432
#[derive(Clone, Debug, Eq, PartialEq)]
443433
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
444-
pub enum AstKind {
434+
pub enum Ast {
445435
/// An empty regex that matches everything.
446436
Empty(Box<Span>),
447437
/// A set of flags, e.g., `(?is)`.
@@ -473,106 +463,106 @@ pub enum AstKind {
473463
impl Ast {
474464
/// Create an "empty" AST item.
475465
pub fn empty(span: Span) -> Ast {
476-
Ast(Box::new(AstKind::Empty(Box::new(span))))
466+
Ast::Empty(Box::new(span))
477467
}
478468

479469
/// Create a "flags" AST item.
480470
pub fn flags(e: SetFlags) -> Ast {
481-
Ast(Box::new(AstKind::Flags(Box::new(e))))
471+
Ast::Flags(Box::new(e))
482472
}
483473

484474
/// Create a "literal" AST item.
485475
pub fn literal(e: Literal) -> Ast {
486-
Ast(Box::new(AstKind::Literal(Box::new(e))))
476+
Ast::Literal(Box::new(e))
487477
}
488478

489479
/// Create a "dot" AST item.
490480
pub fn dot(span: Span) -> Ast {
491-
Ast(Box::new(AstKind::Dot(Box::new(span))))
481+
Ast::Dot(Box::new(span))
492482
}
493483

494484
/// Create a "assertion" AST item.
495485
pub fn assertion(e: Assertion) -> Ast {
496-
Ast(Box::new(AstKind::Assertion(Box::new(e))))
486+
Ast::Assertion(Box::new(e))
497487
}
498488

499489
/// Create a "Unicode class" AST item.
500490
pub fn class_unicode(e: ClassUnicode) -> Ast {
501-
Ast(Box::new(AstKind::ClassUnicode(Box::new(e))))
491+
Ast::ClassUnicode(Box::new(e))
502492
}
503493

504494
/// Create a "Perl class" AST item.
505495
pub fn class_perl(e: ClassPerl) -> Ast {
506-
Ast(Box::new(AstKind::ClassPerl(Box::new(e))))
496+
Ast::ClassPerl(Box::new(e))
507497
}
508498

509499
/// Create a "bracketed class" AST item.
510500
pub fn class_bracketed(e: ClassBracketed) -> Ast {
511-
Ast(Box::new(AstKind::ClassBracketed(Box::new(e))))
501+
Ast::ClassBracketed(Box::new(e))
512502
}
513503

514504
/// Create a "repetition" AST item.
515505
pub fn repetition(e: Repetition) -> Ast {
516-
Ast(Box::new(AstKind::Repetition(Box::new(e))))
506+
Ast::Repetition(Box::new(e))
517507
}
518508

519509
/// Create a "group" AST item.
520510
pub fn group(e: Group) -> Ast {
521-
Ast(Box::new(AstKind::Group(Box::new(e))))
511+
Ast::Group(Box::new(e))
522512
}
523513

524514
/// Create a "alternation" AST item.
525515
pub fn alternation(e: Alternation) -> Ast {
526-
Ast(Box::new(AstKind::Alternation(Box::new(e))))
516+
Ast::Alternation(Box::new(e))
527517
}
528518

529519
/// Create a "concat" AST item.
530520
pub fn concat(e: Concat) -> Ast {
531-
Ast(Box::new(AstKind::Concat(Box::new(e))))
521+
Ast::Concat(Box::new(e))
532522
}
533523

534524
/// Return the span of this abstract syntax tree.
535525
pub fn span(&self) -> &Span {
536-
match *self.0 {
537-
AstKind::Empty(ref span) => span,
538-
AstKind::Flags(ref x) => &x.span,
539-
AstKind::Literal(ref x) => &x.span,
540-
AstKind::Dot(ref span) => span,
541-
AstKind::Assertion(ref x) => &x.span,
542-
AstKind::ClassUnicode(ref x) => &x.span,
543-
AstKind::ClassPerl(ref x) => &x.span,
544-
AstKind::ClassBracketed(ref x) => &x.span,
545-
AstKind::Repetition(ref x) => &x.span,
546-
AstKind::Group(ref x) => &x.span,
547-
AstKind::Alternation(ref x) => &x.span,
548-
AstKind::Concat(ref x) => &x.span,
526+
match *self {
527+
Ast::Empty(ref span) => span,
528+
Ast::Flags(ref x) => &x.span,
529+
Ast::Literal(ref x) => &x.span,
530+
Ast::Dot(ref span) => span,
531+
Ast::Assertion(ref x) => &x.span,
532+
Ast::ClassUnicode(ref x) => &x.span,
533+
Ast::ClassPerl(ref x) => &x.span,
534+
Ast::ClassBracketed(ref x) => &x.span,
535+
Ast::Repetition(ref x) => &x.span,
536+
Ast::Group(ref x) => &x.span,
537+
Ast::Alternation(ref x) => &x.span,
538+
Ast::Concat(ref x) => &x.span,
549539
}
550540
}
551541

552542
/// Return true if and only if this Ast is empty.
553543
pub fn is_empty(&self) -> bool {
554-
match *self.0 {
555-
AstKind::Empty(_) => true,
544+
match *self {
545+
Ast::Empty(_) => true,
556546
_ => false,
557547
}
558548
}
559549

560550
/// Returns true if and only if this AST has any (including possibly empty)
561551
/// subexpressions.
562552
fn has_subexprs(&self) -> bool {
563-
match *self.0 {
564-
AstKind::Empty(_)
565-
| AstKind::Flags(_)
566-
| AstKind::Literal(_)
567-
| AstKind::Dot(_)
568-
| AstKind::Assertion(_)
569-
| AstKind::ClassUnicode(_)
570-
| AstKind::ClassPerl(_) => false,
571-
AstKind::ClassBracketed(_)
572-
| AstKind::Repetition(_)
573-
| AstKind::Group(_)
574-
| AstKind::Alternation(_)
575-
| AstKind::Concat(_) => true,
553+
match *self {
554+
Ast::Empty(_)
555+
| Ast::Flags(_)
556+
| Ast::Literal(_)
557+
| Ast::Dot(_)
558+
| Ast::Assertion(_)
559+
| Ast::ClassUnicode(_)
560+
| Ast::ClassPerl(_) => false,
561+
Ast::ClassBracketed(_)
562+
| Ast::Repetition(_)
563+
| Ast::Group(_)
564+
| Ast::Alternation(_)
565+
| Ast::Concat(_) => true,
576566
}
577567
}
578568
}
@@ -1598,48 +1588,48 @@ impl Drop for Ast {
15981588
fn drop(&mut self) {
15991589
use core::mem;
16001590

1601-
match *self.0 {
1602-
AstKind::Empty(_)
1603-
| AstKind::Flags(_)
1604-
| AstKind::Literal(_)
1605-
| AstKind::Dot(_)
1606-
| AstKind::Assertion(_)
1607-
| AstKind::ClassUnicode(_)
1608-
| AstKind::ClassPerl(_)
1591+
match *self {
1592+
Ast::Empty(_)
1593+
| Ast::Flags(_)
1594+
| Ast::Literal(_)
1595+
| Ast::Dot(_)
1596+
| Ast::Assertion(_)
1597+
| Ast::ClassUnicode(_)
1598+
| Ast::ClassPerl(_)
16091599
// Bracketed classes are recursive, they get their own Drop impl.
1610-
| AstKind::ClassBracketed(_) => return,
1611-
AstKind::Repetition(ref x) if !x.ast.has_subexprs() => return,
1612-
AstKind::Group(ref x) if !x.ast.has_subexprs() => return,
1613-
AstKind::Alternation(ref x) if x.asts.is_empty() => return,
1614-
AstKind::Concat(ref x) if x.asts.is_empty() => return,
1600+
| Ast::ClassBracketed(_) => return,
1601+
Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1602+
Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1603+
Ast::Alternation(ref x) if x.asts.is_empty() => return,
1604+
Ast::Concat(ref x) if x.asts.is_empty() => return,
16151605
_ => {}
16161606
}
16171607

16181608
let empty_span = || Span::splat(Position::new(0, 0, 0));
16191609
let empty_ast = || Ast::empty(empty_span());
16201610
let mut stack = vec![mem::replace(self, empty_ast())];
16211611
while let Some(mut ast) = stack.pop() {
1622-
match *ast.0 {
1623-
AstKind::Empty(_)
1624-
| AstKind::Flags(_)
1625-
| AstKind::Literal(_)
1626-
| AstKind::Dot(_)
1627-
| AstKind::Assertion(_)
1628-
| AstKind::ClassUnicode(_)
1629-
| AstKind::ClassPerl(_)
1612+
match ast {
1613+
Ast::Empty(_)
1614+
| Ast::Flags(_)
1615+
| Ast::Literal(_)
1616+
| Ast::Dot(_)
1617+
| Ast::Assertion(_)
1618+
| Ast::ClassUnicode(_)
1619+
| Ast::ClassPerl(_)
16301620
// Bracketed classes are recursive, so they get their own Drop
16311621
// impl.
1632-
| AstKind::ClassBracketed(_) => {}
1633-
AstKind::Repetition(ref mut x) => {
1622+
| Ast::ClassBracketed(_) => {}
1623+
Ast::Repetition(ref mut x) => {
16341624
stack.push(mem::replace(&mut x.ast, empty_ast()));
16351625
}
1636-
AstKind::Group(ref mut x) => {
1626+
Ast::Group(ref mut x) => {
16371627
stack.push(mem::replace(&mut x.ast, empty_ast()));
16381628
}
1639-
AstKind::Alternation(ref mut x) => {
1629+
Ast::Alternation(ref mut x) => {
16401630
stack.extend(x.asts.drain(..));
16411631
}
1642-
AstKind::Concat(ref mut x) => {
1632+
Ast::Concat(ref mut x) => {
16431633
stack.extend(x.asts.drain(..));
16441634
}
16451635
}
@@ -1760,35 +1750,13 @@ mod tests {
17601750
// 64-bit target. Wow.
17611751
#[test]
17621752
fn ast_size() {
1763-
std::dbg!(core::mem::size_of::<Span>());
1764-
std::dbg!(core::mem::size_of::<SetFlags>());
1765-
std::dbg!(core::mem::size_of::<Literal>());
1766-
std::dbg!(core::mem::size_of::<Span>());
1767-
std::dbg!(core::mem::size_of::<Assertion>());
1768-
std::dbg!(core::mem::size_of::<ClassUnicode>());
1769-
std::dbg!(core::mem::size_of::<ClassPerl>());
1770-
std::dbg!(core::mem::size_of::<ClassBracketed>());
1771-
std::dbg!(core::mem::size_of::<Repetition>());
1772-
std::dbg!(core::mem::size_of::<Group>());
1773-
std::dbg!(core::mem::size_of::<Alternation>());
1774-
std::dbg!(core::mem::size_of::<Concat>());
1775-
1776-
let max = core::mem::size_of::<usize>();
1753+
let max = 2 * core::mem::size_of::<usize>();
17771754
let size = core::mem::size_of::<Ast>();
17781755
assert!(
17791756
size <= max,
17801757
"Ast size of {} bytes is bigger than suggested max {}",
17811758
size,
17821759
max
17831760
);
1784-
1785-
let max = 2 * core::mem::size_of::<usize>();
1786-
let size = core::mem::size_of::<AstKind>();
1787-
assert!(
1788-
size <= max,
1789-
"AstKind size of {} bytes is bigger than suggested max {}",
1790-
size,
1791-
max
1792-
);
17931761
}
17941762
}

0 commit comments

Comments
 (0)