Skip to content

Commit 363ffc3

Browse files
committed
syntax: unbox Ast and remove AstKind
The AstKind experiment proved unfruitful. I think the issue here is that the savings on Vec<Ast> didn't prove to be enough to offset the extra heap allocation that resulted from the indirection. This seems to be a sweet spot. It would be nice to get Ast down below 16 bytes, but it's not clear how to do that (without much larger changes that I don't feel inclined to pursue). Fixes #1090
1 parent 8b0b0b0 commit 363ffc3

File tree

6 files changed

+150
-180
lines changed

6 files changed

+150
-180
lines changed

regex-cli/cmd/generate/fowler.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,9 @@ fn count_capturing_groups_ast(ast: &regex_syntax::ast::Ast) -> usize {
404404
| Ast::Literal(_)
405405
| Ast::Dot(_)
406406
| Ast::Assertion(_)
407-
| Ast::Class(_) => 0,
407+
| Ast::ClassUnicode(_)
408+
| Ast::ClassPerl(_)
409+
| Ast::ClassBracketed(_) => 0,
408410
Ast::Repetition(ref rep) => count_capturing_groups_ast(&*rep.ast),
409411
Ast::Group(ref group) => {
410412
let this = if group.is_capturing() { 1 } else { 0 };

regex-syntax/src/ast/mod.rs

+68-100
Original file line numberDiff line numberDiff line change
@@ -429,19 +429,9 @@ pub struct Comment {
429429
///
430430
/// This type defines its own destructor that uses constant stack space and
431431
/// heap space proportional to the size of the `Ast`.
432-
///
433-
/// This type boxes the actual kind of the AST element so that an `Ast` value
434-
/// itself has a very small size. This in turn makes things like `Vec<Ast>` use
435-
/// a lot less memory than it might otherwise, which is particularly beneficial
436-
/// for representing long concatenations or alternations.
437-
#[derive(Clone, Debug, Eq, PartialEq)]
438-
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
439-
pub struct Ast(pub Box<AstKind>);
440-
441-
/// The kind of an abstract syntax element.
442432
#[derive(Clone, Debug, Eq, PartialEq)]
443433
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
444-
pub enum AstKind {
434+
pub enum Ast {
445435
/// An empty regex that matches everything.
446436
Empty(Box<Span>),
447437
/// A set of flags, e.g., `(?is)`.
@@ -473,106 +463,106 @@ pub enum AstKind {
473463
impl Ast {
474464
/// Create an "empty" AST item.
475465
pub fn empty(span: Span) -> Ast {
476-
Ast(Box::new(AstKind::Empty(Box::new(span))))
466+
Ast::Empty(Box::new(span))
477467
}
478468

479469
/// Create a "flags" AST item.
480470
pub fn flags(e: SetFlags) -> Ast {
481-
Ast(Box::new(AstKind::Flags(Box::new(e))))
471+
Ast::Flags(Box::new(e))
482472
}
483473

484474
/// Create a "literal" AST item.
485475
pub fn literal(e: Literal) -> Ast {
486-
Ast(Box::new(AstKind::Literal(Box::new(e))))
476+
Ast::Literal(Box::new(e))
487477
}
488478

489479
/// Create a "dot" AST item.
490480
pub fn dot(span: Span) -> Ast {
491-
Ast(Box::new(AstKind::Dot(Box::new(span))))
481+
Ast::Dot(Box::new(span))
492482
}
493483

494484
/// Create a "assertion" AST item.
495485
pub fn assertion(e: Assertion) -> Ast {
496-
Ast(Box::new(AstKind::Assertion(Box::new(e))))
486+
Ast::Assertion(Box::new(e))
497487
}
498488

499489
/// Create a "Unicode class" AST item.
500490
pub fn class_unicode(e: ClassUnicode) -> Ast {
501-
Ast(Box::new(AstKind::ClassUnicode(Box::new(e))))
491+
Ast::ClassUnicode(Box::new(e))
502492
}
503493

504494
/// Create a "Perl class" AST item.
505495
pub fn class_perl(e: ClassPerl) -> Ast {
506-
Ast(Box::new(AstKind::ClassPerl(Box::new(e))))
496+
Ast::ClassPerl(Box::new(e))
507497
}
508498

509499
/// Create a "bracketed class" AST item.
510500
pub fn class_bracketed(e: ClassBracketed) -> Ast {
511-
Ast(Box::new(AstKind::ClassBracketed(Box::new(e))))
501+
Ast::ClassBracketed(Box::new(e))
512502
}
513503

514504
/// Create a "repetition" AST item.
515505
pub fn repetition(e: Repetition) -> Ast {
516-
Ast(Box::new(AstKind::Repetition(Box::new(e))))
506+
Ast::Repetition(Box::new(e))
517507
}
518508

519509
/// Create a "group" AST item.
520510
pub fn group(e: Group) -> Ast {
521-
Ast(Box::new(AstKind::Group(Box::new(e))))
511+
Ast::Group(Box::new(e))
522512
}
523513

524514
/// Create a "alternation" AST item.
525515
pub fn alternation(e: Alternation) -> Ast {
526-
Ast(Box::new(AstKind::Alternation(Box::new(e))))
516+
Ast::Alternation(Box::new(e))
527517
}
528518

529519
/// Create a "concat" AST item.
530520
pub fn concat(e: Concat) -> Ast {
531-
Ast(Box::new(AstKind::Concat(Box::new(e))))
521+
Ast::Concat(Box::new(e))
532522
}
533523

534524
/// Return the span of this abstract syntax tree.
535525
pub fn span(&self) -> &Span {
536-
match *self.0 {
537-
AstKind::Empty(ref span) => span,
538-
AstKind::Flags(ref x) => &x.span,
539-
AstKind::Literal(ref x) => &x.span,
540-
AstKind::Dot(ref span) => span,
541-
AstKind::Assertion(ref x) => &x.span,
542-
AstKind::ClassUnicode(ref x) => &x.span,
543-
AstKind::ClassPerl(ref x) => &x.span,
544-
AstKind::ClassBracketed(ref x) => &x.span,
545-
AstKind::Repetition(ref x) => &x.span,
546-
AstKind::Group(ref x) => &x.span,
547-
AstKind::Alternation(ref x) => &x.span,
548-
AstKind::Concat(ref x) => &x.span,
526+
match *self {
527+
Ast::Empty(ref span) => span,
528+
Ast::Flags(ref x) => &x.span,
529+
Ast::Literal(ref x) => &x.span,
530+
Ast::Dot(ref span) => span,
531+
Ast::Assertion(ref x) => &x.span,
532+
Ast::ClassUnicode(ref x) => &x.span,
533+
Ast::ClassPerl(ref x) => &x.span,
534+
Ast::ClassBracketed(ref x) => &x.span,
535+
Ast::Repetition(ref x) => &x.span,
536+
Ast::Group(ref x) => &x.span,
537+
Ast::Alternation(ref x) => &x.span,
538+
Ast::Concat(ref x) => &x.span,
549539
}
550540
}
551541

552542
/// Return true if and only if this Ast is empty.
553543
pub fn is_empty(&self) -> bool {
554-
match *self.0 {
555-
AstKind::Empty(_) => true,
544+
match *self {
545+
Ast::Empty(_) => true,
556546
_ => false,
557547
}
558548
}
559549

560550
/// Returns true if and only if this AST has any (including possibly empty)
561551
/// subexpressions.
562552
fn has_subexprs(&self) -> bool {
563-
match *self.0 {
564-
AstKind::Empty(_)
565-
| AstKind::Flags(_)
566-
| AstKind::Literal(_)
567-
| AstKind::Dot(_)
568-
| AstKind::Assertion(_)
569-
| AstKind::ClassUnicode(_)
570-
| AstKind::ClassPerl(_) => false,
571-
AstKind::ClassBracketed(_)
572-
| AstKind::Repetition(_)
573-
| AstKind::Group(_)
574-
| AstKind::Alternation(_)
575-
| AstKind::Concat(_) => true,
553+
match *self {
554+
Ast::Empty(_)
555+
| Ast::Flags(_)
556+
| Ast::Literal(_)
557+
| Ast::Dot(_)
558+
| Ast::Assertion(_)
559+
| Ast::ClassUnicode(_)
560+
| Ast::ClassPerl(_) => false,
561+
Ast::ClassBracketed(_)
562+
| Ast::Repetition(_)
563+
| Ast::Group(_)
564+
| Ast::Alternation(_)
565+
| Ast::Concat(_) => true,
576566
}
577567
}
578568
}
@@ -1598,48 +1588,48 @@ impl Drop for Ast {
15981588
fn drop(&mut self) {
15991589
use core::mem;
16001590

1601-
match *self.0 {
1602-
AstKind::Empty(_)
1603-
| AstKind::Flags(_)
1604-
| AstKind::Literal(_)
1605-
| AstKind::Dot(_)
1606-
| AstKind::Assertion(_)
1607-
| AstKind::ClassUnicode(_)
1608-
| AstKind::ClassPerl(_)
1591+
match *self {
1592+
Ast::Empty(_)
1593+
| Ast::Flags(_)
1594+
| Ast::Literal(_)
1595+
| Ast::Dot(_)
1596+
| Ast::Assertion(_)
1597+
| Ast::ClassUnicode(_)
1598+
| Ast::ClassPerl(_)
16091599
// Bracketed classes are recursive, they get their own Drop impl.
1610-
| AstKind::ClassBracketed(_) => return,
1611-
AstKind::Repetition(ref x) if !x.ast.has_subexprs() => return,
1612-
AstKind::Group(ref x) if !x.ast.has_subexprs() => return,
1613-
AstKind::Alternation(ref x) if x.asts.is_empty() => return,
1614-
AstKind::Concat(ref x) if x.asts.is_empty() => return,
1600+
| Ast::ClassBracketed(_) => return,
1601+
Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1602+
Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1603+
Ast::Alternation(ref x) if x.asts.is_empty() => return,
1604+
Ast::Concat(ref x) if x.asts.is_empty() => return,
16151605
_ => {}
16161606
}
16171607

16181608
let empty_span = || Span::splat(Position::new(0, 0, 0));
16191609
let empty_ast = || Ast::empty(empty_span());
16201610
let mut stack = vec![mem::replace(self, empty_ast())];
16211611
while let Some(mut ast) = stack.pop() {
1622-
match *ast.0 {
1623-
AstKind::Empty(_)
1624-
| AstKind::Flags(_)
1625-
| AstKind::Literal(_)
1626-
| AstKind::Dot(_)
1627-
| AstKind::Assertion(_)
1628-
| AstKind::ClassUnicode(_)
1629-
| AstKind::ClassPerl(_)
1612+
match ast {
1613+
Ast::Empty(_)
1614+
| Ast::Flags(_)
1615+
| Ast::Literal(_)
1616+
| Ast::Dot(_)
1617+
| Ast::Assertion(_)
1618+
| Ast::ClassUnicode(_)
1619+
| Ast::ClassPerl(_)
16301620
// Bracketed classes are recursive, so they get their own Drop
16311621
// impl.
1632-
| AstKind::ClassBracketed(_) => {}
1633-
AstKind::Repetition(ref mut x) => {
1622+
| Ast::ClassBracketed(_) => {}
1623+
Ast::Repetition(ref mut x) => {
16341624
stack.push(mem::replace(&mut x.ast, empty_ast()));
16351625
}
1636-
AstKind::Group(ref mut x) => {
1626+
Ast::Group(ref mut x) => {
16371627
stack.push(mem::replace(&mut x.ast, empty_ast()));
16381628
}
1639-
AstKind::Alternation(ref mut x) => {
1629+
Ast::Alternation(ref mut x) => {
16401630
stack.extend(x.asts.drain(..));
16411631
}
1642-
AstKind::Concat(ref mut x) => {
1632+
Ast::Concat(ref mut x) => {
16431633
stack.extend(x.asts.drain(..));
16441634
}
16451635
}
@@ -1760,35 +1750,13 @@ mod tests {
17601750
// 64-bit target. Wow.
17611751
#[test]
17621752
fn ast_size() {
1763-
std::dbg!(core::mem::size_of::<Span>());
1764-
std::dbg!(core::mem::size_of::<SetFlags>());
1765-
std::dbg!(core::mem::size_of::<Literal>());
1766-
std::dbg!(core::mem::size_of::<Span>());
1767-
std::dbg!(core::mem::size_of::<Assertion>());
1768-
std::dbg!(core::mem::size_of::<ClassUnicode>());
1769-
std::dbg!(core::mem::size_of::<ClassPerl>());
1770-
std::dbg!(core::mem::size_of::<ClassBracketed>());
1771-
std::dbg!(core::mem::size_of::<Repetition>());
1772-
std::dbg!(core::mem::size_of::<Group>());
1773-
std::dbg!(core::mem::size_of::<Alternation>());
1774-
std::dbg!(core::mem::size_of::<Concat>());
1775-
1776-
let max = core::mem::size_of::<usize>();
1753+
let max = 2 * core::mem::size_of::<usize>();
17771754
let size = core::mem::size_of::<Ast>();
17781755
assert!(
17791756
size <= max,
17801757
"Ast size of {} bytes is bigger than suggested max {}",
17811758
size,
17821759
max
17831760
);
1784-
1785-
let max = 2 * core::mem::size_of::<usize>();
1786-
let size = core::mem::size_of::<AstKind>();
1787-
assert!(
1788-
size <= max,
1789-
"AstKind size of {} bytes is bigger than suggested max {}",
1790-
size,
1791-
max
1792-
);
17931761
}
17941762
}

regex-syntax/src/ast/parse.rs

+31-31
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use alloc::{
1616
};
1717

1818
use crate::{
19-
ast::{self, Ast, AstKind, Position, Span},
19+
ast::{self, Ast, Position, Span},
2020
either::Either,
2121
is_escapeable_character, is_meta_character,
2222
};
@@ -1044,8 +1044,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
10441044
)
10451045
}
10461046
};
1047-
match *ast.0 {
1048-
AstKind::Empty(_) | AstKind::Flags(_) => {
1047+
match ast {
1048+
Ast::Empty(_) | Ast::Flags(_) => {
10491049
return Err(
10501050
self.error(self.span(), ast::ErrorKind::RepetitionMissing)
10511051
)
@@ -1096,8 +1096,8 @@ impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
10961096
)
10971097
}
10981098
};
1099-
match *ast.0 {
1100-
AstKind::Empty(_) | AstKind::Flags(_) => {
1099+
match ast {
1100+
Ast::Empty(_) | Ast::Flags(_) => {
11011101
return Err(
11021102
self.error(self.span(), ast::ErrorKind::RepetitionMissing)
11031103
)
@@ -2183,43 +2183,43 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
21832183
}
21842184

21852185
fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2186-
let span = match *ast.0 {
2187-
AstKind::Empty(_)
2188-
| AstKind::Flags(_)
2189-
| AstKind::Literal(_)
2190-
| AstKind::Dot(_)
2191-
| AstKind::Assertion(_)
2192-
| AstKind::ClassUnicode(_)
2193-
| AstKind::ClassPerl(_) => {
2186+
let span = match *ast {
2187+
Ast::Empty(_)
2188+
| Ast::Flags(_)
2189+
| Ast::Literal(_)
2190+
| Ast::Dot(_)
2191+
| Ast::Assertion(_)
2192+
| Ast::ClassUnicode(_)
2193+
| Ast::ClassPerl(_) => {
21942194
// These are all base cases, so we don't increment depth.
21952195
return Ok(());
21962196
}
2197-
AstKind::ClassBracketed(ref x) => &x.span,
2198-
AstKind::Repetition(ref x) => &x.span,
2199-
AstKind::Group(ref x) => &x.span,
2200-
AstKind::Alternation(ref x) => &x.span,
2201-
AstKind::Concat(ref x) => &x.span,
2197+
Ast::ClassBracketed(ref x) => &x.span,
2198+
Ast::Repetition(ref x) => &x.span,
2199+
Ast::Group(ref x) => &x.span,
2200+
Ast::Alternation(ref x) => &x.span,
2201+
Ast::Concat(ref x) => &x.span,
22022202
};
22032203
self.increment_depth(span)
22042204
}
22052205

22062206
fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2207-
match *ast.0 {
2208-
AstKind::Empty(_)
2209-
| AstKind::Flags(_)
2210-
| AstKind::Literal(_)
2211-
| AstKind::Dot(_)
2212-
| AstKind::Assertion(_)
2213-
| AstKind::ClassUnicode(_)
2214-
| AstKind::ClassPerl(_) => {
2207+
match *ast {
2208+
Ast::Empty(_)
2209+
| Ast::Flags(_)
2210+
| Ast::Literal(_)
2211+
| Ast::Dot(_)
2212+
| Ast::Assertion(_)
2213+
| Ast::ClassUnicode(_)
2214+
| Ast::ClassPerl(_) => {
22152215
// These are all base cases, so we don't decrement depth.
22162216
Ok(())
22172217
}
2218-
AstKind::ClassBracketed(_)
2219-
| AstKind::Repetition(_)
2220-
| AstKind::Group(_)
2221-
| AstKind::Alternation(_)
2222-
| AstKind::Concat(_) => {
2218+
Ast::ClassBracketed(_)
2219+
| Ast::Repetition(_)
2220+
| Ast::Group(_)
2221+
| Ast::Alternation(_)
2222+
| Ast::Concat(_) => {
22232223
self.decrement_depth();
22242224
Ok(())
22252225
}

0 commit comments

Comments
 (0)