Skip to content

Commit db2ddb1

Browse files
committed
auto merge of #14423 : Kimundi/rust/static_regex, r=alexcrichton
This patch changes the internals of `Regex` and `regex!()` such that ```rust static RE: Regex = regex!(...); ``` is valid. It doesn't change anything about the actual regex implementation, it just changes the type to something that can be constructed as a const expression.
2 parents 0fca6c6 + b997de5 commit db2ddb1

File tree

5 files changed

+125
-44
lines changed

5 files changed

+125
-44
lines changed

src/libregex/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ pub mod native {
401401
// undesirable consequences (such as requiring a dependency on
402402
// `libsyntax`).
403403
//
404-
// Secondly, the code generated generated by `regex!` must *also* be able
404+
// Secondly, the code generated by `regex!` must *also* be able
405405
// to access various functions in this crate to reduce code duplication
406406
// and to provide a value with precisely the same `Regex` type in this
407407
// crate. This, AFAIK, is impossible to mitigate.

src/libregex/re.rs

+83-34
Original file line numberDiff line numberDiff line change
@@ -100,38 +100,45 @@ pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
100100
/// documentation.
101101
#[deriving(Clone)]
102102
#[allow(visible_private_types)]
103-
pub struct Regex {
104-
/// The representation of `Regex` is exported to support the `regex!`
105-
/// syntax extension. Do not rely on it.
106-
///
107-
/// See the comments for the `program` module in `lib.rs` for a more
108-
/// detailed explanation for what `regex!` requires.
103+
pub enum Regex {
104+
// The representation of `Regex` is exported to support the `regex!`
105+
// syntax extension. Do not rely on it.
106+
//
107+
// See the comments for the `program` module in `lib.rs` for a more
108+
// detailed explanation for what `regex!` requires.
109109
#[doc(hidden)]
110-
pub original: String,
110+
Dynamic(Dynamic),
111111
#[doc(hidden)]
112-
pub names: Vec<Option<String>>,
112+
Native(Native),
113+
}
114+
115+
#[deriving(Clone)]
116+
#[doc(hidden)]
117+
pub struct Dynamic {
118+
original: String,
119+
names: Vec<Option<String>>,
113120
#[doc(hidden)]
114-
pub p: MaybeNative,
121+
pub prog: Program
115122
}
116123

117-
impl fmt::Show for Regex {
118-
/// Shows the original regular expression.
119-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
120-
write!(f, "{}", self.original)
121-
}
124+
#[doc(hidden)]
125+
pub struct Native {
126+
#[doc(hidden)]
127+
pub original: &'static str,
128+
#[doc(hidden)]
129+
pub names: &'static [Option<&'static str>],
130+
#[doc(hidden)]
131+
pub prog: fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>
122132
}
123133

124-
pub enum MaybeNative {
125-
Dynamic(Program),
126-
Native(fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>),
134+
impl Clone for Native {
135+
fn clone(&self) -> Native { *self }
127136
}
128137

129-
impl Clone for MaybeNative {
130-
fn clone(&self) -> MaybeNative {
131-
match *self {
132-
Dynamic(ref p) => Dynamic(p.clone()),
133-
Native(fp) => Native(fp),
134-
}
138+
impl fmt::Show for Regex {
139+
/// Shows the original regular expression.
140+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
141+
write!(f, "{}", self.as_str())
135142
}
136143
}
137144

@@ -146,10 +153,11 @@ impl Regex {
146153
pub fn new(re: &str) -> Result<Regex, parse::Error> {
147154
let ast = try!(parse::parse(re));
148155
let (prog, names) = Program::new(ast);
149-
Ok(Regex {
156+
Ok(Dynamic(Dynamic {
150157
original: re.to_strbuf(),
151-
names: names, p: Dynamic(prog),
152-
})
158+
names: names,
159+
prog: prog,
160+
}))
153161
}
154162

155163
/// Returns true if and only if the regex matches the string given.
@@ -495,6 +503,46 @@ impl Regex {
495503
}
496504
new.append(text.slice(last_match, text.len()))
497505
}
506+
507+
/// Returns the original string of this regex.
508+
pub fn as_str<'a>(&'a self) -> &'a str {
509+
match *self {
510+
Dynamic(Dynamic { ref original, .. }) => original.as_slice(),
511+
Native(Native { ref original, .. }) => original.as_slice(),
512+
}
513+
}
514+
515+
#[doc(hidden)]
516+
#[allow(visible_private_types)]
517+
#[experimental]
518+
pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
519+
match *self {
520+
Native(ref n) => NamesIterNative(n.names.iter()),
521+
Dynamic(ref d) => NamesIterDynamic(d.names.iter())
522+
}
523+
}
524+
525+
fn names_len(&self) -> uint {
526+
match *self {
527+
Native(ref n) => n.names.len(),
528+
Dynamic(ref d) => d.names.len()
529+
}
530+
}
531+
532+
}
533+
534+
enum NamesIter<'a> {
535+
NamesIterNative(::std::slice::Items<'a, Option<&'static str>>),
536+
NamesIterDynamic(::std::slice::Items<'a, Option<String>>)
537+
}
538+
539+
impl<'a> Iterator<Option<String>> for NamesIter<'a> {
540+
fn next(&mut self) -> Option<Option<String>> {
541+
match *self {
542+
NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_strbuf())),
543+
NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_strbuf())),
544+
}
545+
}
498546
}
499547

500548
/// NoExpand indicates literal string replacement.
@@ -612,22 +660,23 @@ pub struct Captures<'t> {
612660
}
613661

614662
impl<'t> Captures<'t> {
663+
#[allow(experimental)]
615664
fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
616665
-> Option<Captures<'t>> {
617666
if !has_match(&locs) {
618667
return None
619668
}
620669

621670
let named =
622-
if re.names.len() == 0 {
671+
if re.names_len() == 0 {
623672
None
624673
} else {
625674
let mut named = HashMap::new();
626-
for (i, name) in re.names.iter().enumerate() {
675+
for (i, name) in re.names_iter().enumerate() {
627676
match name {
628-
&None => {},
629-
&Some(ref name) => {
630-
named.insert(name.to_strbuf(), i);
677+
None => {},
678+
Some(name) => {
679+
named.insert(name, i);
631680
}
632681
}
633682
}
@@ -862,9 +911,9 @@ fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
862911

863912
fn exec_slice(re: &Regex, which: MatchKind,
864913
input: &str, s: uint, e: uint) -> CaptureLocs {
865-
match re.p {
866-
Dynamic(ref prog) => vm::run(which, prog, input, s, e),
867-
Native(exec) => exec(which, input, s, e),
914+
match *re {
915+
Dynamic(Dynamic { ref prog, .. }) => vm::run(which, prog, input, s, e),
916+
Native(Native { prog, .. }) => prog(which, input, s, e),
868917
}
869918
}
870919

src/libregex/test/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ mod native_bench;
2020
#[path = "tests.rs"]
2121
mod native_tests;
2222

23+
#[cfg(not(stage1))]
24+
mod native_static;
25+
2326
// Due to macro scoping rules, this definition only applies for the modules
2427
// defined below. Effectively, it allows us to use the same tests for both
2528
// native and dynamic regexes.

src/libregex/test/native_static.rs

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use regex::Regex;
12+
static RE: Regex = regex!(r"\d+");
13+
14+
#[test]
15+
fn static_splitn() {
16+
let text = "cauchy123plato456tyler789binx";
17+
let subs: Vec<&str> = RE.splitn(text, 2).collect();
18+
assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
19+
}
20+
21+
#[test]
22+
fn static_split() {
23+
let text = "cauchy123plato456tyler789binx";
24+
let subs: Vec<&str> = RE.split(text).collect();
25+
assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
26+
}

src/libregex_macros/lib.rs

+12-9
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub fn macro_registrar(register: |ast::Name, SyntaxExtension|) {
7575
/// It is strongly recommended to read the dynamic implementation in vm.rs
7676
/// first before trying to understand the code generator. The implementation
7777
/// strategy is identical and vm.rs has comments and will be easier to follow.
78+
#[allow(experimental)]
7879
fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
7980
-> Box<MacResult> {
8081
let regex = match parse(cx, tts) {
@@ -89,14 +90,14 @@ fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
8990
return DummyResult::any(sp)
9091
}
9192
};
92-
let prog = match re.p {
93-
Dynamic(ref prog) => prog.clone(),
93+
let prog = match re {
94+
Dynamic(Dynamic { ref prog, .. }) => prog.clone(),
9495
Native(_) => unreachable!(),
9596
};
9697

9798
let mut gen = NfaGen {
9899
cx: &*cx, sp: sp, prog: prog,
99-
names: re.names.clone(), original: re.original.clone(),
100+
names: re.names_iter().collect(), original: re.as_str().to_strbuf(),
100101
};
101102
MacExpr::new(gen.code())
102103
}
@@ -119,7 +120,7 @@ impl<'a> NfaGen<'a> {
119120
|cx, name| match *name {
120121
Some(ref name) => {
121122
let name = name.as_slice();
122-
quote_expr!(cx, Some($name.to_strbuf()))
123+
quote_expr!(cx, Some($name))
123124
}
124125
None => cx.expr_none(self.sp),
125126
}
@@ -141,9 +142,11 @@ impl<'a> NfaGen<'a> {
141142
let regex = self.original.as_slice();
142143

143144
quote_expr!(self.cx, {
145+
static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
144146
fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
145147
start: uint, end: uint) -> Vec<Option<uint>> {
146148
#![allow(unused_imports)]
149+
#![allow(unused_mut)]
147150
use regex::native::{
148151
MatchKind, Exists, Location, Submatches,
149152
StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
@@ -310,11 +313,11 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
310313
}
311314
}
312315

313-
::regex::Regex {
314-
original: $regex.to_strbuf(),
315-
names: vec!$cap_names,
316-
p: ::regex::native::Native(exec),
317-
}
316+
::regex::native::Native(::regex::native::Native {
317+
original: $regex,
318+
names: CAP_NAMES,
319+
prog: exec,
320+
})
318321
})
319322
}
320323

0 commit comments

Comments
 (0)