Skip to content

Commit 1f5a950

Browse files
committed
tackler-rs: full haystack regex matcher
GH-31 Signed-off-by: 35V LG84 <[email protected]>
1 parent a0d4954 commit 1f5a950

File tree

5 files changed

+364
-0
lines changed

5 files changed

+364
-0
lines changed

tackler-rs/CRATES.md

+24
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,30 @@ The Rusty Services are assorted bits and pieces which are needed for
99
Tackler, but won't fit into the domain of plain text accounting.
1010

1111

12+
## Full haystack regex matchers
13+
14+
By default Rust `regex::Regex::is_match` will test if there is a match for the regex [anywhere in the haystack](https://docs.rs/regex/latest/regex/struct.Regex.html#method.is_match) given.
15+
16+
These constructors create a regex which will try to match against the full haystack by default. This logic is similar than [java.util.regex.Matcher.matches()](https://docs.oracle.com/en/java/javase/21/docs/api/java.base/java/util/regex/Matcher.html#matches())
17+
18+
```rust
19+
tackler_rs::regex::{
20+
new_full_haystack_regex,
21+
new_full_haystack_regex_set,
22+
peeled_pattern,
23+
peeled_patterns
24+
}
25+
```
26+
27+
### Serializers and Deserializers for full haystack matchers
28+
29+
This is serializer and deserializer implementation of full haystack matcher for Serde.
30+
31+
```rust
32+
tackler_rs::regex::serde::full_haystack_matcher
33+
```
34+
35+
1236
## Tackler components on Crates.io
1337

1438
* Tackler CLI application: [tackler](https://crates.io/crates/tackler)

tackler-rs/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ use std::io::BufWriter;
3030
use std::path::{Path, PathBuf};
3131
use walkdir::{DirEntry, WalkDir};
3232

33+
/// Regex helpers to have full haystack matcher (JDK matches())
34+
pub mod regex;
35+
3336
///
3437
/// Get full path based on
3538
/// directory, filename prefix, filename and extension

tackler-rs/src/regex.rs

+202
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,205 @@
4848
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
4949
* OR OTHER DEALINGS IN THE SOFTWARE.
5050
*/
51+
52+
/// Serialization and Deserialization for full haystack regex matchers
53+
pub mod serde;
54+
55+
use regex::{Regex, RegexSet};
56+
57+
fn into_full_haystack_pattern<S>(re: S) -> String
58+
where
59+
S: AsRef<str>,
60+
{
61+
format!("^(?:{})$", re.as_ref())
62+
}
63+
64+
fn peel_full_haystack_pattern(re: &str) -> &str {
65+
match re.strip_prefix("^(?:") {
66+
Some(prefix_clean) => prefix_clean.strip_suffix(r")$").unwrap_or(re),
67+
None => re,
68+
}
69+
}
70+
71+
/// Compiles a full haystack regular expression
72+
///
73+
/// This will augment (anchor) the given re so that it will match against
74+
/// full haystack.
75+
///
76+
/// See `Regex::Regex::new` for actual documentation of this method.
77+
///
78+
/// See `peeled_pattern_as_str` how to get back the original string
79+
///
80+
/// # Examples
81+
/// ```rust
82+
/// # use std::error::Error;
83+
/// use tackler_rs::regex::new_full_haystack_regex;
84+
///
85+
/// let re_foo = new_full_haystack_regex("foo")?;
86+
/// let re_bar = new_full_haystack_regex("bar")?;
87+
///
88+
/// assert!(re_foo.is_match("foo"));
89+
/// assert!(re_bar.is_match("bar"));
90+
///
91+
/// assert!(!re_foo.is_match("foobar"));
92+
/// assert!(!re_bar.is_match("foobar"));
93+
/// # Ok::<(), Box<dyn Error>>(())
94+
/// ```
95+
pub fn new_full_haystack_regex(re: &str) -> Result<Regex, regex::Error> {
96+
Regex::new(into_full_haystack_pattern(re).as_str())
97+
}
98+
99+
/// Returns the original string of this regex.
100+
/// # Examples
101+
/// ```rust
102+
/// # use std::error::Error;
103+
/// use tackler_rs::regex::new_full_haystack_regex;
104+
/// use tackler_rs::regex::peeled_pattern;
105+
///
106+
/// let re_foo = new_full_haystack_regex(r"foo.*")?;
107+
///
108+
/// assert_eq!(peeled_pattern(&re_foo), r"foo.*");
109+
/// # Ok::<(), Box<dyn Error>>(())
110+
/// ```
111+
pub fn peeled_pattern(regex: &Regex) -> &str {
112+
peel_full_haystack_pattern(regex.as_str())
113+
}
114+
115+
/// Compiles a set of full haystack regular expressions
116+
///
117+
/// This will augment (anchor) the given expressions so
118+
/// that each of those will match against full haystack.
119+
///
120+
/// See `Regex::RegexSet::new` for actual documentation of this method.
121+
///
122+
/// See `peeled_pattern` how to get back the original string
123+
///
124+
/// # Examples
125+
/// ```rust
126+
/// # use std::error::Error;
127+
/// use tackler_rs::regex::new_full_haystack_regex_set;
128+
///
129+
/// let re_set = new_full_haystack_regex_set(["foo", "bar"])?;
130+
///
131+
/// assert!(re_set.is_match("foo"));
132+
/// assert!(re_set.is_match("bar"));
133+
///
134+
/// assert!(!re_set.is_match("foobar"));
135+
/// assert!(!re_set.is_match("foobar"));
136+
/// # Ok::<(), Box<dyn Error>>(())
137+
/// ```
138+
pub fn new_full_haystack_regex_set<I, S>(exprs: I) -> Result<RegexSet, regex::Error>
139+
where
140+
S: AsRef<str>,
141+
I: IntoIterator<Item = S>,
142+
{
143+
RegexSet::new(exprs.into_iter().map(|re| into_full_haystack_pattern(re)))
144+
}
145+
146+
/// Returns the peeled regex patterns that this regex set was constructed from.
147+
///
148+
/// # Examples
149+
/// ```rust
150+
/// # use std::error::Error;
151+
/// use tackler_rs::regex::new_full_haystack_regex_set;
152+
/// use tackler_rs::regex::peeled_patterns;
153+
///
154+
/// let re_set = new_full_haystack_regex_set(["foo", "bar"])?;
155+
///
156+
/// assert_eq!(peeled_patterns(&re_set), vec!["foo", "bar"]);
157+
/// # Ok::<(), Box<dyn Error>>(())
158+
/// ```
159+
pub fn peeled_patterns(regex_set: &RegexSet) -> Vec<String> {
160+
regex_set
161+
.patterns()
162+
.iter()
163+
.map(|re| peel_full_haystack_pattern(re).to_string())
164+
.collect::<Vec<_>>()
165+
}
166+
167+
#[cfg(test)]
168+
mod tests {
169+
use super::*;
170+
171+
#[test]
172+
fn test_peel_full_haystack_pattern() {
173+
assert_eq!(peel_full_haystack_pattern("abc"), "abc");
174+
assert_eq!(peel_full_haystack_pattern(".*"), ".*");
175+
assert_eq!(peel_full_haystack_pattern("(.*)"), "(.*)");
176+
assert_eq!(peel_full_haystack_pattern("^(?:.*)"), "^(?:.*)");
177+
assert_eq!(peel_full_haystack_pattern("(.*)$"), "(.*)$");
178+
assert_eq!(peel_full_haystack_pattern("^(?:.*)$"), ".*");
179+
}
180+
181+
#[test]
182+
fn test_full_haystack_pattern() {
183+
let re = new_full_haystack_regex(r"o.a").unwrap(/*:test:*/);
184+
assert_eq!(re.as_str(), r"^(?:o.a)$");
185+
186+
assert!(!re.is_match("foobar"));
187+
assert!(!re.is_match("ooba"));
188+
assert!(!re.is_match("obar"));
189+
assert!(re.is_match("oba"));
190+
}
191+
192+
#[test]
193+
fn test_full_haystack_pattern_anchored() {
194+
let re = new_full_haystack_regex(r"^o.a$").unwrap(/*:test:*/);
195+
assert_eq!(re.as_str(), r"^(?:^o.a$)$");
196+
197+
assert!(!re.is_match("foobar"));
198+
assert!(!re.is_match("ooba"));
199+
assert!(!re.is_match("obar"));
200+
assert!(re.is_match("oba"));
201+
}
202+
203+
#[test]
204+
fn test_full_haystack_pattern_peeled() {
205+
let re_str = r"^(?:o.a)$";
206+
let re = new_full_haystack_regex(re_str).unwrap(/*:test:*/);
207+
assert_eq!(re.as_str(), r"^(?:^(?:o.a)$)$");
208+
209+
assert!(!re.is_match("foobar"));
210+
assert!(!re.is_match("ooba"));
211+
assert!(!re.is_match("obar"));
212+
assert!(re.is_match("oba"));
213+
214+
assert_eq!(peeled_pattern(&re), re_str);
215+
}
216+
217+
#[test]
218+
fn test_full_haystack_patterns() {
219+
let re_set = new_full_haystack_regex_set([r".*foo", r"bar.*"]).unwrap(/*:test:*/);
220+
assert_eq!(re_set.patterns(), [r"^(?:.*foo)$", r"^(?:bar.*)$"]);
221+
222+
assert!(!re_set.is_match("foobar"));
223+
assert!(re_set.is_match("foo"));
224+
assert!(re_set.is_match("bar"));
225+
}
226+
227+
#[test]
228+
fn test_full_haystack_patterns_anchored() {
229+
let re_set = new_full_haystack_regex_set([r"^.*foo$", r"^bar.*$"]).unwrap(/*:test:*/);
230+
assert_eq!(re_set.patterns(), [r"^(?:^.*foo$)$", r"^(?:^bar.*$)$"]);
231+
232+
assert!(!re_set.is_match("foobar"));
233+
assert!(re_set.is_match("foo"));
234+
assert!(re_set.is_match("bar"));
235+
}
236+
237+
#[test]
238+
fn test_full_haystack_patterns_peeled() {
239+
let re_set_str = [r"^(?:.*foo)$", r"^(?:bar.*)$"];
240+
let re_set = new_full_haystack_regex_set(re_set_str).unwrap(/*:test:*/);
241+
assert_eq!(
242+
re_set.patterns(),
243+
[r"^(?:^(?:.*foo)$)$", r"^(?:^(?:bar.*)$)$"]
244+
);
245+
246+
assert!(!re_set.is_match("foobar"));
247+
assert!(re_set.is_match("foo"));
248+
assert!(re_set.is_match("bar"));
249+
250+
assert_eq!(peeled_patterns(&re_set), re_set_str);
251+
}
252+
}

tackler-rs/src/regex/serde.rs

+19
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,22 @@
4949
* OR OTHER DEALINGS IN THE SOFTWARE.
5050
*/
5151

52+
/// Full Haystack matcher serializer and deserializer
53+
///
54+
/// # Example
55+
///
56+
/// ```rust
57+
/// use regex::Regex;
58+
/// use serde::{Deserialize, Serialize};
59+
/// use tackler_rs::regex::serde::full_haystack_matcher;
60+
///
61+
/// #[derive(Serialize, Deserialize)]
62+
/// struct Account {
63+
/// #[serde(with = "full_haystack_matcher")]
64+
/// regex: Regex,
65+
/// }
66+
///
67+
/// #
68+
/// # fn main() {}
69+
/// ```
70+
pub mod full_haystack_matcher;

tackler-rs/src/regex/serde/full_haystack_matcher.rs

+116
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,119 @@
4949
* OR OTHER DEALINGS IN THE SOFTWARE.
5050
*/
5151

52+
//
53+
// This code is based on: https://github.com/tailhook/serde-regex,
54+
// which is licensed as Apache-2.0 OR MIT
55+
//
56+
57+
use regex::Regex;
58+
use std::{
59+
borrow::Cow,
60+
hash::Hash,
61+
ops::{Deref, DerefMut},
62+
};
63+
64+
use crate::regex::{new_full_haystack_regex, peeled_pattern};
65+
use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};
66+
67+
/// A wrapper type which implements `Serialize` and `Deserialize` for
68+
/// types involving `Regex`
69+
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
70+
pub struct Serde<T>(pub T);
71+
72+
impl<'de> Deserialize<'de> for Serde<Regex> {
73+
fn deserialize<D>(d: D) -> Result<Serde<Regex>, D::Error>
74+
where
75+
D: Deserializer<'de>,
76+
{
77+
let s = <Cow<str>>::deserialize(d)?;
78+
79+
match new_full_haystack_regex(s.as_ref()) {
80+
Ok(regex) => Ok(Serde(regex)),
81+
Err(err) => Err(D::Error::custom(err)),
82+
}
83+
}
84+
}
85+
86+
/// Deserialize function, see crate docs to see how to use it
87+
pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
88+
where
89+
D: Deserializer<'de>,
90+
Serde<T>: Deserialize<'de>,
91+
{
92+
Serde::deserialize(deserializer).map(|x| x.0)
93+
}
94+
95+
/// Serialize function, see crate docs to see how to use it
96+
pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
97+
where
98+
S: Serializer,
99+
for<'a> Serde<&'a T>: Serialize,
100+
{
101+
Serde(value).serialize(serializer)
102+
}
103+
104+
impl<T> Deref for Serde<T> {
105+
type Target = T;
106+
107+
fn deref(&self) -> &T {
108+
&self.0
109+
}
110+
}
111+
112+
impl<T> DerefMut for Serde<T> {
113+
fn deref_mut(&mut self) -> &mut T {
114+
&mut self.0
115+
}
116+
}
117+
118+
impl<T> Serde<T> {
119+
/// Consumes the `Serde`, returning the inner value.
120+
pub fn into_inner(self) -> T {
121+
self.0
122+
}
123+
}
124+
125+
impl<T> From<T> for Serde<T> {
126+
fn from(val: T) -> Serde<T> {
127+
Serde(val)
128+
}
129+
}
130+
131+
impl Serialize for Serde<&Regex> {
132+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
133+
where
134+
S: Serializer,
135+
{
136+
peeled_pattern(self.0).serialize(serializer)
137+
}
138+
}
139+
140+
impl Serialize for Serde<Regex> {
141+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
142+
where
143+
S: Serializer,
144+
{
145+
peeled_pattern(&self.0).serialize(serializer)
146+
}
147+
}
148+
149+
#[cfg(test)]
150+
mod test {
151+
use super::*;
152+
153+
use crate::regex::into_full_haystack_pattern;
154+
use regex::Regex;
155+
use serde_json::{from_str, to_string};
156+
157+
const SAMPLE: &str = r#"[a-z"\]]+\d{1,10}""#;
158+
const SAMPLE_JSON: &str = r#""[a-z\"\\]]+\\d{1,10}\"""#;
159+
160+
#[test]
161+
fn test_regex() {
162+
let re: Serde<Regex> = from_str(SAMPLE_JSON).unwrap();
163+
164+
assert_eq!(re.as_str(), into_full_haystack_pattern(SAMPLE));
165+
assert_eq!(to_string(&re).unwrap(), SAMPLE_JSON);
166+
}
167+
}

0 commit comments

Comments
 (0)