|
11 | 11 | use std::cmp;
|
12 | 12 | use symbol::Symbol;
|
13 | 13 |
|
14 |
| -/// To find the Levenshtein distance between two strings |
| 14 | +/// Find the Levenshtein distance between two strings |
15 | 15 | pub fn lev_distance(a: &str, b: &str) -> usize {
|
16 | 16 | // cases which don't require further computation
|
17 | 17 | if a.is_empty() {
|
@@ -41,10 +41,12 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
|
41 | 41 | } dcol[t_last + 1]
|
42 | 42 | }
|
43 | 43 |
|
44 |
| -/// To find the best match for a given string from an iterator of names |
| 44 | +/// Find the best match for a given word in the given iterator |
| 45 | +/// |
45 | 46 | /// As a loose rule to avoid the obviously incorrect suggestions, it takes
|
46 | 47 | /// an optional limit for the maximum allowable edit distance, which defaults
|
47 | 48 | /// to one-third of the given word.
|
| 49 | +/// |
48 | 50 | /// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
|
49 | 51 | /// a lower(upper)case letters mismatch.
|
50 | 52 | pub fn find_best_match_for_name<'a, T>(iter_names: T,
|
@@ -105,3 +107,39 @@ fn test_lev_distance() {
|
105 | 107 | assert_eq!(lev_distance(b, c), 1);
|
106 | 108 | assert_eq!(lev_distance(c, b), 1);
|
107 | 109 | }
|
| 110 | + |
| 111 | +#[test] |
| 112 | +fn test_find_best_match_for_name() { |
| 113 | + use with_globals; |
| 114 | + with_globals(|| { |
| 115 | + let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")]; |
| 116 | + assert_eq!( |
| 117 | + find_best_match_for_name(input.iter(), "aaaa", None), |
| 118 | + Some(Symbol::intern("aaab")) |
| 119 | + ); |
| 120 | + |
| 121 | + assert_eq!( |
| 122 | + find_best_match_for_name(input.iter(), "1111111111", None), |
| 123 | + None |
| 124 | + ); |
| 125 | + |
| 126 | + let input = vec![Symbol::intern("aAAA")]; |
| 127 | + assert_eq!( |
| 128 | + find_best_match_for_name(input.iter(), "AAAA", None), |
| 129 | + Some(Symbol::intern("aAAA")) |
| 130 | + ); |
| 131 | + |
| 132 | + let input = vec![Symbol::intern("AAAA")]; |
| 133 | + // Returns None because `lev_distance > max_dist / 3` |
| 134 | + assert_eq!( |
| 135 | + find_best_match_for_name(input.iter(), "aaaa", None), |
| 136 | + None |
| 137 | + ); |
| 138 | + |
| 139 | + let input = vec![Symbol::intern("AAAA")]; |
| 140 | + assert_eq!( |
| 141 | + find_best_match_for_name(input.iter(), "aaaa", Some(4)), |
| 142 | + Some(Symbol::intern("AAAA")) |
| 143 | + ); |
| 144 | + }) |
| 145 | +} |
0 commit comments