23
23
*/
24
24
25
25
using System ;
26
+ using System . Data . SqlTypes ;
26
27
using System . Linq ;
27
28
using F23 . StringSimilarity . Interfaces ;
28
29
// ReSharper disable SuggestVarOrType_Elsewhere
@@ -38,7 +39,7 @@ namespace F23.StringSimilarity
38
39
/// Jaro-Winkler was developed in the area of record linkage (duplicate
39
40
/// detection) (Winkler, 1990). It returns a value in the interval [0.0, 1.0].
40
41
/// The distance is computed as 1 - Jaro-Winkler similarity.
41
- public class JaroWinkler : INormalizedStringSimilarity , INormalizedStringDistance
42
+ public class JaroWinkler : INormalizedStringSimilarity , INormalizedStringDistance , INormalizedSpanSimilarity , INormalizedSpanDistance
42
43
{
43
44
private const double DEFAULT_THRESHOLD = 0.7 ;
44
45
private const int THREE = 3 ;
@@ -75,6 +76,10 @@ public JaroWinkler(double threshold)
75
76
/// <returns>The Jaro-Winkler similarity in the range [0, 1]</returns>
76
77
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
77
78
public double Similarity ( string s1 , string s2 )
79
+ => Similarity ( s1 . AsSpan ( ) , s2 . AsSpan ( ) ) ;
80
+
81
+ public double Similarity < T > ( ReadOnlySpan < T > s1 , ReadOnlySpan < T > s2 )
82
+ where T : IEquatable < T >
78
83
{
79
84
if ( s1 == null )
80
85
{
@@ -86,7 +91,7 @@ public double Similarity(string s1, string s2)
86
91
throw new ArgumentNullException ( nameof ( s2 ) ) ;
87
92
}
88
93
89
- if ( s1 . Equals ( s2 ) )
94
+ if ( s1 . SequenceEqual ( s2 ) )
90
95
{
91
96
return 1f ;
92
97
}
@@ -117,10 +122,15 @@ public double Similarity(string s1, string s2)
117
122
/// <exception cref="ArgumentNullException">If s1 or s2 is null.</exception>
118
123
public double Distance ( string s1 , string s2 )
119
124
=> 1.0 - Similarity ( s1 , s2 ) ;
125
+
126
+ public double Distance < T > ( ReadOnlySpan < T > s1 , ReadOnlySpan < T > s2 )
127
+ where T : IEquatable < T >
128
+ => 1.0 - Similarity ( s1 , s2 ) ;
120
129
121
- private static int [ ] Matches ( string s1 , string s2 )
130
+ private static int [ ] Matches < T > ( ReadOnlySpan < T > s1 , ReadOnlySpan < T > s2 )
131
+ where T : IEquatable < T >
122
132
{
123
- string max , min ;
133
+ ReadOnlySpan < T > max , min ;
124
134
if ( s1 . Length > s2 . Length )
125
135
{
126
136
max = s1 ;
@@ -141,11 +151,11 @@ private static int[] Matches(string s1, string s2)
141
151
int matches = 0 ;
142
152
for ( int mi = 0 ; mi < min . Length ; mi ++ )
143
153
{
144
- char c1 = min [ mi ] ;
154
+ var c1 = min [ mi ] ;
145
155
for ( int xi = Math . Max ( mi - range , 0 ) ,
146
156
xn = Math . Min ( mi + range + 1 , max . Length ) ; xi < xn ; xi ++ )
147
157
{
148
- if ( ! match_flags [ xi ] && c1 == max [ xi ] )
158
+ if ( ! match_flags [ xi ] && c1 . Equals ( max [ xi ] ) )
149
159
{
150
160
match_indexes [ mi ] = xi ;
151
161
match_flags [ xi ] = true ;
@@ -154,8 +164,8 @@ private static int[] Matches(string s1, string s2)
154
164
}
155
165
}
156
166
}
157
- char [ ] ms1 = new char [ matches ] ;
158
- char [ ] ms2 = new char [ matches ] ;
167
+ T [ ] ms1 = new T [ matches ] ;
168
+ T [ ] ms2 = new T [ matches ] ;
159
169
for ( int i = 0 , si = 0 ; i < min . Length ; i ++ )
160
170
{
161
171
if ( match_indexes [ i ] != - 1 )
@@ -175,15 +185,15 @@ private static int[] Matches(string s1, string s2)
175
185
int transpositions = 0 ;
176
186
for ( int mi = 0 ; mi < ms1 . Length ; mi ++ )
177
187
{
178
- if ( ms1 [ mi ] != ms2 [ mi ] )
188
+ if ( ! ms1 [ mi ] . Equals ( ms2 [ mi ] ) )
179
189
{
180
190
transpositions ++ ;
181
191
}
182
192
}
183
193
int prefix = 0 ;
184
194
for ( int mi = 0 ; mi < min . Length ; mi ++ )
185
195
{
186
- if ( s1 [ mi ] == s2 [ mi ] )
196
+ if ( s1 [ mi ] . Equals ( s2 [ mi ] ) )
187
197
{
188
198
prefix ++ ;
189
199
}
0 commit comments