@@ -22,18 +22,10 @@ public class EdgeNGramTests : TokenizerAssertionBase<EdgeNGramTests>
22
22
23
23
public override ITokenizer Initializer => new EdgeNGramTokenizer
24
24
{
25
- MaxGram = 2 ,
26
- MinGram = 1 ,
27
- TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
25
+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
28
26
} ;
29
27
30
- public override object Json => new
31
- {
32
- min_gram = 1 ,
33
- max_gram = 2 ,
34
- token_chars = new [ ] { "digit" , "letter" } ,
35
- type = "edge_ngram"
36
- } ;
28
+ public override object Json => new { min_gram = 1 , max_gram = 2 , token_chars = new [ ] { "digit" , "letter" } , type = "edge_ngram" } ;
37
29
38
30
public override string Name => "endgen" ;
39
31
}
@@ -50,10 +42,7 @@ public class EdgeNGramCustomTokenCharsTests : TokenizerAssertionBase<EdgeNGramCu
50
42
51
43
public override ITokenizer Initializer => new EdgeNGramTokenizer
52
44
{
53
- MaxGram = 2 ,
54
- MinGram = 1 ,
55
- TokenChars = new [ ] { TokenChar . Custom } ,
56
- CustomTokenChars = "+-_"
45
+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Custom } , CustomTokenChars = "+-_"
57
46
} ;
58
47
59
48
public override object Json => new
@@ -62,7 +51,7 @@ public class EdgeNGramCustomTokenCharsTests : TokenizerAssertionBase<EdgeNGramCu
62
51
max_gram = 2 ,
63
52
token_chars = new [ ] { "custom" } ,
64
53
custom_token_chars = "+-_" ,
65
- type = "edge_ngram"
54
+ type = "edge_ngram"
66
55
} ;
67
56
68
57
public override string Name => "endgen_custom" ;
@@ -78,18 +67,10 @@ public class NGramTests : TokenizerAssertionBase<NGramTests>
78
67
79
68
public override ITokenizer Initializer => new NGramTokenizer
80
69
{
81
- MaxGram = 2 ,
82
- MinGram = 1 ,
83
- TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
70
+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Digit , TokenChar . Letter }
84
71
} ;
85
72
86
- public override object Json => new
87
- {
88
- min_gram = 1 ,
89
- max_gram = 2 ,
90
- token_chars = new [ ] { "digit" , "letter" } ,
91
- type = "ngram"
92
- } ;
73
+ public override object Json => new { min_gram = 1 , max_gram = 2 , token_chars = new [ ] { "digit" , "letter" } , type = "ngram" } ;
93
74
94
75
public override string Name => "ng" ;
95
76
}
@@ -106,10 +87,7 @@ public class NGramCustomTokenCharsTests : TokenizerAssertionBase<NGramCustomToke
106
87
107
88
public override ITokenizer Initializer => new NGramTokenizer
108
89
{
109
- MaxGram = 2 ,
110
- MinGram = 1 ,
111
- TokenChars = new [ ] { TokenChar . Custom } ,
112
- CustomTokenChars = "+-_"
90
+ MaxGram = 2 , MinGram = 1 , TokenChars = new [ ] { TokenChar . Custom } , CustomTokenChars = "+-_"
113
91
} ;
114
92
115
93
public override object Json => new
@@ -164,16 +142,9 @@ public class IcuTests : TokenizerAssertionBase<IcuTests>
164
142
. RuleFiles ( RuleFiles )
165
143
) ;
166
144
167
- public override ITokenizer Initializer => new IcuTokenizer
168
- {
169
- RuleFiles = RuleFiles ,
170
- } ;
145
+ public override ITokenizer Initializer => new IcuTokenizer { RuleFiles = RuleFiles , } ;
171
146
172
- public override object Json => new
173
- {
174
- rule_files = RuleFiles ,
175
- type = "icu_tokenizer"
176
- } ;
147
+ public override object Json => new { rule_files = RuleFiles , type = "icu_tokenizer" } ;
177
148
178
149
public override string Name => "icu" ;
179
150
}
@@ -198,7 +169,7 @@ public class KuromojiTests : TokenizerAssertionBase<KuromojiTests>
198
169
DiscardPunctuation = true ,
199
170
NBestExamples = Example ,
200
171
NBestCost = 1000 ,
201
- UserDictionaryRules = new [ ] { Inline }
172
+ UserDictionaryRules = new [ ] { Inline }
202
173
} ;
203
174
204
175
public override object Json => new
@@ -208,7 +179,7 @@ public class KuromojiTests : TokenizerAssertionBase<KuromojiTests>
208
179
nbest_cost = 1000 ,
209
180
nbest_examples = Example ,
210
181
type = "kuromoji_tokenizer" ,
211
- user_dictionary_rules = new [ ] { Inline }
182
+ user_dictionary_rules = new [ ] { Inline }
212
183
} ;
213
184
214
185
public override string Name => "kuro" ;
@@ -228,18 +199,9 @@ public class KuromojiDiscardCompoundTokenTests : TokenizerAssertionBase<Kuromoji
228
199
. DiscardCompoundToken ( )
229
200
) ;
230
201
231
- public override ITokenizer Initializer => new KuromojiTokenizer
232
- {
233
- Mode = KuromojiTokenizationMode . Search ,
234
- DiscardCompoundToken = true ,
235
- } ;
202
+ public override ITokenizer Initializer => new KuromojiTokenizer { Mode = KuromojiTokenizationMode . Search , DiscardCompoundToken = true , } ;
236
203
237
- public override object Json => new
238
- {
239
- discard_compound_token = true ,
240
- mode = "search" ,
241
- type = "kuromoji_tokenizer" ,
242
- } ;
204
+ public override object Json => new { discard_compound_token = true , mode = "search" , type = "kuromoji_tokenizer" , } ;
243
205
244
206
public override string Name => "kuro_discard_compound_token" ;
245
207
}
@@ -252,11 +214,7 @@ public class UaxTests : TokenizerAssertionBase<UaxTests>
252
214
253
215
public override ITokenizer Initializer => new UaxEmailUrlTokenizer { MaxTokenLength = 12 } ;
254
216
255
- public override object Json => new
256
- {
257
- max_token_length = 12 ,
258
- type = "uax_url_email"
259
- } ;
217
+ public override object Json => new { max_token_length = 12 , type = "uax_url_email" } ;
260
218
261
219
public override string Name => "uax" ;
262
220
}
@@ -269,20 +227,9 @@ public class PatternTests : TokenizerAssertionBase<PatternTests>
269
227
. Pattern ( @"\W+" )
270
228
) ;
271
229
272
- public override ITokenizer Initializer => new PatternTokenizer
273
- {
274
- Flags = "CASE_INSENSITIVE" ,
275
- Group = 1 ,
276
- Pattern = @"\W+"
277
- } ;
230
+ public override ITokenizer Initializer => new PatternTokenizer { Flags = "CASE_INSENSITIVE" , Group = 1 , Pattern = @"\W+" } ;
278
231
279
- public override object Json => new
280
- {
281
- pattern = @"\W+" ,
282
- flags = "CASE_INSENSITIVE" ,
283
- group = 1 ,
284
- type = "pattern"
285
- } ;
232
+ public override object Json => new { pattern = @"\W+" , flags = "CASE_INSENSITIVE" , group = 1 , type = "pattern" } ;
286
233
287
234
public override string Name => "pat" ;
288
235
}
@@ -312,10 +259,7 @@ public class NoriTests : TokenizerAssertionBase<NoriTests>
312
259
. DecompoundMode ( NoriDecompoundMode . Mixed )
313
260
) ;
314
261
315
- public override ITokenizer Initializer => new NoriTokenizer
316
- {
317
- DecompoundMode = NoriDecompoundMode . Mixed
318
- } ;
262
+ public override ITokenizer Initializer => new NoriTokenizer { DecompoundMode = NoriDecompoundMode . Mixed } ;
319
263
320
264
public override object Json => new { type = "nori_tokenizer" , decompound_mode = "mixed" } ;
321
265
public override string Name => "nori" ;
@@ -331,16 +275,14 @@ public class NoriWithUserDictionaryTests : TokenizerAssertionBase<NoriWithUserDi
331
275
332
276
public override ITokenizer Initializer => new NoriTokenizer
333
277
{
334
- DecompoundMode = NoriDecompoundMode . Mixed ,
335
- UserDictionaryRules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
278
+ DecompoundMode = NoriDecompoundMode . Mixed , UserDictionaryRules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
336
279
} ;
337
280
338
281
public override object Json => new
339
282
{
340
- type = "nori_tokenizer" ,
341
- decompound_mode = "mixed" ,
342
- user_dictionary_rules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
283
+ type = "nori_tokenizer" , decompound_mode = "mixed" , user_dictionary_rules = new [ ] { "c++" , "C샤프" , "세종" , "세종시 세종 시" }
343
284
} ;
285
+
344
286
public override string Name => "nori_userdictionary" ;
345
287
}
346
288
@@ -353,16 +295,9 @@ public class CharGroupTests : TokenizerAssertionBase<CharGroupTests>
353
295
. TokenizeOnCharacters ( _chars )
354
296
) ;
355
297
356
- public override ITokenizer Initializer => new CharGroupTokenizer
357
- {
358
- TokenizeOnCharacters = _chars
359
- } ;
298
+ public override ITokenizer Initializer => new CharGroupTokenizer { TokenizeOnCharacters = _chars } ;
360
299
361
- public override object Json => new
362
- {
363
- tokenize_on_chars = _chars ,
364
- type = "char_group"
365
- } ;
300
+ public override object Json => new { tokenize_on_chars = _chars , type = "char_group" } ;
366
301
367
302
public override string Name => "char_group" ;
368
303
}
@@ -377,18 +312,9 @@ public class CharGroupMaxTokenLengthTests : TokenizerAssertionBase<CharGroupMaxT
377
312
. MaxTokenLength ( 255 )
378
313
) ;
379
314
380
- public override ITokenizer Initializer => new CharGroupTokenizer
381
- {
382
- TokenizeOnCharacters = _chars ,
383
- MaxTokenLength = 255
384
- } ;
315
+ public override ITokenizer Initializer => new CharGroupTokenizer { TokenizeOnCharacters = _chars , MaxTokenLength = 255 } ;
385
316
386
- public override object Json => new
387
- {
388
- tokenize_on_chars = _chars ,
389
- type = "char_group" ,
390
- max_token_length = 255
391
- } ;
317
+ public override object Json => new { tokenize_on_chars = _chars , type = "char_group" , max_token_length = 255 } ;
392
318
393
319
public override string Name => "char_group_max_token_length" ;
394
320
}
@@ -400,13 +326,38 @@ public class DiscardPunctuationTests : TokenizerAssertionBase<DiscardPunctuation
400
326
. DiscardPunctuation ( )
401
327
) ;
402
328
403
- public override ITokenizer Initializer => new NoriTokenizer
404
- {
405
- DiscardPunctuation = true
406
- } ;
329
+ public override ITokenizer Initializer => new NoriTokenizer { DiscardPunctuation = true } ;
407
330
408
331
public override object Json => new { type = "nori_tokenizer" , discard_punctuation = true } ;
409
332
public override string Name => "nori-discard" ;
410
333
}
334
+
335
+ [ SkipVersion ( "<7.7.0" , "simple_pattern experimental until 7.7.0" ) ]
336
+ public class SimplePatternTests : TokenizerAssertionBase < SimplePatternTests >
337
+ {
338
+ public override FuncTokenizer Fluent => ( n , t ) => t . SimplePattern ( n , e => e
339
+ . Pattern ( @"\W+" )
340
+ ) ;
341
+
342
+ public override ITokenizer Initializer => new SimplePatternTokenizer { Pattern = @"\W+" } ;
343
+
344
+ public override object Json => new { pattern = @"\W+" , type = "simple_pattern" } ;
345
+
346
+ public override string Name => "simple-pattern" ;
347
+ }
348
+
349
+ [ SkipVersion ( "<7.7.0" , "simple_pattern_split experimental until 7.7.0" ) ]
350
+ public class SimplePatternSplitTests : TokenizerAssertionBase < SimplePatternTests >
351
+ {
352
+ public override FuncTokenizer Fluent => ( n , t ) => t . SimplePatternSplit ( n , e => e
353
+ . Pattern ( @"\W+" )
354
+ ) ;
355
+
356
+ public override ITokenizer Initializer => new SimplePatternTokenizer { Pattern = @"\W+" } ;
357
+
358
+ public override object Json => new { pattern = @"\W+" , type = "simple_pattern_split" } ;
359
+
360
+ public override string Name => "simple-pattern-split" ;
361
+ }
411
362
}
412
363
}
0 commit comments