@@ -92,31 +92,35 @@ def testx_sent_tokenize(self):
92
92
# Use default engine (crfcut)
93
93
self .assertEqual (sent_tokenize (None ), [])
94
94
self .assertEqual (sent_tokenize ("" ), [])
95
-
96
95
self .assertEqual (
97
- sent_tokenize (SENT_1 , engine = "crfcut" ),
96
+ sent_tokenize (SENT_1 ),
98
97
SENT_1_TOKS ,
99
98
)
100
99
self .assertEqual (
101
- sent_tokenize (SENT_2 , engine = "crfcut" ),
100
+ sent_tokenize (SENT_2 ),
102
101
SENT_2_TOKS ,
103
102
)
104
103
self .assertEqual (
105
- sent_tokenize (SENT_3 , engine = "crfcut" ),
104
+ sent_tokenize (SENT_3 ),
106
105
SENT_3_TOKS ,
107
106
)
107
+
108
108
self .assertEqual (
109
- sent_tokenize (SENT_1 ),
109
+ sent_tokenize (SENT_1 , engine = "crfcut" ),
110
110
SENT_1_TOKS ,
111
111
)
112
112
self .assertEqual (
113
- sent_tokenize (SENT_2 ),
113
+ sent_tokenize (SENT_2 , engine = "crfcut" ),
114
114
SENT_2_TOKS ,
115
115
)
116
116
self .assertEqual (
117
- sent_tokenize (SENT_3 ),
117
+ sent_tokenize (SENT_3 , engine = "crfcut" ),
118
118
SENT_3_TOKS ,
119
119
)
120
+ self .assertEqual (
121
+ sent_tokenize (SENT_4 , engine = "crfcut" ),
122
+ [["ผม" , "กิน" , "ข้าว" , " " , "\n " , "เธอ" , "เล่น" , "เกม" ]],
123
+ )
120
124
self .assertIsNotNone (
121
125
sent_tokenize (
122
126
SENT_1 ,
@@ -135,6 +139,7 @@ def testx_sent_tokenize(self):
135
139
engine = "tltk" ,
136
140
),
137
141
)
142
+
138
143
self .assertIsNotNone (
139
144
sent_tokenize (
140
145
SENT_1 ,
@@ -153,6 +158,11 @@ def testx_sent_tokenize(self):
153
158
engine = "thaisum" ,
154
159
),
155
160
)
161
+ self .assertEqual (
162
+ sent_tokenize (SENT_4 , engine = "thaisum" ),
163
+ [["ผม" , "กิน" , "ข้าว" , " " , "เธอ" , "เล่น" , "เกม" ]],
164
+ )
165
+
156
166
self .assertIsNotNone (
157
167
sent_tokenize (
158
168
SENT_3 ,
@@ -177,14 +187,6 @@ def testx_sent_tokenize(self):
177
187
# engine="wtp-large",
178
188
# ),
179
189
# )
180
- self .assertEqual (
181
- sent_tokenize (SENT_4 , engine = "crfcut" ),
182
- [["ผม" , "กิน" , "ข้าว" , " " , "\n " , "เธอ" , "เล่น" , "เกม" ]],
183
- )
184
- self .assertEqual (
185
- sent_tokenize (SENT_4 , engine = "thaisum" ),
186
- [["ผม" , "กิน" , "ข้าว" , " " , "เธอ" , "เล่น" , "เกม" ]],
187
- )
188
190
189
191
def testx_word_tokenize (self ):
190
192
self .assertIsNotNone (word_tokenize (TEXT_1 , engine = "nlpo3" ))
0 commit comments