Skip to content

Commit 588bd52

Browse files
authored
Merge pull request #960 from bact/mv-sent-tokenize-test
Move more sent_tokenize test
2 parents 057de9b + 3d1bd1a commit 588bd52

File tree

2 files changed

+17
-27
lines changed

2 files changed

+17
-27
lines changed

tests/test_tokenize.py

-12
Original file line numberDiff line numberDiff line change
@@ -225,18 +225,6 @@ def test_sent_tokenize(self):
225225
sent_tokenize("รักน้ำ รักปลา ", engine="whitespace+newline"),
226226
["รักน้ำ", "รักปลา"],
227227
)
228-
self.assertEqual(
229-
sent_tokenize(SENT_1),
230-
SENT_1_TOKS,
231-
)
232-
self.assertEqual(
233-
sent_tokenize(SENT_2),
234-
SENT_2_TOKS,
235-
)
236-
self.assertEqual(
237-
sent_tokenize(SENT_3),
238-
SENT_3_TOKS,
239-
)
240228
self.assertIsNotNone(
241229
sent_tokenize(
242230
SENT_1,

tests/testx_tokenize.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -92,31 +92,35 @@ def testx_sent_tokenize(self):
9292
# Use default engine (crfcut)
9393
self.assertEqual(sent_tokenize(None), [])
9494
self.assertEqual(sent_tokenize(""), [])
95-
9695
self.assertEqual(
97-
sent_tokenize(SENT_1, engine="crfcut"),
96+
sent_tokenize(SENT_1),
9897
SENT_1_TOKS,
9998
)
10099
self.assertEqual(
101-
sent_tokenize(SENT_2, engine="crfcut"),
100+
sent_tokenize(SENT_2),
102101
SENT_2_TOKS,
103102
)
104103
self.assertEqual(
105-
sent_tokenize(SENT_3, engine="crfcut"),
104+
sent_tokenize(SENT_3),
106105
SENT_3_TOKS,
107106
)
107+
108108
self.assertEqual(
109-
sent_tokenize(SENT_1),
109+
sent_tokenize(SENT_1, engine="crfcut"),
110110
SENT_1_TOKS,
111111
)
112112
self.assertEqual(
113-
sent_tokenize(SENT_2),
113+
sent_tokenize(SENT_2, engine="crfcut"),
114114
SENT_2_TOKS,
115115
)
116116
self.assertEqual(
117-
sent_tokenize(SENT_3),
117+
sent_tokenize(SENT_3, engine="crfcut"),
118118
SENT_3_TOKS,
119119
)
120+
self.assertEqual(
121+
sent_tokenize(SENT_4, engine="crfcut"),
122+
[["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]],
123+
)
120124
self.assertIsNotNone(
121125
sent_tokenize(
122126
SENT_1,
@@ -135,6 +139,7 @@ def testx_sent_tokenize(self):
135139
engine="tltk",
136140
),
137141
)
142+
138143
self.assertIsNotNone(
139144
sent_tokenize(
140145
SENT_1,
@@ -153,6 +158,11 @@ def testx_sent_tokenize(self):
153158
engine="thaisum",
154159
),
155160
)
161+
self.assertEqual(
162+
sent_tokenize(SENT_4, engine="thaisum"),
163+
[["ผม", "กิน", "ข้าว", " ", "เธอ", "เล่น", "เกม"]],
164+
)
165+
156166
self.assertIsNotNone(
157167
sent_tokenize(
158168
SENT_3,
@@ -177,14 +187,6 @@ def testx_sent_tokenize(self):
177187
# engine="wtp-large",
178188
# ),
179189
# )
180-
self.assertEqual(
181-
sent_tokenize(SENT_4, engine="crfcut"),
182-
[["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]],
183-
)
184-
self.assertEqual(
185-
sent_tokenize(SENT_4, engine="thaisum"),
186-
[["ผม", "กิน", "ข้าว", " ", "เธอ", "เล่น", "เกม"]],
187-
)
188190

189191
def testx_word_tokenize(self):
190192
self.assertIsNotNone(word_tokenize(TEXT_1, engine="nlpo3"))

0 commit comments

Comments
 (0)