diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index d12eadbf7..6cc30fe10 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -225,18 +225,6 @@ def test_sent_tokenize(self): sent_tokenize("รักน้ำ รักปลา ", engine="whitespace+newline"), ["รักน้ำ", "รักปลา"], ) - self.assertEqual( - sent_tokenize(SENT_1), - SENT_1_TOKS, - ) - self.assertEqual( - sent_tokenize(SENT_2), - SENT_2_TOKS, - ) - self.assertEqual( - sent_tokenize(SENT_3), - SENT_3_TOKS, - ) self.assertIsNotNone( sent_tokenize( SENT_1, diff --git a/tests/testx_tokenize.py b/tests/testx_tokenize.py index 72103515c..c5fddafa0 100644 --- a/tests/testx_tokenize.py +++ b/tests/testx_tokenize.py @@ -92,31 +92,35 @@ def testx_sent_tokenize(self): # Use default engine (crfcut) self.assertEqual(sent_tokenize(None), []) self.assertEqual(sent_tokenize(""), []) - self.assertEqual( - sent_tokenize(SENT_1, engine="crfcut"), + sent_tokenize(SENT_1), SENT_1_TOKS, ) self.assertEqual( - sent_tokenize(SENT_2, engine="crfcut"), + sent_tokenize(SENT_2), SENT_2_TOKS, ) self.assertEqual( - sent_tokenize(SENT_3, engine="crfcut"), + sent_tokenize(SENT_3), SENT_3_TOKS, ) + self.assertEqual( - sent_tokenize(SENT_1), + sent_tokenize(SENT_1, engine="crfcut"), SENT_1_TOKS, ) self.assertEqual( - sent_tokenize(SENT_2), + sent_tokenize(SENT_2, engine="crfcut"), SENT_2_TOKS, ) self.assertEqual( - sent_tokenize(SENT_3), + sent_tokenize(SENT_3, engine="crfcut"), SENT_3_TOKS, ) + self.assertEqual( + sent_tokenize(SENT_4, engine="crfcut"), + [["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]], + ) self.assertIsNotNone( sent_tokenize( SENT_1, @@ -135,6 +139,7 @@ def testx_sent_tokenize(self): engine="tltk", ), ) + self.assertIsNotNone( sent_tokenize( SENT_1, @@ -153,6 +158,11 @@ def testx_sent_tokenize(self): engine="thaisum", ), ) + self.assertEqual( + sent_tokenize(SENT_4, engine="thaisum"), + [["ผม", "กิน", "ข้าว", " ", "เธอ", "เล่น", "เกม"]], + ) + self.assertIsNotNone( sent_tokenize( SENT_3, @@ -177,14 +187,6 @@ def testx_sent_tokenize(self): # engine="wtp-large", # ), # ) - self.assertEqual( - sent_tokenize(SENT_4, engine="crfcut"), - [["ผม", "กิน", "ข้าว", " ", "\n", "เธอ", "เล่น", "เกม"]], - ) - self.assertEqual( - sent_tokenize(SENT_4, engine="thaisum"), - [["ผม", "กิน", "ข้าว", " ", "เธอ", "เล่น", "เกม"]], - ) def testx_word_tokenize(self): self.assertIsNotNone(word_tokenize(TEXT_1, engine="nlpo3"))