Skip to content

Commit 34e2e01

Browse files
authored
Merge pull request #671 from PyThaiNLP/fixed-#666-2
Fixed #666 again
2 parents bcc596e + 5138b75 commit 34e2e01

File tree

5 files changed

+16
-14
lines changed

5 files changed

+16
-14
lines changed

pythainlp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
__version__ = "3.0.7"
2+
__version__ = "3.0.8"
33

44
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars
55

pythainlp/tokenize/nercut.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,18 +65,15 @@ def segment(
6565
words.append(combining_word)
6666
combining_word = ""
6767
words.append(curr_word)
68-
else:
68+
else: # if tag is O
6969
combining_word = ""
7070
words.append(curr_word)
7171
if idx + 1 == len(tagged_words):
72-
if (
73-
curr_tag.startswith("B-") or curr_tag == "O"
74-
) and combining_word != "":
72+
if curr_tag.startswith("B-") and combining_word != "":
7573
words.append(combining_word)
76-
combining_word = ""
77-
words.append(curr_word)
78-
else: # if tag is O
79-
combining_word += curr_word
74+
elif curr_tag.startswith("I-") and combining_word != "":
8075
words.append(combining_word)
76+
else:
77+
pass
8178

8279
return words

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 3.0.7
2+
current_version = 3.0.8
33
commit = True
44
tag = True
55
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105

106106
setup(
107107
name="pythainlp",
108-
version="3.0.7",
108+
version="3.0.8",
109109
description="Thai Natural Language Processing library",
110110
long_description=readme,
111111
long_description_content_type="text/markdown",

tests/test_tokenize.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -610,9 +610,14 @@ def test_nercut(self):
610610
self.assertEqual(nercut.segment(None), [])
611611
self.assertEqual(nercut.segment(""), [])
612612
self.assertIsNotNone(nercut.segment("ทดสอบ"))
613-
self.assertIsNotNone(nercut.segment("ทุ๊กกโคนน"))
614-
self.assertIsNotNone(nercut.segment("อือหือ"))
615-
self.assertIsNotNone(nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"))
613+
self.assertEqual(nercut.segment("ทันแน่ๆ"), ['ทัน', 'แน่ๆ'])
614+
self.assertEqual(nercut.segment("%1ครั้ง"), ['%', '1', 'ครั้ง'])
615+
self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ['ทุ๊กกโคนน'])
616+
self.assertEqual(nercut.segment("อือหือ"), ['อือหือ'])
617+
self.assertEqual(
618+
nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"),
619+
['อย่าลืมอัพการ์ดนะจ๊ะ']
620+
)
616621
self.assertIsNotNone(word_tokenize("ทดสอบ", engine="nercut"))
617622

618623
def test_ssg(self):

0 commit comments

Comments
 (0)