Skip to content

Commit 836e7f1

Browse files
authored
Merge pull request #926 from WTFPUn/tone_collate
Tone collate fixed
2 parents 3fea2f7 + aa66f24 commit 836e7f1

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

pythainlp/util/collate.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
def _thkey(word: str) -> str:
1616
cv = _RE_TONE.sub("", word) # remove tone
1717
cv = _RE_LV_C.sub("\\2\\1", cv) # switch lead vowel
18-
tone = _RE_TONE.sub(" ", word) # just tone
18+
19+
tone_match = _RE_TONE.search(word)
20+
tone = tone_match.group() if tone_match else ""
1921
return cv + tone
2022

2123

tests/test_util.py

+8
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ def test_collate(self):
7979
collate(["ไก่", "เป็ด", "หมู", "วัว"]),
8080
["ไก่", "เป็ด", "วัว", "หมู"],
8181
)
82+
self.assertEqual(
83+
collate(["ก้วย", "ก๋วย", "กวย", "ก่วย", "ก๊วย"]),
84+
collate(["ก๋วย", "ก่วย", "ก้วย", "ก๊วย", "กวย"]),
85+
) # should guarantee same order
86+
self.assertEqual(
87+
collate(["ก้วย", "ก๋วย", "ก่วย", "กวย", "ก้วย", "ก่วย", "ก๊วย"]),
88+
["กวย", "ก่วย", "ก่วย", "ก้วย", "ก้วย", "ก๊วย", "ก๋วย"],
89+
)
8290

8391
# ### pythainlp.util.numtoword
8492

0 commit comments

Comments
 (0)