Skip to content

Commit 5145d37

Browse files
committed
- more test cases for bahttext, num_to_thaiword, thaiword_to_num
- handle 0 and None
1 parent 48d307f commit 5145d37

File tree

3 files changed

+49
-11
lines changed

3 files changed

+49
-11
lines changed

pythainlp/number/thainum.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ def bahttext(amount_number):
9292
9393
Similar to BAHTTEXT funcation in Excel
9494
"""
95+
if amount_number is None:
96+
return ""
97+
98+
if amount_number == 0:
99+
return "ศูนย์บาทถ้วน"
100+
95101
amount_number = number_format(amount_number, 2).replace(" ", "")
96102
pt = amount_number.find(".")
97103
number, fraction = "", ""
@@ -106,11 +112,13 @@ def bahttext(amount_number):
106112

107113
ret = ""
108114
number = ast.literal_eval(number.replace(",", ""))
115+
109116
baht = num_to_thaiword(number)
110117
if baht != "":
111118
ret = "".join([ret, baht, "บาท"])
119+
112120
satang = num_to_thaiword(fraction)
113-
if satang != "":
121+
if satang != "" and satang != "ศูนย์":
114122
ret = "".join([ret, satang, "สตางค์"])
115123
else:
116124
ret = "".join([ret, "ถ้วน"])
@@ -123,12 +131,16 @@ def num_to_thaiword(number):
123131
:param float number: a float number (with decimals) indicating a quantity
124132
:return: a text that indicates the full amount in word form, properly ending each digit with the right term.
125133
"""
134+
if number is None:
135+
return ""
136+
137+
if number == 0:
138+
return "ศูนย์"
139+
126140
position_call = ["แสน", "หมื่น", "พัน", "ร้อย", "สิบ", ""]
127141
number_call = ["", "หนึ่ง", "สอง", "สาม", "สี่", "ห้า", "หก", "เจ็ด", "แปด", "เก้า"]
128142

129143
ret = ""
130-
if number == 0:
131-
return ret
132144
if number > 1000000:
133145
ret += num_to_thaiword(int(number / 1000000)) + "ล้าน"
134146
number = int(math.fmod(number, 1000000))
@@ -137,6 +149,7 @@ def num_to_thaiword(number):
137149
pos = 0
138150
while number > 0:
139151
d = int(number / divider)
152+
140153
if (divider == 10) and (d == 2):
141154
ret += "ยี่"
142155
elif (divider == 10) and (d == 1):
@@ -145,10 +158,12 @@ def num_to_thaiword(number):
145158
ret += "เอ็ด"
146159
else:
147160
ret += number_call[d]
161+
148162
if d:
149163
ret += position_call[pos]
150164
else:
151165
ret += ""
166+
152167
number = number % divider
153168
divider = divider / 10
154169
pos += 1
@@ -157,4 +172,4 @@ def num_to_thaiword(number):
157172

158173

159174
if __name__ == "__main__":
160-
print(bahtext(4000.0))
175+
print(bahttext(4000.0))

pythainlp/number/wordtonum.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
"""
88
import re
99

10+
from pythainlp.tokenize import Tokenizer
11+
1012
_THAIWORD_NUMS = set("ศูนย์ หนึ่ง เอ็ด สอง ยี่ สาม สี่ ห้า หก เจ็ด แปด เก้า".split())
1113
_THAIWORD_UNITS = set("สิบ ร้อย พัน หมื่น แสน ล้าน".split())
1214
_THAIWORD_NUMS_UNITS = _THAIWORD_NUMS | _THAIWORD_UNITS
@@ -34,12 +36,14 @@
3436
_NU_PAT = re.compile("(.+)?(สิบ|ร้อย|พัน|หมื่น|แสน|ล้าน)(.+)?") # หกสิบ, ร้อยเอ็ด
3537
# assuming that the units are separated already
3638

39+
_TOKENIZER = Tokenizer(custom_dict=_THAIWORD_NUMS_UNITS)
40+
3741

3842
def _thaiword_to_num(tokens):
3943
len_tokens = len(tokens)
4044

4145
if len_tokens == 0:
42-
return 0
46+
return None
4347

4448
if len_tokens == 1:
4549
return _THAI_INT_MAP[tokens[0]]
@@ -61,7 +65,17 @@ def _thaiword_to_num(tokens):
6165
return _THAI_INT_MAP[a] * _THAI_INT_MAP[b] + _thaiword_to_num(tokens[2:])
6266

6367

64-
def thaiword_to_num(tokens):
68+
def thaiword_to_num(thaiword):
69+
if not thaiword:
70+
return None
71+
72+
tokens = []
73+
if type(thaiword) == str:
74+
tokens = _TOKENIZER.word_tokenize(thaiword)
75+
elif type(thaiword) in (list, tuple, set, frozenset):
76+
for w in thaiword:
77+
tokens.extend(_TOKENIZER.word_tokenize(w))
78+
6579
res = []
6680
for tok in tokens:
6781
if tok in _THAIWORD_NUMS_UNITS:
@@ -72,4 +86,5 @@ def thaiword_to_num(tokens):
7286
res.extend([t for t in m.groups() if t]) # ตัด None ทิ้ง
7387
else:
7488
pass # should not be here
89+
7590
return _thaiword_to_num(res)

tests/__init__.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -152,17 +152,25 @@ def test_number(self):
152152
bahttext(5611116.50),
153153
"ห้าล้านหกแสนหนึ่งหมื่นหนึ่งพันหนึ่งร้อยสิบหกบาทห้าสิบสตางค์",
154154
)
155-
self.assertEqual(
156-
bahttext(116),
157-
"หนึ่งร้อยสิบหกบาทถ้วน",
158-
)
155+
self.assertEqual(bahttext(116), "หนึ่งร้อยสิบหกบาทถ้วน")
156+
self.assertEqual(bahttext(0), "ศูนย์บาทถ้วน")
157+
self.assertEqual(bahttext(None), "")
158+
159159
self.assertEqual(num_to_thaiword(112), "หนึ่งร้อยสิบสอง")
160+
self.assertEqual(num_to_thaiword(0), "ศูนย์")
161+
self.assertEqual(num_to_thaiword(None), "")
162+
163+
self.assertEqual(thaiword_to_num("ร้อยสิบสอง"), 112)
160164
self.assertEqual(
161165
thaiword_to_num(
162-
["หก", "ล้าน", "หกแสน", "หกหมื่น", "หกพัน", "หกร้อย", "หกสิบ", "หก"]
166+
["หก", "ล้าน", "หก", "แสน", "หกหมื่น", "หกพัน", "หกร้อย", "หกสิบ", "หก"]
163167
),
164168
6666666,
165169
)
170+
self.assertEqual(thaiword_to_num("ยี่สิบ"), 20)
171+
self.assertEqual(thaiword_to_num("ศูนย์"), 0)
172+
self.assertEqual(thaiword_to_num(""), None)
173+
self.assertEqual(thaiword_to_num(None), None)
166174

167175
# ### pythainlp.rank
168176

0 commit comments

Comments
 (0)