@@ -14,17 +14,19 @@ def romanize(
14
14
fallback_engine : str = DEFAULT_ROMANIZE_ENGINE ,
15
15
) -> str :
16
16
"""
17
- This function renders Thai words in the Latin alphabet or "romanization",
17
+ This function renders Thai word in the Latin alphabet or "romanization",
18
18
using the Royal Thai General System of Transcription (RTGS)
19
19
[#rtgs_transcription]_. RTGS is the official system published
20
20
by the Royal Institute of Thailand. (Thai: ถอดเสียงภาษาไทยเป็นอักษรละติน)
21
21
22
- :param str text: Thai text to be romanized
22
+ :param str text: A Thai word to be romanized. \
23
+ The input should not include whitespace because \
24
+ the function is support subwords by spliting whitespace.
23
25
:param str engine: One of 'royin' (default), 'thai2rom', 'thai2rom_onnx, 'tltk', and 'lookup'. See more in options for engine section.
24
26
:param str fallback_engine: If engine equals 'lookup', use `fallback_engine` for words that are not in the transliteration dict.
25
27
No effect on other engines. Default to 'royin'.
26
28
27
- :return: A string of Thai words rendered in the Latin alphabet.
29
+ :return: A string of a Thai word rendered in the Latin alphabet.
28
30
:rtype: str
29
31
30
32
:Options for engines:
@@ -53,6 +55,9 @@ def romanize(
53
55
romanize("ภาพยนตร์", engine="royin")
54
56
# output: 'phapn'
55
57
58
+ romanize("รส ดี", engine="royin") # subwords
59
+ # output: 'rot di'
60
+
56
61
romanize("ภาพยนตร์", engine="thai2rom")
57
62
# output: 'phapphayon'
58
63
@@ -87,9 +92,9 @@ def select_romanize_engine(engine: str):
87
92
else :
88
93
rom_engine = select_romanize_engine (engine )
89
94
trans_word = []
90
- for word in text .split (' ' ):
91
- trans_word .append (rom_engine (word ))
92
- new_word = '' .join (trans_word )
95
+ for subword in text .split (' ' ):
96
+ trans_word .append (rom_engine (subword ))
97
+ new_word = ' ' .join (trans_word )
93
98
return new_word
94
99
95
100
0 commit comments