Skip to content

Commit 372ced4

Browse files
authored
Merge pull request #1110 from PyThaiNLP/update-docs
Update romanize docs and keep space
2 parents aad134d + 4cb0c32 commit 372ced4

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

pythainlp/transliterate/core.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,19 @@ def romanize(
1414
fallback_engine: str = DEFAULT_ROMANIZE_ENGINE,
1515
) -> str:
1616
"""
17-
This function renders Thai words in the Latin alphabet or "romanization",
17+
This function renders Thai word in the Latin alphabet or "romanization",
1818
using the Royal Thai General System of Transcription (RTGS)
1919
[#rtgs_transcription]_. RTGS is the official system published
2020
by the Royal Institute of Thailand. (Thai: ถอดเสียงภาษาไทยเป็นอักษรละติน)
2121
22-
:param str text: Thai text to be romanized
22+
:param str text: A Thai word to be romanized. \
23+
The input should not include whitespace because \
24+
the function is support subwords by spliting whitespace.
2325
:param str engine: One of 'royin' (default), 'thai2rom', 'thai2rom_onnx, 'tltk', and 'lookup'. See more in options for engine section.
2426
:param str fallback_engine: If engine equals 'lookup', use `fallback_engine` for words that are not in the transliteration dict.
2527
No effect on other engines. Default to 'royin'.
2628
27-
:return: A string of Thai words rendered in the Latin alphabet.
29+
:return: A string of a Thai word rendered in the Latin alphabet.
2830
:rtype: str
2931
3032
:Options for engines:
@@ -53,6 +55,9 @@ def romanize(
5355
romanize("ภาพยนตร์", engine="royin")
5456
# output: 'phapn'
5557
58+
romanize("รส ดี", engine="royin") # subwords
59+
# output: 'rot di'
60+
5661
romanize("ภาพยนตร์", engine="thai2rom")
5762
# output: 'phapphayon'
5863
@@ -87,9 +92,9 @@ def select_romanize_engine(engine: str):
8792
else:
8893
rom_engine = select_romanize_engine(engine)
8994
trans_word = []
90-
for word in text.split(' '):
91-
trans_word.append(rom_engine(word))
92-
new_word = ''.join(trans_word)
95+
for subword in text.split(' '):
96+
trans_word.append(rom_engine(subword))
97+
new_word = ' '.join(trans_word)
9398
return new_word
9499

95100

0 commit comments

Comments
 (0)