1
1
# -*- coding: utf-8 -*-
2
2
"""
3
- Transliterating Japanese/Korean/Vietnamese romanization text to Thai text
3
+ Transliterating Japanese/Korean/Mandarin/Vietnamese romanization text
4
+ to Thai text
4
5
By Wunsen
5
6
6
7
:See Also:
12
13
13
14
class WunsenTransliterate :
14
15
"""
15
- Transliterating Japanese/Korean/Vietnamese romanization text to Thai text
16
+ Transliterating Japanese/Korean/Mandarin/Vietnamese romanization text
17
+ to Thai text
16
18
by Wunsen
17
19
18
20
:See Also:
19
21
* `GitHub \
20
22
<https://github.com/cakimpei/wunsen>`_
21
23
"""
24
+
22
25
def __init__ (self ) -> None :
23
26
self .thap_value = None
24
27
self .lang = None
25
28
self .jp_input = None
29
+ self .zh_sandhi = None
30
+ self .system = None
26
31
27
- def transliterate (self , text : str , lang : str , jp_input : str = None ):
32
+ def transliterate (
33
+ self ,
34
+ text : str ,
35
+ lang : str ,
36
+ jp_input : str = None ,
37
+ zh_sandhi : bool = None ,
38
+ system : str = None ,
39
+ ):
28
40
"""
29
41
Use Wunsen for transliteration
30
42
31
43
:param str text: text wants transliterated to Thai text.
32
44
:param str lang: source language
33
45
:param str jp_input: japanese input method (for japanese only)
46
+ :param bool zh_sandhi: mandarin third tone sandhi option
47
+ (for mandarin only)
48
+ :param str system: transliteration system (for japanese and
49
+ mandarin only)
34
50
35
51
:return: Thai text
36
52
:rtype: str
@@ -39,8 +55,22 @@ def transliterate(self, text: str, lang: str, jp_input: str = None):
39
55
* *jp* - Japanese (from Hepburn romanization)
40
56
* *ko* - Korean (from Revised Romanization)
41
57
* *vi* - Vietnamese (Latin script)
58
+ * *zh* - Mandarin (from Hanyu Pinyin)
42
59
:Options for jp_input:
43
60
* *Hepburn-no diacritic* - Hepburn-no diacritic (without macron)
61
+ :Options for zh_sandhi:
62
+ * *True* - apply third tone sandhi rule
63
+ * *False* - do not apply third tone sandhi rule
64
+ :Options for system:
65
+ * *ORS61* - for Japanese หลักเกณฑ์การทับศัพท์ภาษาญี่ปุ่น
66
+ (สำนักงานราชบัณฑิตยสภา พ.ศ. 2561)
67
+ * *RI35* - for Japanese หลักเกณฑ์การทับศัพท์ภาษาญี่ปุ่น
68
+ (ราชบัณฑิตยสถาน พ.ศ. 2535)
69
+ * *RI49* - for Mandarin หลักเกณฑ์การทับศัพท์ภาษาจีน
70
+ (ราชบัณฑิตยสถาน พ.ศ. 2549)
71
+ * *THC43* - for Mandarin เกณฑ์การถ่ายทอดเสียงภาษาจีนแมนดาริน
72
+ ด้วยอักขรวิธีไทย (คณะกรรมการสืบค้นประวัติศาสตร์ไทยในเอกสาร
73
+ ภาษาจีน พ.ศ. 2543)
44
74
45
75
:Example:
46
76
::
@@ -58,24 +88,56 @@ def transliterate(self, text: str, lang: str, jp_input: str = None):
58
88
)
59
89
# output: 'โอฮาโย'
60
90
91
+ wt.transliterate("ohayō", lang="jp", system="RI35")
92
+ # output: 'โอะฮะโย'
93
+
61
94
wt.transliterate("annyeonghaseyo", lang="ko")
62
95
# output: 'อันนย็องฮาเซโย'
63
96
64
97
wt.transliterate("xin chào", lang="vi")
65
98
# output: 'ซีน จ่าว'
99
+
100
+ wt.transliterate("ni3 hao3", lang="zh")
101
+ # output: 'หนี เห่า'
102
+
103
+ wt.transliterate("ni3 hao3", lang="zh", zh_sandhi=False)
104
+ # output: 'หนี่ เห่า'
105
+
106
+ wt.transliterate("ni3 hao3", lang="zh", system="RI49")
107
+ # output: 'หนี ห่าว'
66
108
"""
67
- if self .lang != lang or self .jp_input != jp_input :
109
+ if (
110
+ self .lang != lang
111
+ or self .jp_input != jp_input
112
+ or self .zh_sandhi != zh_sandhi
113
+ or self .system != system
114
+ ):
68
115
if lang == "jp" :
69
- if jp_input is None :
70
- self .thap_value = ThapSap ("ja" )
71
- else :
72
- self .thap_value = ThapSap ("ja" , input = jp_input )
73
116
self .jp_input = jp_input
117
+ self .zh_sandhi = None
118
+ self .system = system
119
+ elif lang == "zh" :
120
+ self .jp_input = None
121
+ self .zh_sandhi = zh_sandhi
122
+ self .system = system
74
123
elif lang == "ko" or lang == "vi" :
75
124
self .jp_input = None
76
- self .thap_value = ThapSap (lang )
125
+ self .zh_sandhi = None
126
+ self .system = None
77
127
else :
78
128
raise NotImplementedError (
79
129
"The %s language is not implemented." % lang
80
130
)
131
+ self .lang = lang
132
+ input_lang = lang
133
+ if input_lang == "jp" :
134
+ input_lang = "ja"
135
+ setting = {}
136
+ if self .jp_input is not None :
137
+ setting .update ({"input" : self .jp_input })
138
+ if self .zh_sandhi is not None :
139
+ setting .update ({"option" : {"sandhi" : self .zh_sandhi }})
140
+ if self .system is not None :
141
+ setting .update ({"system" : self .system })
142
+ self .thap_value = ThapSap (input_lang , ** setting )
81
143
return self .thap_value .thap (text )
0 commit comments