Skip to content

Commit c5f7c9d

Browse files
committed
Add small100
1 parent 2e7dc23 commit c5f7c9d

File tree

2 files changed

+422
-0
lines changed

2 files changed

+422
-0
lines changed

pythainlp/translate/small100.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
from transformers import M2M100ForConditionalGeneration
2+
from .tokenization_small100 import SMALL100Tokenizer
3+
4+
class Small100Translator:
5+
"""
6+
Machine Translation with small100 model
7+
8+
- Huggingface https://huggingface.co/alirezamsh/small100
9+
10+
:param bool use_gpu : load model to gpu (Default is False)
11+
"""
12+
13+
def __init__(
14+
self,
15+
use_gpu: bool = False,
16+
pretrained: str = "alirezamsh/small100",
17+
) -> None:
18+
self.pretrained = pretrained
19+
self.model = M2M100ForConditionalGeneration.from_pretrained(self.pretrained)
20+
self.tgt_lang = None
21+
if use_gpu:
22+
self.model = self.model.cuda()
23+
24+
def translate(self, text: str, tgt_lang: str="en") -> str:
25+
"""
26+
Translate text from X to X
27+
28+
:param str text: input text in source language
29+
:param str tgt_lang: target language
30+
:return: translated text in target language
31+
:rtype: str
32+
33+
:Example:
34+
35+
::
36+
37+
from pythainlp.translate.small100 import Small100Translator
38+
39+
mt = Small100Translator()
40+
41+
# Translate text from Thai to English
42+
mt.translate("ทดสอบระบบ", tgt_lang="en")
43+
# output: 'Testing system'
44+
45+
# Translate text from Thai to Chinese
46+
mt.translate("ทดสอบระบบ", tgt_lang="zh")
47+
# output: '系统测试'
48+
49+
# Translate text from Thai to French
50+
mt.translate("ทดสอบระบบ", tgt_lang="fr")
51+
# output: 'Test du système'
52+
53+
"""
54+
if tgt_lang!=self.tgt_lang:
55+
self.tokenizer = SMALL100Tokenizer.from_pretrained(self.pretrained, tgt_lang=tgt_lang)
56+
self.tgt_lang = tgt_lang
57+
self.translated = self.model.generate(
58+
**self.tokenizer(text, return_tensors="pt")
59+
)
60+
return self.tokenizer.batch_decode(self.translated, skip_special_tokens=True)[0]

0 commit comments

Comments
 (0)