Skip to content

Commit 6abcc3e

Browse files
committed
add code using nltk
add code using nltk postaggers using nltk add nltk in requirements on setup.py
1 parent ecf185f commit 6abcc3e

File tree

3 files changed

+8
-11
lines changed

3 files changed

+8
-11
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ b = segment(a)
4949
print(b) # ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คนไทย']
5050
# Postaggers ภาษาไทย
5151
from pythainlp.postaggers import tag
52-
print(tag('คุณกำลังประชุม')) #ค ุณ/PPRS กำลัง/XVBM ประชุม/VACT
52+
print(tag('คุณกำลังประชุม')) # [('คุณ', 'PPRS'), ('กำลัง', 'XVBM'), ('ประชุม', 'VACT')]
5353
# หาคำที่มีจำนวนการใช้งานมากที่สุด
5454
from pythainlp.rank import rank
5555
aa = rank(b)

pythainlp/postaggers/text.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,16 @@
33
import pythainlp
44
import os
55
import pickle
6+
import nltk.tag, nltk.data
67
templates_dir = os.path.join(os.path.dirname(pythainlp.__file__), 'postaggers')
78
template_file = os.path.join(templates_dir, 'thaipos.pickle')
9+
#default_tagger = nltk.data.load(nltk.tag._POS_TAGGER)
810
def data():
911
with open(template_file, 'rb') as handle:
10-
data = pickle.load(handle)
11-
return data
12+
model = pickle.load(handle)
13+
return model
1214
data1 =data()
1315
def tag(text):
1416
text= segment(text)
15-
a=''
16-
for b in text:
17-
try:
18-
a+=b+"/"+data1[b]
19-
except KeyError:
20-
a+=b
21-
a+=' '
22-
return a
17+
tagger = nltk.tag.UnigramTagger(model=data1)# backoff=default_tagger)
18+
return tagger.tag(text)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
requirements = [
1414
'pyicu>=1.9.3',
15+
'nltk',
1516
# TODO: put package requirements here
1617
]
1718

0 commit comments

Comments
 (0)