Merge pull request #95 from PyThaiNLP/pythainlp1.6

wannaphong · web-flow · commit 007e644daab4 · 2018-06-22T15:15:52.000+07:00
PyThaiNLP 1.6.0.7
diff --git a/build_pypi.bat b/build_pypi.bat
@@ -1,2 +1 @@
-python setup.py sdist
 python setup.py bdist_wheel
diff --git a/docs/pythainlp-1-6-thai.md b/docs/pythainlp-1-6-thai.md
@@ -75,7 +75,7 @@ engine คือ ระบบตัดคำไทย ปัจจุบัน
 เช่น text=u'ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
 ```
 
-การใช้งาน
+**การใช้งาน**
 
 ```python
 from pythainlp.tokenize import word_tokenize
@@ -201,6 +201,54 @@ grammar : คุณ Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/
 '/คืน/ความสุข'
 ```
 
+### summarize
+
+เป็นระบบสรุปเอกสารภาษาไทยแบบง่าย ๆ
+
+summarize_text(text,n,engine='frequency')
+
+    text เป็นข้อความ
+    n คือ จำนวนประโยคสรุป
+    engine ที่รองรับ
+    - frequency
+**การใช้งาน**
+
+```python
+>>> from pythainlp.summarize import summarize_text
+>>> summarize_text(text="อาหาร หมายถึง ของแข็งหรือของเหลว ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว จะทำให้เกิดพลังงานและความร้อนยเจริญเติบโต ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ อาหารจะต้องงกาย",n=1,engine='frequency')
+['อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย']
+```
+
+### word_vector
+
+```python
+from pythainlp.word_vector import thai2vec
+```
+
+word_vector เป็นระบบ word vector ใน PyThaiNLP
+
+ปัจจุบันนี้รองรับเฉพาะ thai2vec (https://github.com/cstorm125/thai2vec)
+
+thai2vec พัฒนาโดยคุณ Charin Polpanumas
+
+#### thai2vec
+
+ความต้องการโมดูล
+
+- gensim
+- numpy
+
+##### API
+
+- get_model() - รับข้อมูล model ในรูปแบบของ gensim
+- most_similar_cosmul(positive,negative)
+- doesnt_match(listdata)
+- similarity(word1,word2) - หาค่าความคล้ายกันระหว่าง 2 คำ โดยทั้งคู่เป็น str
+- sentence_vectorizer(ss,dim=300,use_mean=False)
+- about() - รายละเอียด thai2vec
+
+
+
 ### keywords
 
 ใช้หา keywords จากข้อความภาษาไทย
diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
@@ -1,6 +1,6 @@
 ﻿# -*- coding: utf-8 -*-
 from __future__ import absolute_import
-__version__ = 1.5
+__version__ = 1.6
 import six
 if six.PY3:
 	"""
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
@@ -26,7 +26,6 @@
 เcctาะ
 เccีtยะ
 เccีtย(?=[เ-ไก-ฮ]|$)
-เccอะ
 เcc็c
 เcิc์c
 เcิtc
diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py
@@ -13,7 +13,6 @@
 เcctาะ
 เccีtยะ
 เccีtย(?=[เ-ไก-ฮ]|$)
-เccอะ
 เcc็c
 เcิc์c
 เcิtc
diff --git a/pythainlp/word_vector/__init__ b/pythainlp/word_vector/__init__
@@ -1,2 +1,3 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
+from __future__ import absolute_import,unicode_literals
+from .thai2vec import *
diff --git a/pythainlp/word_vector/thai2vec.py b/pythainlp/word_vector/thai2vec.py
@@ -31,7 +31,7 @@ def download():
 	if not os.path.exists(path):
 		print("Download models...")
 		from urllib import request
-		request.urlretrieve("https://github.com/cstorm125/thai2vec/raw/master/data/thaiwiki/models/thai2vec.vec",path)
+		request.urlretrieve("https://www.dropbox.com/s/upnbmiebkfma7oy/thai2vec.vec?dl=1",path)
 		print("OK.")
 	return path
 def get_model():
@@ -62,4 +62,4 @@ def about():
 	
 	Development : Charin Polpanumas
 	GitHub : https://github.com/cstorm125/thai2vec
-	'''
+	'''
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
 	'nltk>=3.2.2',
 	'future>=0.16.0',
 	'six',
-	'marisa_trie',
+	'marisa_trie<=0.7.4',
 	'requests',
 	'dill',
 	'pytz'
@@ -19,7 +19,7 @@
 
 setup(
     name='pythainlp',
-    version='1.6.0.2',
+    version='1.6.0.7',
     description="Thai natural language processing in Python package.",
     long_description=readme,
     author='PyThaiNLP',

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1 @@`
`1`		`-python setup.py sdist`
`2`	`1`	`python setup.py bdist_wheel`