Skip to content

Commit 0c20956

Browse files
authored
Merge pull request #1027 from PyThaiNLP/release-5.0.5
PyThaiNLP v5.0.5
2 parents 79e5d58 + b804d41 commit 0c20956

File tree

16 files changed

+175
-70
lines changed

16 files changed

+175
-70
lines changed

Diff for: CITATION.cff

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ authors:
2020
given-names: "Pattarawat"
2121
orcid: "https://orcid.org/0000-0000-0000-0000"
2222
title: "PyThaiNLP: Thai Natural Language Processing in Python"
23-
version: v5.0.4
23+
version: v5.0.5
2424
license: Apache-2.0
2525
date-released: 2024-06-02

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร
2323
2424
| Version | Description | Status |
2525
|:------:|:--:|:------:|
26-
| [5.0.4](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
26+
| [5.0.5](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
2727
| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) |
2828

2929
## Getting Started

Diff for: README_TH.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร
2020
2121
| รุ่น | คำอธิบาย | สถานะ |
2222
|:------:|:--:|:------:|
23-
| [5.0.4](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
23+
| [5.0.5](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
2424
| [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) |
2525

2626
ติดตามพวกเราบน [PyThaiNLP Facebook page](https://www.facebook.com/pythainlp/) เพื่อรับข่าวสารเพิ่มเติม

Diff for: pythainlp/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
4-
__version__ = "5.0.4"
4+
__version__ = "5.0.5"
55

66
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars
77

Diff for: pythainlp/cls/__init__.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
pythainlp.cls
66
Depreciated. Use pythainlp.classify instead.
77
"""
8-
import warnings
98

109
__all__ = ["GzipModel"]
1110

1211
from pythainlp.classify.param_free import GzipModel
12+
from pythainlp.tools import warn_deprecation
1313

14-
warnings.warn(
15-
"Deprecated: Use pythainlp.classify instead.", DeprecationWarning
16-
)
14+
warn_deprecation("pythainlp.cls", "pythainlp.classify", "5.1", "5.2")

Diff for: pythainlp/corpus/common.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
]
2525

2626
from typing import FrozenSet, List, Union
27-
import warnings
2827

2928
from pythainlp.corpus import get_corpus, get_corpus_as_is, get_corpus_path
29+
from pythainlp.tools import warn_deprecation
3030

3131
_THAI_COUNTRIES: FrozenSet[str] = frozenset()
3232
_THAI_COUNTRIES_FILENAME = "countries_th.txt"
@@ -56,9 +56,9 @@
5656

5757
_THAI_ORST_WORDS: FrozenSet[str] = frozenset()
5858

59-
_THAI_DICT = {}
60-
_THAI_WSD_DICT = {}
61-
_THAI_SYNONYMS = {}
59+
_THAI_DICT: dict[str, list] = {}
60+
_THAI_WSD_DICT: dict[str, list] = {}
61+
_THAI_SYNONYMS: dict[str, list] = {}
6262

6363

6464
def countries() -> FrozenSet[str]:
@@ -336,7 +336,12 @@ def thai_synonyms() -> dict:
336336

337337

338338
def thai_synonym() -> dict:
339-
warnings.warn("Deprecated: Use thai_synonyms() instead.", DeprecationWarning)
339+
warn_deprecation(
340+
"pythainlp.corpus.thai_synonym",
341+
"pythainlp.corpus.thai_synonyms",
342+
"5.1",
343+
"5.2",
344+
)
340345
return thai_synonyms()
341346

342347

Diff for: pythainlp/phayathaibert/core.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def get_ner(
394394
if pos:
395395
warnings.warn(
396396
"This model doesn't support output \
397-
postag and It doesn't output the postag."
397+
postag and it doesn't output the postag."
398398
)
399399

400400
sample_output = []

Diff for: pythainlp/tokenize/core.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
"""
55
Generic functions of tokenizers
66
"""
7+
78
import re
8-
import warnings
99
from typing import Iterable, List, Union
1010

1111
from pythainlp.tokenize import (
@@ -21,6 +21,7 @@
2121
rejoin_formatted_num,
2222
strip_whitespace,
2323
)
24+
from pythainlp.tools import warn_deprecation
2425
from pythainlp.util.trie import Trie, dict_trie
2526

2627

@@ -45,13 +46,9 @@ def clause_tokenize(doc: List[str]) -> List[List[str]]:
4546
# ['และ', 'คุณ', 'เล่น', 'มือถือ'],
4647
# ['ส่วน', 'น้อง', 'เขียน', 'โปรแกรม']]
4748
"""
49+
warn_deprecation("pythainlp.util.clause_tokenize", "", "5.0.5", "5.1")
4850
from pythainlp.tokenize.crfcls import segment
4951

50-
warnings.warn(
51-
"""
52-
clause_tokenize is no longer supported \
53-
and will be removed in version 5.1.
54-
""", DeprecationWarning)
5552
return segment(doc)
5653

5754

@@ -71,6 +68,7 @@ def word_detokenize(
7168
::
7269
7370
from pythainlp.tokenize import word_detokenize
71+
7472
print(word_detokenize(["เรา", "เล่น"]))
7573
# output: เราเล่น
7674
"""
@@ -299,18 +297,19 @@ def word_tokenize(
299297
segments = segment(text)
300298
elif engine == "nlpo3":
301299
from pythainlp.tokenize.nlpo3 import segment
300+
302301
# Currently cannot handle custom_dict from inside word_tokenize(),
303302
# due to difference in type.
304-
#if isinstance(custom_dict, str):
303+
# if isinstance(custom_dict, str):
305304
# segments = segment(text, custom_dict=custom_dict)
306-
#elif not isinstance(custom_dict, str) and not custom_dict:
305+
# elif not isinstance(custom_dict, str) and not custom_dict:
307306
# raise ValueError(
308307
# f"""Tokenizer \"{engine}\":
309308
# custom_dict must be a str.
310309
# It is a dictionary name as assigned with load_dict().
311310
# See pythainlp.tokenize.nlpo3.load_dict()"""
312311
# )
313-
#else:
312+
# else:
314313
# segments = segment(text)
315314
segments = segment(text)
316315
else:

Diff for: pythainlp/tools/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66
"get_full_data_path",
77
"get_pythainlp_data_path",
88
"get_pythainlp_path",
9+
"safe_print",
10+
"warn_deprecation",
911
]
1012

13+
from pythainlp.tools.core import safe_print, warn_deprecation
14+
1115
from pythainlp.tools.path import (
1216
PYTHAINLP_DEFAULT_DATA_DIR,
1317
get_full_data_path,

Diff for: pythainlp/tools/core.py

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# -*- coding: utf-8 -*-
2+
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""
5+
Generic support functions for PyThaiNLP.
6+
"""
7+
8+
import sys
9+
import warnings
10+
11+
12+
def warn_deprecation(
13+
deprecated_func: str,
14+
replacing_func: str = "",
15+
deprecated_version: str = "",
16+
removal_version: str = "",
17+
):
18+
"""Warn about the deprecation of a function.
19+
20+
:param str deprecated_func: Name of the deprecated function.
21+
:param str replacing_func: Name of the function to use instead (optional).
22+
:param str deprecated_version: Version in which the function will be deprecated (optional).
23+
:param str removal_version: Version in which the function will be removed (optional).
24+
"""
25+
message = f"The '{deprecated_func}' function is deprecated"
26+
if deprecated_version:
27+
message += f" since {deprecated_version}"
28+
if not removal_version:
29+
removal_version = "a future release"
30+
message += f" and will be removed in {removal_version}."
31+
if replacing_func:
32+
message += f" Please use '{replacing_func}' instead."
33+
warnings.warn(message, DeprecationWarning, stacklevel=2)
34+
35+
36+
def safe_print(text: str):
37+
"""Print text to console, handling UnicodeEncodeError.
38+
39+
:param text: Text to print.
40+
:type text: str
41+
"""
42+
try:
43+
print(text)
44+
except UnicodeEncodeError:
45+
print(
46+
text.encode(sys.stdout.encoding, errors="replace").decode(
47+
sys.stdout.encoding
48+
)
49+
)

Diff for: pythainlp/util/__init__.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
"display_thai_char",
2020
"emoji_to_thai",
2121
"eng_to_thai",
22+
"expand_maiyamok",
2223
"find_keyword",
2324
"ipa_to_rtgs",
2425
"is_native_thai",
2526
"isthai",
2627
"isthaichar",
28+
"maiyamok",
2729
"nectec_to_ipa",
2830
"normalize",
2931
"now_reign_year",
@@ -85,8 +87,9 @@
8587
from pythainlp.util.emojiconv import emoji_to_thai
8688
from pythainlp.util.keywords import find_keyword, rank
8789
from pythainlp.util.normalize import (
88-
normalize,
90+
expand_maiyamok,
8991
maiyamok,
92+
normalize,
9093
remove_dangling,
9194
remove_dup_spaces,
9295
remove_repeat_vowels,

0 commit comments

Comments
 (0)