Skip to content

Use common warn_deprecation #956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions pythainlp/cls/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@
pythainlp.cls
Depreciated. Use pythainlp.classify instead.
"""
import warnings

__all__ = ["GzipModel"]

from pythainlp.classify.param_free import GzipModel
from pythainlp.tools import warn_deprecation

warnings.warn(
"Deprecated: Use pythainlp.classify instead.", DeprecationWarning
)
warn_deprecation("pythainlp.cls", "pythainlp.classify", "5.1")
8 changes: 6 additions & 2 deletions pythainlp/corpus/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
"thai_wsd_dict",
]

import warnings
from typing import FrozenSet, List, Union

from pythainlp.corpus import get_corpus, get_corpus_as_is, get_corpus_path
from pythainlp.tools import warn_deprecation

_THAI_COUNTRIES: FrozenSet[str] = frozenset()
_THAI_COUNTRIES_FILENAME = "countries_th.txt"
Expand Down Expand Up @@ -336,7 +336,11 @@ def thai_synonyms() -> dict:


def thai_synonym() -> dict:
warnings.warn("Deprecated: Use thai_synonyms() instead.", DeprecationWarning)
warn_deprecation(
"pythainlp.corpus.thai_synonym",
"pythainlp.corpus.thai_synonyms",
"5.1",
)
return thai_synonyms()


Expand Down
2 changes: 2 additions & 0 deletions pythainlp/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
"get_full_data_path",
"get_pythainlp_data_path",
"get_pythainlp_path",
"warn_deprecation",
]

from pythainlp.tools.core import warn_deprecation
from pythainlp.tools.path import (
PYTHAINLP_DEFAULT_DATA_DIR,
get_full_data_path,
Expand Down
30 changes: 30 additions & 0 deletions pythainlp/tools/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
"""
Generic support functions for PyThaiNLP.
"""

import warnings


def warn_deprecation(
deprecated_func: str,
replacing_func: str = "",
version: str = "",
):
"""
Warn about the deprecation of a function.

:param str deprecated_func: Name of the deprecated function.
:param str replacing_func: Name of the function to use instead (optional).
:param str version: PyThaiNLP version in which the function will be deprecated (optional).
"""
if version:
version = f"PyThaiNLP {version}"
else:
version = "a future release"
message = f"The '{deprecated_func}' function is deprecated and will be removed in {version}."
if replacing_func:
message += f" Please use '{replacing_func}' instead."
warnings.warn(message, DeprecationWarning, stacklevel=2)
4 changes: 3 additions & 1 deletion pythainlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"display_thai_char",
"emoji_to_thai",
"eng_to_thai",
"expand_maiyamok",
"find_keyword",
"ipa_to_rtgs",
"is_native_thai",
Expand Down Expand Up @@ -97,6 +98,7 @@
remove_tonemark,
remove_zw,
reorder_vowels,
expand_maiyamok,
)
from pythainlp.util.numtoword import bahttext, num_to_thaiword
from pythainlp.util.phoneme import ipa_to_rtgs, nectec_to_ipa, remove_tone_ipa
Expand All @@ -117,7 +119,7 @@
from pythainlp.util.trie import Trie, dict_trie
from pythainlp.util.wordtonum import text_to_num, thaiword_to_num, words_to_num

# syllable and pronounce have to be imported last
# sound_syllable and pronounce have to be imported last
from pythainlp.util.syllable import (
sound_syllable,
syllable_length,
Expand Down
44 changes: 34 additions & 10 deletions pythainlp/util/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pythainlp import thai_lead_vowels as lead_v
from pythainlp import thai_tonemarks as tonemarks
from pythainlp.tokenize import word_tokenize
from pythainlp.tools import warn_deprecation

_DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
_RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
Expand Down Expand Up @@ -249,12 +250,13 @@ def normalize(text: str) -> str:
return text


def maiyamok(sent: Union[str, List[str]]) -> List[str]:
def expand_maiyamok(sent: Union[str, List[str]]) -> List[str]:
"""
Thai MaiYaMok
Expand Maiyamok.

Maiyamok (ๆ) (Unicode U+0E46) is a Thai character indicating word
repetition. This function preprocesses Thai text by expanding Maiyamok

MaiYaMok (ๆ) is the mark of duplicate word in Thai language.
This function is preprocessing MaiYaMok in Thai sentence.

:param Union[str, List[str]] sent: input sentence (list or str)
:return: list of words
Expand All @@ -265,15 +267,12 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:

from pythainlp.util import maiyamok

maiyamok("เด็กๆชอบไปโรงเรียน")
# output: ['เด็ก', 'เด็ก', 'ชอบ', 'ไป', 'โรงเรียน']

maiyamok(["ทำไม", "คน", "ดี", " ", "ๆ", "ๆ", " ", "ถึง", "ทำ", "ไม่ได้"])
# output: ['ทำไม', 'คน', 'ดี', 'ดี', 'ดี', ' ', 'ถึง', 'ทำ', 'ไม่ได้']
maiyamok("เด็กๆกิน")
# output: ['เด็ก', 'เด็ก', 'กิน']
"""
if isinstance(sent, str):
sent = word_tokenize(sent)
_list_word = []
_list_word: list[str] = []
i = 0
for j, text in enumerate(sent):
if text.isspace() and "ๆ" in sent[j + 1]:
Expand All @@ -292,3 +291,28 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
_list_word.append(text)
i += 1
return _list_word


def maiyamok(sent: Union[str, List[str]]) -> List[str]:
"""
Expand Maiyamok.

Maiyamok (ๆ) (Unicode U+0E46) is a Thai character indicating word
repetition. This function preprocesses Thai text by expanding Maiyamok

:param Union[str, List[str]] sent: input sentence (list or str)
:return: list of words
:rtype: List[str]

:Example:
::

from pythainlp.util import maiyamok

maiyamok("เด็กๆกิน")
# output: ['เด็ก', 'เด็ก', 'กิน']
"""
warn_deprecation(
"pythainlp.util.maiyamok", "pythainlp.util.expand_maiyamok"
)
return expand_maiyamok(sent)
14 changes: 6 additions & 8 deletions pythainlp/util/thaiwordcheck.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
# -*- coding: utf-8 -*-
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
# SPDX-License-Identifier: Apache-2.0
import warnings
from pythainlp.tools import warn_deprecation


def is_native_thai(word: str) -> bool:
warnings.warn(
"""
pythainlp.util.is_native_thai has been renamed to \
pythainlp.morpheme.is_native_thai.
This function will be removed in PyThaiNLP 5.1.
""",
DeprecationWarning,
warn_deprecation(
"pythainlp.util.is_native_thai",
"pythainlp.morpheme.is_native_thai",
"5.1",
)

from pythainlp.morpheme import is_native_thai as check

return check(word)
2 changes: 1 addition & 1 deletion tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def test_normalize(self):
self.assertEqual(remove_zw("\u200bกา"), "กา")
self.assertEqual(remove_zw("กา\u200b\u200c\u200b"), "กา")

# maiyamok
# expand maiyamok
self.assertEqual(
maiyamok("เด็กๆชอบไปโรงเรียน"),
["เด็ก", "เด็ก", "ชอบ", "ไป", "โรงเรียน"],
Expand Down
Loading