Skip to content

Commit 2283d9d

Browse files
authored
Merge pull request #956 from bact/fix-maiyamok
Use common warn_deprecation
2 parents 6452578 + 13d3ab3 commit 2283d9d

File tree

8 files changed

+84
-26
lines changed

8 files changed

+84
-26
lines changed

pythainlp/cls/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
pythainlp.cls
66
Depreciated. Use pythainlp.classify instead.
77
"""
8-
import warnings
98

109
__all__ = ["GzipModel"]
1110

1211
from pythainlp.classify.param_free import GzipModel
12+
from pythainlp.tools import warn_deprecation
1313

14-
warnings.warn(
15-
"Deprecated: Use pythainlp.classify instead.", DeprecationWarning
16-
)
14+
warn_deprecation("pythainlp.cls", "pythainlp.classify", "5.1")

pythainlp/corpus/common.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
"thai_wsd_dict",
2424
]
2525

26-
import warnings
2726
from typing import FrozenSet, List, Union
2827

2928
from pythainlp.corpus import get_corpus, get_corpus_as_is, get_corpus_path
29+
from pythainlp.tools import warn_deprecation
3030

3131
_THAI_COUNTRIES: FrozenSet[str] = frozenset()
3232
_THAI_COUNTRIES_FILENAME = "countries_th.txt"
@@ -336,7 +336,11 @@ def thai_synonyms() -> dict:
336336

337337

338338
def thai_synonym() -> dict:
339-
warnings.warn("Deprecated: Use thai_synonyms() instead.", DeprecationWarning)
339+
warn_deprecation(
340+
"pythainlp.corpus.thai_synonym",
341+
"pythainlp.corpus.thai_synonyms",
342+
"5.1",
343+
)
340344
return thai_synonyms()
341345

342346

pythainlp/tools/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
"get_full_data_path",
77
"get_pythainlp_data_path",
88
"get_pythainlp_path",
9+
"warn_deprecation",
910
]
1011

12+
from pythainlp.tools.core import warn_deprecation
1113
from pythainlp.tools.path import (
1214
PYTHAINLP_DEFAULT_DATA_DIR,
1315
get_full_data_path,

pythainlp/tools/core.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# -*- coding: utf-8 -*-
2+
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""
5+
Generic support functions for PyThaiNLP.
6+
"""
7+
8+
import warnings
9+
10+
11+
def warn_deprecation(
12+
deprecated_func: str,
13+
replacing_func: str = "",
14+
version: str = "",
15+
):
16+
"""
17+
Warn about the deprecation of a function.
18+
19+
:param str deprecated_func: Name of the deprecated function.
20+
:param str replacing_func: Name of the function to use instead (optional).
21+
:param str version: PyThaiNLP version in which the function will be deprecated (optional).
22+
"""
23+
if version:
24+
version = f"PyThaiNLP {version}"
25+
else:
26+
version = "a future release"
27+
message = f"The '{deprecated_func}' function is deprecated and will be removed in {version}."
28+
if replacing_func:
29+
message += f" Please use '{replacing_func}' instead."
30+
warnings.warn(message, DeprecationWarning, stacklevel=2)

pythainlp/util/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"display_thai_char",
2020
"emoji_to_thai",
2121
"eng_to_thai",
22+
"expand_maiyamok",
2223
"find_keyword",
2324
"ipa_to_rtgs",
2425
"is_native_thai",
@@ -97,6 +98,7 @@
9798
remove_tonemark,
9899
remove_zw,
99100
reorder_vowels,
101+
expand_maiyamok,
100102
)
101103
from pythainlp.util.numtoword import bahttext, num_to_thaiword
102104
from pythainlp.util.phoneme import ipa_to_rtgs, nectec_to_ipa, remove_tone_ipa
@@ -117,7 +119,7 @@
117119
from pythainlp.util.trie import Trie, dict_trie
118120
from pythainlp.util.wordtonum import text_to_num, thaiword_to_num, words_to_num
119121

120-
# syllable and pronounce have to be imported last
122+
# sound_syllable and pronounce have to be imported last
121123
from pythainlp.util.syllable import (
122124
sound_syllable,
123125
syllable_length,

pythainlp/util/normalize.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pythainlp import thai_lead_vowels as lead_v
1515
from pythainlp import thai_tonemarks as tonemarks
1616
from pythainlp.tokenize import word_tokenize
17+
from pythainlp.tools import warn_deprecation
1718

1819
_DANGLING_CHARS = f"{above_v}{below_v}{tonemarks}\u0e3a\u0e4c\u0e4d\u0e4e"
1920
_RE_REMOVE_DANGLINGS = re.compile(f"^[{_DANGLING_CHARS}]+")
@@ -249,12 +250,13 @@ def normalize(text: str) -> str:
249250
return text
250251

251252

252-
def maiyamok(sent: Union[str, List[str]]) -> List[str]:
253+
def expand_maiyamok(sent: Union[str, List[str]]) -> List[str]:
253254
"""
254-
Thai MaiYaMok
255+
Expand Maiyamok.
256+
257+
Maiyamok (ๆ) (Unicode U+0E46) is a Thai character indicating word
258+
repetition. This function preprocesses Thai text by expanding Maiyamok
255259
256-
MaiYaMok (ๆ) is the mark of duplicate word in Thai language.
257-
This function is preprocessing MaiYaMok in Thai sentence.
258260
259261
:param Union[str, List[str]] sent: input sentence (list or str)
260262
:return: list of words
@@ -265,15 +267,12 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
265267
266268
from pythainlp.util import maiyamok
267269
268-
maiyamok("เด็กๆชอบไปโรงเรียน")
269-
# output: ['เด็ก', 'เด็ก', 'ชอบ', 'ไป', 'โรงเรียน']
270-
271-
maiyamok(["ทำไม", "คน", "ดี", " ", "ๆ", "ๆ", " ", "ถึง", "ทำ", "ไม่ได้"])
272-
# output: ['ทำไม', 'คน', 'ดี', 'ดี', 'ดี', ' ', 'ถึง', 'ทำ', 'ไม่ได้']
270+
maiyamok("เด็กๆกิน")
271+
# output: ['เด็ก', 'เด็ก', 'กิน']
273272
"""
274273
if isinstance(sent, str):
275274
sent = word_tokenize(sent)
276-
_list_word = []
275+
_list_word: list[str] = []
277276
i = 0
278277
for j, text in enumerate(sent):
279278
if text.isspace() and "ๆ" in sent[j + 1]:
@@ -292,3 +291,28 @@ def maiyamok(sent: Union[str, List[str]]) -> List[str]:
292291
_list_word.append(text)
293292
i += 1
294293
return _list_word
294+
295+
296+
def maiyamok(sent: Union[str, List[str]]) -> List[str]:
297+
"""
298+
Expand Maiyamok.
299+
300+
Maiyamok (ๆ) (Unicode U+0E46) is a Thai character indicating word
301+
repetition. This function preprocesses Thai text by expanding Maiyamok
302+
303+
:param Union[str, List[str]] sent: input sentence (list or str)
304+
:return: list of words
305+
:rtype: List[str]
306+
307+
:Example:
308+
::
309+
310+
from pythainlp.util import maiyamok
311+
312+
maiyamok("เด็กๆกิน")
313+
# output: ['เด็ก', 'เด็ก', 'กิน']
314+
"""
315+
warn_deprecation(
316+
"pythainlp.util.maiyamok", "pythainlp.util.expand_maiyamok"
317+
)
318+
return expand_maiyamok(sent)

pythainlp/util/thaiwordcheck.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
# -*- coding: utf-8 -*-
22
# SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
4-
import warnings
4+
from pythainlp.tools import warn_deprecation
55

66

77
def is_native_thai(word: str) -> bool:
8-
warnings.warn(
9-
"""
10-
pythainlp.util.is_native_thai has been renamed to \
11-
pythainlp.morpheme.is_native_thai.
12-
This function will be removed in PyThaiNLP 5.1.
13-
""",
14-
DeprecationWarning,
8+
warn_deprecation(
9+
"pythainlp.util.is_native_thai",
10+
"pythainlp.morpheme.is_native_thai",
11+
"5.1",
1512
)
13+
1614
from pythainlp.morpheme import is_native_thai as check
1715

1816
return check(word)

tests/test_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ def test_normalize(self):
531531
self.assertEqual(remove_zw("\u200bกา"), "กา")
532532
self.assertEqual(remove_zw("กา\u200b\u200c\u200b"), "กา")
533533

534-
# maiyamok
534+
# expand maiyamok
535535
self.assertEqual(
536536
maiyamok("เด็กๆชอบไปโรงเรียน"),
537537
["เด็ก", "เด็ก", "ชอบ", "ไป", "โรงเรียน"],

0 commit comments

Comments
 (0)