Skip to content

Commit fcef21c

Browse files
authored
Merge pull request #890 from PyThaiNLP/add-find_synonym
Add pythainlp.corpus.find_synonyms
2 parents 3b6daf0 + d9aa851 commit fcef21c

File tree

4 files changed

+52
-0
lines changed

4 files changed

+52
-0
lines changed

docs/api/corpus.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ countries
1212
.. autofunction:: countries
1313
:noindex:
1414

15+
find_synonym
16+
~~~~~~~~~~~~
17+
.. autofunction:: find_synonym
18+
:noindex:
19+
1520
get_corpus
1621
~~~~~~~~~~
1722
.. autofunction:: get_corpus

pythainlp/corpus/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"corpus_path",
1515
"countries",
1616
"download",
17+
"find_synonyms",
1718
"get_corpus",
1819
"get_corpus_as_is",
1920
"get_corpus_db",
@@ -101,6 +102,7 @@ def corpus_db_path() -> str:
101102
) # these imports must come before other pythainlp.corpus.* imports
102103
from pythainlp.corpus.common import (
103104
countries,
105+
find_synonyms,
104106
provinces,
105107
thai_dict,
106108
thai_family_names,

pythainlp/corpus/common.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# -*- coding: utf-8 -*-
22
# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project
33
# SPDX-License-Identifier: Apache-2.0
4+
45
"""
56
Common lists of words.
67
"""
78

89
__all__ = [
910
"countries",
11+
"find_synonyms",
1012
"provinces",
1113
"thai_family_names",
1214
"thai_female_names",
@@ -336,3 +338,38 @@ def thai_synonyms() -> dict:
336338
def thai_synonym() -> dict:
337339
warnings.warn("Deprecated: Use thai_synonyms() instead.", DeprecationWarning)
338340
return thai_synonyms()
341+
342+
343+
def find_synonyms(word: str) -> List[str]:
344+
"""
345+
Find synonyms
346+
347+
:param str word: Thai word
348+
:return: List of synonyms of the input word or an empty list if it isn't exist.
349+
:rtype: List[str]
350+
351+
:Example:
352+
::
353+
354+
from pythainlp.corpus import find_synonyms
355+
356+
print(find_synonyms("หมู"))
357+
# output: ['จรุก', 'วราหะ', 'วราห์', 'ศูกร', 'สุกร']
358+
"""
359+
synonyms = thai_synonyms() # get a dictionary of {word, synonym}
360+
list_synonym = []
361+
362+
if word in synonyms["word"]: # find by word
363+
list_synonym.extend(synonyms["synonym"][synonyms["word"].index(word)])
364+
365+
for idx, words in enumerate(synonyms["synonym"]): # find by synonym
366+
if word in words:
367+
list_synonym.extend(synonyms["synonym"][idx])
368+
list_synonym.append(synonyms["word"][idx])
369+
370+
list_synonym = sorted(list(set(list_synonym)))
371+
372+
if word in list_synonym: # remove same word
373+
list_synonym.remove(word)
374+
375+
return list_synonym

tests/test_corpus.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
conceptnet,
1414
countries,
1515
download,
16+
find_synonyms,
1617
get_corpus_db,
1718
get_corpus_db_detail,
1819
get_corpus_default_db,
@@ -204,3 +205,10 @@ def test_zip(self):
204205
p = get_corpus_path("test_zip")
205206
self.assertEqual(os.path.isdir(p), True)
206207
self.assertEqual(remove("test_zip"), True)
208+
209+
def test_find_synonyms(self):
210+
self.assertEqual(
211+
find_synonyms("หมู"),
212+
['จรุก', 'วราหะ', 'วราห์', 'ศูกร', 'สุกร']
213+
)
214+
self.assertEqual(find_synonyms("1"), [])

0 commit comments

Comments
 (0)