12
12
"thai_female_names" ,
13
13
"thai_male_names" ,
14
14
"thai_negations" ,
15
+ "thai_dict" ,
15
16
"thai_stopwords" ,
16
17
"thai_syllables" ,
18
+ "thai_synonym" ,
19
+ "thai_synonyms" ,
17
20
"thai_words" ,
18
- "thai_dict" ,
19
21
"thai_wsd_dict" ,
20
- "thai_synonym" ,
21
22
]
22
23
23
24
from typing import FrozenSet , List , Union
25
+ import warnings
24
26
25
- from pythainlp .corpus import get_corpus , get_corpus_path
27
+ from pythainlp .corpus import get_corpus , get_corpus_as_is , get_corpus_path
26
28
27
- _THAI_COUNTRIES = set ()
29
+ _THAI_COUNTRIES : FrozenSet [ str ] = frozenset ()
28
30
_THAI_COUNTRIES_FILENAME = "countries_th.txt"
29
31
30
- _THAI_THAILAND_PROVINCES = set ()
31
- _THAI_THAILAND_PROVINCES_DETAILS = []
32
+ _THAI_THAILAND_PROVINCES : FrozenSet [ str ] = frozenset ()
33
+ _THAI_THAILAND_PROVINCES_DETAILS : List [ dict ] = []
32
34
_THAI_THAILAND_PROVINCES_FILENAME = "thailand_provinces_th.csv"
33
35
34
- _THAI_SYLLABLES = set ()
36
+ _THAI_SYLLABLES : FrozenSet [ str ] = frozenset ()
35
37
_THAI_SYLLABLES_FILENAME = "syllables_th.txt"
36
38
37
- _THAI_WORDS = set ()
39
+ _THAI_WORDS : FrozenSet [ str ] = frozenset ()
38
40
_THAI_WORDS_FILENAME = "words_th.txt"
39
41
40
- _THAI_STOPWORDS = set ()
42
+ _THAI_STOPWORDS : FrozenSet [ str ] = frozenset ()
41
43
_THAI_STOPWORDS_FILENAME = "stopwords_th.txt"
42
44
43
- _THAI_NEGATIONS = set ()
45
+ _THAI_NEGATIONS : FrozenSet [ str ] = frozenset ()
44
46
_THAI_NEGATIONS_FILENAME = "negations_th.txt"
45
47
46
- _THAI_FAMLIY_NAMES = set ()
48
+ _THAI_FAMLIY_NAMES : FrozenSet [ str ] = frozenset ()
47
49
_THAI_FAMLIY_NAMES_FILENAME = "family_names_th.txt"
48
- _THAI_FEMALE_NAMES = set ()
50
+ _THAI_FEMALE_NAMES : FrozenSet [ str ] = frozenset ()
49
51
_THAI_FEMALE_NAMES_FILENAME = "person_names_female_th.txt"
50
- _THAI_MALE_NAMES = set ()
52
+ _THAI_MALE_NAMES : FrozenSet [ str ] = frozenset ()
51
53
_THAI_MALE_NAMES_FILENAME = "person_names_male_th.txt"
52
54
53
- _THAI_ORST_WORDS = set ()
55
+ _THAI_ORST_WORDS : FrozenSet [ str ] = frozenset ()
54
56
55
57
_THAI_DICT = {}
56
58
_THAI_WSD_DICT = {}
57
- _THAI_SYNONYM = None
59
+ _THAI_SYNONYMS = {}
58
60
59
61
60
62
def countries () -> FrozenSet [str ]:
@@ -74,7 +76,7 @@ def countries() -> FrozenSet[str]:
74
76
return _THAI_COUNTRIES
75
77
76
78
77
- def provinces (details : bool = False ) -> Union [FrozenSet [str ], List [str ]]:
79
+ def provinces (details : bool = False ) -> Union [FrozenSet [str ], List [dict ]]:
78
80
"""
79
81
Return a frozenset of Thailand province names in Thai such as "กระบี่",
80
82
"กรุงเทพมหานคร", "กาญจนบุรี", and "อุบลราชธานี".
@@ -96,7 +98,7 @@ def provinces(details: bool = False) -> Union[FrozenSet[str], List[str]]:
96
98
provs = set ()
97
99
prov_details = []
98
100
99
- for line in get_corpus (_THAI_THAILAND_PROVINCES_FILENAME , as_is = True ):
101
+ for line in get_corpus_as_is (_THAI_THAILAND_PROVINCES_FILENAME ):
100
102
p = line .split ("," )
101
103
102
104
prov = {}
@@ -155,14 +157,14 @@ def thai_orst_words() -> FrozenSet[str]:
155
157
"""
156
158
Return a frozenset of Thai words from Royal Society of Thailand
157
159
\n (See: `dev/pythainlp/corpus/thai_orst_words.txt\
158
- <https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/thai_orst_words >`_)
160
+ <https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/orst_words_th.txt >`_)
159
161
160
162
:return: :class:`frozenset` containing words in the Thai language.
161
163
:rtype: :class:`frozenset`
162
164
"""
163
165
global _THAI_ORST_WORDS
164
166
if not _THAI_ORST_WORDS :
165
- _THAI_ORST_WORDS = get_corpus ("thai_orst_words .txt" )
167
+ _THAI_ORST_WORDS = get_corpus ("orst_words_th .txt" )
166
168
167
169
return _THAI_ORST_WORDS
168
170
@@ -266,8 +268,11 @@ def thai_dict() -> dict:
266
268
global _THAI_DICT
267
269
if not _THAI_DICT :
268
270
import csv
269
- _THAI_DICT = {"word" :[], "meaning" :[]}
270
- with open (get_corpus_path ("thai_dict" ), newline = "\n " , encoding = "utf-8" ) as csvfile :
271
+
272
+ _THAI_DICT = {"word" : [], "meaning" : []}
273
+ with open (
274
+ get_corpus_path ("thai_dict" ), newline = "\n " , encoding = "utf-8"
275
+ ) as csvfile :
271
276
reader = csv .DictReader (csvfile , delimiter = "," )
272
277
for row in reader :
273
278
_THAI_DICT ["word" ].append (row ["word" ])
@@ -288,38 +293,46 @@ def thai_wsd_dict() -> dict:
288
293
global _THAI_WSD_DICT
289
294
if not _THAI_WSD_DICT :
290
295
_thai_wsd = thai_dict ()
291
- _THAI_WSD_DICT = {"word" :[],"meaning" :[]}
292
- for i ,j in zip (_thai_wsd ["word" ],_thai_wsd ["meaning" ]):
296
+ _THAI_WSD_DICT = {"word" : [], "meaning" : []}
297
+ for i , j in zip (_thai_wsd ["word" ], _thai_wsd ["meaning" ]):
293
298
_all_value = list (eval (j ).values ())
294
299
_use = []
295
300
for k in _all_value :
296
301
_use .extend (k )
297
- _use = list (set (_use ))
298
- if len (_use )> 1 :
302
+ _use = list (set (_use ))
303
+ if len (_use ) > 1 :
299
304
_THAI_WSD_DICT ["word" ].append (i )
300
305
_THAI_WSD_DICT ["meaning" ].append (_use )
301
306
302
307
return _THAI_WSD_DICT
303
308
304
309
305
- def thai_synonym () -> dict :
310
+ def thai_synonyms () -> dict :
306
311
"""
307
- Return Thai synonym .
312
+ Return Thai synonyms .
308
313
\n (See: `thai_synonym\
309
314
<https://pythainlp.github.io/pythainlp-corpus/thai_synonym.html>`_)
310
315
311
316
:return: Thai words with part-of-speech type and synonym
312
317
:rtype: dict
313
318
"""
314
- global _THAI_SYNONYM
315
- if _THAI_SYNONYM is None :
319
+ global _THAI_SYNONYMS
320
+ if not _THAI_SYNONYMS :
316
321
import csv
317
- _THAI_SYNONYM = {"word" :[], "pos" :[], "synonym" :[]}
318
- with open (get_corpus_path ("thai_synonym" ), newline = "\n " , encoding = "utf-8" ) as csvfile :
322
+
323
+ _THAI_SYNONYMS = {"word" : [], "pos" : [], "synonym" : []}
324
+ with open (
325
+ get_corpus_path ("thai_synonym" ), newline = "\n " , encoding = "utf-8"
326
+ ) as csvfile :
319
327
reader = csv .DictReader (csvfile , delimiter = "," )
320
328
for row in reader :
321
- _THAI_SYNONYM ["word" ].append (row ["word" ])
322
- _THAI_SYNONYM ["pos" ].append (row ["pos" ])
323
- _THAI_SYNONYM ["synonym" ].append (row ["synonym" ].split ("|" ))
329
+ _THAI_SYNONYMS ["word" ].append (row ["word" ])
330
+ _THAI_SYNONYMS ["pos" ].append (row ["pos" ])
331
+ _THAI_SYNONYMS ["synonym" ].append (row ["synonym" ].split ("|" ))
332
+
333
+ return _THAI_SYNONYMS
324
334
325
- return _THAI_SYNONYM
335
+
336
+ def thai_synonym () -> dict :
337
+ warnings .warn ("Deprecated: Use thai_synonyms() instead." , DeprecationWarning )
338
+ return thai_synonyms ()
0 commit comments