Skip to content

Commit d1b2b5c

Browse files
authored
Merge pull request #833 from PyThaiNLP/add-ancient
Add pythainlp.ancient
2 parents c7470dc + 47d5189 commit d1b2b5c

File tree

15 files changed

+531
-90
lines changed

15 files changed

+531
-90
lines changed

.github/workflows/macos-test.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,10 @@ jobs:
5151
python -m pip install --upgrade pip
5252
pip uninstall --y pythainlp
5353
pip install --no-deps fastai==1.0.61
54-
pip install PyYAML attacut emoji epitran gensim nltk numpy pandas sacremoses sentencepiece ssg bpemb transformers sefr_cut phunspell spylls symspellpy tltk oskut nlpo3 onnxruntime thai_nner wunsen spacy_thai ufal.chu-liu-edmonds
55-
pip install -e .
54+
conda install -c conda-forge icu
55+
conda install -c conda-forge pyicu
56+
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt
57+
pip install .[full]
5658
python -m nltk.downloader omw-1.4
5759
python -m unittest discover
5860
if: matrix.os == 'self-hosted'
@@ -73,10 +75,11 @@ jobs:
7375
pip install pytest coverage coveralls
7476
conda install -c conda-forge icu
7577
conda install -c conda-forge pyicu
76-
if [ -f docker_requirements.txt ]; then SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt; fi
78+
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt
7779
pip install deepcut tltk
7880
pip install .[full]
7981
python -m nltk.downloader omw-1.4
82+
python -m pip cache purge
8083
if: matrix.os != 'self-hosted'
8184
- name: Test
8285
shell: bash -l {0}

.github/workflows/test.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ jobs:
2929
- name: Install dependencies
3030
run: |
3131
python -m pip install --upgrade pip
32+
python -m pip install backports.zoneinfo[tzdata]
3233
pip install pytest coverage coveralls
33-
if [ -f docker_requirements.txt ]; then SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt; fi
34+
SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True pip install -r docker_requirements.txt
3435
pip install deepcut tltk
3536
pip install .[full]
3637
python -m nltk.downloader omw-1.4
38+
python -m pip install spacy deepcut tltk
39+
python -m pip cache purge
3740
- name: Test
3841
env:
3942
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

docker_requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ fairseq==0.10.2
2222
pyicu==2.8
2323
deepcut==0.7.0.0
2424
h5py==3.1.0
25-
tensorflow==2.9.3
25+
tensorflow==2.11.1
2626
pandas==1.4.*
2727
tltk==1.6.8
2828
OSKut==1.3
@@ -37,4 +37,4 @@ ufal.chu-liu-edmonds==1.0.2
3737
wtpsplit==1.0.1
3838
fastcoref==2.1.6
3939
panphon==0.20.0
40-
sentence-transformers==2.2.2
40+
sentence-transformers==2.2.2

docs/api/ancient.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
.. currentmodule:: pythainlp.ancient
2+
3+
pythainlp.ancient
4+
=================
5+
6+
Modules
7+
-------
8+
9+
.. autofunction:: aksonhan_to_current

notebooks/test-aksonhan.ipynb

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from pythainlp.ancient import aksonhan_to_current"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": 2,
15+
"metadata": {},
16+
"outputs": [
17+
{
18+
"data": {
19+
"text/plain": [
20+
"'จัก'"
21+
]
22+
},
23+
"execution_count": 2,
24+
"metadata": {},
25+
"output_type": "execute_result"
26+
}
27+
],
28+
"source": [
29+
"aksonhan_to_current(\"จกก\")"
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": 3,
35+
"metadata": {},
36+
"outputs": [
37+
{
38+
"data": {
39+
"text/plain": [
40+
"'บรร'"
41+
]
42+
},
43+
"execution_count": 3,
44+
"metadata": {},
45+
"output_type": "execute_result"
46+
}
47+
],
48+
"source": [
49+
"aksonhan_to_current(\"บรร\")"
50+
]
51+
},
52+
{
53+
"cell_type": "code",
54+
"execution_count": 4,
55+
"metadata": {},
56+
"outputs": [
57+
{
58+
"data": {
59+
"text/plain": [
60+
"'ดั่ง'"
61+
]
62+
},
63+
"execution_count": 4,
64+
"metadata": {},
65+
"output_type": "execute_result"
66+
}
67+
],
68+
"source": [
69+
"aksonhan_to_current(\"ดง่ง\")"
70+
]
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": 5,
75+
"metadata": {},
76+
"outputs": [
77+
{
78+
"data": {
79+
"text/plain": [
80+
"'นั้น'"
81+
]
82+
},
83+
"execution_count": 5,
84+
"metadata": {},
85+
"output_type": "execute_result"
86+
}
87+
],
88+
"source": [
89+
"aksonhan_to_current(\"นน้น\")"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": 6,
95+
"metadata": {},
96+
"outputs": [
97+
{
98+
"data": {
99+
"text/plain": [
100+
"'ขัด'"
101+
]
102+
},
103+
"execution_count": 6,
104+
"metadata": {},
105+
"output_type": "execute_result"
106+
}
107+
],
108+
"source": [
109+
"aksonhan_to_current(\"ขดด\")"
110+
]
111+
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": 7,
115+
"metadata": {},
116+
"outputs": [
117+
{
118+
"data": {
119+
"text/plain": [
120+
"'ตรัส'"
121+
]
122+
},
123+
"execution_count": 7,
124+
"metadata": {},
125+
"output_type": "execute_result"
126+
}
127+
],
128+
"source": [
129+
"aksonhan_to_current(\"ตรสส\")"
130+
]
131+
},
132+
{
133+
"cell_type": "code",
134+
"execution_count": 8,
135+
"metadata": {},
136+
"outputs": [
137+
{
138+
"data": {
139+
"text/plain": [
140+
"'ขับ'"
141+
]
142+
},
143+
"execution_count": 8,
144+
"metadata": {},
145+
"output_type": "execute_result"
146+
}
147+
],
148+
"source": [
149+
"aksonhan_to_current(\"ขบบ\")"
150+
]
151+
},
152+
{
153+
"cell_type": "code",
154+
"execution_count": 9,
155+
"metadata": {},
156+
"outputs": [
157+
{
158+
"data": {
159+
"text/plain": [
160+
"'วัน'"
161+
]
162+
},
163+
"execution_count": 9,
164+
"metadata": {},
165+
"output_type": "execute_result"
166+
}
167+
],
168+
"source": [
169+
"aksonhan_to_current(\"วนน\")"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": 10,
175+
"metadata": {},
176+
"outputs": [
177+
{
178+
"data": {
179+
"text/plain": [
180+
"'หลัง'"
181+
]
182+
},
183+
"execution_count": 10,
184+
"metadata": {},
185+
"output_type": "execute_result"
186+
}
187+
],
188+
"source": [
189+
"aksonhan_to_current(\"หลงง\")"
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": 11,
195+
"metadata": {},
196+
"outputs": [
197+
{
198+
"data": {
199+
"text/plain": [
200+
"'บังคับ'"
201+
]
202+
},
203+
"execution_count": 11,
204+
"metadata": {},
205+
"output_type": "execute_result"
206+
}
207+
],
208+
"source": [
209+
"aksonhan_to_current(\"บงงคบบ\")"
210+
]
211+
},
212+
{
213+
"cell_type": "code",
214+
"execution_count": 12,
215+
"metadata": {},
216+
"outputs": [
217+
{
218+
"data": {
219+
"text/plain": [
220+
"'สรรเพชญ'"
221+
]
222+
},
223+
"execution_count": 12,
224+
"metadata": {},
225+
"output_type": "execute_result"
226+
}
227+
],
228+
"source": [
229+
"aksonhan_to_current(\"สรรเพชญ\")"
230+
]
231+
},
232+
{
233+
"cell_type": "code",
234+
"execution_count": 13,
235+
"metadata": {},
236+
"outputs": [
237+
{
238+
"data": {
239+
"text/plain": [
240+
"'กก'"
241+
]
242+
},
243+
"execution_count": 13,
244+
"metadata": {},
245+
"output_type": "execute_result"
246+
}
247+
],
248+
"source": [
249+
"aksonhan_to_current(\"กก\")"
250+
]
251+
},
252+
{
253+
"cell_type": "code",
254+
"execution_count": 14,
255+
"metadata": {},
256+
"outputs": [
257+
{
258+
"data": {
259+
"text/plain": [
260+
"'ก'"
261+
]
262+
},
263+
"execution_count": 14,
264+
"metadata": {},
265+
"output_type": "execute_result"
266+
}
267+
],
268+
"source": [
269+
"aksonhan_to_current(\"\")"
270+
]
271+
},
272+
{
273+
"cell_type": "code",
274+
"execution_count": 15,
275+
"metadata": {},
276+
"outputs": [
277+
{
278+
"data": {
279+
"text/plain": [
280+
"'ถนน'"
281+
]
282+
},
283+
"execution_count": 15,
284+
"metadata": {},
285+
"output_type": "execute_result"
286+
}
287+
],
288+
"source": [
289+
"aksonhan_to_current(\"ถนน\")"
290+
]
291+
},
292+
{
293+
"cell_type": "code",
294+
"execution_count": null,
295+
"metadata": {},
296+
"outputs": [],
297+
"source": []
298+
}
299+
],
300+
"metadata": {
301+
"kernelspec": {
302+
"display_name": "Python 3",
303+
"language": "python",
304+
"name": "python3"
305+
},
306+
"language_info": {
307+
"codemirror_mode": {
308+
"name": "ipython",
309+
"version": 3
310+
},
311+
"file_extension": ".py",
312+
"mimetype": "text/x-python",
313+
"name": "python",
314+
"nbconvert_exporter": "python",
315+
"pygments_lexer": "ipython3",
316+
"version": "3.10.6"
317+
},
318+
"orig_nbformat": 4
319+
},
320+
"nbformat": 4,
321+
"nbformat_minor": 2
322+
}

0 commit comments

Comments
 (0)