Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/deploy_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ on:
jobs:
release:
name: Build
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v1
- name: Set up Python 3.7
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.7
python-version: 3.8
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
python-version: [3.8]

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/macos-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
fail-fast: false
matrix:
os: [macos-latest]
python-version: [3.7]
python-version: [3.8]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ on:
jobs:
deploy:

runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
python-version: [3.8]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ on:
jobs:
build:

runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
python-version: [3.8]

steps:
- uses: actions/checkout@v2
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ on:
jobs:
build:

runs-on: ubuntu-latest
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
python-version: [3.8]

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.7-slim-buster
FROM python:3.8-slim-buster

COPY . .

Expand Down
3 changes: 2 additions & 1 deletion docker_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ pyicu==2.8
deepcut==0.7.0.0
h5py==3.1.0
tensorflow==2.7.2
pandas==0.24
pandas==1.4.*
tltk==1.3.8
OSKut==1.3
nlpo3==1.2.2
thai-nner==0.3
spacy==2.3.*
1 change: 1 addition & 0 deletions docs/api/util.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Modules
.. autofunction:: remove_tonemark
.. autofunction:: remove_zw
.. autofunction:: reorder_vowels
.. autofunction:: sound_syllable
.. autofunction:: text_to_arabic_digit
.. autofunction:: text_to_num
.. autofunction:: text_to_thai_digit
Expand Down
2 changes: 2 additions & 0 deletions pythainlp/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"time_to_thaiword",
"text_to_num",
"words_to_num",
"sound_syllable",
]

from pythainlp.util.collate import collate
Expand Down Expand Up @@ -89,3 +90,4 @@
from pythainlp.util.time import thai_time, thaiword_to_time, time_to_thaiword
from pythainlp.util.trie import Trie, dict_trie
from pythainlp.util.wordtonum import thaiword_to_num, text_to_num, words_to_num
from pythainlp.util.syllable import sound_syllable
113 changes: 113 additions & 0 deletions pythainlp/util/syllable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
"""
Syllable tools
"""
import re
from pythainlp import thai_consonants

spelling_class = {
"กง": list("ง"),
"กม": list("ม"),
"เกย": list("ย"),
"เกอว": list("ว"),
"กน": list("นญณรลฬ"),
"กก": list("กขคฆ"),
"กด": list("ดจชซฎฏฐฑฒตถทธศษส"),
"กบ": list("บปภพฟ")
}

thai_consonants_all = list(thai_consonants)
thai_consonants_all.remove("อ")

_temp = list(
"".join(["".join(spelling_class[i]) for i in spelling_class.keys()])
)
not_spelling_class = [j for j in thai_consonants_all if j not in _temp]

# vowel's short sound
short = "ะัิึุ"
re_short = re.compile("เ(.*)ะ|แ(.*)ะ|เ(.*)อะ|โ(.*)ะ|เ(.*)าะ", re.U)
pattern = re.compile("เ(.*)า", re.U) # เ-า is live syllable

_check_1 = []
# these spelling consonant are live syllable.
for i in ["กง", "กน", "กม", "เกย", "เกอว"]:
_check_1.extend(spelling_class[i])
# these spelling consonant are dead syllable.
_check_2 = spelling_class["กก"]+spelling_class["กบ"]+spelling_class["กด"]


def sound_syllable(syllable: str) -> str:
"""
Sound syllable classification

This function is sound syllable classification.
It is live syllable or dead syllable.

:param str syllable: Thai syllable
:return: syllable's type (live or dead)
:rtype: str

:Example:
::

from pythainlp.util import sound_syllable

print(sound_syllable("มา"))
# output: live

print(sound_syllable("เลข"))
# output: dead
"""
# get consonants
consonants = [i for i in syllable if i in list(thai_consonants_all)]
# get spelling consonants
spelling_consonant = consonants[-1]
# if len of syllable < 2
if len(syllable) < 2:
return "dead"
elif (
(
spelling_consonant in _check_2)
and
(
any((c in set("าีืแูาเโ")) for c in syllable) == False
and any((c in set("ำใไ")) for c in syllable) == False
and pattern.findall(syllable) != True
)
):
return "dead"
elif any((c in set("าีืแูาโ")) for c in syllable): # in syllable:
if spelling_consonant != syllable[-1]:
return "live"
elif spelling_consonant in _check_1:
return "live"
elif spelling_consonant in _check_2:
return "dead"
elif (
re_short.findall(syllable)
or
any((c in set(short)) for c in syllable)
):
return "dead"
return "live"
elif any((c in set("ำใไ")) for c in syllable):
return "live" # if these vowel's long sound are live syllable
elif pattern.findall(syllable): # if it is เ-า
return "live"
elif spelling_consonant in _check_1:
if (
re_short.findall(syllable)
or
any((c in set(short)) for c in syllable)
) and len(consonants) < 2:
return "dead"
return "live"
elif (
re_short.findall(syllable) # if found vowel's short sound
or
any((c in set(short)) for c in syllable) # consonant in short
):
return "dead"
else:
return "dead"
39 changes: 39 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
thai_keyboard_dist,
text_to_num,
words_to_num,
sound_syllable,
)


Expand Down Expand Up @@ -661,3 +662,41 @@ def test_emoji_to_thai(self):
emoji_to_thai("🇹🇭 นี่คือธงประเทศไทย"),
":ธง_ไทย: นี่คือธงประเทศไทย",
)

def test_sound_syllable(self):
test = [
("มา", "live"),
("ดู", "live"),
("ปู", "live"),
("เวลา", "live"),
("ปี", "live"),
("จำ", "live"),
("น้ำ", "live"),
("ใช่", "live"),
("เผ่า", "live"),
("เสา", "live"),
("ไป", "live"),
("จริง", "live"),
("กิน", "live"),
("กำ", "live"),
("มา", "live"),
("สาว", "live"),
("ฉุย", "live"),
("ธุ", "dead"),
("ระ", "dead"),
("กะ", "dead"),
("ทิ", "dead"),
("เกะ", "dead"),
("กะ", "dead"),
("บท", "dead"),
("บาท", "dead"),
("ลาภ", "dead"),
("เมฆ", "dead"),
("เลข", "dead"),
("ธูป", "dead"),
("บ", "dead"),
("บ่", "dead"),
("ก็", "dead"),
]
for i, j in test:
self.assertEqual(sound_syllable(i), j)