Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 57 additions & 28 deletions pythainlp/util/syllable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
Syllable tools
"""

import re

from pythainlp import thai_consonants, thai_tonemarks
Expand All @@ -23,9 +24,7 @@
thai_consonants_all = list(thai_consonants)
thai_consonants_all.remove("อ")

_temp = list(
"".join(["".join(v) for v in spelling_class.values()])
)
_temp = list("".join(["".join(v) for v in spelling_class.values()]))
not_spelling_class = [j for j in thai_consonants_all if j not in _temp]

# vowel's short sound
Expand All @@ -37,6 +36,7 @@
# These spelling consonant ares live syllables.
for i in ["กง", "กน", "กม", "เกย", "เกอว"]:
_check_1.extend(spelling_class[i])

# These spelling consonants are dead syllables.
_check_2 = spelling_class["กก"] + spelling_class["กบ"] + spelling_class["กด"]

Expand All @@ -54,6 +54,7 @@
"high": thai_high_aspirates + thai_high_irregular,
}
thai_initial_consonant_to_type = {}

for k, v in thai_initial_consonant_type.items():
for i in v:
thai_initial_consonant_to_type[i] = k
Expand All @@ -67,7 +68,7 @@ def sound_syllable(syllable: str) -> str:
The syllable is a live syllable or dead syllable.

:param str syllable: Thai syllable
:return: syllable's type (live or dead)
:return: syllable's type ("live" or "dead")
:rtype: str

:Example:
Expand All @@ -81,56 +82,78 @@ def sound_syllable(syllable: str) -> str:
print(sound_syllable("เลข"))
# output: dead
"""
# if len of syllable < 2
if len(syllable) < 2:
return "dead"

# get consonants
consonants = [i for i in syllable if i in list(thai_consonants_all)]
if (
(len(consonants) == 0)
and ("อ" in syllable)
and any((c in set("เ")) for c in syllable)
and (len(syllable) == 2)
):
return "live"

# get spelling consonants
spelling_consonant = consonants[-1]
# if len of syllable < 2
if len(syllable) < 2:
return "dead"
elif (spelling_consonant in _check_2) and (
if (spelling_consonant in _check_2) and (
any((c in set("าีืแูาเโ")) for c in syllable) is False
and any((c in set("ำใไ")) for c in syllable) is False
and bool(pattern.search(syllable)) is not True
):
return "dead"
elif any((c in set("าีืแูาโ")) for c in syllable): # in syllable:

if any((c in set("าีืแูาโ")) for c in syllable): # in syllable:
if (
spelling_consonant in _check_1
and bool(re_short.search(syllable)) is not True
):
return "live"
elif (

if (
spelling_consonant != syllable[-1]
and bool(re_short.search(syllable)) is not True
):
return "live"
elif spelling_consonant in _check_2:

if spelling_consonant in _check_2:
return "dead"
elif bool(re_short.search(syllable)) or any(

if bool(re_short.search(syllable)) or any(
(c in set(short)) for c in syllable
):
return "dead"

return "live"
elif any((c in set("ำใไ")) for c in syllable):

if any((c in set("ำใไ")) for c in syllable):
return "live" # if these vowel's long sounds are live syllables
elif bool(pattern.search(syllable)): # if it is เ-า

if bool(pattern.search(syllable)): # if it is เ-า
return "live"
elif spelling_consonant in _check_1:

if spelling_consonant in _check_1:
if (
bool(re_short.search(syllable))
or any((c in set(short)) for c in syllable)
) and len(consonants) < 2:
return "dead"

if syllable[-1] in set(short):
return "dead"

return "live"
elif bool(

if bool(
re_short.search(syllable)
) or any( # if vowel's short sound is found
(c in set(short)) for c in syllable
): # consonant in short
return "dead"
else:
return "dead"

return "dead"


def syllable_open_close_detector(syllable: str) -> str:
Expand All @@ -155,10 +178,13 @@ def syllable_open_close_detector(syllable: str) -> str:
# output: open
"""
consonants = [i for i in syllable if i in list(thai_consonants)]

if len(consonants) < 2:
return "open"
elif len(consonants) == 2 and consonants[-1] == "อ":

if len(consonants) == 2 and consonants[-1] == "อ":
return "open"

return "close"


Expand Down Expand Up @@ -186,27 +212,31 @@ def syllable_length(syllable: str) -> str:
consonants = [i for i in syllable if i in list(thai_consonants)]
if len(consonants) <= 3 and any((c in set(short)) for c in syllable):
return "short"
elif bool(re_short.search(syllable)):

if bool(re_short.search(syllable)):
return "short"
else:
return "long"

return "long"


def _tone_mark_detector(syllable: str) -> str:
tone_mark = [i for i in syllable if i in list(thai_tonemarks)]
if tone_mark == []:
return ""
else:
return tone_mark[0]

return tone_mark[0]


def _check_sonorant_syllable(syllable: str) -> bool:
_sonorant = [i for i in syllable if i in thai_low_sonorants]
consonants = [i for i in syllable if i in list(thai_consonants)]

if _sonorant[-1] == consonants[-2]:
return True
elif _sonorant[-1] == consonants[-1]:

if _sonorant[-1] == consonants[-1]:
return True

return False


Expand Down Expand Up @@ -248,9 +278,7 @@ def tone_detector(syllable: str) -> str:
initial_consonant_type = thai_initial_consonant_to_type[initial_consonant]
# r for store value
r = ""
if len(consonants) > 1 and (
initial_consonant in ("อ", "ห")
):
if len(consonants) > 1 and (initial_consonant in ("อ", "ห")):
consonant_ending = _check_sonorant_syllable(syllable)
if (
initial_consonant == "อ"
Expand Down Expand Up @@ -325,4 +353,5 @@ def tone_detector(syllable: str) -> str:
r = "m"
elif initial_consonant_type == "high" and s == "live":
r = "r"

return r
14 changes: 12 additions & 2 deletions tests/core/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,9 +680,14 @@ def test_sound_syllable(self):
("เพราะ", "dead"),
("เกาะ", "dead"),
("แคะ", "dead"),
("ประ", "dead"),
]
for i, j in test:
self.assertEqual(sound_syllable(i), j)
self.assertEqual(
sound_syllable(i),
j,
f"{i} should be determined to be a '{j}' syllable."
)

def test_tone_detector(self):
data = [
Expand Down Expand Up @@ -710,9 +715,14 @@ def test_tone_detector(self):
("f", "ผู้"),
("h", "ครับ"),
("f", "ค่ะ"),
("m", "เอ"),
]
for i, j in data:
self.assertEqual(tone_detector(j), i)
self.assertEqual(
tone_detector(j),
i,
f"{j} should be determined to be a '{i}' tone."
)

def test_syllable_length(self):
self.assertEqual(syllable_length("มาก"), "long")
Expand Down
Loading