Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions pythainlp/transliterate/iso_11940.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,18 @@
}

_punctuation_and_digits = {
# ฯ can has two meanings in ISO 11940.
# If it is for abbrevation, it is paiyan noi.
# If it is for sentence termination, it is angkhan diao.
# Without semantic analysis, they cannot be distinguished from each other.
# In this simple implementation, we decided to always treat ฯ as paiyan noi.
# We commented out angkhan diao line to remove it from the dictionary
# and avoid having duplicate keys.
"ๆ": "«",
"ฯ": "ǂ",
"ฯ": "ǂ", # paiyan noi: U+01C2 ǂ Alveolar Click; ICU uses ‡ (double dagger)
"๏": "§",
"ฯ": "ǀ",
"๚": "ǁ",
# "ฯ": "ǀ", # angkhan diao: U+01C0 ǀ Dental Click; ICU uses | (vertical bar)
"๚": "ǁ", # angkhan khu: U+01C1 ǁ Lateral Click; ICU uses || (two vertical bars)
"๛": "»",
"๐": "0",
"๑": "1",
Expand All @@ -130,19 +137,19 @@
**_tone_marks,
**_punctuation_and_digits,
}
_list_k = _all_dict.keys()
_keys_set = _all_dict.keys()


def transliterate(word: str) -> str:
"""
Use ISO 11940 for transliteration
:param str text: Thai text to be transliterated.
:return: A string of IPA indicating how the text should be pronounced.
:return: A string indicating how the text should be pronounced, according to ISO 11940.
"""
_new = ""
_str = ""
for i in word:
if i in _list_k:
_new += _all_dict[i]
if i in _keys_set:
_str += _all_dict[i]
else:
_new += i
return _new
_str += i
return _str
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ search = __version__ = "{current_version}"
replace = __version__ = "{new_version}"

[metadata]
description-file = README.md
description_file = README.md

[coverage:run]
source = pythainlp