diff --git a/CHANGELOG.md b/CHANGELOG.md index 346690f46..0bdcc1871 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,12 +11,16 @@ Notable changes between versions. - For full release notes, see: - For detailed commit changes, see: - (select tags to compare) + (select tags to compare) -## Version 5.1.1 -> Dev +## Version 5.1.2 -> Dev [WIP] +## Version 5.1.1 -> 5.1.2 + +- Update romanize docs and keep space #1110 + ## Version 5.1.0 -> 5.1.1 - PR Description: Refactor thai_consonants_all to Use set in syllable.py #1087 diff --git a/CITATION.cff b/CITATION.cff index 58a55563f..1c5d3dee0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -45,5 +45,5 @@ keywords: - "Thai language" - "Thai NLP" license: Apache-2.0 -version: 5.1.1 -date-released: "2025-03-31" +version: 5.1.2 +date-released: "2025-05-09" diff --git a/README.md b/README.md index cebd95f59..f26f1d0cf 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ pip install pythainlp | Version | Description | Status | |:------:|:--:|:------:| -| [5.1.1](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) | +| [5.1.2](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) | | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.2 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/1080) | ## Getting Started diff --git a/README_TH.md b/README_TH.md index 0e155a549..46e047cad 100644 --- a/README_TH.md +++ b/README_TH.md @@ -26,7 +26,7 @@ pip install pythainlp | รุ่น | คำอธิบาย | สถานะ | |:------:|:--:|:------:| -| [5.1.1](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) | +| [5.1.2](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) | | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.2 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/1080) | ติดตามพวกเราบน [PyThaiNLP Facebook page](https://www.facebook.com/pythainlp/) เพื่อรับข่าวสารเพิ่มเติม diff --git a/codemeta.json b/codemeta.json index f27f9bd22..5c30f119f 100644 --- a/codemeta.json +++ b/codemeta.json @@ -3,7 +3,7 @@ "@type": "SoftwareSourceCode", "name": "PyThaiNLP", "description": "Thai Natural Language Processing in Python", - "version": "5.1.1", + "version": "5.1.2", "author": [ { "@type": "Person", diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py index 808f3fd2d..b127f7541 100644 --- a/pythainlp/__init__.py +++ b/pythainlp/__init__.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project # SPDX-FileType: SOURCE # SPDX-License-Identifier: Apache-2.0 -__version__ = "5.1.1" +__version__ = "5.1.2" thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars diff --git a/pythainlp/transliterate/core.py b/pythainlp/transliterate/core.py index d5b656ff2..315343661 100644 --- a/pythainlp/transliterate/core.py +++ b/pythainlp/transliterate/core.py @@ -14,17 +14,19 @@ def romanize( fallback_engine: str = DEFAULT_ROMANIZE_ENGINE, ) -> str: """ - This function renders Thai words in the Latin alphabet or "romanization", + This function renders Thai word in the Latin alphabet or "romanization", using the Royal Thai General System of Transcription (RTGS) [#rtgs_transcription]_. RTGS is the official system published by the Royal Institute of Thailand. (Thai: ถอดเสียงภาษาไทยเป็นอักษรละติน) - :param str text: Thai text to be romanized + :param str text: A Thai word to be romanized. \ + The input should not include whitespace because \ + the function is support subwords by spliting whitespace. :param str engine: One of 'royin' (default), 'thai2rom', 'thai2rom_onnx, 'tltk', and 'lookup'. See more in options for engine section. :param str fallback_engine: If engine equals 'lookup', use `fallback_engine` for words that are not in the transliteration dict. No effect on other engines. Default to 'royin'. - :return: A string of Thai words rendered in the Latin alphabet. + :return: A string of a Thai word rendered in the Latin alphabet. :rtype: str :Options for engines: @@ -53,6 +55,9 @@ def romanize( romanize("ภาพยนตร์", engine="royin") # output: 'phapn' + romanize("รส ดี", engine="royin") # subwords + # output: 'rot di' + romanize("ภาพยนตร์", engine="thai2rom") # output: 'phapphayon' @@ -87,9 +92,9 @@ def select_romanize_engine(engine: str): else: rom_engine = select_romanize_engine(engine) trans_word = [] - for word in text.split(' '): - trans_word.append(rom_engine(word)) - new_word = ''.join(trans_word) + for subword in text.split(' '): + trans_word.append(rom_engine(subword)) + new_word = ' '.join(trans_word) return new_word diff --git a/setup.cfg b/setup.cfg index de9810524..213d01c9a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.1.1 +current_version = 5.1.2 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/setup.py b/setup.py index 42c6bddb7..1002c40ec 100644 --- a/setup.py +++ b/setup.py @@ -150,7 +150,7 @@ setup( name="pythainlp", - version="5.1.1", + version="5.1.2", description="Thai Natural Language Processing library", long_description=LONG_DESC, long_description_content_type="text/markdown",