From 9c3f09fd385c21af142b371a736760b4d340c92d Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 13:56:46 +0700
Subject: [PATCH 01/21] Add tltk

---
 docker_requirements.txt         |  3 ++-
 docs/notes/installation.rst     |  1 +
 pythainlp/spell/core.py         |  4 ++++
 pythainlp/spell/tltk.py         |  5 +++++
 pythainlp/tag/pos_tag.py        | 23 +++++++++++++++++++++--
 pythainlp/tag/tltk.py           |  9 +++++++++
 pythainlp/tokenize/core.py      |  9 +++++++++
 pythainlp/tokenize/tltk.py      | 18 ++++++++++++++++++
 pythainlp/transliterate/core.py | 23 +++++++++++++++++++++--
 pythainlp/transliterate/tltk.py | 17 +++++++++++++++++
 setup.py                        |  4 +++-
 tests/test_spell.py             |  8 ++++++++
 tests/test_tokenize.py          | 12 ++++++++++++
 tests/test_transliterate.py     |  5 +++++
 14 files changed, 135 insertions(+), 6 deletions(-)
 create mode 100644 pythainlp/spell/tltk.py
 create mode 100644 pythainlp/tag/tltk.py
 create mode 100644 pythainlp/tokenize/tltk.py
 create mode 100644 pythainlp/transliterate/tltk.py

diff --git a/docker_requirements.txt b/docker_requirements.txt
index 29b2cab01..7dcc9ca87 100644
--- a/docker_requirements.txt
+++ b/docker_requirements.txt
@@ -24,4 +24,5 @@ pyicu==2.6
 deepcut==0.7.0.0
 h5py==2.10.0
 tensorflow==2.4.0
-pandas==0.24
\ No newline at end of file
+pandas==0.24
+tltk==1.3.8
\ No newline at end of file
diff --git a/docs/notes/installation.rst b/docs/notes/installation.rst
index d48354d6e..9b5669ae8 100644
--- a/docs/notes/installation.rst
+++ b/docs/notes/installation.rst
@@ -27,6 +27,7 @@ where ``extras`` can be
   - ``mt5`` (to mt5 models for Thai text summarizer)
   - ``wordnet`` (to support wordnet)
   - ``spell`` (to support phunspell & symspellpy)
+  - ``tltk`` (to support tltk)
   - ``full`` (install everything)
 
 For dependency details, look at `extras` variable in `setup.py <https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py>`_.
diff --git a/pythainlp/spell/core.py b/pythainlp/spell/core.py
index a749fa61c..ac515b576 100644
--- a/pythainlp/spell/core.py
+++ b/pythainlp/spell/core.py
@@ -22,6 +22,7 @@ def spell(word: str, engine: str = "pn") -> List[str]:
         * *pn* - Peter Norvig's algorithm [#norvig_spellchecker]_ (default)
         * *phunspell* - A spell checker utilizing spylls a port of Hunspell.
         * *symspellpy* - symspellpy is a Python port of SymSpell v6.5.
+        * *tltk* - wrapper for `TLTK <https://pypi.org/project/tltk/>`_.,
 
     :return: list of possible correct words within 1 or 2 edit distance and
              sorted by frequency of word occurrences in the spelling dictionary
@@ -39,6 +40,9 @@ def spell(word: str, engine: str = "pn") -> List[str]:
         spell("เส้นตรบ")
         # output: ['เส้นตรง']
 
+        spell("เส้นตรบ",  engine="tltk")
+        # output: ['เส้นตรง']
+
         spell("ครัช")
         # output: ['ครับ', 'ครัว', 'รัช', 'ครัม', 'ครัน', 'วรัช', 'ครัส',
         # 'ปรัช', 'บรัช', 'ครัง', 'คัช', 'คลัช', 'ครัย', 'ครัด']
diff --git a/pythainlp/spell/tltk.py b/pythainlp/spell/tltk.py
new file mode 100644
index 000000000..1aabc3f55
--- /dev/null
+++ b/pythainlp/spell/tltk.py
@@ -0,0 +1,5 @@
+from tltk.nlp import spell_candidates
+from typing import List
+
+def spell(text: str) -> List[str]:
+    return spell_candidates(text)
diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py
index 97f1a6d70..ce2338481 100644
--- a/pythainlp/tag/pos_tag.py
+++ b/pythainlp/tag/pos_tag.py
@@ -15,6 +15,8 @@ def pos_tag(
         * *wangchanberta* - wangchanberta model (support lst20 corpus only \
             and it supports a string only. if you input a list of word, \
             it will convert list word to a string.
+        * *tltk* - TLTK: Thai Language Toolkit (support TNC corpus only.\
+            if you choose other corpus, It's change to TNC corpus.)
     :param str corpus:
         the corpus that used to create the language model for tagger
         * *lst20* - `LST20 <https://aiforthai.in.th/corpus.php>`_ corpus \
@@ -28,6 +30,7 @@ def pos_tag(
         * *pud* - `Parallel Universal Dependencies (PUD)\
             <https://github.com/UniversalDependencies/UD_Thai-PUD>`_ \
             treebanks, natively use Universal POS tags
+        * *tnc* - Thai National Corpus (support tltk engine only)
     :return: a list of tuples (word, POS tag)
     :rtype: list[tuple[str, str]]
 
@@ -89,13 +92,25 @@ def pos_tag(
     if not words:
         return []
 
-    if engine == "perceptron":
+    _support_corpus = ["lst20", "lst20_ud", "orchid", "orchid_ud", "pud"]
+
+    if engine == "perceptron" and corpus in _support_corpus:
         from pythainlp.tag.perceptron import tag as tag_
     elif engine == "wangchanberta" and corpus == "lst20":
         from pythainlp.wangchanberta.postag import pos_tag as tag_
         words = ''.join(words)
-    else:  # default, use "unigram" ("old") engine
+    elif engine == "tltk":
+        from pythainlp.tag.tltk import pos_tag as tag_
+        corpus = "tnc"
+    elif engine == "unigram" and corpus in _support_corpus:  # default
         from pythainlp.tag.unigram import tag as tag_
+    else:
+        raise NotImplemented(
+            "pos_tag not support {0} engine or {1} corpus.".format(
+                engine,
+                corpus
+            )
+        )
 
     word_tags = tag_(words, corpus=corpus)
 
@@ -114,6 +129,9 @@ def pos_tag_sents(
     :param str engine:
         * *perceptron* - perceptron tagger (default)
         * *unigram* - unigram tagger
+        * *wangchanberta*  - wangchanberta model (support lst20 corpus only)
+        * *tltk* - TLTK: Thai Language Toolkit (support TNC corpus only.\
+            if you choose other corpus, It's change to TNC corpus.)
     :param str corpus:
         the corpus that used to create the language model for tagger
         * *lst20* - `LST20 <https://aiforthai.in.th/corpus.php>`_ corpus \
@@ -127,6 +145,7 @@ def pos_tag_sents(
         * *pud* - `Parallel Universal Dependencies (PUD)\
             <https://github.com/UniversalDependencies/UD_Thai-PUD>`_ \
             treebanks, natively use Universal POS tags
+        * *tnc* - Thai National Corpus (support tltk engine only)
     :return: a list of lists of tuples (word, POS tag)
     :rtype: list[list[tuple[str, str]]]
 
diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
new file mode 100644
index 000000000..371075e56
--- /dev/null
+++ b/pythainlp/tag/tltk.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+from tltk.nlp import pos_tag_wordlist
+from typing import List, Tuple
+
+
+def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
+    if corpus != "tnc":
+        raise NotImplemented("tltk not support {0} corpus.".format(0))
+    return pos_tag_wordlist(words)
diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py
index c5a501230..1cb2f43b3 100644
--- a/pythainlp/tokenize/core.py
+++ b/pythainlp/tokenize/core.py
@@ -86,6 +86,8 @@ def word_tokenize(
           and combining tokens that are parts of the same named-entity.
         * *sefr_cut* - wrapper for
           `SEFR CUT <https://github.com/mrpeerat/SEFR_CUT>`_.,
+        * *tltk* - wrapper for
+          `TLTK <https://pypi.org/project/tltk/>`_.,
 
     :Note:
         - The parameter **custom_dict** can be provided as an argument \
@@ -182,6 +184,10 @@ def word_tokenize(
     elif engine == "sefr_cut":
         from pythainlp.tokenize.sefr_cut import segment
 
+        segments = segment(text)
+    elif engine == "tltk":
+        from pythainlp.tokenize.tltk import segment
+
         segments = segment(text)
     else:
         raise ValueError(
@@ -314,6 +320,7 @@ def subword_tokenize(
         * *wangchanberta* - SentencePiece from wangchanberta model.
         * *dict* - newmm word tokenizer with a syllable dictionary
         * *ssg* - CRF syllable segmenter for Thai
+        * *tltk* - syllable tokenizer from tltk
 
     :Example:
 
@@ -376,6 +383,8 @@ def subword_tokenize(
             )
     elif engine == "ssg":
         from pythainlp.tokenize.ssg import segment
+    elif engine == "tltk":
+        from pythainlp.tokenize.tltk import syllable_tokenize as segment
     else:
         raise ValueError(
             f"""Tokenizer \"{engine}\" not found.
diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py
new file mode 100644
index 000000000..e5f8da8d1
--- /dev/null
+++ b/pythainlp/tokenize/tltk.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+from typing import List
+from tltk.nlp import word_segment as tltk_segment
+from tltk.nlp import syl_segment
+
+
+def segment(text: str) -> List[str]:
+    if not text or not isinstance(text, str):
+        return []
+    _temp = tltk_segment(text).replace("<u/>","").replace("<s/>"," ")
+    return _temp.split('|')
+
+
+def syllable_tokenize(text: str) -> List[str]:
+    if not text or not isinstance(text, str):
+        return []
+    _temp = syl_segment(text)
+    return _temp.split('~')
diff --git a/pythainlp/transliterate/core.py b/pythainlp/transliterate/core.py
index 5460eadd7..02c59ead7 100644
--- a/pythainlp/transliterate/core.py
+++ b/pythainlp/transliterate/core.py
@@ -23,6 +23,7 @@ def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str:
           Transcription issued by Royal Institute of Thailand.
         * *thai2rom* - a deep learning-based Thai romanization engine
           (require PyTorch).
+        * *tltk* - TLTK: Thai Language Toolkit
 
     :Example:
     ::
@@ -35,6 +36,9 @@ def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str:
         romanize("สามารถ", engine="thai2rom")
         # output: 'samat'
 
+        romanize("สามารถ", engine="tltk")
+        # output: 'samat'
+
         romanize("ภาพยนตร์", engine="royin")
         # output: 'phapn'
 
@@ -47,6 +51,8 @@ def romanize(text: str, engine: str = DEFAULT_ROMANIZE_ENGINE) -> str:
 
     if engine == "thai2rom":
         from pythainlp.transliterate.thai2rom import romanize
+    elif engine == "tltk":
+        from pythainlp.transliterate.tltk import romanize
     else:  # use default engine "royin"
         from pythainlp.transliterate.royin import romanize
 
@@ -67,10 +73,13 @@ def transliterate(
     :rtype: str
 
     :Options for engines:
-        * *icu* - pyicu, based on International Components for Unicode (ICU)
-        * *ipa* - epitran, output is International Phonetic Alphabet (IPA)
         * *thaig2p* - (default) Thai Grapheme-to-Phoneme,
           output is IPA (require PyTorch)
+        * *icu* - pyicu, based on International Components for Unicode (ICU)
+        * *ipa* - epitran, output is International Phonetic Alphabet (IPA)
+        * *tltk_g2p* - Thai Grapheme-to-Phoneme from\
+            `TLTK <https://pypi.org/project/tltk/>`_.,
+        * *tltk_ipa* - tltk, output is International Phonetic Alphabet (IPA)
 
     :Example:
     ::
@@ -86,6 +95,12 @@ def transliterate(
         transliterate("สามารถ", engine="thaig2p")
         # output: 's aː ˩˩˦ . m aː t̚ ˥˩'
 
+        transliterate("สามารถ", engine="tltk_ipa")
+        # output: 'saː5.maːt3'
+
+        transliterate("สามารถ", engine="tltk_g2p")
+        # output: 'saa4~maat2'
+
         transliterate("ภาพยนตร์", engine="icu")
         # output: 'p̣hāphyntr̒'
 
@@ -103,6 +118,10 @@ def transliterate(
         from pythainlp.transliterate.pyicu import transliterate
     elif engine == "ipa":
         from pythainlp.transliterate.ipa import transliterate
+    elif engine == "tltk_g2p":
+        from pythainlp.transliterate.tltk import tltk_g2p as transliterate
+    elif engine == "tltk_ipa":
+        from pythainlp.transliterate.tltk import tltk_ipa as transliterate
     else:  # use default engine: "thaig2p"
         from pythainlp.transliterate.thaig2p import transliterate
 
diff --git a/pythainlp/transliterate/tltk.py b/pythainlp/transliterate/tltk.py
new file mode 100644
index 000000000..4e4999320
--- /dev/null
+++ b/pythainlp/transliterate/tltk.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+from tltk.nlp import g2p, th2ipa, th2roman
+
+
+def romanize(text: str) -> str:
+    _temp = th2roman(text)
+    return _temp[:_temp.rfind(" <s/>")].replace("<s/>", "")
+
+
+def tltk_g2p(text: str) -> str:
+    _temp = g2p(text).split("<tr/>")[1].replace("|<s/>","").replace("|", " ")
+    return _temp.replace("<s/>", "")
+
+
+def tltk_ipa(text: str) -> str:
+    _temp = th2ipa(text)
+    return _temp[:_temp.rfind(" <s/>")].replace("<s/>", "")
diff --git a/setup.py b/setup.py
index e0597f58b..e7c4fb5ab 100644
--- a/setup.py
+++ b/setup.py
@@ -73,6 +73,7 @@
         "spylls>=0.1.5",
         "symspellpy>=6.7.0"
     ],
+    "tltk": ["tltk>=1.3.8"],
     "full": [
         "PyYAML>=5.3.1",
         "attacut>=1.0.4",
@@ -94,7 +95,8 @@
         "sefr_cut>=1.1",
         "phunspell>=0.1.6",
         "spylls>=0.1.5",
-        "symspellpy>=6.7.0"
+        "symspellpy>=6.7.0",
+        "tltk>=1.3.8",
     ],
 }
 
diff --git a/tests/test_spell.py b/tests/test_spell.py
index 2183f5594..e59474a06 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -40,6 +40,14 @@ def test_spell(self):
         self.assertIsInstance(result, list)
         self.assertGreater(len(result), 0)
 
+        result = spell("เน้ร", engine="tltk")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        result = spell("เกสมร์", engine="tltk")
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
     def test_word_correct(self):
         self.assertEqual(correct(None), "")
         self.assertEqual(correct(""), "")
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index cc90634fc..fd6cc22ae 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -319,6 +319,17 @@ def test_subword_tokenize(self):
         self.assertFalse(
             " " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
         )
+        self.assertEqual(subword_tokenize(None, engine="tltk"), [])
+        self.assertEqual(subword_tokenize("", engine="tltk"), [])
+        self.assertIsInstance(
+            subword_tokenize("สวัสดิีดาวอังคาร", engine="tltk"), list
+        )
+        self.assertFalse(
+            "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tltk")
+        )
+        self.assertIsInstance(
+            subword_tokenize("โควิด19", engine="tltk"), list
+        )
         with self.assertRaises(ValueError):
             subword_tokenize("นกแก้ว", engine="XX")  # engine does not exist
 
@@ -360,6 +371,7 @@ def test_word_tokenize(self):
         self.assertIsNotNone(word_tokenize(self.text_1, engine="nercut"))
         self.assertIsNotNone(word_tokenize(self.text_1, engine="newmm"))
         self.assertIsNotNone(word_tokenize(self.text_1, engine="sefr_cut"))
+        self.assertIsNotNone(word_tokenize(self.text_1, engine="tltk"))
 
         with self.assertRaises(ValueError):
             word_tokenize("หมอนทอง", engine="XX")  # engine does not exist
diff --git a/tests/test_transliterate.py b/tests/test_transliterate.py
index 2d1ca7a91..4a99b1676 100644
--- a/tests/test_transliterate.py
+++ b/tests/test_transliterate.py
@@ -57,6 +57,7 @@ def test_romanize(self):
         self.assertEqual(romanize(None), "")
         self.assertEqual(romanize(""), "")
         self.assertEqual(romanize("แมว"), "maeo")
+        self.assertEqual(romanize("แมว", engine="tltk"), "maeo")
 
     def test_romanize_royin_basic(self):
         for word in _BASIC_TESTS:
@@ -136,6 +137,10 @@ def test_transliterate(self):
         self.assertEqual(transliterate("คน", engine="ipa"), "kʰon")
         self.assertIsNotNone(transliterate("คน", engine="thaig2p"))
         self.assertIsNotNone(transliterate("แมว", engine="thaig2p"))
+        self.assertIsNotNone(transliterate("คน", engine="tltk_g2p"))
+        self.assertIsNotNone(transliterate("แมว", engine="tltk_g2p"))
+        self.assertIsNotNone(transliterate("คน", engine="tltk_ipa"))
+        self.assertIsNotNone(transliterate("แมว", engine="tltk_ipa"))
         self.assertIsNotNone(trans_list("คน"))
         self.assertIsNotNone(xsampa_list("คน"))
 

From 7374cbbe727a6d6045073a1d76e699a092b2dd3f Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 14:39:19 +0700
Subject: [PATCH 02/21] Update tltk.py

---
 pythainlp/tag/tltk.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index 371075e56..e8f68bada 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
-from tltk.nlp import pos_tag_wordlist
+from tltk import nlp
 from typing import List, Tuple
 
+nlp.pos_load()
 
 def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
     if corpus != "tnc":
         raise NotImplemented("tltk not support {0} corpus.".format(0))
-    return pos_tag_wordlist(words)
+    return nlp.pos_tag_wordlist(words)

From 45bb596fd88a5b52ab5aae6c4c7621d2e0d3c348 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 14:41:35 +0700
Subject: [PATCH 03/21] Fixed PEP8

---
 pythainlp/spell/tltk.py         | 1 +
 pythainlp/tag/tltk.py           | 1 +
 pythainlp/tokenize/tltk.py      | 2 +-
 pythainlp/transliterate/tltk.py | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pythainlp/spell/tltk.py b/pythainlp/spell/tltk.py
index 1aabc3f55..6a739b837 100644
--- a/pythainlp/spell/tltk.py
+++ b/pythainlp/spell/tltk.py
@@ -1,5 +1,6 @@
 from tltk.nlp import spell_candidates
 from typing import List
 
+
 def spell(text: str) -> List[str]:
     return spell_candidates(text)
diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index e8f68bada..c8243fc76 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -4,6 +4,7 @@
 
 nlp.pos_load()
 
+
 def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
     if corpus != "tnc":
         raise NotImplemented("tltk not support {0} corpus.".format(0))
diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py
index e5f8da8d1..1ecd9c238 100644
--- a/pythainlp/tokenize/tltk.py
+++ b/pythainlp/tokenize/tltk.py
@@ -7,7 +7,7 @@
 def segment(text: str) -> List[str]:
     if not text or not isinstance(text, str):
         return []
-    _temp = tltk_segment(text).replace("<u/>","").replace("<s/>"," ")
+    _temp = tltk_segment(text).replace("<u/>", "").replace("<s/>", " ")
     return _temp.split('|')
 
 
diff --git a/pythainlp/transliterate/tltk.py b/pythainlp/transliterate/tltk.py
index 4e4999320..8795ce756 100644
--- a/pythainlp/transliterate/tltk.py
+++ b/pythainlp/transliterate/tltk.py
@@ -8,7 +8,7 @@ def romanize(text: str) -> str:
 
 
 def tltk_g2p(text: str) -> str:
-    _temp = g2p(text).split("<tr/>")[1].replace("|<s/>","").replace("|", " ")
+    _temp = g2p(text).split("<tr/>")[1].replace("|<s/>", "").replace("|", " ")
     return _temp.replace("<s/>", "")
 
 

From 42002c7f67c75f97092d96c18b6e03ac3b0ddf47 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 15:00:37 +0700
Subject: [PATCH 04/21] Update pos_tag.py

---
 pythainlp/tag/pos_tag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py
index ce2338481..8cd007987 100644
--- a/pythainlp/tag/pos_tag.py
+++ b/pythainlp/tag/pos_tag.py
@@ -105,7 +105,7 @@ def pos_tag(
     elif engine == "unigram" and corpus in _support_corpus:  # default
         from pythainlp.tag.unigram import tag as tag_
     else:
-        raise NotImplemented(
+        raise ValueError(
             "pos_tag not support {0} engine or {1} corpus.".format(
                 engine,
                 corpus

From 7c4dedf3cb80f090ecc0cca14f006179eb1965ee Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 15:34:06 +0700
Subject: [PATCH 05/21] Add pythainlp.tag.tltk.get_ner

---
 docs/api/tag.rst      |  1 +
 pythainlp/tag/tltk.py | 78 ++++++++++++++++++++++++++++++++++++++++++-
 tests/test_tag.py     | 24 +++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/docs/api/tag.rst b/docs/api/tag.rst
index 87cf0a766..2ab526c60 100644
--- a/docs/api/tag.rst
+++ b/docs/api/tag.rst
@@ -232,6 +232,7 @@ Modules
 .. autofunction:: chunk_parse
 .. autoclass:: pythainlp.tag.named_entity.ThaiNameTagger
    :members: get_ner
+.. autofunction:: pythainlp.tag.tltk.get_ner
 
 Tagger Engines
 --------------
diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index c8243fc76..aaff76206 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -1,11 +1,87 @@
 # -*- coding: utf-8 -*-
+from typing import List, Tuple, Union
 from tltk import nlp
-from typing import List, Tuple
+from pythainlp.tokenize import word_tokenize
 
 nlp.pos_load()
+nlp.ner_load()
 
 
 def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
     if corpus != "tnc":
         raise NotImplemented("tltk not support {0} corpus.".format(0))
     return nlp.pos_tag_wordlist(words)
+
+
+def get_ner(
+    text: str, pos: bool = True, tag: bool = False
+    ) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]], str]:
+    """
+    Named-entity recognizer from **TLTK**
+
+    This function tags named-entitiy from text in IOB format.
+
+    :param str text: text in Thai to be tagged
+    :param bool pos: To include POS tags in the results (`True`) or
+        exclude (`False`). The defualt value is `True`
+    :param bool tag: output like html tag.
+    :return: a list of tuple associated with tokenized word, NER tag,
+        POS tag (if the parameter `pos` is specified as `True`),
+        and output like html tag (if the parameter `tag` is
+        specified as `True`).
+        Otherwise, return a list of tuple associated with tokenized
+        word and NER tag
+    :rtype: Union[list[tuple[str, str]], list[tuple[str, str, str]]], str
+
+    :Example:
+
+        >>> from pythainlp.tag.tltk import get_ner
+        >>> get_ner("เขาเรียนที่โรงเรียนนางรอง")
+        [('เขา', 'PRON', 'O'),
+        ('เรียน', 'VERB', 'O'),
+        ('ที่', 'SCONJ', 'O'),
+        ('โรงเรียน', 'NOUN', 'B-L'),
+        ('นางรอง', 'VERB', 'I-L')]
+        >>> get_ner("เขาเรียนที่โรงเรียนนางรอง", pos=False)
+        [('เขา', 'O'),
+        ('เรียน', 'O'),
+        ('ที่', 'O'),
+        ('โรงเรียน', 'B-L'),
+        ('นางรอง', 'I-L')]
+        >>> get_ner("เขาเรียนที่โรงเรียนนางรอง", tag=True)
+        'เขาเรียนที่<L>โรงเรียนนางรอง</L>'
+    """
+    if not text:
+        return []
+    list_word = []
+    for i in word_tokenize(text, engine="tltk"):
+        if i == " ":
+            i = "<s/>"
+        list_word.append(i)
+    _pos = nlp.pos_tag_wordlist(list_word)
+    sent_ner = nlp.ner(_pos)
+    if sent_ner[-1][0] == '<s/>':
+        del sent_ner[-1]
+    if tag:
+        temp = ""
+        sent = ""
+        for idx, (word, pos, ner) in enumerate(sent_ner):
+            if ner.startswith("B-") and temp != "":
+                sent += "</" + temp + ">"
+                temp = ner[2:]
+                sent += "<" + temp + ">"
+            elif ner.startswith("B-"):
+                temp = ner[2:]
+                sent += "<" + temp + ">"
+            elif ner == "O" and temp != "":
+                sent += "</" + temp + ">"
+                temp = ""
+            sent += word
+
+            if idx == len(sent_ner) - 1 and temp != "":
+                sent += "</" + temp + ">"
+
+        return sent
+    if pos == False:
+        return [(word, ner) for word, pos, ner in sent_ner]
+    return sent_ner
diff --git a/tests/test_tag.py b/tests/test_tag.py
index 854c559e8..e9b838d27 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -10,6 +10,7 @@
     pos_tag,
     pos_tag_sents,
     unigram,
+    tltk,
 )
 from pythainlp.tag.locations import tag_provinces
 from pythainlp.tag.named_entity import ThaiNameTagger
@@ -102,6 +103,9 @@ def test_pos_tag(self):
         self.assertIsNotNone(
             pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud")
         )
+        self.assertIsNotNone(
+            pos_tag(tokens, engine="tltk")
+        )
 
         self.assertEqual(pos_tag_sents(None), [])
         self.assertEqual(pos_tag_sents([]), [])
@@ -355,3 +359,23 @@ def test_ner(self):
         #         ("เช้า", "I-TIME"),
         #     ],
         # )
+
+    def test_tltk_ner(self):
+        self.assertEqual(tltk.get_ner(""), [])
+        self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า"))
+        self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
+        self.assertIsNotNone(
+            tltk.get_ner(
+                """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
+                วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
+                จังหวัดหนองคาย 43000"""
+            )
+        )
+        self.assertIsNotNone(
+            tltk.get_ner(
+                """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
+                วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
+                จังหวัดหนองคาย 43000""",
+                tag=True,
+            )
+        )

From e757235618e630c7bd37e89078f891a5eef1d7e1 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 15:36:38 +0700
Subject: [PATCH 06/21] Update test_tag.py

---
 tests/test_tag.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/test_tag.py b/tests/test_tag.py
index e9b838d27..bbbe10d16 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -100,9 +100,6 @@ def test_pos_tag(self):
         self.assertIsNotNone(
             pos_tag(tokens, engine="wangchanberta", corpus="lst20")
         )
-        self.assertIsNotNone(
-            pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud")
-        )
         self.assertIsNotNone(
             pos_tag(tokens, engine="tltk")
         )
@@ -116,6 +113,10 @@ def test_pos_tag(self):
                 [("แมว", "NCMN"), ("วิ่ง", "VACT")],
             ],
         )
+        with self.assertRaises(ValueError):
+            self.assertIsNotNone(
+                pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud")
+            )
 
     # ### pythainlp.tag.PerceptronTagger
 

From f13f0c3d1cef934e6f803c3b2694cc538d696beb Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 15:38:42 +0700
Subject: [PATCH 07/21] Fixed PEP8

---
 pythainlp/tag/tltk.py | 6 ++++--
 tests/test_tag.py     | 2 --
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index aaff76206..a5b5be1ce 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -15,7 +15,9 @@ def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
 
 def get_ner(
     text: str, pos: bool = True, tag: bool = False
-    ) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]], str]:
+    ) -> Union[
+        List[Tuple[str, str]], List[Tuple[str, str, str]], str
+    ]:
     """
     Named-entity recognizer from **TLTK**
 
@@ -82,6 +84,6 @@ def get_ner(
                 sent += "</" + temp + ">"
 
         return sent
-    if pos == False:
+    if pos is False:
         return [(word, ner) for word, pos, ner in sent_ner]
     return sent_ner
diff --git a/tests/test_tag.py b/tests/test_tag.py
index bbbe10d16..d84e8b1f2 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -368,14 +368,12 @@ def test_tltk_ner(self):
         self.assertIsNotNone(
             tltk.get_ner(
                 """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
-                วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
                 จังหวัดหนองคาย 43000"""
             )
         )
         self.assertIsNotNone(
             tltk.get_ner(
                 """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
-                วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
                 จังหวัดหนองคาย 43000""",
                 tag=True,
             )

From e46ef880b4bae14e570ff52d5c667fb44923446b Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 15:39:30 +0700
Subject: [PATCH 08/21] Update tltk.py

---
 pythainlp/tag/tltk.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index a5b5be1ce..b3c534b70 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -15,9 +15,7 @@ def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
 
 def get_ner(
     text: str, pos: bool = True, tag: bool = False
-    ) -> Union[
-        List[Tuple[str, str]], List[Tuple[str, str, str]], str
-    ]:
+    ) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]]]:
     """
     Named-entity recognizer from **TLTK**
 

From 462ad5274ab0d01413379f695c3b599f0c637676 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 15:40:24 +0700
Subject: [PATCH 09/21] Update tltk.py

---
 pythainlp/tag/tltk.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index b3c534b70..92e6a0bba 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -14,8 +14,10 @@ def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
 
 
 def get_ner(
-    text: str, pos: bool = True, tag: bool = False
-    ) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]]]:
+    text: str,
+    pos: bool = True,
+    tag: bool = False
+) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]], str]:
     """
     Named-entity recognizer from **TLTK**
 

From 2b229823653511911b7c1aff8fe8d175867573ca Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 16:08:55 +0700
Subject: [PATCH 10/21] Update core.py

---
 pythainlp/spell/core.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pythainlp/spell/core.py b/pythainlp/spell/core.py
index ac515b576..65d6ca54c 100644
--- a/pythainlp/spell/core.py
+++ b/pythainlp/spell/core.py
@@ -62,6 +62,9 @@ def spell(word: str, engine: str = "pn") -> List[str]:
     elif engine == "symspellpy":
         from pythainlp.spell.symspellpy import spell as SPELL_CHECKER
         text_correct = SPELL_CHECKER(word)
+    elif engine == "tltk":
+        from pythainlp.spell.tltk import spell as SPELL_CHECKER
+        text_correct = SPELL_CHECKER(word)
     else:
         text_correct = DEFAULT_SPELL_CHECKER.spell(word)
 

From cab2b4b28fb2bbb933145ab8544511580dd76ca6 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 16:33:10 +0700
Subject: [PATCH 11/21] Add tltk sent_tokenize

---
 pythainlp/tokenize/core.py |  5 +++++
 pythainlp/tokenize/tltk.py | 22 +++++++++++++++++++---
 tests/test_tokenize.py     | 20 ++++++++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/pythainlp/tokenize/core.py b/pythainlp/tokenize/core.py
index 1cb2f43b3..41f03f2e6 100644
--- a/pythainlp/tokenize/core.py
+++ b/pythainlp/tokenize/core.py
@@ -221,6 +221,7 @@ def sent_tokenize(
         * *whitespace+newline* - split by whitespaces and newline.
         * *whitespace* - split by whitespaces. Specifiaclly, with \
                          :class:`regex` pattern  ``r" +"``
+        * *tltk* - split by `TLTK <https://pypi.org/project/tltk/>`_.,
     :Example:
 
     Split the text based on *whitespace*::
@@ -277,6 +278,10 @@ def sent_tokenize(
         segments = re.split(r" +", text, re.U)
     elif engine == "whitespace+newline":
         segments = text.split()
+    elif engine == "tltk":
+        from pythainlp.tokenize.tltk import sent_tokenize as segment
+
+        segments = segment(text)
     else:
         raise ValueError(
             f"""Tokenizer \"{engine}\" not found.
diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py
index 1ecd9c238..63e936b7a 100644
--- a/pythainlp/tokenize/tltk.py
+++ b/pythainlp/tokenize/tltk.py
@@ -7,12 +7,28 @@
 def segment(text: str) -> List[str]:
     if not text or not isinstance(text, str):
         return []
-    _temp = tltk_segment(text).replace("<u/>", "").replace("<s/>", " ")
-    return _temp.split('|')
+    text = text.replace(" ", "<u/>")
+    _temp = tltk_segment(text).replace("<u/>", " ").replace("<s/>", "")
+    _temp =_temp.split('|')
+    if _temp[-1] == "":
+        del _temp[-1]
+    return _temp
 
 
 def syllable_tokenize(text: str) -> List[str]:
     if not text or not isinstance(text, str):
         return []
     _temp = syl_segment(text)
-    return _temp.split('~')
+    _temp = _temp.split('~')
+    if _temp[-1] == "<s/>":
+        del _temp[-1]
+    return _temp
+
+
+def sent_tokenize(text: str) -> List[str]:
+    text = text.replace(" ", "<u/>")
+    _temp = tltk_segment(text).replace("<u/>", " ").replace("|", "")
+    _temp =_temp.split('<s/>')
+    if _temp[-1] == "":
+        del _temp[-1]
+    return _temp
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index fd6cc22ae..1b5dd050d 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -21,6 +21,7 @@
     tcc,
     word_tokenize,
     sefr_cut,
+    tltk,
 )
 from pythainlp.tokenize import clause_tokenize as sent_clause_tokenize
 from pythainlp.util import dict_trie
@@ -260,6 +261,15 @@ def test_sent_tokenize(self):
         self.assertIsNotNone(
             sent_tokenize(sent_1, keep_whitespace=False, engine="whitespace",),
         )
+        self.assertIsNotNone(
+            sent_tokenize(sent_1, engine="tltk",),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(sent_2, engine="tltk",),
+        )
+        self.assertIsNotNone(
+            sent_tokenize(sent_3, engine="tltk",),
+        )
         self.assertFalse(
             " "
             in sent_tokenize(
@@ -435,6 +445,16 @@ def test_icu(self):
             ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
         )
 
+    def test_tltk(self):
+        self.assertEqual(tltk.segment(None), [])
+        self.assertEqual(tltk.segment(""), [])
+        self.assertEqual(
+            syllable_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tltk"),
+            ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
+        )
+        self.assertEqual(tltk.syllable_tokenize(None), [])
+        self.assertEqual(tltk.syllable_tokenize(""), [])
+
     def test_longest(self):
         self.assertEqual(longest.segment(None), [])
         self.assertEqual(longest.segment(""), [])

From ead158cab2523b5bdec82215d96907c1f57e7337 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 16:34:04 +0700
Subject: [PATCH 12/21] Fixed PEP8

---
 pythainlp/tokenize/tltk.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pythainlp/tokenize/tltk.py b/pythainlp/tokenize/tltk.py
index 63e936b7a..2199edfa8 100644
--- a/pythainlp/tokenize/tltk.py
+++ b/pythainlp/tokenize/tltk.py
@@ -9,7 +9,7 @@ def segment(text: str) -> List[str]:
         return []
     text = text.replace(" ", "<u/>")
     _temp = tltk_segment(text).replace("<u/>", " ").replace("<s/>", "")
-    _temp =_temp.split('|')
+    _temp = _temp.split('|')
     if _temp[-1] == "":
         del _temp[-1]
     return _temp
@@ -28,7 +28,7 @@ def syllable_tokenize(text: str) -> List[str]:
 def sent_tokenize(text: str) -> List[str]:
     text = text.replace(" ", "<u/>")
     _temp = tltk_segment(text).replace("<u/>", " ").replace("|", "")
-    _temp =_temp.split('<s/>')
+    _temp = _temp.split('<s/>')
     if _temp[-1] == "":
         del _temp[-1]
     return _temp

From 86c43cdc1bddc8cf869e293b1deadd5126631410 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 16:47:12 +0700
Subject: [PATCH 13/21] Update test_spell.py

---
 tests/test_spell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_spell.py b/tests/test_spell.py
index e59474a06..9dd3a25ba 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -44,7 +44,7 @@ def test_spell(self):
         self.assertIsInstance(result, list)
         self.assertGreater(len(result), 0)
 
-        result = spell("เกสมร์", engine="tltk")
+        result = spell("เดก", engine="tltk")
         self.assertIsInstance(result, list)
         self.assertGreater(len(result), 0)
 

From 26317c63c074b7d002234ed914fc1b026676f91f Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 17:01:47 +0700
Subject: [PATCH 14/21] Update test_tokenize.py

---
 tests/test_tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 1b5dd050d..4083e9174 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -449,7 +449,7 @@ def test_tltk(self):
         self.assertEqual(tltk.segment(None), [])
         self.assertEqual(tltk.segment(""), [])
         self.assertEqual(
-            syllable_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tltk"),
+            tltk.syllable_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tltk"),
             ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
         )
         self.assertEqual(tltk.syllable_tokenize(None), [])

From 44de92824501f17ec1558b275cbc8bc08477aa83 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 17:06:01 +0700
Subject: [PATCH 15/21] Update test_tokenize.py

---
 tests/test_tokenize.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 4083e9174..2a9881401 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -449,7 +449,9 @@ def test_tltk(self):
         self.assertEqual(tltk.segment(None), [])
         self.assertEqual(tltk.segment(""), [])
         self.assertEqual(
-            tltk.syllable_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tltk"),
+            tltk.syllable_tokenize(
+                "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tltk"
+            ),
             ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
         )
         self.assertEqual(tltk.syllable_tokenize(None), [])

From 9e2ea86f3c121bdf682f4811e50182764e1e4113 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 17:32:54 +0700
Subject: [PATCH 16/21] Update test_tokenize.py

---
 tests/test_tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 2a9881401..6930ca704 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -450,7 +450,7 @@ def test_tltk(self):
         self.assertEqual(tltk.segment(""), [])
         self.assertEqual(
             tltk.syllable_tokenize(
-                "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="tltk"
+                "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"
             ),
             ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
         )

From 7c6f823e725e795214efd9909091a566cf8d3d0d Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 18:20:44 +0700
Subject: [PATCH 17/21] Update test_tokenize.py

---
 tests/test_tokenize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 6930ca704..6e4195260 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -452,7 +452,7 @@ def test_tltk(self):
             tltk.syllable_tokenize(
                 "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"
             ),
-            ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
+            ['ฉัน', 'รัก', 'ภา', 'ษา', 'ไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
         )
         self.assertEqual(tltk.syllable_tokenize(None), [])
         self.assertEqual(tltk.syllable_tokenize(""), [])

From f1f8f517e6699716d88931091a29fbd1a6b2c9fb Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 18:22:30 +0700
Subject: [PATCH 18/21] Update test_tokenize.py

---
 tests/test_tokenize.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
index 6e4195260..6d1f54bd0 100644
--- a/tests/test_tokenize.py
+++ b/tests/test_tokenize.py
@@ -452,7 +452,18 @@ def test_tltk(self):
             tltk.syllable_tokenize(
                 "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"
             ),
-            ['ฉัน', 'รัก', 'ภา', 'ษา', 'ไทย', 'เพราะ', 'ฉัน', 'เป็น', 'คน', 'ไทย'],
+            [
+                'ฉัน',
+                'รัก',
+                'ภา',
+                'ษา',
+                'ไทย',
+                'เพราะ',
+                'ฉัน',
+                'เป็น',
+                'คน',
+                'ไทย'
+            ],
         )
         self.assertEqual(tltk.syllable_tokenize(None), [])
         self.assertEqual(tltk.syllable_tokenize(""), [])

From 2c9a9ea9693882bf026e29db9a7fd3903b717f58 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 20:01:00 +0700
Subject: [PATCH 19/21] Add post_process tltk ner

---
 pythainlp/tag/tltk.py | 10 +++++++---
 tests/test_tag.py     |  5 +++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index 92e6a0bba..f61cbff8f 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -13,6 +13,10 @@ def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
     return nlp.pos_tag_wordlist(words)
 
 
+def _post_process(text: str) -> str:
+    return text.replace("<s/>", " ")
+
+
 def get_ner(
     text: str,
     pos: bool = True,
@@ -61,9 +65,9 @@ def get_ner(
             i = "<s/>"
         list_word.append(i)
     _pos = nlp.pos_tag_wordlist(list_word)
-    sent_ner = nlp.ner(_pos)
-    if sent_ner[-1][0] == '<s/>':
-        del sent_ner[-1]
+    sent_ner = [
+        (_post_process(word), pos, ner) for word, pos, ner in nlp.ner(_pos)
+    ]
     if tag:
         temp = ""
         sent = ""
diff --git a/tests/test_tag.py b/tests/test_tag.py
index d84e8b1f2..c300527da 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -365,6 +365,11 @@ def test_tltk_ner(self):
         self.assertEqual(tltk.get_ner(""), [])
         self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า"))
         self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
+        self.assertIsNotNone(
+            tltk.get_ner(
+                "พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า"
+            )
+        )
         self.assertIsNotNone(
             tltk.get_ner(
                 """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น

From 17d3a76f7a519229d01e3f6ad88715be0c212430 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 20:10:50 +0700
Subject: [PATCH 20/21] Add test

---
 pythainlp/tag/tltk.py | 2 +-
 tests/test_tag.py     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/pythainlp/tag/tltk.py b/pythainlp/tag/tltk.py
index f61cbff8f..bfb5bacb9 100644
--- a/pythainlp/tag/tltk.py
+++ b/pythainlp/tag/tltk.py
@@ -9,7 +9,7 @@
 
 def pos_tag(words: List[str], corpus: str = "tnc") -> List[Tuple[str, str]]:
     if corpus != "tnc":
-        raise NotImplemented("tltk not support {0} corpus.".format(0))
+        raise ValueError("tltk not support {0} corpus.".format(0))
     return nlp.pos_tag_wordlist(words)
 
 
diff --git a/tests/test_tag.py b/tests/test_tag.py
index c300527da..d9c4c25e5 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -117,6 +117,9 @@ def test_pos_tag(self):
             self.assertIsNotNone(
                 pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud")
             )
+            self.assertIsNotNone(
+                tltk.pos_tag(tokens, corpus="lst20")
+            )
 
     # ### pythainlp.tag.PerceptronTagger
 

From f7d99eb742b6f88422e7cdc5fe89c3d4407a1005 Mon Sep 17 00:00:00 2001
From: Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
Date: Thu, 29 Jul 2021 20:33:23 +0700
Subject: [PATCH 21/21] Add test

---
 tests/test_spell.py     | 6 +++++-
 tests/test_summarize.py | 2 ++
 tests/test_tag.py       | 8 ++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/test_spell.py b/tests/test_spell.py
index 9dd3a25ba..bb273709a 100644
--- a/tests/test_spell.py
+++ b/tests/test_spell.py
@@ -7,7 +7,8 @@
     correct,
     spell,
     spell_sent,
-    correct_sent
+    correct_sent,
+    symspellpy,
 )
 
 
@@ -131,3 +132,6 @@ def test_correct_sent(self):
         self.assertIsNotNone(
             correct_sent(self.spell_sent, engine="symspellpy")
         )
+        self.assertIsNotNone(
+            symspellpy.correct_sent(self.spell_sent)
+        )
diff --git a/tests/test_summarize.py b/tests/test_summarize.py
index f5cb0161f..2c36ebced 100644
--- a/tests/test_summarize.py
+++ b/tests/test_summarize.py
@@ -24,3 +24,5 @@ def test_summarize(self):
         self.assertIsNotNone(summarize([]))
         self.assertIsNotNone(summarize(text, 1, engine="mt5-small"))
         self.assertIsNotNone(summarize(text, 1, engine="XX"))
+        with self.assertRaises(ValueError):
+            self.assertIsNotNone(summarize(text, 1, engine="mt5-cat"))
diff --git a/tests/test_tag.py b/tests/test_tag.py
index d9c4c25e5..68232e30c 100644
--- a/tests/test_tag.py
+++ b/tests/test_tag.py
@@ -2,6 +2,7 @@
 
 import unittest
 from os import path
+from pythainlp import tag
 
 from pythainlp.tag import (
     chunk_parse,
@@ -117,6 +118,7 @@ def test_pos_tag(self):
             self.assertIsNotNone(
                 pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud")
             )
+        with self.assertRaises(ValueError):
             self.assertIsNotNone(
                 tltk.pos_tag(tokens, corpus="lst20")
             )
@@ -373,6 +375,12 @@ def test_tltk_ner(self):
                 "พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า"
             )
         )
+        self.assertIsNotNone(
+            tltk.get_ner(
+                "พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า",
+                tag=True,
+            )
+        )
         self.assertIsNotNone(
             tltk.get_ner(
                 """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น