Skip to content

Commit 9bf1842

Browse files
committed
Update test_tokenize.py
1 parent 2f39603 commit 9bf1842

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

tests/test_tokenize.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,24 @@ def test_subword_tokenize(self):
300300
self.assertFalse(
301301
" " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
302302
)
303+
self.assertEqual(
304+
subword_tokenize("สวัสดีชาวโลก", engine="dict"), ["สวัส", "ดี", "ชาว", "โลก"]
305+
)
306+
self.assertFalse("า" in subword_tokenize("สวัสดีชาวโลก", engine="dict"))
307+
self.assertEqual(subword_tokenize(None, engine="ssg"), [])
308+
self.assertEqual(syllable_tokenize("", engine="ssg"), [])
309+
self.assertEqual(
310+
subword_tokenize("แมวกินปลา", engine="ssg"), ["แมว", "กิน", "ปลา"]
311+
)
312+
self.assertTrue(
313+
"ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
314+
)
315+
self.assertFalse(
316+
"า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
317+
)
318+
self.assertFalse(
319+
" " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
320+
)
303321
with self.assertRaises(ValueError):
304322
subword_tokenize("นกแก้ว", engine="XX") # engine does not exist
305323

0 commit comments

Comments
 (0)