Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pythainlp/corpus/pos_lst20_perceptron-v0.2.3.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pythainlp/corpus/pos_orchid_perceptron.json

Large diffs are not rendered by default.

Binary file removed pythainlp/corpus/pos_orchid_perceptron.pkl
Binary file not shown.
1 change: 1 addition & 0 deletions pythainlp/corpus/pos_ud_perceptron.json

Large diffs are not rendered by default.

Binary file removed pythainlp/corpus/pos_ud_perceptron.pkl
Binary file not shown.
16 changes: 8 additions & 8 deletions pythainlp/tag/_tag_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from __future__ import absolute_import

import os
import pickle
import json
import random
from collections import defaultdict
from typing import Dict, Iterable, List, Tuple, Union
Expand Down Expand Up @@ -193,25 +193,25 @@ def train(
data = {}
data["weights"] = self.model.weights
data["tagdict"] = self.tagdict
data["classes"] = self.classes
with open(save_loc, "wb") as f:
pickle.dump(data, f, -1)
data["classes"] = list(self.classes)
with open(save_loc, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False)

def load(self, loc: str) -> None:
"""
Load a pickled model.
:param str loc: model path
"""
try:
with open(loc, "rb") as f:
w_td_c = pickle.load(f)
with open(loc, "r", encoding='utf-8') as f:
w_td_c = json.load(f)
except IOError:
msg = "Missing trontagger.pickle file."
msg = "Missing trontagger.json file."
raise IOError(msg)
self.model.weights = w_td_c["weights"]
self.tagdict = w_td_c["tagdict"]
self.classes = w_td_c["classes"]
self.model.classes = self.classes
self.model.classes = set(self.classes)

def _normalize(self, word: str) -> str:
"""
Expand Down
9 changes: 5 additions & 4 deletions pythainlp/tag/perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
from pythainlp.corpus import corpus_path, get_corpus_path
from pythainlp.tag import PerceptronTagger, lst20, orchid

_ORCHID_FILENAME = "pos_orchid_perceptron.pkl"
_ORCHID_FILENAME = "pos_orchid_perceptron.json"
_ORCHID_PATH = os.path.join(corpus_path(), _ORCHID_FILENAME)

_PUD_FILENAME = "pos_ud_perceptron.pkl"
_PUD_FILENAME = "pos_ud_perceptron.json"
_PUD_PATH = os.path.join(corpus_path(), _PUD_FILENAME)

_LST20_TAGGER_NAME = "pos_lst20_perceptron"
_LST20_TAGGER_NAME = "pos_lst20_perceptron-v0.2.3.json"
_LST20_TAGGERD_PATH = os.path.join(corpus_path(), _LST20_TAGGER_NAME)

_ORCHID_TAGGER = None
_PUD_TAGGER = None
Expand All @@ -39,7 +40,7 @@ def _pud_tagger():
def _lst20_tagger():
global _LST20_TAGGER
if not _LST20_TAGGER:
_LST20_TAGGER = PerceptronTagger(path=get_corpus_path(_LST20_TAGGER_NAME, version = "0.2.3"))
_LST20_TAGGER = PerceptronTagger(path=_LST20_TAGGERD_PATH)
return _LST20_TAGGER


Expand Down
2 changes: 1 addition & 1 deletion tests/test_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_perceptron_tagger(self):
[("เม่น", "N"), ("กิน", "V")],
[("หนอน", "N"), ("กิน", "V")],
]
filename = "ptagger_temp4XcDf.pkl"
filename = "ptagger_temp4XcDf.json"
tagger.train(data, save_loc=filename)
self.assertTrue(path.exists(filename))

Expand Down