1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (C) 2016-2023 PyThaiNLP Project
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """
16
+ Thai abbreviation tools
17
+ """
18
+ from typing import List , Tuple , Union
19
+
20
+
21
+ def abbreviation_to_full_text (text : str , top_k : int = 2 ) -> List [Tuple [str , Union [float , None ]]]:
22
+ """
23
+ This function convert Thai text (with abbreviation) to full text.
24
+
25
+ This function use KhamYo for handles abbreviations.
26
+ See more `KhamYo <https://github.com/wannaphong/KhamYo>`_.
27
+
28
+ :param str text: Thai text
29
+ :param int top_k: Top K
30
+ :return: Thai full text that handles abbreviations as full text and cos scores (original text - modified text).
31
+ :rtype: List[Tuple[str, Union[float, None]]]
32
+
33
+ :Example:
34
+ ::
35
+
36
+ from pythainlp.util import abbreviation_to_full_text
37
+
38
+ text = "รร.ของเราน่าอยู่"
39
+
40
+ abbreviation_to_full_text(text)
41
+ # output: [
42
+ # ('โรงเรียนของเราน่าอยู่', tensor(0.3734)),
43
+ # ('โรงแรมของเราน่าอยู่', tensor(0.2438))
44
+ # ]
45
+ """
46
+ try :
47
+ from khamyo import replace as _replace
48
+ except ImportError :
49
+ raise ImportError (
50
+ """
51
+ This funtion need to use khamyo.
52
+ You can install by pip install khamyo or
53
+ pip install pythainlp[abbreviation].
54
+ """
55
+ )
56
+ return _replace (text , top_k = top_k )
0 commit comments