1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (C) 2016-2023 PyThaiNLP Project
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ from typing import List
16
+ model = None
17
+
18
+
19
+ def coreference_resolution (texts :List [str ], model_name :str = "han-coref-v1.0" , device :str = "cpu" ):
20
+ """
21
+ Coreference Resolution
22
+
23
+ :param List[str] texts: list texts to do coreference resolution
24
+ :param str model_name: coreference resolution model
25
+ :param str device: device for running coreference resolution model (cpu, cuda, and other)
26
+ :return: List txets of coreference resolution
27
+ :rtype: List[dict]
28
+
29
+ :Options for model_name:
30
+ * *han-coref-v1.0* - (default) Han-Corf: Thai oreference resolution by PyThaiNLP v1.0
31
+
32
+ :Example:
33
+ ::
34
+
35
+ from pythainlp.coref import coreference_resolution
36
+
37
+ print(
38
+ coreference_resolution(
39
+ ["Bill Gates ได้รับวัคซีน COVID-19 เข็มแรกแล้ว ระบุ ผมรู้สึกสบายมาก"]
40
+ )
41
+ )
42
+ # output:
43
+ # [
44
+ # {'text': 'Bill Gates ได้รับวัคซีน COVID-19 เข็มแรกแล้ว ระบุ ผมรู้สึกสบายมาก',
45
+ # 'clusters_string': [['Bill Gates', 'ผม']],
46
+ # 'clusters': [[(0, 10), (50, 52)]]}
47
+ # ]
48
+ """
49
+ global model
50
+ if isinstance (texts , str ):
51
+ texts = [texts ]
52
+ if model == None and model_name == "han-coref-v1.0" :
53
+ from pythainlp .coref .han_coref import HanCoref
54
+ model = HanCoref (device = device )
55
+ return model .predict (texts )
0 commit comments