diff --git a/knlp/Pipeline/PipeEval.py b/knlp/Pipeline/PipeEval.py
new file mode 100644
index 0000000..d7ccdb2
--- /dev/null
+++ b/knlp/Pipeline/PipeEval.py
@@ -0,0 +1,6 @@
+class PipeEvaluator:
+    def __init__(self):
+        pass
+
+    def evaluate(self):
+        pass
diff --git a/knlp/Pipeline/PipeTrainer.py b/knlp/Pipeline/PipeTrainer.py
new file mode 100644
index 0000000..dbf5d8a
--- /dev/null
+++ b/knlp/Pipeline/PipeTrainer.py
@@ -0,0 +1,6 @@
+class PipeTrainer:
+    def __init__(self):
+        pass
+
+    def train(self):
+        pass
\ No newline at end of file
diff --git a/knlp/Pipeline/__init__.py b/knlp/Pipeline/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/knlp/Pipeline/pipeline.py b/knlp/Pipeline/pipeline.py
new file mode 100644
index 0000000..257ef28
--- /dev/null
+++ b/knlp/Pipeline/pipeline.py
@@ -0,0 +1,12 @@
+class Pipeline:
+    def __init__(self):
+        pass
+
+    def train(self, model):
+        pass
+
+    def inference(self, model, input):
+        pass
+
+    def evaluate(self, model):
+        pass
diff --git a/knlp/common/constant.py b/knlp/common/constant.py
index 4669266..c236793 100644
--- a/knlp/common/constant.py
+++ b/knlp/common/constant.py
@@ -19,4 +19,5 @@
 SEP = "[SEP]"
 CLS = "[CLS]"
 MASK = "MASK"
-model_list = ['hmm', 'crf', 'trie', 'bilstm', 'bert_mrc', 'bert_tagger']    # ner pipeline中目前支持的模型列表
+model_list = ['hmm', 'crf', 'trie', 'bilstm', 'bert_mrc', 'bert_tagger']  # ner pipeline中目前支持的模型列表
+class_model_list = ['bert', 'textcnn', 'beyas']  # classification pipeline中目前支持的模型列表
diff --git a/knlp/seq_labeling/NER/bert/trainer.py b/knlp/seq_labeling/NER/bert/trainer.py
index 26f0902..59818ca 100644
--- a/knlp/seq_labeling/NER/bert/trainer.py
+++ b/knlp/seq_labeling/NER/bert/trainer.py
@@ -298,9 +298,9 @@ def evaluate(self, args, model, tokenizer, prefix=""):
         return results
 
     def load_and_cache_examples(self, args, task, tokenizer, data_type='train'):
-        # if args.local_rank not in [-1, 0] and not args.do_eval:
-        #     torch.distributed.barrier()  # Make sure only the first process in distributed training process the dataset, and the others will use the cache
-        # processor = processors[task]()
+        # if args.local_rank not in [-1, 0] and not args.do_eval: torch.distributed.barrier()  # Make sure only the
+        # first process in distributed training process the dataset, and the others will use the cache processor =
+        # processors[task]()
         processor = processors(self.task)
         if self.training_data_path:
             args.data_dir = self.training_data_path
@@ -378,15 +378,14 @@ def run(self):
         args.label2id = {label: i for i, label in enumerate(label_list)}
         num_labels = len(label_list)
 
-        # # Load pretrained model and tokenizer
-        # if args.local_rank not in [-1, 0]:
-        #     torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab
+        # # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier()  #
+        # Make sure only the first process in distributed training will download model & vocab
         args.model_type = args.model_type.lower()
         config = BertConfig.from_pretrained(args.model_name_or_path, num_labels=num_labels)
         config.loss_type = args.loss_type
         model = BertSoftmaxForNer.from_pretrained(args.model_name_or_path, config=config)
-        # if args.local_rank == 0:
-        #     torch.distributed.barrier()  # Make sure only the first process in distributed training will download model & vocab
+        # if args.local_rank == 0: torch.distributed.barrier()  # Make sure only the first process in distributed
+        # training will download model & vocab
 
         model.to(args.device)
         logger.info("Training/evaluation parameters %s", args)
@@ -400,7 +399,7 @@ def run(self):
             if not os.path.exists(self.output_dir):
                 os.makedirs(self.output_dir)
             logger.info("Saving model checkpoint to %s", self.output_dir)
-            # Save a trained model, configuration and tokenizer using `save_pretrained()`.
+            # Save a trained model, configuration and tokenizer using `saseve_pretrained()`.
             # They can then be reloaded using `from_pretrained()`
             model_to_save = (
                 model.module if hasattr(model, "module") else model
diff --git a/knlp/seq_labeling/bert/metrics/classification_metrics.py b/knlp/seq_labeling/bert/metrics/classification_metrics.py
new file mode 100644
index 0000000..f3f8409
--- /dev/null
+++ b/knlp/seq_labeling/bert/metrics/classification_metrics.py
@@ -0,0 +1,34 @@
+import csv
+import sys
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def simple_accuracy(preds, labels):
+    return (preds == labels).mean()
+
+
+def acc_and_f1(preds, labels):
+    acc = simple_accuracy(preds, labels)
+    f1 = f1_score(y_true=labels, y_pred=preds)
+    return {
+        "acc": acc,
+        "f1": f1,
+        "acc_and_f1": (acc + f1) / 2,
+    }
+
+
+def pearson_and_spearman(preds, labels):
+    pearson_corr = pearsonr(preds, labels)[0]
+    spearman_corr = spearmanr(preds, labels)[0]
+    return {
+        "pearson": pearson_corr,
+        "spearmanr": spearman_corr,
+        "corr": (pearson_corr + spearman_corr) / 2,
+    }
+
+
+def compute_metrics(task_name, preds, labels):
+    assert len(preds) == len(labels)
+    return {"acc": simple_accuracy(preds, labels)}
diff --git a/knlp/seq_labeling/bert/metrics/ner_metrics.py b/knlp/seq_labeling/bert/metrics/ner_metrics.py
index fdef4eb..7cf03f0 100644
--- a/knlp/seq_labeling/bert/metrics/ner_metrics.py
+++ b/knlp/seq_labeling/bert/metrics/ner_metrics.py
@@ -3,7 +3,7 @@
 from knlp.seq_labeling.bert.processors.utils_ner import get_entities
 
 class SeqEntityScore(object):
-    def __init__(self, id2label,markup='bios'):
+    def __init__(self, id2label, markup='bios'):
         self.id2label = id2label
         self.markup = markup
         self.reset()
diff --git a/knlp/seq_labeling/bert/models/bert_for_classification.py b/knlp/seq_labeling/bert/models/bert_for_classification.py
new file mode 100644
index 0000000..a13430d
--- /dev/null
+++ b/knlp/seq_labeling/bert/models/bert_for_classification.py
@@ -0,0 +1,50 @@
+import torch
+import torch.nn as nn
+from transformers import BertModel, BertPreTrainedModel
+from torch.nn import CrossEntropyLoss
+
+from knlp.common.constant import KNLP_PATH
+from knlp.seq_labeling.bert.losses.focal_loss import FocalLoss
+from knlp.seq_labeling.bert.losses.label_smoothing import LabelSmoothingCrossEntropy
+
+
+class BertForTokenClassification(BertPreTrainedModel):
+
+    def __init__(self, config):
+        super(BertForTokenClassification, self).__init__(config)
+        self.num_labels = config.num_labels
+
+        self.bert = BertModel(config)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
+
+        self.init_weights()
+
+    def forward(self, input_ids, attention_mask=None, token_type_ids=None,
+                position_ids=None, head_mask=None, labels=None):
+
+        outputs = self.bert(input_ids,
+                            attention_mask=attention_mask,
+                            token_type_ids=token_type_ids,
+                            position_ids=position_ids,
+                            head_mask=head_mask)
+
+        sequence_output = outputs[0]
+
+        sequence_output = self.dropout(sequence_output)
+        logits = self.classifier(sequence_output)
+
+        outputs = (logits,) + outputs[2:]  # add hidden states and attention if they are here
+        if labels is not None:
+            loss_fct = CrossEntropyLoss()
+            # Only keep active parts of the loss
+            if attention_mask is not None:
+                active_loss = attention_mask.view(-1) == 1
+                active_logits = logits.view(-1, self.num_labels)[active_loss]
+                active_labels = labels.view(-1)[active_loss]
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            outputs = (loss,) + outputs
+
+        return outputs  # (loss), scores, (hidden_states), (attentions)
diff --git a/knlp/seq_labeling/bert/processors/classification.py b/knlp/seq_labeling/bert/processors/classification.py
new file mode 100644
index 0000000..389c693
--- /dev/null
+++ b/knlp/seq_labeling/bert/processors/classification.py
@@ -0,0 +1,508 @@
+import logging
+import os
+import torch
+from knlp.seq_labeling.bert.processors.classification_utils import DataProcessor, InputExample, InputFeatures
+
+logger = logging.getLogger(__name__)
+
+
+def collate_fn(batch):
+    """
+    batch should be a list of (sequence, target, length) tuples...
+    Returns a padded tensor of sequences sorted from longest to shortest,
+    """
+    all_input_ids, all_attention_mask, all_token_type_ids, all_lens, all_labels = map(torch.stack, zip(*batch))
+    max_len = max(all_lens).item()
+    all_input_ids = all_input_ids[:, :max_len]
+    all_attention_mask = all_attention_mask[:, :max_len]
+    all_token_type_ids = all_token_type_ids[:, :max_len]
+    return all_input_ids, all_attention_mask, all_token_type_ids, all_labels
+
+
+def xlnet_collate_fn(batch):
+    """
+    batch should be a list of (sequence, target, length) tuples...
+    Returns a padded tensor of sequences sorted from longest to shortest,
+    """
+    all_input_ids, all_attention_mask, all_token_type_ids, all_lens, all_labels = map(torch.stack, zip(*batch))
+    max_len = max(all_lens).item()
+    all_input_ids = all_input_ids[:, -max_len:]
+    all_attention_mask = all_attention_mask[:, -max_len:]
+    all_token_type_ids = all_token_type_ids[:, -max_len:]
+    return all_input_ids, all_attention_mask, all_token_type_ids, all_labels
+
+
+def clue_convert_examples_to_features(examples, tokenizer,
+                                      max_length=512,
+                                      task=None,
+                                      label_list=None,
+                                      output_mode=None,
+                                      pad_on_left=False,
+                                      pad_token=0,
+                                      pad_token_segment_id=0,
+                                      mask_padding_with_zero=True):
+    """
+    Loads a data file into a list of ``InputFeatures``
+    Args:
+        examples: List of ``InputExamples`` or ``tf.data.Dataset`` containing the examples.
+        tokenizer: Instance of a tokenizer that will tokenize the examples
+        max_length: Maximum example length
+        task: CLUE task
+        label_list: List of labels. Can be obtained from the processor using the ``processor.get_labels()`` method
+        output_mode: String indicating the output mode. Either ``regression`` or ``classification``
+        pad_on_left: If set to ``True``, the examples will be padded on the left rather than on the right (default)
+        pad_token: Padding token
+        pad_token_segment_id: The segment ID for the padding token (It is usually 0, but can vary such as for XLNet where it is 4)
+        mask_padding_with_zero: If set to ``True``, the attention mask will be filled by ``1`` for actual values
+            and by ``0`` for padded values. If set to ``False``, inverts it (``1`` for padded values, ``0`` for
+            actual values)
+
+    Returns:
+        If the input is a list of ``InputExamples``, will return
+        a list of task-specific ``InputFeatures`` which can be fed to the model.
+
+    """
+    if task is not None:
+        processor = clue_processors[task]()
+        if label_list is None:
+            label_list = processor.get_labels()
+            logger.info("Using label list %s for task %s" % (label_list, task))
+        if output_mode is None:
+            output_mode = clue_output_modes[task]
+            logger.info("Using output mode %s for task %s" % (output_mode, task))
+
+    label_map = {label: i for i, label in enumerate(label_list)}
+
+    features = []
+    for (ex_index, example) in enumerate(examples):
+        if ex_index % 10000 == 0:
+            logger.info("Writing example %d" % (ex_index))
+
+        inputs = tokenizer.encode_plus(
+            example.text_a,
+            example.text_b,
+            add_special_tokens=True,
+            max_length=max_length
+        )
+        input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
+
+        # The mask has 1 for real tokens and 0 for padding tokens. Only real
+        # tokens are attended to.
+        attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)
+        input_len = len(input_ids)
+        # Zero-pad up to the sequence length.
+        padding_length = max_length - len(input_ids)
+        if pad_on_left:
+            input_ids = ([pad_token] * padding_length) + input_ids
+            attention_mask = ([0 if mask_padding_with_zero else 1] * padding_length) + attention_mask
+            token_type_ids = ([pad_token_segment_id] * padding_length) + token_type_ids
+        else:
+            input_ids = input_ids + ([pad_token] * padding_length)
+            attention_mask = attention_mask + ([0 if mask_padding_with_zero else 1] * padding_length)
+            token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length)
+
+        assert len(input_ids) == max_length, "Error with input length {} vs {}".format(len(input_ids), max_length)
+        assert len(attention_mask) == max_length, "Error with input length {} vs {}".format(len(attention_mask),
+                                                                                            max_length)
+        assert len(token_type_ids) == max_length, "Error with input length {} vs {}".format(len(token_type_ids),
+                                                                                            max_length)
+        if output_mode == "classification":
+            label = label_map[example.label]
+        elif output_mode == "regression":
+            label = float(example.label)
+        else:
+            raise KeyError(output_mode)
+
+        if ex_index < 5:
+            logger.info("*** Example ***")
+            logger.info("guid: %s" % (example.guid))
+            logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
+            logger.info("attention_mask: %s" % " ".join([str(x) for x in attention_mask]))
+            logger.info("token_type_ids: %s" % " ".join([str(x) for x in token_type_ids]))
+            logger.info("label: %s (id = %d)" % (example.label, label))
+            logger.info("input length: %d" % (input_len))
+
+        features.append(
+            InputFeatures(input_ids=input_ids,
+                          attention_mask=attention_mask,
+                          token_type_ids=token_type_ids,
+                          label=label,
+                          input_len=input_len))
+    return features
+
+
+class TnewsProcessor(DataProcessor):
+    """Processor for the TNEWS data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        labels = []
+        for i in range(17):
+            if i == 5 or i == 11:
+                continue
+            labels.append(str(100 + i))
+        return labels
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line['sentence']
+            text_b = None
+            label = str(line['label']) if set_type != 'test' else "100"
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class IflytekProcessor(DataProcessor):
+    """Processor for the IFLYTEK data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        labels = []
+        for i in range(119):
+            labels.append(str(i))
+        return labels
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line['sentence']
+            text_b = None
+            label = str(line['label']) if set_type != 'test' else "0"
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class AfqmcProcessor(DataProcessor):
+    """Processor for the AFQMC data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line['sentence1']
+            text_b = line['sentence2']
+            label = str(line['label']) if set_type != 'test' else "0"
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+class OcnliProcessor(DataProcessor):
+    """Processor for the CMNLI data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        return ["contradiction", "entailment", "neutral"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line["sentence1"]
+            text_b = line["sentence2"]
+            label = str(line["label"]) if set_type != 'test' else 'neutral'
+            if label.strip()=='-':
+                continue
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+class CmnliProcessor(DataProcessor):
+    """Processor for the CMNLI data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        return ["contradiction", "entailment", "neutral"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line["sentence1"]
+            text_b = line["sentence2"]
+            label = str(line["label"]) if set_type != 'test' else 'neutral'
+            if label.strip()=='-':
+                continue
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+def _create_examples(lines, set_type):
+    """Creates examples for the training and dev sets."""
+    examples = []
+    for (i, line) in enumerate(lines):
+        guid = "%s-%s" % (set_type, i)
+        text_a = " ".join(line['keyword'])
+        text_b = line['abst']
+        label = str(line['label']) if set_type != 'test' else '0'
+        examples.append(
+            InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+    return examples
+
+
+class CslProcessor(DataProcessor):
+    """Processor for the CSL data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return _create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return _create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return _create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+
+class WscProcessor(DataProcessor):
+    """Processor for the WSC data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        return ["true", "false"]
+
+    def _create_examples(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            text_a = line['text']
+            text_a_list = list(text_a)
+            target = line['target']
+            query = target['span1_text']
+            query_idx = target['span1_index']
+            pronoun = target['span2_text']
+            pronoun_idx = target['span2_index']
+            assert text_a[pronoun_idx: (pronoun_idx + len(pronoun))] == pronoun, "pronoun: {}".format(pronoun)
+            assert text_a[query_idx: (query_idx + len(query))] == query, "query: {}".format(query)
+            if pronoun_idx > query_idx:
+                text_a_list.insert(query_idx, "_")
+                text_a_list.insert(query_idx + len(query) + 1, "_")
+                text_a_list.insert(pronoun_idx + 2, "[")
+                text_a_list.insert(pronoun_idx + len(pronoun) + 2 + 1, "]")
+            else:
+                text_a_list.insert(pronoun_idx, "[")
+                text_a_list.insert(pronoun_idx + len(pronoun) + 1, "]")
+                text_a_list.insert(query_idx + 2, "_")
+                text_a_list.insert(query_idx + len(query) + 2 + 1, "_")
+            text_a = "".join(text_a_list)
+            text_b = None
+            label = str(line['label']) if set_type != 'test' else 'true'
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+class CopaProcessor(DataProcessor):
+    """Processor for the COPA data set (CLUE version)."""
+
+    def get_train_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "train.json")), "train")
+
+    def get_dev_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "dev.json")), "dev")
+
+    def get_test_examples(self, data_dir):
+        """See base class."""
+        return self._create_examples(
+            self._read_json(os.path.join(data_dir, "test.json")), "test")
+
+    def get_labels(self):
+        """See base class."""
+        return ["0", "1"]
+
+    def _create_examples(self, lines, set_type):
+        examples = []
+        for (i, line) in enumerate(lines):
+            i = 2 * i
+            guid1 = "%s-%s" % (set_type, i)
+            guid2 = "%s-%s" % (set_type, i + 1)
+            premise = line['premise']
+            choice0 = line['choice0']
+            label = str(1 if line['label'] == 0 else 0) if set_type != 'test' else '0'
+            choice1 = line['choice1']
+            label2 = str(0 if line['label'] == 0 else 1) if set_type != 'test' else '0'
+            if line['question'] == 'effect':
+                text_a = premise
+                text_b = choice0
+                text_a2 = premise
+                text_b2 = choice1
+            elif line['question'] == 'cause':
+                text_a = choice0
+                text_b = premise
+                text_a2 = choice1
+                text_b2 = premise
+            else:
+                raise ValueError(f'unknowed {line["question"]} type')
+            examples.append(
+                InputExample(guid=guid1, text_a=text_a, text_b=text_b, label=label))
+            examples.append(
+                InputExample(guid=guid2, text_a=text_a2, text_b=text_b2, label=label2))
+        return examples
+
+    def _create_examples_version2(self, lines, set_type):
+        """Creates examples for the training and dev sets."""
+        examples = []
+        for (i, line) in enumerate(lines):
+            guid = "%s-%s" % (set_type, i)
+            if line['question'] == 'cause':
+                text_a = line['premise'] + '这是什么原因造成的？' + line['choice0']
+                text_b = line['premise'] + '这是什么原因造成的？' + line['choice1']
+            else:
+                text_a = line['premise'] + '这造成了什么影响？' + line['choice0']
+                text_b = line['premise'] + '这造成了什么影响？' + line['choice1']
+            label = str(1 if line['label'] == 0 else 0) if set_type != 'test' else '0'
+            examples.append(
+                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
+        return examples
+
+
+clue_tasks_num_labels = {
+    'iflytek': 119,
+    'cmnli': 3,
+    'ocnli': 3,
+    'afqmc': 2,
+    'csl': 2,
+    'wsc': 2,
+    'copa': 2,
+    'tnews': 15,
+}
+
+clue_processors = {
+    'tnews': TnewsProcessor,
+    'iflytek': IflytekProcessor,
+    'cmnli': CmnliProcessor,
+    'ocnli': OcnliProcessor,
+    'afqmc': AfqmcProcessor,
+    'csl': CslProcessor,
+    'wsc': WscProcessor,
+    'copa': CopaProcessor,
+}
+
+clue_output_modes = {
+    'tnews': "classification",
+    'iflytek': "classification",
+    'cmnli': "classification",
+    'ocnli': "classification",
+    'afqmc': "classification",
+    'csl': "classification",
+    'wsc': "classification",
+    'copa': "classification",
+}
diff --git a/knlp/seq_labeling/bert/processors/classification_utils.py b/knlp/seq_labeling/bert/processors/classification_utils.py
new file mode 100644
index 0000000..beb52c1
--- /dev/null
+++ b/knlp/seq_labeling/bert/processors/classification_utils.py
@@ -0,0 +1,104 @@
+import csv
+import sys
+import copy
+import json
+
+class InputExample(object):
+    """
+    A single training/test example for simple sequence classification.
+
+    Args:
+        guid: Unique id for the example.
+        text_a: string. The untokenized text of the first sequence. For single
+        sequence tasks, only this sequence must be specified.
+        text_b: (Optional) string. The untokenized text of the second sequence.
+        Only must be specified for sequence pair tasks.
+        label: (Optional) string. The label of the example. This should be
+        specified for train and dev examples, but not for test examples.
+    """
+    def __init__(self, guid, text_a, text_b=None, label=None):
+        self.guid = guid
+        self.text_a = text_a
+        self.text_b = text_b
+        self.label = label
+
+    def __repr__(self):
+        return str(self.to_json_string())
+
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
+
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+
+
+class InputFeatures(object):
+    """
+    A single set of features of data.
+
+    Args:
+        input_ids: Indices of input sequence tokens in the vocabulary.
+        attention_mask: Mask to avoid performing attention on padding token indices.
+            Mask values selected in ``[0, 1]``:
+            Usually  ``1`` for tokens that are NOT MASKED, ``0`` for MASKED (padded) tokens.
+        token_type_ids: Segment token indices to indicate first and second portions of the inputs.
+        label: Label corresponding to the input
+    """
+
+    def __init__(self, input_ids, attention_mask, token_type_ids, label,input_len):
+        self.input_ids = input_ids
+        self.attention_mask = attention_mask
+        self.token_type_ids = token_type_ids
+        self.input_len = input_len
+        self.label = label
+
+    def __repr__(self):
+        return str(self.to_json_string())
+
+    def to_dict(self):
+        """Serializes this instance to a Python dictionary."""
+        output = copy.deepcopy(self.__dict__)
+        return output
+
+    def to_json_string(self):
+        """Serializes this instance to a JSON string."""
+        return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
+
+
+class DataProcessor(object):
+    """Base class for data converters for sequence classification data sets."""
+
+    def get_train_examples(self, data_dir):
+        """Gets a collection of `InputExample`s for the train set."""
+        raise NotImplementedError()
+
+    def get_dev_examples(self, data_dir):
+        """Gets a collection of `InputExample`s for the dev set."""
+        raise NotImplementedError()
+
+    def get_labels(self):
+        """Gets the list of labels for this data set."""
+        raise NotImplementedError()
+
+    @classmethod
+    def _read_tsv(cls, input_file, quotechar=None):
+        """Reads a tab separated value file."""
+        with open(input_file, "r", encoding="utf-8-sig") as f:
+            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
+            lines = []
+            for line in reader:
+                lines.append(line)
+            return lines
+
+    @classmethod
+    def _read_json(cls, input_file):
+        """Reads a json list file."""
+        with open(input_file, "r") as f:
+            reader = f.readlines()
+            lines = []
+            for line in reader:
+                lines.append(json.loads(line.strip()))
+            return lines
diff --git a/knlp/seq_labeling/bert/tools/collate_fn.py b/knlp/seq_labeling/bert/tools/collate_fn.py
new file mode 100644
index 0000000..65f8039
--- /dev/null
+++ b/knlp/seq_labeling/bert/tools/collate_fn.py
@@ -0,0 +1,11 @@
+def collate_fn(batch):
+    """
+    batch should be a list of (sequence, target, length) tuples...
+    Returns a padded tensor of sequences sorted from longest to shortest,
+    """
+    all_input_ids, all_attention_mask, all_token_type_ids, all_lens, all_labels = map(torch.stack, zip(*batch))
+    max_len = max(all_lens).item()
+    all_input_ids = all_input_ids[:, :max_len]
+    all_attention_mask = all_attention_mask[:, :max_len]
+    all_token_type_ids = all_token_type_ids[:, :max_len]
+    return all_input_ids, all_attention_mask, all_token_type_ids, all_labels
diff --git a/knlp/seq_labeling/bert/tools/progressbar.py b/knlp/seq_labeling/bert/tools/progressbar.py
new file mode 100644
index 0000000..7df72fe
--- /dev/null
+++ b/knlp/seq_labeling/bert/tools/progressbar.py
@@ -0,0 +1,59 @@
+import time
+
+class ProgressBar(object):
+    '''
+    custom progress bar
+    Example:
+        >>> pbar = ProgressBar(n_total=30,desc='Training')
+        >>> step = 2
+        >>> pbar(step=step)
+    '''
+    def __init__(self, n_total,width=30,desc = 'Training'):
+        self.width = width
+        self.n_total = n_total
+        self.start_time = time.time()
+        self.desc = desc
+
+    def __call__(self, step, info={}):
+        now = time.time()
+        current = step + 1
+        recv_per = current / self.n_total
+        bar = f'[{self.desc}] {current}/{self.n_total} ['
+        if recv_per >= 1:
+            recv_per = 1
+        prog_width = int(self.width * recv_per)
+        if prog_width > 0:
+            bar += '=' * (prog_width - 1)
+            if current< self.n_total:
+                bar += ">"
+            else:
+                bar += '='
+        bar += '.' * (self.width - prog_width)
+        bar += ']'
+        show_bar = f"\r{bar}"
+        time_per_unit = (now - self.start_time) / current
+        if current < self.n_total:
+            eta = time_per_unit * (self.n_total - current)
+            if eta > 3600:
+                eta_format = ('%d:%02d:%02d' %
+                              (eta // 3600, (eta % 3600) // 60, eta % 60))
+            elif eta > 60:
+                eta_format = '%d:%02d' % (eta // 60, eta % 60)
+            else:
+                eta_format = '%ds' % eta
+            time_info = f' - ETA: {eta_format}'
+        else:
+            if time_per_unit >= 1:
+                time_info = f' {time_per_unit:.1f}s/step'
+            elif time_per_unit >= 1e-3:
+                time_info = f' {time_per_unit * 1e3:.1f}ms/step'
+            else:
+                time_info = f' {time_per_unit * 1e6:.1f}us/step'
+
+        show_bar += time_info
+        if len(info) != 0:
+            show_info = f'{show_bar} ' + \
+                        "-".join([f' {key}: {value:.4f} ' for key, value in info.items()])
+            print(show_info, end='')
+        else:
+            print(show_bar, end='')
diff --git a/knlp/seq_labeling/classification/ModelTrainer/__init__.py b/knlp/seq_labeling/classification/ModelTrainer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/knlp/seq_labeling/classification/ModelTrainer/model_train.py b/knlp/seq_labeling/classification/ModelTrainer/model_train.py
new file mode 100644
index 0000000..bc9b4be
--- /dev/null
+++ b/knlp/seq_labeling/classification/ModelTrainer/model_train.py
@@ -0,0 +1,107 @@
+from knlp.common.constant import KNLP_PATH, model_list
+from knlp.seq_labeling.classification.bert.trainer import BertTrain
+from knlp.nn.textcnn.train_textcnn import TrainTextCNN
+from knlp.seq_labeling.classification.beyas.beyas_train import beyas_train
+
+
+class ModelTrainer(PipeTrainer):
+    def __init__(self, data_path, vocab_path, model):
+        """
+        :param data_path: 数据集路径（具体到训练数据位置，用于hmm、crf、trie等等模型）
+        :param vocab_path: 数据集vocab路径
+        :param model: 选择模型库中的某个模型，或全部模型
+        """
+        super().__init__()
+        self.training_data_path = data_path
+        self.vocab_set_path = vocab_path
+        self.clf_model_path = KNLP_PATH + "/knlp/model/beyas/classification"
+        self.tf_model_path = KNLP_PATH + "/knlp/model/beyas/classification"
+        self.model = model
+        self.model_list = class_model_list
+        if not data_path:
+            self.training_data_path = KNLP_PATH + '/knlp/data/class_clue'
+
+    def train(self):
+        if self.model not in self.model_list and self.model != 'all':
+            print(f'only support model in {self.model_list}')
+        else:
+            if self.model == 'bert':
+                self.bert_train()
+            elif self.model == 'textcnn':
+                self.textcnn_train(model_save_path=KNLP_PATH + "/knlp/model/classification/textcnn.pkl",
+                                   word2idx_path=KNLP_PATH + "/knlp/nn/textcnn/model_textcnn/weibo_word2idx.json",
+                                   label2idx_path=KNLP_PATH + "/knlp/nn/textcnn/model_textcnn/weibo_label2idx.json")
+            elif self.model == 'beyas':
+                self.beyas_train(clf_model_path=self.clf_model_path, tf_model_path=self.tf_model_path)
+            elif self.model == 'all':
+                self.bert_train()
+                self.textcnn_train(model_save_path=KNLP_PATH + "/knlp/model/classification/textcnn.pkl",
+                                   word2idx_path=KNLP_PATH + "/knlp/nn/textcnn/model_textcnn/weibo_word2idx.json",
+                                   label2idx_path=KNLP_PATH + "/knlp/nn/textcnn/model_textcnn/weibo_label2idx.json")
+                self.beyas_train(clf_model_path=self.clf_model_path, tf_model_path=self.tf_model_path)
+
+    def your_model_train(self):
+        """
+        example:
+        print('your_model_name-分类训练开始')
+        YourModelTrainer = YourModelTrain(**params)
+        YourModelTrainer.run(**params)
+        print('your_model_name-分类训练结束')
+        """
+        pass
+
+    def bert_train(self):
+        print('Bert-文本分类训练开始')
+        BertTrainer = BertTrain(data_path=self.tagger_data_path, tokenizer_vocab=self.vocab_set_path)
+        BertTrainer.run()
+        print('Bert-文本分类训练结束')
+
+    def textcnn_train(self, model_save_path, word2idx_path, label2idx_path):
+        kwargs = {
+            "dataset_hyperparameters": {
+                "vocab_set_path": self.vocab_set_path,
+                # "training_data_path": KNLP_PATH + "/knlp/nn/textcnn/data_textcnn/text_classification_weibo_eval_9988.txt",
+                "training_data_path": self.training_data_path,
+                # "eval_data_path": KNLP_PATH + "/knlp/nn/textcnn/data_textcnn/text_classification_weibo_eval_9988.txt",
+                "tokenizer": jieba.lcut,
+                "shuffle": True,
+                "batch_size": 64,
+                "max_length": 150
+            },
+            "optimizer_hyperparameters": {
+                "lr": 0.01,
+                "weight_decay": 1e-4
+            },
+            "model_hyperparameters": {
+                "n_filters": 100,
+                "filter_sizes": [3, 4, 5]
+            },
+            # "non_static_word2vec_path": KNLP_PATH + "/knlp/nn/textcnn/data_textcnn/text_classification_weibo_word2vec_300d_20509.txt",
+            "static_word2vec_path": KNLP_PATH + "/knlp/nn/textcnn/data_textcnn/text_classification_weibo_word2vec_100d_22770.txt",
+
+        }
+        save_kwargs = {
+            "model_path": model_save_path,
+            "word2idx_path": word2idx_path,
+            "label2idx_path": label2idx_path,
+        }
+        print("Textcnn-文本分类训练开始")
+        train = TrainTextCNN(**kwargs)
+        train.train(5)
+        train.save(**save_kwargs)
+        print("Textcnn-文本分类结束")
+
+    def beyas_train(self, clf_model_path, tf_model_path):
+        print("Beyas-文本分类开始")
+        beyas = beyas_train(file_path=self.train_data, clf_model_path=clf_model_path, tf_model_path=tf_model_path)
+        train_datas, train_labels = beyas.load_data()
+        beyas.train_model(datas=train_datas, labels=train_labels)
+        print("Beyas-文本分类结束")
+
+if __name__ == '__main__':
+    for model in ['bert', 'beyas', 'textcnn']:
+        test = ModelTrainer(data_path=KNLP_PATH + '/knlp/data/msra_bios/train.bios',
+                            vocab_path=KNLP_PATH + '/knlp/data/cluener_public/cluener_vocab.txt',
+                             model=model)
+        test.train()
+
diff --git a/knlp/seq_labeling/classification/__init__.py b/knlp/seq_labeling/classification/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/knlp/seq_labeling/classification/bert/__init__.py b/knlp/seq_labeling/classification/bert/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/knlp/seq_labeling/classification/bert/bert_config.py b/knlp/seq_labeling/classification/bert/bert_config.py
new file mode 100644
index 0000000..5af04f8
--- /dev/null
+++ b/knlp/seq_labeling/classification/bert/bert_config.py
@@ -0,0 +1,98 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import json
+import logging
+import sys
+from io import open
+
+from .configuration_utils import PretrainedConfig
+
+logger = logging.getLogger(__name__)
+
+BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-config.json",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-config.json",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-config.json",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-config.json",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-config.json",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-config.json",
+    'bert-base-german-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-cased-config.json",
+    'bert-large-uncased-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-config.json",
+    'bert-large-cased-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-config.json",
+    'bert-large-uncased-whole-word-masking-finetuned-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-whole-word-masking-finetuned-squad-config.json",
+    'bert-large-cased-whole-word-masking-finetuned-squad': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-whole-word-masking-finetuned-squad-config.json",
+    'bert-base-cased-finetuned-mrpc': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-config.json",
+    'bert-base-german-dbmdz-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-config.json",
+    'bert-base-german-dbmdz-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-config.json",
+}
+
+
+class BertConfig(PretrainedConfig):
+    r"""
+        :class:`~transformers.BertConfig` is the configuration class to store the configuration of a
+        `BertModel`.
+
+
+        Arguments:
+            vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`.
+            hidden_size: Size of the encoder layers and the pooler layer.
+            num_hidden_layers: Number of hidden layers in the Transformer encoder.
+            num_attention_heads: Number of attention heads for each attention layer in
+                the Transformer encoder.
+            intermediate_size: The size of the "intermediate" (i.e., feed-forward)
+                layer in the Transformer encoder.
+            hidden_act: The non-linear activation function (function or string) in the
+                encoder and pooler. If string, "gelu", "relu", "swish" and "gelu_new" are supported.
+            hidden_dropout_prob: The dropout probabilitiy for all fully connected
+                layers in the embeddings, encoder, and pooler.
+            attention_probs_dropout_prob: The dropout ratio for the attention
+                probabilities.
+            max_position_embeddings: The maximum sequence length that this model might
+                ever be used with. Typically set this to something large just in case
+                (e.g., 512 or 1024 or 2048).
+            type_vocab_size: The vocabulary size of the `token_type_ids` passed into
+                `BertModel`.
+            initializer_range: The sttdev of the truncated_normal_initializer for
+                initializing all weight matrices.
+            layer_norm_eps: The epsilon used by LayerNorm.
+    """
+    pretrained_config_archive_map = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
+
+    def __init__(self,
+                 vocab_size_or_config_json_file=30522,
+                 hidden_size=768,
+                 num_hidden_layers=12,
+                 num_attention_heads=12,
+                 intermediate_size=3072,
+                 hidden_act="gelu",
+                 hidden_dropout_prob=0.1,
+                 attention_probs_dropout_prob=0.1,
+                 max_position_embeddings=512,
+                 type_vocab_size=2,
+                 initializer_range=0.02,
+                 layer_norm_eps=1e-12,
+                 **kwargs):
+        super(BertConfig, self).__init__(**kwargs)
+        if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2
+                        and isinstance(vocab_size_or_config_json_file, unicode)):
+            with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
+                json_config = json.loads(reader.read())
+            for key, value in json_config.items():
+                self.__dict__[key] = value
+        elif isinstance(vocab_size_or_config_json_file, int):
+            self.vocab_size = vocab_size_or_config_json_file
+            self.hidden_size = hidden_size
+            self.num_hidden_layers = num_hidden_layers
+            self.num_attention_heads = num_attention_heads
+            self.hidden_act = hidden_act
+            self.intermediate_size = intermediate_size
+            self.hidden_dropout_prob = hidden_dropout_prob
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.max_position_embeddings = max_position_embeddings
+            self.type_vocab_size = type_vocab_size
+            self.initializer_range = initializer_range
+            self.layer_norm_eps = layer_norm_eps
+        else:
+            raise ValueError("First argument must be either a vocabulary size (int)"
+                             " or the path to a pretrained model config file (str)")
diff --git a/knlp/seq_labeling/classification/bert/inference.py b/knlp/seq_labeling/classification/bert/inference.py
new file mode 100644
index 0000000..c470b49
--- /dev/null
+++ b/knlp/seq_labeling/classification/bert/inference.py
@@ -0,0 +1,55 @@
+import json
+import os
+import numpy as np
+import torch
+from torch.utils.data import SequentialSampler, DataLoader, DistributedSampler
+from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, BertTokenizer
+import torch.nn.functional as F
+from knlp.common.constant import KNLP_PATH
+from knlp.seq_labeling.bert.processors.classification import TnewsProcessor as processor
+from knlp.seq_labeling.classification.bert.trainer import BertTrain
+from knlp.utils.tokenization import BasicTokenizer
+from knlp.seq_labeling.bert.tools.progressbar import ProgressBar
+from knlp.seq_labeling.bert.tools.collate_fn import collate_fn
+
+BERT_MODEL_PATH = KNLP_PATH + "/knlp/model/bert/output_modelbert"
+
+class bertinference():
+    def __init__(self, task):
+        self.task = task
+
+    def predict(self, model, text):
+        tokenizer = BasicTokenizer(vocab_file=KNLP_PATH + '/knlp/data/msra_bios/vocab.txt', do_lower_case=True)
+        nb_pred_steps = 0
+        preds = None
+        pbar = ProgressBar(n_total=len(text), desc="Predicting")
+        input_tokens = tokenizer.tokenize(text)
+        for step, batch in enumerate(pred_dataloader):
+            model.eval()
+            batch = tuple(t.to(args.device) for t in batch)
+            with torch.no_grad():
+                inputs = {'input_ids': input_tokens[0],
+                            'attention_mask': input_tokens[1],
+                            'labels': input_tokens[3]}
+                if args.model_type != 'distilbert':
+                    inputs['token_type_ids'] = batch[2]
+                outputs = model(**inputs)
+                _, logits = outputs[:2]
+            nb_pred_steps += 1
+            if preds is None:
+                preds = logits.detach().cpu().numpy()
+            else:
+                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
+            pbar(step)
+        print(' ')
+        predict_label = np.argmax(preds, axis=1)
+        return predict_label
+
+if __name__ == '__main__':
+    inference = bertinference('cluener')
+    to_be_pred = '我还行'
+    model = BertForTokenClassification.from_pretrained(KNLP_PATH + '/knlp/model/bert/output_modelbert/checkpoint-448')
+    model.to('cpu')
+    processor = processor()
+    label_list = processor.get_labels()
+    result = inference.predict(model=model, label_list=label_list)
diff --git a/knlp/seq_labeling/classification/bert/trainer.py b/knlp/seq_labeling/classification/bert/trainer.py
new file mode 100644
index 0000000..faaaa83
--- /dev/null
+++ b/knlp/seq_labeling/classification/bert/trainer.py
@@ -0,0 +1,476 @@
+from __future__ import absolute_import, division, print_function
+import argparse
+import glob
+import logging
+import os
+import json
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
+from torch.utils.data.distributed import DistributedSampler
+from knlp.common.constant import KNLP_PATH
+from transformers import (WEIGHTS_NAME, BertConfig,
+                          BertForSequenceClassification, BertTokenizer,
+                          RobertaConfig, XLNetConfig,
+                          XLNetForSequenceClassification,
+                          XLNetTokenizer,
+                          AlbertForSequenceClassification)
+from transformers import AdamW, WarmupLinearSchedule
+from knlp.seq_labeling.classification.bert.bert_config import BertConfig
+from knlp.seq_labeling.bert.models.bert_for_classification import BertForTokenClassification
+from knlp.seq_labeling.bert.metrics.clue_compute_metrics import compute_metrics
+from knlp.seq_labeling.bert.processors.classification import clue_output_modes as output_modes
+from knlp.seq_labeling.bert.processors.classification import clue_processors as processors
+from knlp.seq_labeling.bert.processors.classification import \
+    clue_convert_examples_to_features as convert_examples_to_features
+from knlp.seq_labeling.bert.processors.classification import collate_fn, xlnet_collate_fn
+from knlp.seq_labeling.bert.tools.common import seed_everything, save_numpy
+from knlp.seq_labeling.bert.tools.common import init_logger, logger
+from knlp.seq_labeling.bert.tools.progressbar import progressbar
+
+
+# 改训练数据路径！
+
+def get_argparse():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_dir", default=KNLP_PATH + "/knlp/data/cluener_public", type=str, required=True,
+                        help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
+    parser.add_argument("--model_type", default='bert', type=str, required=True,
+                        help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))
+    parser.add_argument("--model_name_or_path", default=KNLP_PATH + "/knlp/model/bert/Chinese_wwm", type=str,
+                        required=True,
+                        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(
+                            ALL_MODELS))
+    parser.add_argument("--task_name", default=None, type=str, required=True,
+                        help="The name of the task to train selected in the list: " + ", ".join(processors.keys()))
+    parser.add_argument("--output_dir", default=KNLP_PATH + "/knlp/model/bert/output_model", type=str, required=True,
+                        help="The output directory where the model predictions and checkpoints will be written.")
+
+    ## Other parameters
+    parser.add_argument("--config_name", default="", type=str,
+                        help="Pretrained config name or path if not the same as model_name")
+    parser.add_argument("--tokenizer_name", default="", type=str,
+                        help="Pretrained tokenizer name or path if not the same as model_name")
+    parser.add_argument("--cache_dir", default="", type=str,
+                        help="Where do you want to store the pre-trained models downloaded from s3")
+    parser.add_argument("--max_seq_length", default=128, type=int,
+                        help="The maximum total input sequence length after tokenization. Sequences longer "
+                             "than this will be truncated, sequences shorter will be padded.")
+    parser.add_argument("--do_train", action='store_true',
+                        help="Whether to run training.")
+    parser.add_argument("--do_eval", action='store_true',
+                        help="Whether to run eval on the dev set.")
+    parser.add_argument("--do_predict", action='store_true',
+                        help="Whether to run the model in inference mode on the test set.")
+    parser.add_argument("--do_lower_case", action='store_true',
+                        help="Set this flag if you are using an uncased model.")
+
+    parser.add_argument("--per_gpu_train_batch_size", default=8, type=int,
+                        help="Batch size per GPU/CPU for training.")
+    parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int,
+                        help="Batch size per GPU/CPU for evaluation.")
+    parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
+                        help="Number of updates steps to accumulate before performing a backward/update pass.")
+    parser.add_argument("--learning_rate", default=5e-5, type=float,
+                        help="The initial learning rate for Adam.")
+    parser.add_argument("--weight_decay", default=0.01, type=float,
+                        help="Weight deay if we apply some.")
+    parser.add_argument("--adam_epsilon", default=1e-8, type=float,
+                        help="Epsilon for Adam optimizer.")
+    parser.add_argument("--max_grad_norm", default=1.0, type=float,
+                        help="Max gradient norm.")
+    parser.add_argument("--num_train_epochs", default=3.0, type=float,
+                        help="Total number of training epochs to perform.")
+    parser.add_argument("--max_steps", default=-1, type=int,
+                        help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
+    parser.add_argument("--warmup_proportion", default=0.1, type=float,
+                        help="Proportion of training to perform linear learning rate warmup for,E.g., 0.1 = 10% of "
+                             "training.")
+    parser.add_argument('--logging_steps', type=int, default=10,
+                        help="Log every X updates steps.")
+    parser.add_argument('--save_steps', type=int, default=1000,
+                        help="Save checkpoint every X updates steps.")
+    parser.add_argument("--eval_all_checkpoints", action='store_true',
+                        help="Evaluate all checkpoints starting with the same prefix as model_name ending and ending "
+                             "with step number")
+    parser.add_argument("--predict_checkpoints", type=int, default=0,
+                        help="predict checkpoints starting with the same prefix as model_name ending and ending with "
+                             "step number")
+    parser.add_argument("--no_cuda", action='store_true',
+                        help="Avoid using CUDA when available")
+    parser.add_argument('--overwrite_output_dir', action='store_true',
+                        help="Overwrite the content of the output directory")
+    parser.add_argument('--overwrite_cache', action='store_true',
+                        help="Overwrite the cached training and evaluation sets")
+    parser.add_argument('--seed', type=int, default=42,
+                        help="random seed for initialization")
+
+
+class BertTrain(TrainNN):
+    def __init__(self, device: str = "cuda", data_path=None, tokenizer_vocab=None, save_path=None):
+        super().__init__(device=device)
+        self.config_class = BertConfig()
+        self.output_dir = save_path if save_path else KNLP_PATH + "/knlp/model/bert/output_model"
+        self.vocab = KNLP_PATH + '/knlp/model/bert/Chinese_wwm/vocab.txt' if not tokenizer_vocab else tokenizer_vocab
+        self.tokenizer = BasicTokenizer(vocab_file=self.vocab,
+                                        do_lower_case=True)
+        self.training_data_path = data_path
+
+    def train(args, train_dataset, model, tokenizer):
+        """ Train the model """
+        args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)
+        train_sampler = RandomSampler(train_dataset) if args.local_rank == -1 else DistributedSampler(train_dataset)
+        train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size,
+                                      collate_fn=collate_fn)
+        if args.max_steps > 0:
+            t_total = args.max_steps
+            args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
+        else:
+            t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
+        args.warmup_steps = int(t_total * args.warmup_proportion)
+        # Prepare optimizer and schedule (linear warmup and decay)
+        no_decay = ['bias', 'LayerNorm.weight']
+        optimizer_grouped_parameters = [
+            {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
+             'weight_decay': args.weight_decay},
+            {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
+        ]
+        optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
+        scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
+        if args.fp16:
+            try:
+                from apex import amp
+            except ImportError:
+                raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
+            model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level)
+
+        # multi-gpu training (should be after apex fp16 initialization)
+        if args.n_gpu > 1:
+            model = torch.nn.DataParallel(model)
+
+        # Distributed training (should be after apex fp16 initialization)
+        if args.local_rank != -1:
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank],
+                                                              output_device=args.local_rank,
+                                                              find_unused_parameters=True)
+
+        # Train!
+        logger.info("***** Running training *****")
+        logger.info("  Num examples = %d", len(train_dataset))
+        logger.info("  Num Epochs = %d", args.num_train_epochs)
+        logger.info("  Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size)
+        logger.info("  Total train batch size (w. parallel, distributed & accumulation) = %d",
+                    args.train_batch_size * args.gradient_accumulation_steps * (
+                        torch.distributed.get_world_size() if args.local_rank != -1 else 1))
+        logger.info("  Gradient Accumulation steps = %d", args.gradient_accumulation_steps)
+        logger.info("  Total optimization steps = %d", t_total)
+
+        global_step = 0
+        tr_loss, logging_loss = 0.0, 0.0
+        model.zero_grad()
+        seed_everything(args.seed)  # Added here for reproductibility (even between python 2 and 3)
+        for _ in range(int(args.num_train_epochs)):
+            pbar = ProgressBar(n_total=len(train_dataloader), desc='Training')
+            for step, batch in enumerate(train_dataloader):
+                model.train()
+                batch = tuple(t.to(args.device) for t in batch)
+                inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[3],
+                          'token_type_ids': batch[2]}
+                outputs = model(**inputs)
+                loss = outputs[0]  # model outputs are always tuple in transformers (see doc)
+
+                if args.n_gpu > 1:
+                    loss = loss.mean()  # mean() to average on multi-gpu parallel training
+                if args.gradient_accumulation_steps > 1:
+                    loss = loss / args.gradient_accumulation_steps
+
+                if args.fp16:
+                    with amp.scale_loss(loss, optimizer) as scaled_loss:
+                        scaled_loss.backward()
+                    torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
+                else:
+                    loss.backward()
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)
+
+                pbar(step, {'loss': loss.item()})
+                tr_loss += loss.item()
+                if (step + 1) % args.gradient_accumulation_steps == 0:
+                    optimizer.step()
+                    scheduler.step()  # Update learning rate schedule
+                    model.zero_grad()
+                    global_step += 1
+
+                    if args.local_rank in [-1, 0] and args.logging_steps > 0 and global_step % args.logging_steps == 0:
+                        print(" ")
+                        # Log metrics
+                        if args.local_rank == -1:  # Only evaluate when single GPU otherwise metrics may not average
+                            # well
+                            evaluate(args, model, tokenizer)
+
+                    if args.local_rank in [-1, 0] and args.save_steps > 0 and global_step % args.save_steps == 0:
+                        # Save model checkpoint
+                        output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step))
+                        if not os.path.exists(output_dir):
+                            os.makedirs(output_dir)
+                        model_to_save = model.module if hasattr(model,
+                                                                'module') else model  # Take care of
+                        # distributed/parallel training
+                        model_to_save.save_pretrained(output_dir)
+                        torch.save(args, os.path.join(output_dir, 'training_args.bin'))
+                        logger.info("Saving model checkpoint to %s", output_dir)
+                        tokenizer.save_vocabulary(vocab_path=output_dir)
+            print(" ")
+            if 'cuda' in str(args.device):
+                torch.cuda.empty_cache()
+        return global_step, tr_loss / global_step
+
+    def evaluate(args, model, tokenizer, prefix=""):
+        eval_task_names = (args.task_name,)
+        eval_outputs_dirs = (args.output_dir,)
+        results = {}
+        for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs):
+            eval_dataset = self.load_and_cache_examples(args, eval_task, tokenizer, data_type='dev')
+            if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]:
+                os.makedirs(eval_output_dir)
+
+            args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
+            # Note that DistributedSampler samples randomly
+            eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(
+                eval_dataset)
+            eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size,
+                                         collate_fn=collate_fn)
+
+            # Eval!
+            logger.info("********* Running evaluation {} ********".format(prefix))
+            eval_loss = 0.0
+            nb_eval_steps = 0
+            preds = None
+            out_label_ids = None
+            pbar = ProgressBar(n_total=len(eval_dataloader), desc="Evaluating")
+            for step, batch in enumerate(eval_dataloader):
+                model.eval()
+                batch = tuple(t.to(args.device) for t in batch)
+                with torch.no_grad():
+                    inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[3],
+                              'token_type_ids': batch[2]}
+                    outputs = model(**inputs)
+                    tmp_eval_loss, logits = outputs[:2]
+                    eval_loss += tmp_eval_loss.mean().item()
+                nb_eval_steps += 1
+                if preds is None:
+                    preds = logits.detach().cpu().numpy()
+                    out_label_ids = inputs['labels'].detach().cpu().numpy()
+                else:
+                    preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
+                    out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0)
+                pbar(step)
+            print(' ')
+            if 'cuda' in str(args.device):
+                torch.cuda.empty_cache()
+            eval_loss = eval_loss / nb_eval_steps
+            if args.output_mode == "classification":
+                preds = np.argmax(preds, axis=1)
+            elif args.output_mode == "regression":
+                preds = np.squeeze(preds)
+            result = compute_metrics(eval_task, preds, out_label_ids)
+            results.update(result)
+            logger.info("  Num examples = %d", len(eval_dataset))
+            logger.info("  Batch size = %d", args.eval_batch_size)
+            logger.info("******** Eval results {} ********".format(prefix))
+            for key in sorted(result.keys()):
+                logger.info(" dev: %s = %s", key, str(result[key]))
+        return results
+
+    def load_and_cache_examples(args, task, tokenizer, data_type='train'):
+        global all_labels
+        if args.local_rank not in [-1, 0] and not evaluate:
+            torch.distributed.barrier()  # Make sure only the first process in distributed training process the
+            # dataset, and the others will use the cache
+
+        processor = processors[task]()
+        output_mode = output_modes[task]
+        # Load data features from cache or dataset file
+        cached_features_file = os.path.join(args.data_dir, 'cached_{}_{}_{}_{}'.format(
+            data_type,
+            list(filter(None, args.model_name_or_path.split('/'))).pop(),
+            str(args.max_seq_length),
+            str(task)))
+        if os.path.exists(cached_features_file):
+            logger.info("Loading features from cached file %s", cached_features_file)
+            features = torch.load(cached_features_file)
+        else:
+            logger.info("Creating features from dataset file at %s", args.data_dir)
+            label_list = processor.get_labels()
+            if data_type == 'train':
+                examples = processor.get_train_examples(args.data_dir)
+            elif data_type == 'dev':
+                examples = processor.get_dev_examples(args.data_dir)
+            else:
+                examples = processor.get_test_examples(args.data_dir)
+
+            features = convert_examples_to_features(examples,
+                                                    tokenizer,
+                                                    label_list=label_list,
+                                                    max_length=args.max_seq_length,
+                                                    output_mode=output_mode,
+                                                    pad_on_left=False,
+                                                    pad_token=tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],
+                                                    pad_token_segment_id=0,
+                                                    )
+            if args.local_rank in [-1, 0]:
+                logger.info("Saving features into cached file %s", cached_features_file)
+                torch.save(features, cached_features_file)
+
+        if args.local_rank == 0 and not evaluate:
+            torch.distributed.barrier()  # Make sure only the first process in distributed training process the
+            # dataset, and the others will use the cache
+        # Convert to Tensors and build dataset
+        all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
+        all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long)
+        all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long)
+        all_lens = torch.tensor([f.input_len for f in features], dtype=torch.long)
+        if output_mode == "classification":
+            all_labels = torch.tensor([f.label for f in features], dtype=torch.long)
+        elif output_mode == "regression":
+            all_labels = torch.tensor([f.label for f in features], dtype=torch.float)
+        dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, all_lens, all_labels)
+        return dataset
+
+    def run(self):
+        parser = argparse.ArgumentParser()
+        ## Required parameters
+        args = parser.parse_args()
+        if self.training_data_path:
+            args.data_dir = self.training_data_path
+        tokenizer = self.tokenizer
+        if not os.path.exists(args.output_dir):
+            os.mkdir(args.output_dir)
+        if not os.path.exists(args.output_dir):
+            os.mkdir(args.output_dir)
+        init_logger(log_file=args.output_dir)
+        if os.path.exists(args.output_dir) and os.listdir(
+                args.output_dir) and args.do_train and not args.overwrite_output_dir:
+            raise ValueError(
+                "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
+                    args.output_dir))
+
+        # Setup distant debugging if needed
+        if args.server_ip and args.server_port:
+            # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
+            import ptvsd
+            print("Waiting for debugger attach")
+            ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
+            ptvsd.wait_for_attach()
+
+        # Setup CUDA, GPU & distributed training
+        if args.local_rank == -1 or args.no_cuda:
+            device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
+            args.n_gpu = torch.cuda.device_count()
+        else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
+            torch.cuda.set_device(args.local_rank)
+            device = torch.device("cuda", args.local_rank)
+            torch.distributed.init_process_group(backend='nccl')
+            args.n_gpu = 1
+        args.device = device
+
+        # Setup logging
+        logger.warning("Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
+                       args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16)
+
+        # Set seed
+        seed_everything(args.seed)
+        # Prepare CLUE task
+        args.task_name = args.task_name.lower()
+        if args.task_name not in processors:
+            raise ValueError("Task not found: %s" % (args.task_name))
+        processor = processors[args.task_name]()
+        args.output_mode = output_modes[args.task_name]
+        label_list = processor.get_labels()
+        num_labels = len(label_list)
+
+        # Load pretrained model and tokenizer
+        if args.local_rank not in [-1, 0]:
+            torch.distributed.barrier()  # Make sure only the first process in distributed training will download
+            # model & vocab
+
+        config_class = self.config_class
+        config = config_class.from_pretrained(args.config_name if args.config_name else args.model_name_or_path,
+                                              num_labels=num_labels, finetuning_task=args.task_name)
+        model = BertForTokenClassification.from_pretrained(args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path),
+                                            config=config)
+
+        if args.local_rank == 0:
+            torch.distributed.barrier()  # Make sure only the first process in distributed training will download
+            # model & vocab
+        model.to(args.device)
+        logger.info("Training/evaluation parameters %s", args)
+        # Training
+        if args.do_train:
+            train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, data_type='train')
+            global_step, tr_loss = train(args, train_dataset, model, tokenizer)
+            logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)
+
+        # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
+        if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
+            # Create output directory if needed
+            if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
+                os.makedirs(args.output_dir)
+
+            logger.info("Saving model checkpoint to %s", args.output_dir)
+            # Save a trained model, configuration and tokenizer using `save_pretrained()`.
+            # They can then be reloaded using `from_pretrained()`
+            model_to_save = model.module if hasattr(model,
+                                                    'module') else model  # Take care of distributed/parallel training
+            model_to_save.save_pretrained(args.output_dir)
+            tokenizer.save_pretrained(args.output_dir)
+
+            # Good practice: save your training arguments together with the trained model
+            torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
+
+            # Load a trained model and vocabulary that you have fine-tuned
+            model = BertForTokenClassification.from_pretrained(args.output_dir)
+            model.to(args.device)
+        # Evaluation
+        results = {}
+        if args.do_eval and args.local_rank in [-1, 0]:
+            checkpoints = [args.output_dir]
+            if args.eval_all_checkpoints:
+                checkpoints = list(
+                    os.path.dirname(c) for c in
+                    sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
+                logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
+            logger.info("Evaluate the following checkpoints: %s", checkpoints)
+            for checkpoint in checkpoints:
+                global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
+                prefix = checkpoint.split('/')[-1] if checkpoint.find('checkpoint') != -1 else ""
+                model = BertForTokenClassification.from_pretrained(checkpoint)
+                model.to(args.device)
+                result = evaluate(args, model, tokenizer, prefix=prefix)
+                result = dict((k + '_{}'.format(global_step), v) for k, v in result.items())
+                results.update(result)
+            output_eval_file = os.path.join(args.output_dir, "checkpoint_eval_results.txt")
+            with open(output_eval_file, "w") as writer:
+                for key in sorted(results.keys()):
+                    writer.write("%s = %s\n" % (key, str(results[key])))
+
+        if args.do_predict and args.local_rank in [-1, 0]:
+            checkpoints = [args.output_dir]
+            if args.predict_checkpoints > 0:
+                checkpoints = list(
+                    os.path.dirname(c) for c in
+                    sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
+                logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN)  # Reduce logging
+                checkpoints = [x for x in checkpoints if x.split('-')[-1] == str(args.predict_checkpoints)]
+            logger.info("Predict the following checkpoints: %s", checkpoints)
+            for checkpoint in checkpoints:
+                prefix = checkpoint.split('/')[-1] if checkpoint.find('checkpoint') != -1 else ""
+                model = BertForTokenClassification.from_pretrained(checkpoint)
+                model.to(args.device)
+                predict(args, model, tokenizer, label_list, prefix=prefix)
+
+
+if __name__ == "__main__":
+    print('Bert-文本分类训练开始')
+    trainer = BertTrain()
+    trainer.run()
+    print('Bert-文本分类训练结束')
diff --git a/knlp/seq_labeling/classification/beyas/beyas_inference.py b/knlp/seq_labeling/classification/beyas/beyas_inference.py
new file mode 100644
index 0000000..92d5e33
--- /dev/null
+++ b/knlp/seq_labeling/classification/beyas/beyas_inference.py
@@ -0,0 +1,24 @@
+import jieba
+import joblib
+
+
+class beyas_inference():
+
+    def __init__(self, model_path, tf_path):
+        self.model_path = model_path
+        self.tf_path = tf_path
+
+    def load_model(self):
+        global MODEL
+        global TF
+        MODEL = joblib.load(self.model_path)
+        TF = joblib.load(self.tf_path)
+        return MODEL, TF
+
+    def predict(sentence, MODEL, TF):
+        assert MODEL != None and TF != None
+        words = jieba.cut(sentence)
+        s = ' '.join(words)
+        test_features = TF.transform([s])
+        predicted_labels = MODEL.predict(test_features)
+        return predicted_labels[0]
diff --git a/knlp/seq_labeling/classification/beyas/beyas_train.py b/knlp/seq_labeling/classification/beyas/beyas_train.py
new file mode 100644
index 0000000..7265db3
--- /dev/null
+++ b/knlp/seq_labeling/classification/beyas/beyas_train.py
@@ -0,0 +1,46 @@
+from knlp.common.constant import KNLP_PATH
+import jieba
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.naive_bayes import MultinomialNB
+import joblib
+
+
+class beyas_train():
+    def __init__(self, file_path, clf_model_path, tf_model_path):
+        self.file_path = file_path
+        self.clf_model_path = clf_model_path
+        self.tf_model_path = tf_model_path
+
+    def load_data(self):
+        with open(slef.file_path) as f:
+            lines = f.readlines()
+        data = []
+        label = []
+        for line in lines:
+            line = eval(line)
+            words = jieba.cut(line['query'])
+            print(words)
+            s = ''
+            for w in words:
+                s += w + ' '
+            s = s.strip()
+            data.append(s)
+            label.append(line['label'])
+        return data, label
+
+    def train(self, datas, labels):
+        tf = TfidfVectorizer(max_df=0.5)
+        train_features = tf.fit_transform(datas)
+        clf = MultinomialNB(alpha=0.001).fit(train_features, labels)
+        joblib.dump(clf, self.clf_model_path)
+        joblib.dump(tf, self.tf_model_path)
+
+
+if __name__ == '__main__':
+    test = beyas_train(KNLP_PATH + '/knlp/seq_labeling/classification/bert/dataset/data_train.json')
+    train_datas, train_labels = test.load_data()
+    tf = TfidfVectorizer(max_df=0.5)
+    train_features = tf.fit_transform(train_datas)
+    clf = MultinomialNB(alpha=0.001).fit(train_features, train_labels)
+    joblib.dump(clf, KNLP_PATH + '/knlp/seq_labeling/classification/beyas/model/nb.pkl')
+    joblib.dump(tf, KNLP_PATH + '/knlp/seq_labeling/classification/beyas/model/tf.pkl')
diff --git a/knlp/seq_labeling/classification/pipeline.py b/knlp/seq_labeling/classification/pipeline.py
new file mode 100644
index 0000000..b9bbe08
--- /dev/null
+++ b/knlp/seq_labeling/classification/pipeline.py
@@ -0,0 +1,124 @@
+from knlp.common.constant import KNLP_PATH, class_model_list
+from knlp.nn.textcnn.inference_textcnn import InferenceTextCNN
+import jieba
+from knlp.Pipeline.pipeline import Pipeline
+from knlp.seq_labeling.classification.ModelTrainer.model_train import ModelTrainer
+from knlp.seq_labeling.classification.bert.inference import bertinference
+import torch
+import argparse
+
+
+
+
+class ClassificationPipeline(Pipeline):
+
+    def __init__(self, type, data_path=KNLP_PATH + '/knlp/data/bios_clue/train.txt',
+                 dev_path=KNLP_PATH + '/knlp/data/clue/val.txt',
+                 vocab_path=KNLP_PATH + '/knlp/data/clue/vocab.txt',
+                 word2idx_path=KNLP_PATH + "/knlp/nn/textcnn/model_textcnn/weibo_word2idx.json",
+                 label2idx_path=KNLP_PATH + "/knlp/nn/textcnn/model_textcnn/weibo_label2idx.json",
+                 max_length=150):
+        """
+                Args:
+                    type: train、inference的选择
+                    data_path：使用的数据数据集路径（具体到训练数据位置，用于模型的训练）
+                    model：选择模型
+                    word2idx_path:textcnn中的word2idx的位置
+                    label2idx_path：textcnn中的label2idx位置
+                    max_length：最大截断长度
+
+                """
+        super().__init__()
+        if data_path:
+            self.training_data_path = data_path
+        if dev_path:
+            self.dev_path = dev_path
+        if vocab_path:
+            self.vocab_set_path = vocab_path
+        if word2idx_path:
+            self.word2idx_path = word2idx_path
+        if label2idx_path:
+            self.label2idx_path = label2idx_path
+        self.type = type
+        self.max_length = max_length
+        self.model_list = class_model_list
+        # bert模型存储位置
+        self.model_path_bert = KNLP_PATH + "/knlp/model/bert/output_model"
+        #  beyas模型存储位置
+        self.model_path_clf = KNLP_PATH + "/knlp/model/beyas/classification"
+        self.model_path_tf = KNLP_PATH + "/knlp/model/beyas/classification"
+        # textcnn模型存储位置
+        self.model_path_textcnn = KNLP_PATH + "/knlp/model/classification/textcnn.pkl"
+
+    def train(self, model):
+        model_list = class_model_list
+        if model not in model_list:
+            print(f'only support model in {model_list}')
+        trainer = ModelTrainer(data_path=self.training_data_pathth,
+                            vocab_path=self.vocab_set_path,
+                             model=model)
+        if model=='beyas':
+            trainer.beyas_train(clf_model_path=self.model_path_clf, tf_model_path=self.model_path_tf)
+        if model=='bert':
+            trainer.bert_train()
+        if model=='textcnn':
+            trainer.textcnn_train(model_save_path=self.model_path_textcnn,
+                                   word2idx_path=self.word2idx_path,
+                                   label2idx_path=self.label2idx_path)
+        if model=='all':
+            trainer.beyas_train(clf_model_path=self.model_path_clf, tf_model_path=self.model_path_tf)
+            trainer.bert_train()
+            trainer.textcnn_train(model_save_path=self.model_path_textcnn,
+                                  word2idx_path=self.word2idx_path,
+                                  label2idx_path=self.label2idx_path)
+
+    def inference(self, model, input, model_path_textcnn=None, model_path_bert=None, model_path_clf=None, model_path_tf=None):
+        words = input
+        model_bert = model_path_bert if model_path_bert else self.model_path_bert
+        model_textcnn = model_path_textcnn if model_path_textcnn else self.model_path_textcnn
+        model_clf= model_path_clf if model_path_clf else self.model_path_clf
+        model_tf = model_path_tf if model_path_tf else self.model_path_tf
+        if model not in model_list:
+            print(f'only support model in {model_list}')
+        else:
+            if model == 'bert':
+                self.bert_inference(words, model_bert)
+            elif model == 'textcnn':
+                self.textcnn_inference(words, self.max_length, model_textcnn)
+            elif model == 'beyas':
+                self.beyas_inference(words, model_clf, model_tf)
+            elif model=='all':
+                self.bert_inference(words, model_bert)
+                self.textcnn_inference(words, self.max_length, model_textcnn)
+                self.beyas_inference(words, model_clf, model_tf)
+
+    def bert_inference(self, words, model_path):
+        print("******** bert_result ********")
+        inference = bertinference('cluener')
+        model = BertForTokenClassification.from_pretrained(model_path)
+        model.to('cpu')
+        result = inference.predict(model=model, text=words)
+        print(result)
+
+    def textcnn_inference(self, words, max_length, model_path, word2idx_path, label2idx_path):
+        print("******** textcnn_result ********")
+        model_path_textcnn = model_path
+        tokenizer = jieba.lcut
+        inference = InferenceTextCNN(model_path=model_path_textcnn, word2idx_path=word2idx_path,
+                                     label2idx_path=label2idx_path, max_length=max_length, tokenizer=tokenizer)
+        print(inference([words], return_label=True))
+
+    def beyas_inference(self, words, clf_model, tf_model):
+        print("******** beyas_result ********")
+        clf_model = clf_model
+        tf_model = tf_model
+        inference = beyas_inference(clf_model, tf_model)
+        MODEL, TF = inference.load_model()
+        result = inference.predict(words, MODEL, TF)
+        print(result)
+
+
+if __name__ == '__main__':
+    sentence = '我很开心'
+    pipe = ClassificationPipeline(data_sign='msra')
+    pipe.inference(model='all', input=sentence)