diff --git a/.gitignore b/.gitignore
index 491f881f32..756b66b727 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,10 @@ FETCH_HEAD
 
 # auto generated version file by setuptools_scm
 ppsci/_version.py
+
+
+# Ignore entire output_laplace2d output directory
+/examples/laplace/output_laplace2d/
+/examples/extformer_moe/data
+/examples/extformer_moe/pretrained
+/examples/test
\ No newline at end of file
diff --git a/README.md b/README.md
index 8c1d5b9200..879317550a 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # PaddleScience
 
+TecOrigin学习专用
+
 <!-- --8<-- [start:status] -->
 ![paddlescience_icon](https://paddle-org.bj.bcebos.com/paddlescience%2Fdocs%2Fpaddlescience_icon.png)
 > *Developed with [PaddlePaddle](https://www.paddlepaddle.org.cn/)*
diff --git a/examples/extformer_moe/conf/extformer_moe_enso_inference.yaml b/examples/extformer_moe/conf/extformer_moe_enso_inference.yaml
new file mode 100644
index 0000000000..f21e6b6267
--- /dev/null
+++ b/examples/extformer_moe/conf/extformer_moe_enso_inference.yaml
@@ -0,0 +1,155 @@
+defaults:
+  - ppsci_default
+  - TRAIN: train_default
+  - TRAIN/ema: ema_default
+  - TRAIN/swa: swa_default
+  - EVAL: eval_default
+  - INFER: infer_default
+  - hydra/job/config/override_dirname/exclude_keys: exclude_keys_default
+  - _self_
+
+hydra:
+  run:
+    # dynamic output directory according to running time and override name
+    dir: outputs_extformer_moe_pretrain
+  job:
+    name: ${mode} # name of logfile
+    chdir: false # keep current working directory unchanged
+  callbacks:
+    init_callback:
+      _target_: ppsci.utils.callbacks.InitCallback
+  sweep:
+    # output directory for multirun
+    dir: ${hydra.run.dir}
+    subdir: ./
+
+# general settings
+mode: inference # running mode: inference only
+seed: 0
+output_dir: ${hydra:run.dir}
+log_freq: 20
+
+# set train and evaluate data path
+# FILE_PATH: /hpc2hdd/home/hni017/Workplace/data/weather_data/icar_enso_2021/enso_round1_train_20210201
+FILE_PATH: /localnvme/application/duff/workspace/PaddleScience/examples/extformer_moe/data/weather_data/icar_enso_2021/enso_round1_train_20210201/
+
+# dataset setting
+DATASET:
+  label_keys: ["sst_target","nino_target"]
+  in_len: 12
+  out_len: 14
+  nino_window_t: 3
+  in_stride: 1
+  out_stride: 1
+  train_samples_gap: 1
+  eval_samples_gap: 1
+  normalize_sst: true
+
+# model settings
+MODEL:
+  input_keys: ["sst_data"]
+  output_keys: ["sst_target","nino_target","aux_loss","rank_loss"]
+  input_shape: [12, 24, 48, 1]
+  target_shape: [14, 24, 48, 1]
+  base_units: 64
+  scale_alpha: 1.0
+
+  enc_depth: [1, 1]
+  dec_depth: [1, 1]
+  enc_use_inter_ffn: true
+  dec_use_inter_ffn: true
+  dec_hierarchical_pos_embed: false
+
+  downsample: 2
+  downsample_type: "patch_merge"
+  upsample_type: "upsample"
+
+  num_global_vectors: 0
+  use_dec_self_global: false
+  dec_self_update_global: true
+  use_dec_cross_global: false
+  use_global_vector_ffn: false
+  use_global_self_attn: false
+  separate_global_qkv: false
+  global_dim_ratio: 1
+
+  self_pattern: "axial"
+  cross_self_pattern: "axial"
+  cross_pattern: "cross_1x1"
+  dec_cross_last_n_frames: null
+
+  attn_drop: 0.1
+  proj_drop: 0.1
+  ffn_drop: 0.1
+  num_heads: 4
+
+  ffn_activation: "gelu"
+  gated_ffn: false
+  norm_layer: "layer_norm"
+  padding_type: "zeros"
+  pos_embed_type: "t+h+w"
+  use_relative_pos: true
+  self_attn_use_final_proj: true
+  dec_use_first_self_attn: false
+
+  z_init_method: "zeros"
+  initial_downsample_type: "conv"
+  initial_downsample_activation: "leaky_relu"
+  initial_downsample_scale: [1, 1, 2]
+  initial_downsample_conv_layers: 2
+  final_upsample_conv_layers: 1
+  checkpoint_level: 0
+
+  attn_linear_init_mode: "0"
+  ffn_linear_init_mode: "0"
+  conv_init_mode: "0"
+  down_up_linear_init_mode: "0"
+  norm_init_mode: "0"
+
+# moe settings
+MOE:
+  use_linear_moe: false
+  use_ffn_moe: true
+  use_attn_moe: false
+  num_experts: 10
+  out_planes: 4
+  importance_weight: 0.0
+  load_weight: 0.0
+  gate_style: "cuboid-latent" # linear, spatial-latent, cuboid-latent, spatial-latent-linear, cuboid-latent-linear
+  dispatch_style: "dense" # sparse, dense
+  aux_loss_style: "all" # all, cell
+
+# rnc settings
+RNC:
+  use_rnc: false
+  rank_imbalance_style: "batch+T+H+W"
+  feature_similarity_style: "l2"
+  rank_imbalance_temp: 2
+  label_difference_style: "l1"
+  rank_reg_coeff: 0.01
+  loss_cal_style: "computation-efficient" # computation-efficient, memory-efficient
+
+# training settings
+TRAIN:
+  epochs: 100
+  save_freq: 20
+  eval_during_train: true
+  eval_freq: 10
+  lr_scheduler:
+    epochs: ${TRAIN.epochs}
+    learning_rate: 0.0002
+    by_epoch: true
+  min_lr_ratio: 1.0e-3
+  wd: 1.0e-5
+  batch_size: 16
+  pretrained_model_path: null
+  checkpoint_path: null
+  update_freq: 1
+
+# evaluation settings
+EVAL:
+  # pretrained_model_path: ./checkpoint/enso/extformer_moe_enso.pdparams
+  pretrained_model_path: ./pretrained/extformer_moe_pretrained.pdparams
+  compute_metric_by_batch: false
+  eval_with_no_grad: true
+  batch_size: 1
diff --git a/examples/extformer_moe/conf/extformer_moe_enso_pretrain.yaml b/examples/extformer_moe/conf/extformer_moe_enso_pretrain.yaml
index 450ffd21ab..b584bc2b37 100644
--- a/examples/extformer_moe/conf/extformer_moe_enso_pretrain.yaml
+++ b/examples/extformer_moe/conf/extformer_moe_enso_pretrain.yaml
@@ -30,7 +30,8 @@ output_dir: ${hydra:run.dir}
 log_freq: 20
 
 # set train and evaluate data path
-FILE_PATH: /hpc2hdd/home/hni017/Workplace/data/weather_data/icar_enso_2021/enso_round1_train_20210201
+# FILE_PATH: /hpc2hdd/home/hni017/Workplace/data/weather_data/icar_enso_2021/enso_round1_train_20210201
+FILE_PATH: /root/workspace/PaddleScience/examples/extformer_moe/data/weather_data/icar_enso_2021/enso_round1_train_20210201
 
 # dataset setting
 DATASET:
diff --git a/examples/extformer_moe/coverage.py b/examples/extformer_moe/coverage.py
new file mode 100644
index 0000000000..aeff95cb26
--- /dev/null
+++ b/examples/extformer_moe/coverage.py
@@ -0,0 +1,29 @@
+import argparse
+import re
+
+def get_api_info(path):
+    pattern_fallback = '(?<=missing sdaa kernel: )\s*\w+'
+    pattern_api = '(?<=Finish AD API: )\s*\w+'
+    all_api_set = set()
+    fallback_api_set = set()
+    with open(path, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+    for line in lines:
+        if 'Finish AD API: ' in line:
+            api = re.search(pattern_api, line).group()
+            all_api_set.add(api)
+        
+        if 'missing sdaa kernel: ' in line:
+            api = re.search(pattern_fallback, line).group()
+            fallback_api_set.add(api)
+    return sorted(list(all_api_set)), sorted(list(fallback_api_set))
+    
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='算子覆盖率统计')
+    parser.add_argument('--path', type=str, help='日志文件路径', required=True)
+    args = parser.parse_args()
+    all_api_set, fallback_api_set = get_api_info(args.path)
+    print(f"all api: {all_api_set}, total: {len(all_api_set)}\n")
+    print(f"fallback op: {fallback_api_set}, total: {len(fallback_api_set)}\n")
+    print(f"coverage rate: {(1 - len(fallback_api_set) / len(all_api_set)) * 100:.2f}%")
\ No newline at end of file
diff --git a/examples/extformer_moe/extformer_moe_enso_inference.py b/examples/extformer_moe/extformer_moe_enso_inference.py
new file mode 100644
index 0000000000..18818f812a
--- /dev/null
+++ b/examples/extformer_moe/extformer_moe_enso_inference.py
@@ -0,0 +1,77 @@
+# 需要加载环境变量export CUSTOM_DEVICE_BLACK_LIST=top_k_v2,top_k,mask_select
+import os
+import numpy as np
+import paddle
+from omegaconf import DictConfig
+from omegaconf import OmegaConf
+import  ppsci
+from ppsci.utils import save_load
+
+CMIP6_SST_MAX = 10.198975563049316
+CMIP6_SST_MIN = -16.549121856689453
+CMIP5_SST_MAX = 8.991744995117188
+CMIP5_SST_MIN = -9.33076286315918
+CMIP6_NINO_MAX = 4.138188362121582
+CMIP6_NINO_MIN = -3.5832221508026123
+CMIP5_NINO_MAX = 3.8253555297851562
+CMIP5_NINO_MIN = -2.691682815551758
+
+SST_MAX = max(CMIP6_SST_MAX, CMIP5_SST_MAX)
+SST_MIN = min(CMIP6_SST_MIN, CMIP5_SST_MIN)
+def scale_sst(sst):
+    return (sst - SST_MIN) / (SST_MAX - SST_MIN)
+
+
+
+
+def inference(cfg: DictConfig):
+    #  载入配置
+    normalize_sst = cfg.DATASET.normalize_sst
+    print('normalize_sst:', normalize_sst)
+    in_len = cfg.DATASET.in_len
+    print('in_len:', in_len)
+    input_keys = cfg.MODEL.input_keys
+
+    # 载入数据
+    test_00001_06_05 = np.load('./data/weather_data/icar_enso_2021/enso_final_test_data_B/test_00001_06_05.npy')
+
+    # 4为预测因子，并按照SST,T300,Ua,Va的顺序存放，所以只取sst
+    test_sst = paddle.to_tensor(test_00001_06_05[...,0], dtype='float32')
+    test_sst = test_sst[...,np.newaxis]
+    # 取19：67的区间是为了对应训练时的经度范围95E-330E
+    test_sst = test_sst[ :, :, 19:67, :]
+
+    test_sst_in_tar = np.concatenate([test_sst, test_sst, test_sst], axis=0)[:26]
+    print('test_sst:', test_sst_in_tar.shape)
+    if normalize_sst:
+        test_sst_in_tar = scale_sst(test_sst_in_tar)
+
+    # 构建模型所需的数据集
+    in_seq = paddle.unsqueeze(paddle.to_tensor(test_sst_in_tar[: in_len, ...], dtype='float32'), axis=0) # ( in_len, lat, lon, 1)
+    target_seq = paddle.unsqueeze(paddle.to_tensor(test_sst_in_tar[in_len :, ...], dtype='float32'), axis=0)  # ( in_len, lat, lon, 1)
+    input_item = {input_keys[0]: in_seq, "sst_target": target_seq}
+
+    # 载入模型
+    moe_config = OmegaConf.to_object(cfg.MOE)
+    rnc_config = OmegaConf.to_object(cfg.RNC)
+    model = ppsci.arch.ExtFormerMoECuboid(**cfg.MODEL, moe_config=moe_config, rnc_config=rnc_config)
+    save_load.load_pretrain(model, "./pretrained/extformer_moe_pretrained.pdparams")
+
+    # 预测
+    # model.eval()
+    pred = model(input_item)
+    print({k: (None if v is None else v.shape) for k, v in pred.items()})
+
+
+
+def main(cfg: DictConfig):
+   if cfg.mode == 'inference':
+        inference(cfg)
+   else:
+        raise ValueError("Invalid mode: for inference only, but got {}".format(cfg.mode))
+
+    
+
+if __name__ == '__main__':
+    cfg = OmegaConf.load("./conf/extformer_moe_enso_inference.yaml")
+    main(cfg)
\ No newline at end of file
diff --git a/examples/extformer_moe/extformer_moe_enso_train.py b/examples/extformer_moe/extformer_moe_enso_train.py
index e0e570fb95..6df79c21e9 100644
--- a/examples/extformer_moe/extformer_moe_enso_train.py
+++ b/examples/extformer_moe/extformer_moe_enso_train.py
@@ -1,3 +1,4 @@
+# 需要加载环境变量export CUSTOM_DEVICE_BLACK_LIST=top_k_v2,top_k
 import enso_metric
 import hydra
 import paddle
@@ -7,6 +8,7 @@
 
 import ppsci
 
+paddle.set_device("sdaa")
 
 def get_parameter_names(model, forbidden_layer_types):
     result = []
@@ -43,7 +45,7 @@ def train(cfg: DictConfig):
             "shuffle": True,
         },
         "batch_size": cfg.TRAIN.batch_size,
-        "num_workers": 8,
+        "num_workers": 0,
     }
 
     # set constraint
diff --git a/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/config.yaml b/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/config.yaml
new file mode 100644
index 0000000000..cbe35ff0c2
--- /dev/null
+++ b/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/config.yaml
@@ -0,0 +1,164 @@
+mode: eval
+output_dir: ${hydra:run.dir}
+log_freq: 20
+seed: 0
+use_vdl: false
+use_tbd: false
+wandb_config: {}
+use_wandb: false
+device: null
+use_amp: false
+amp_level: O1
+to_static: false
+prim: false
+log_level: info
+trace: false
+TRAIN:
+  epochs: 100
+  iters_per_epoch: 20
+  update_freq: 1
+  save_freq: 20
+  eval_during_train: true
+  start_eval_epoch: 1
+  eval_freq: 10
+  checkpoint_path: null
+  pretrained_model_path: null
+  ema:
+    use_ema: false
+    decay: 0.9
+    avg_freq: 1
+  swa:
+    use_swa: false
+    avg_freq: 1
+    avg_range: null
+  lr_scheduler:
+    epochs: ${TRAIN.epochs}
+    learning_rate: 0.0002
+    by_epoch: true
+  min_lr_ratio: 0.001
+  wd: 1.0e-05
+  batch_size: 16
+EVAL:
+  pretrained_model_path: https://paddle-org.bj.bcebos.com/paddlescience/models/extformer-moe/extformer_moe_pretrained.pdparams
+  eval_with_no_grad: true
+  compute_metric_by_batch: false
+  batch_size: 1
+INFER:
+  pretrained_model_path: null
+  export_path: ./inference
+  pdmodel_path: null
+  pdiparams_path: null
+  onnx_path: null
+  device: cpu
+  engine: native
+  precision: fp32
+  ir_optim: true
+  min_subgraph_size: 30
+  gpu_mem: 2000
+  gpu_id: 0
+  max_batch_size: 1024
+  num_cpu_threads: 10
+  batch_size: 256
+FILE_PATH: /root/workspace/PaddleScience/examples/extformer_moe/data/weather_data/icar_enso_2021/enso_round1_train_20210201
+DATASET:
+  label_keys:
+  - sst_target
+  - nino_target
+  in_len: 12
+  out_len: 14
+  nino_window_t: 3
+  in_stride: 1
+  out_stride: 1
+  train_samples_gap: 1
+  eval_samples_gap: 1
+  normalize_sst: true
+MODEL:
+  input_keys:
+  - sst_data
+  output_keys:
+  - sst_target
+  - nino_target
+  - aux_loss
+  - rank_loss
+  input_shape:
+  - 12
+  - 24
+  - 48
+  - 1
+  target_shape:
+  - 14
+  - 24
+  - 48
+  - 1
+  base_units: 64
+  scale_alpha: 1.0
+  enc_depth:
+  - 1
+  - 1
+  dec_depth:
+  - 1
+  - 1
+  enc_use_inter_ffn: true
+  dec_use_inter_ffn: true
+  dec_hierarchical_pos_embed: false
+  downsample: 2
+  downsample_type: patch_merge
+  upsample_type: upsample
+  num_global_vectors: 0
+  use_dec_self_global: false
+  dec_self_update_global: true
+  use_dec_cross_global: false
+  use_global_vector_ffn: false
+  use_global_self_attn: false
+  separate_global_qkv: false
+  global_dim_ratio: 1
+  self_pattern: axial
+  cross_self_pattern: axial
+  cross_pattern: cross_1x1
+  dec_cross_last_n_frames: null
+  attn_drop: 0.1
+  proj_drop: 0.1
+  ffn_drop: 0.1
+  num_heads: 4
+  ffn_activation: gelu
+  gated_ffn: false
+  norm_layer: layer_norm
+  padding_type: zeros
+  pos_embed_type: t+h+w
+  use_relative_pos: true
+  self_attn_use_final_proj: true
+  dec_use_first_self_attn: false
+  z_init_method: zeros
+  initial_downsample_type: conv
+  initial_downsample_activation: leaky_relu
+  initial_downsample_scale:
+  - 1
+  - 1
+  - 2
+  initial_downsample_conv_layers: 2
+  final_upsample_conv_layers: 1
+  checkpoint_level: 0
+  attn_linear_init_mode: '0'
+  ffn_linear_init_mode: '0'
+  conv_init_mode: '0'
+  down_up_linear_init_mode: '0'
+  norm_init_mode: '0'
+MOE:
+  use_linear_moe: false
+  use_ffn_moe: true
+  use_attn_moe: false
+  num_experts: 10
+  out_planes: 4
+  importance_weight: 0.0
+  load_weight: 0.0
+  gate_style: cuboid-latent
+  dispatch_style: dense
+  aux_loss_style: all
+RNC:
+  use_rnc: true
+  rank_imbalance_style: batch+T+H+W
+  feature_similarity_style: l2
+  rank_imbalance_temp: 2
+  label_difference_style: l1
+  rank_reg_coeff: 0.01
+  loss_cal_style: computation-efficient
diff --git a/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/hydra.yaml b/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/hydra.yaml
new file mode 100644
index 0000000000..048bedaf4c
--- /dev/null
+++ b/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/hydra.yaml
@@ -0,0 +1,204 @@
+hydra:
+  run:
+    dir: outputs_extformer_moe_pretrain
+  sweep:
+    dir: ${hydra.run.dir}
+    subdir: ./
+  launcher:
+    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+
+      Use --hydra-help to view Hydra specific help
+
+      '
+    template: '${hydra.help.header}
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (group=option)
+
+
+      $APP_CONFIG_GROUPS
+
+
+      == Config ==
+
+      Override anything in the config (foo.bar=value)
+
+
+      $CONFIG
+
+
+      ${hydra.help.footer}
+
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+
+      See https://hydra.cc for more info.
+
+
+      == Flags ==
+
+      $FLAGS_HELP
+
+
+      == Configuration groups ==
+
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+
+
+      $HYDRA_CONFIG_GROUPS
+
+
+      Use ''--cfg hydra'' to Show the Hydra config.
+
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: RUN
+  searchpath: []
+  callbacks:
+    init_callback:
+      _target_: ppsci.utils.callbacks.InitCallback
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=RUN
+    task:
+    - mode=eval
+    - EVAL.pretrained_model_path=https://paddle-org.bj.bcebos.com/paddlescience/models/extformer-moe/extformer_moe_pretrained.pdparams
+  job:
+    name: ${mode}
+    chdir: false
+    override_dirname: ''
+    id: ???
+    num: ???
+    config_name: extformer_moe_enso_pretrain.yaml
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys:
+        - mode
+        - output_dir
+        - log_freq
+        - seed
+        - use_vdl
+        - use_tbd
+        - wandb_config
+        - use_wandb
+        - device
+        - use_amp
+        - amp_level
+        - to_static
+        - prim
+        - log_level
+        - trace
+        - TRAIN.save_freq
+        - TRAIN.eval_during_train
+        - TRAIN.start_eval_epoch
+        - TRAIN.eval_freq
+        - TRAIN.checkpoint_path
+        - TRAIN.pretrained_model_path
+        - EVAL.pretrained_model_path
+        - EVAL.eval_with_no_grad
+        - EVAL.compute_metric_by_batch
+        - EVAL.batch_size
+        - INFER.pretrained_model_path
+        - INFER.export_path
+        - INFER.pdmodel_path
+        - INFER.pdiparams_path
+        - INFER.onnx_path
+        - INFER.device
+        - INFER.engine
+        - INFER.precision
+        - INFER.ir_optim
+        - INFER.min_subgraph_size
+        - INFER.gpu_mem
+        - INFER.gpu_id
+        - INFER.max_batch_size
+        - INFER.num_cpu_threads
+        - INFER.batch_size
+  runtime:
+    version: 1.3.2
+    version_base: '1.3'
+    cwd: /root/workspace/PaddleScience/examples/extformer_moe
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /root/workspace/PaddleScience/examples/extformer_moe/conf
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /root/workspace/PaddleScience/examples/extformer_moe/outputs_extformer_moe_pretrain
+    choices:
+      hydra/job/config/override_dirname/exclude_keys: exclude_keys_default
+      INFER: infer_default
+      EVAL: eval_default
+      TRAIN/swa: swa_default
+      TRAIN/ema: ema_default
+      TRAIN: train_default
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: basic
+      hydra/output: default
+  verbose: false
diff --git a/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/overrides.yaml b/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/overrides.yaml
new file mode 100644
index 0000000000..92912728af
--- /dev/null
+++ b/examples/extformer_moe/outputs_extformer_moe_pretrain/.hydra/overrides.yaml
@@ -0,0 +1,2 @@
+- mode=eval
+- EVAL.pretrained_model_path=https://paddle-org.bj.bcebos.com/paddlescience/models/extformer-moe/extformer_moe_pretrained.pdparams
diff --git a/examples/laplace/conf/laplace2d.yaml b/examples/laplace/conf/laplace2d.yaml
index 20591a6f36..803ae3fd45 100644
--- a/examples/laplace/conf/laplace2d.yaml
+++ b/examples/laplace/conf/laplace2d.yaml
@@ -45,7 +45,8 @@ MODEL:
 
 # training settings
 TRAIN:
-  epochs: 20000
+  # epochs: 20000
+  epochs: 1000
   iters_per_epoch: 1
   eval_during_train: true
   eval_freq: 200
@@ -61,7 +62,7 @@ INFER:
   export_path: ./inference/laplace2d
   pdmodel_path: ${INFER.export_path}.pdmodel
   pdiparams_path: ${INFER.export_path}.pdiparams
-  device: gpu
+  device: sdaa
   engine: native
   precision: fp32
   onnx_path: ${INFER.export_path}.onnx
diff --git a/ppsci/arch/extformer_moe_utils.py b/ppsci/arch/extformer_moe_utils.py
index 3332b356c8..96d1d09d73 100644
--- a/ppsci/arch/extformer_moe_utils.py
+++ b/ppsci/arch/extformer_moe_utils.py
@@ -125,9 +125,10 @@ def forward(self, x, t_map=None, eps=1e-25, dense_routing=False):
 
         assert logits.shape[-1] == self.num_experts
         logits = self.softmax(logits)  # [B, T, H, W, E]
-        top_logits, top_indices = logits.topk(
-            min(self.out_planes + 1, self.num_experts), axis=-1
-        )
+        with paddle.amp.auto_cast(custom_black_list={"top_k_v2", "top_k_v2_grad"}):
+            top_logits, top_indices = logits.topk(
+                min(self.out_planes + 1, self.num_experts), axis=-1
+            )
         top_k_logits = top_logits[:, :, :, :, : self.out_planes]
         top_k_indices = top_indices[:, :, :, :, : self.out_planes]
         top_k_gates = top_k_logits / (
@@ -486,6 +487,11 @@ def cal_loss(self, features, labels):
         exp_logits = logits.exp()
         n = logits.shape[1]
 
+        # Guard against trivial case where n <= 1 (no negatives) to avoid empty tensors
+        # if n <= 1:
+        #     # return a zero tensor on the same device/dtype as logits
+        #     return logits.sum() * 0.0
+
         # remove diagonal
         logits = logits.masked_select(
             (1 - paddle.eye(n)).astype("bool").unsqueeze(0).tile([B, 1, 1])
@@ -511,6 +517,7 @@ def cal_loss(self, features, labels):
                 loss += -pos_log_probs.sum()
             loss /= B * n * (n - 1)
         elif self.loss_cal_mode == "computation-efficient":
+            # print('label_diffs.shape:', label_diffs.shape)
             neg_mask = (label_diffs.unsqueeze(-2) >= label_diffs.unsqueeze(-1)).astype(
                 "float32"
             )  # [B, n, n - 1, n - 1]