[hotfix] fixing polices of sequence parallel (#4922)

KKZ20 · littsk · web-flow · commit b68f19029d62 · 2023-10-16T15:50:51.000+08:00
* Add layer norm gradients all-reduce for sequence parallel.

* Modify docs and polish code

* Polish code

* skip pipeline inference test

* fix parameter passing when calling get_autopolicy

---------

Co-authored-by: littsk &lt;1214689160@qq.com&gt;
diff --git a/colossalai/inference/tensor_parallel/engine.py b/colossalai/inference/tensor_parallel/engine.py
@@ -213,7 +213,7 @@ def _shard_model_by(self, shardformer: ShardFormer, model: nn.Module) -> None:
         ), "Discrepancy between the tp size of TPInferEngine and the tp size of shard config"
         model_name = model.__class__.__name__
         assert model_name in self.supported_models, f"Unsupported model cls {model_name} for TP inference."
-        policy = get_autopolicy(model, inference_only=True)
+        policy = get_autopolicy(model, shard_config=self.shard_config)
         self.model, _ = shardformer.optimize(model, policy)
 
         if self.shard_config.inference_gptq: