hpcaitech
diff --git a/‎.github/workflows/build_on_pr.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_on_pr.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎applications/Chat/benchmarks/benchmark_opt_lora_dummy.py‎
Lines changed: 4 additions & 2 deletions b/‎applications/Chat/benchmarks/benchmark_opt_lora_dummy.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎applications/Chat/coati/dataset/sft_dataset.py‎
Lines changed: 13 additions & 3 deletions b/‎applications/Chat/coati/dataset/sft_dataset.py‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎applications/Chat/coati/models/base/actor.py‎
Lines changed: 0 additions & 1 deletion b/‎applications/Chat/coati/models/base/actor.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎applications/Chat/coati/ray/utils.py‎
Lines changed: 3 additions & 1 deletion b/‎applications/Chat/coati/ray/utils.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎applications/Chat/coati/trainer/strategies/ddp.py‎
Lines changed: 2 additions & 1 deletion b/‎applications/Chat/coati/trainer/strategies/ddp.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎applications/Chat/examples/community/peft/train_peft_prompts.py‎
Lines changed: 3 additions & 1 deletion b/‎applications/Chat/examples/community/peft/train_peft_prompts.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎applications/Chat/examples/train_sft.sh‎
Lines changed: 1 addition & 1 deletion b/‎applications/Chat/examples/train_sft.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py‎
Lines changed: 1 addition & 2 deletions b/‎applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py‎
Lines changed: 1 addition & 2 deletions
@@ -201,4 +201,4 @@ jobs:
         uses: actions/upload-artifact@v3
         with:
           name: report
-          path: report/
+          path: report/
@@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-   THE SOFTWARE.  
+   THE SOFTWARE.
@@ -76,9 +76,11 @@ def main(args):
     if args.strategy == "ddp":
         strategy = DDPStrategy()
     elif args.strategy == "colossalai_gemini":
-        strategy = GeminiStrategy(placement_policy="static",initial_scale=2**5)
+        strategy = GeminiStrategy(placement_policy="static", initial_scale=2**5)
     elif args.strategy == "colossalai_gemini_cpu":
-        strategy = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
+        strategy = GeminiStrategy(
+            placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5
+        )
     elif args.strategy == "colossalai_zero2":
         strategy = LowLevelZeroStrategy(stage=2, placement_policy="cuda")
     elif args.strategy == "colossalai_zero2_cpu":
 
@@ -51,11 +51,21 @@ def _preprocess(
     """Preprocess the data by tokenizing."""
     sequences = [s + t + tokenizer.eos_token for s, t in zip(sources, targets)]
     sequences_token = tokenizer(
-        sequences, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt", add_special_tokens=False
+        sequences,
+        max_length=max_length,
+        padding="max_length",
+        truncation=True,
+        return_tensors="pt",
+        add_special_tokens=False,
     )
 
     sources_token = tokenizer(
-        sources, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt", add_special_tokens=False
+        sources,
+        max_length=max_length,
+        padding="max_length",
+        truncation=True,
+        return_tensors="pt",
+        add_special_tokens=False,
     )
 
     assert sequences_token["attention_mask"].dim() == 2, "seq2seq model should be preprocessed differently"
@@ -66,7 +76,7 @@ def _preprocess(
         if tokenizer.padding_side == "right":
             # |prompt|completion|eos|pad|
             labels[i][:source_len] = IGNORE_INDEX
-            if pad_len>0:
+            if pad_len > 0:
                 labels[i][-pad_len:] = IGNORE_INDEX
         elif tokenizer.padding_side == "left":
             # |pad|prompt|completion|eos|
 
@@ -30,4 +30,3 @@ def forward(
         """Returns model output."""
         output = self.model(input_ids, attention_mask=attention_mask, **model_kwargs)
         return output
-    
@@ -75,7 +75,9 @@ def get_strategy_from_args(strategy: str):
     elif strategy == "colossalai_zero2":
         strategy_ = LowLevelZeroStrategy(stage=2, placement_policy="cuda")
     elif strategy == "colossalai_gemini_cpu":
-        strategy_ = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
+        strategy_ = GeminiStrategy(
+            placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5
+        )
     elif strategy == "colossalai_zero2_cpu":
         strategy_ = LowLevelZeroStrategy(stage=2, placement_policy="cpu")
     else:
 
@@ -101,16 +101,17 @@ def save_pretrained(
 
         model_path = os.path.join(path, "pytorch_model.bin")
         self.save_model(model, model_path, shard=shard)
+
         def _replace_keys(model_path: str, replace_fn: Callable):
             state_dict = torch.load(model_path, map_location="cpu")
             state_dict = {replace_fn(k): v for k, v in state_dict.items()}
             torch.save(state_dict, model_path)
+
         # FIXME: save_model would add "model." prefix to keys of pytorch_model.bin
         # HACK: rename keys of pytorch_model.bin
         if dist.get_rank() == 0:
             _replace_keys(model_path, lambda k: k.replace("model.", "", 1))
 
-
     def get_model_state_dict_shard(self, model: nn.Module, **config):
         # TODO: implement sharding on naive strategy
         model = self.unwrap_model(model)
 
@@ -24,7 +24,9 @@ def main(args):
     if args.strategy == "ddp":
         strategy = DDPStrategy()
     elif args.strategy == "colossalai_gemini":
-        strategy = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
+        strategy = GeminiStrategy(
+            placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5
+        )
     elif args.strategy == "colossalai_zero2":
         strategy = LowLevelZeroStrategy(stage=2, placement_policy="cpu")
     else:
 
@@ -25,4 +25,4 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
     --accumulation_steps 8 \
     --lr 2e-5 \
     --max_datasets_size 512 \
-    --max_epochs 1
+    --max_epochs 1
@@ -8,11 +8,10 @@
 
 import numpy as np
 import torch
-from transformers import LlamaTokenizer, LlamaForCausalLM
+from transformers import LlamaForCausalLM, LlamaTokenizer
 
 from colossalai.logging import get_dist_logger
 
-
 logger = get_dist_logger()