From f94f5b088d15c2d5fca179d25041741b6bb5eb1b Mon Sep 17 00:00:00 2001 From: lvkaokao <641553140@qq.com> Date: Tue, 2 Jan 2024 01:58:36 -0500 Subject: [PATCH 1/8] remove SharedDDP as it is deprecated ([#25702](https://github.com/huggingface/transformers/pull/25702)) --- .../finetune_on_Intel_Xeon_CPU.ipynb | 2 - .../instruction/finetune_on_Nvidia_GPU.ipynb | 4 -- .../finetuning/multi_modal/llava_trainer.py | 72 ++++++++----------- .../transformers/trainer.py | 4 +- 4 files changed, 30 insertions(+), 52 deletions(-) diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Intel_Xeon_CPU.ipynb b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Intel_Xeon_CPU.ipynb index 0c14cf3d936..f125b2ede1c 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Intel_Xeon_CPU.ipynb +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Intel_Xeon_CPU.ipynb @@ -384,7 +384,6 @@ "save_strategy=no,\n", "save_total_limit=2,\n", "seed=42,\n", - "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", @@ -1526,7 +1525,6 @@ "save_strategy=no,\n", "save_total_limit=2,\n", "seed=42,\n", - "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Nvidia_GPU.ipynb b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Nvidia_GPU.ipynb index 13c64d58d91..515ff2e6c5d 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Nvidia_GPU.ipynb +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/instruction/finetune_on_Nvidia_GPU.ipynb @@ -251,7 +251,6 @@ "save_strategy=no,\n", "save_total_limit=2,\n", "seed=42,\n", - "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", @@ -740,7 +739,6 @@ "save_strategy=no,\n", "save_total_limit=2,\n", "seed=42,\n", - "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", @@ -1322,7 +1320,6 @@ "save_strategy=no,\n", "save_total_limit=2,\n", "seed=42,\n", - "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", @@ -1807,7 +1804,6 @@ "save_strategy=no,\n", "save_total_limit=2,\n", "seed=42,\n", - "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torch_compile=False,\n", diff --git a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_trainer.py b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_trainer.py index a5bcc53ab65..c163af45f53 100644 --- a/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_trainer.py +++ b/intel_extension_for_transformers/neural_chat/examples/finetuning/multi_modal/llava_trainer.py @@ -26,7 +26,6 @@ get_parameter_names, has_length, ALL_LAYERNORM_LAYERS, - ShardedDDPOption, logger, ) from typing import List, Optional @@ -176,7 +175,7 @@ def create_optimizer(self): """ if is_sagemaker_mp_enabled(): return super().create_optimizer() - if self.sharded_ddp == ShardedDDPOption.SIMPLE: + if self.is_fsdp_enabled: return super().create_optimizer() opt_model = self.model @@ -237,27 +236,20 @@ def create_optimizer(self): optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(self.args) - if self.sharded_ddp == ShardedDDPOption.SIMPLE: - self.optimizer = OSS( - params=optimizer_grouped_parameters, - optim=optimizer_cls, - **optimizer_kwargs, - ) - else: - self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) - if optimizer_cls.__name__ == "Adam8bit": - import bitsandbytes + self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) + if optimizer_cls.__name__ == "Adam8bit": + import bitsandbytes - manager = bitsandbytes.optim.GlobalOptimManager.get_instance() + manager = bitsandbytes.optim.GlobalOptimManager.get_instance() - skipped = 0 - for module in opt_model.modules(): - if isinstance(module, nn.Embedding): - skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values()) - logger.info(f"skipped {module}: {skipped/2**20}M params") - manager.register_module_override(module, "weight", {"optim_bits": 32}) - logger.debug(f"bitsandbytes: will optimize {module} in fp32") - logger.info(f"skipped: {skipped/2**20}M params") + skipped = 0 + for module in opt_model.modules(): + if isinstance(module, nn.Embedding): + skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values()) + logger.info(f"skipped {module}: {skipped/2**20}M params") + manager.register_module_override(module, "weight", {"optim_bits": 32}) + logger.debug(f"bitsandbytes: will optimize {module} in fp32") + logger.info(f"skipped: {skipped/2**20}M params") return self.optimizer @@ -297,7 +289,6 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None): get_parameter_names, has_length, ALL_LAYERNORM_LAYERS, - ShardedDDPOption, logger, ) from typing import List, Optional @@ -328,7 +319,7 @@ def create_optimizer(self): """ if is_sagemaker_mp_enabled(): return super().create_optimizer() - if self.sharded_ddp == ShardedDDPOption.SIMPLE: + if self.is_fsdp_enabled: return super().create_optimizer() opt_model = self.model @@ -401,27 +392,20 @@ def create_optimizer(self): # optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(self.args) - if self.sharded_ddp == ShardedDDPOption.SIMPLE: - self.optimizer = OSS( - params=optimizer_grouped_parameters, - optim=optimizer_cls, - **optimizer_kwargs, - ) - else: - self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) - if optimizer_cls.__name__ == "Adam8bit": - import bitsandbytes - - manager = bitsandbytes.optim.GlobalOptimManager.get_instance() - - skipped = 0 - for module in opt_model.modules(): - if isinstance(module, nn.Embedding): - skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values()) - logger.info(f"skipped {module}: {skipped/2**20}M params") - manager.register_module_override(module, "weight", {"optim_bits": 32}) - logger.debug(f"bitsandbytes: will optimize {module} in fp32") - logger.info(f"skipped: {skipped/2**20}M params") + self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) + if optimizer_cls.__name__ == "Adam8bit": + import bitsandbytes + + manager = bitsandbytes.optim.GlobalOptimManager.get_instance() + + skipped = 0 + for module in opt_model.modules(): + if isinstance(module, nn.Embedding): + skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values()) + logger.info(f"skipped {module}: {skipped/2**20}M params") + manager.register_module_override(module, "weight", {"optim_bits": 32}) + logger.debug(f"bitsandbytes: will optimize {module} in fp32") + logger.info(f"skipped: {skipped/2**20}M params") return self.optimizer diff --git a/intel_extension_for_transformers/transformers/trainer.py b/intel_extension_for_transformers/transformers/trainer.py index c61ccfe4c6d..7be8b6ca400 100644 --- a/intel_extension_for_transformers/transformers/trainer.py +++ b/intel_extension_for_transformers/transformers/trainer.py @@ -67,7 +67,6 @@ ) from transformers.trainer_utils import ( HPSearchBackend, - ShardedDDPOption, TrainOutput, EvalLoopOutput, EvalPrediction, @@ -762,7 +761,8 @@ def train( else: debug_overflow = DebugUnderflowOverflow(self.model) # noqa - delay_optimizer_creation = self.sharded_ddp is not None and self.sharded_ddp != ShardedDDPOption.SIMPLE + # delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled + delay_optimizer_creation = is_sagemaker_mp_enabled() if not delay_optimizer_creation: self.create_optimizer_and_scheduler(num_training_steps=max_steps) From 9dd4ea0e95e701240cde962d6f0ed75b81957fb0 Mon Sep 17 00:00:00 2001 From: lvkaokao <641553140@qq.com> Date: Tue, 2 Jan 2024 03:57:47 -0500 Subject: [PATCH 2/8] remove SharedDDP as it is deprecated --- .github/workflows/script/unitTest/run_unit_test_optimize.sh | 2 +- intel_extension_for_transformers/transformers/trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/script/unitTest/run_unit_test_optimize.sh b/.github/workflows/script/unitTest/run_unit_test_optimize.sh index 276ccb0a67e..70772e569f7 100644 --- a/.github/workflows/script/unitTest/run_unit_test_optimize.sh +++ b/.github/workflows/script/unitTest/run_unit_test_optimize.sh @@ -21,7 +21,7 @@ function pytest() { mkdir -p ${coverage_log_dir} pip install --no-cache-dir protobuf==3.20.0 ## install transformers==4.34.1, to work with SharedDPO API - pip install transformers==4.34.1 + pip install transformers cd /intel-extension-for-transformers/tests/CI || exit 1 JOB_NAME=unit_test ut_log_name=${LOG_DIR}/${JOB_NAME}.log diff --git a/intel_extension_for_transformers/transformers/trainer.py b/intel_extension_for_transformers/transformers/trainer.py index 7be8b6ca400..3aee52f5fdf 100644 --- a/intel_extension_for_transformers/transformers/trainer.py +++ b/intel_extension_for_transformers/transformers/trainer.py @@ -49,7 +49,7 @@ from transformers import __version__, Seq2SeqTrainer, Trainer, PreTrainedModel from transformers.configuration_utils import PretrainedConfig from transformers.debug_utils import DebugOption, DebugUnderflowOverflow -from transformers.file_utils import ( +from transformers.utils import ( CONFIG_NAME, WEIGHTS_NAME, is_torch_tpu_available, From 856670e6e910760e1c3d69f4e8722305100b1221 Mon Sep 17 00:00:00 2001 From: lvkaokao <641553140@qq.com> Date: Thu, 4 Jan 2024 05:14:21 -0500 Subject: [PATCH 3/8] fix save issue. new transformers version model.save_pretrained api adds safe_serialization arg and default True. --- tests/CI/test_weight_only.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CI/test_weight_only.py b/tests/CI/test_weight_only.py index 0b0c884396e..048d7f1b7a6 100644 --- a/tests/CI/test_weight_only.py +++ b/tests/CI/test_weight_only.py @@ -153,7 +153,7 @@ def test_auto_model_saving_loading(self): if isinstance(module, QuantizedLinearQBits): module_list.append(name) self.assertTrue(len(module_list) > 0) - model.save_pretrained(self.workspace) + model.save_pretrained(self.workspace, safe_serialization=False) loaded_model = AutoModelForCausalLM.from_pretrained(self.workspace) for name, module in loaded_model.named_modules(): if isinstance(module, QuantizedLinearQBits): From 789b7fb0fd6551b79d04a645602caa2531e5472f Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Thu, 4 Jan 2024 18:47:53 +0800 Subject: [PATCH 4/8] support safetensors. --- .../transformers/modeling/modeling_auto.py | 8 ++++++++ .../transformers/utils/utility.py | 1 + 2 files changed, 9 insertions(+) diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index ae3b0de2d00..b1719b73c2d 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -53,6 +53,7 @@ QUANT_CONFIG, WEIGHTS_NAME, WEIGHTS_INDEX_NAME, + SAFE_WEIGHTS_NAME, ) from intel_extension_for_transformers.llm.quantization.utils import replace_linear from transformers.configuration_utils import PretrainedConfig @@ -727,6 +728,13 @@ def load_low_bit(cls, pretrained_model_name_or_path, subfolder, _add_variant(WEIGHTS_INDEX_NAME, variant) ) is_sharded = True + elif os.path.isfile( + os.path.join(pretrained_model_name_or_path, subfolder, _add_variant(SAFE_WEIGHTS_NAME, variant)) + ): + # Load from a safetensors checkpoint + archive_file = os.path.join( + pretrained_model_name_or_path, subfolder, _add_variant(SAFE_WEIGHTS_NAME, variant) + ) elif os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)): archive_file = pretrained_model_name_or_path is_local = True diff --git a/intel_extension_for_transformers/transformers/utils/utility.py b/intel_extension_for_transformers/transformers/utils/utility.py index d35f4330151..f55dbf98724 100644 --- a/intel_extension_for_transformers/transformers/utils/utility.py +++ b/intel_extension_for_transformers/transformers/utils/utility.py @@ -34,6 +34,7 @@ WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json" QUANT_CONFIG = "quantization_config.json" SPARSITY_CONFIG = "sparsity_config.json" +SAFE_WEIGHTS_NAME = "model.safetensors" torch = LazyImport("torch") From 2a7e636bccee98eb517c8e7e89dafacb72ce80eb Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Thu, 4 Jan 2024 19:24:48 +0800 Subject: [PATCH 5/8] fix llava conflict. --- .../transformers/modeling/llava_models/llava_mistral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py b/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py index d5427a5a612..7125e200bed 100644 --- a/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py +++ b/intel_extension_for_transformers/transformers/modeling/llava_models/llava_mistral.py @@ -31,7 +31,7 @@ class LlavaConfig(MistralConfig): - model_type = "llava" + model_type = "llava_custom" class LlavaMistralModel(LlavaMetaModel, MistralModel): @@ -110,5 +110,5 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_ _inputs['images'] = images return _inputs -AutoConfig.register("llava", LlavaConfig) +AutoConfig.register("llava_custom", LlavaConfig) AutoModelForCausalLM.register(LlavaConfig, LlavaMistralForCausalLM) From 9f524c50da6496a9377c63da7e674007c51f4b7a Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Fri, 5 Jan 2024 17:37:04 +0800 Subject: [PATCH 6/8] remove no_init_weights to fix quantized model loading issue. --- .../transformers/modeling/modeling_base_seq2seq.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_base_seq2seq.py b/intel_extension_for_transformers/transformers/modeling/modeling_base_seq2seq.py index 75acfab6483..17ec9c7ffdb 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_base_seq2seq.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_base_seq2seq.py @@ -25,9 +25,7 @@ from neural_compressor.utils.pytorch import load from transformers import AutoModel, PretrainedConfig from transformers.file_utils import add_start_docstrings -from transformers.modeling_utils import no_init_weights from transformers.models.auto.auto_factory import _get_model_class -from transformers.utils.generic import ContextManagers from optimum.exporters import TasksManager from optimum.intel.neural_compressor import INCConfig @@ -268,9 +266,7 @@ def _from_pretrained( decoder = model else: model_class = _get_model_class(config, cls.auto_model_class._model_mapping) - init_contexts = [no_init_weights(_enable=True)] - with ContextManagers(init_contexts): - model = model_class(config) + model = model_class(config) # Load the model from local directory if os.path.isdir(model_id): From acda11a2a1542566fbd5060e7e1c72f768a6ebb3 Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Tue, 9 Jan 2024 18:16:51 +0800 Subject: [PATCH 7/8] transformers version >= 4.36.2 remove do_grad_scaling because of removing shared_ddp. --- .../transformers/trainer.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/intel_extension_for_transformers/transformers/trainer.py b/intel_extension_for_transformers/transformers/trainer.py index 3aee52f5fdf..251d3cf168f 100644 --- a/intel_extension_for_transformers/transformers/trainer.py +++ b/intel_extension_for_transformers/transformers/trainer.py @@ -1176,9 +1176,7 @@ def training_step( else: loss.backward() else: - if self.do_grad_scaling: - self.scaler.scale(loss).backward() - elif self.use_apex: + if self.use_apex: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() elif NEW_DEEPSPEED_FLAG: @@ -1265,9 +1263,7 @@ def training_step_length_adaptive( else: loss.backward() else: - if self.do_grad_scaling: - self.scaler.scale(loss).backward() - elif self.use_apex: + if self.use_apex: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() elif NEW_DEEPSPEED_FLAG: @@ -1360,9 +1356,7 @@ def training_step_length_adaptive( else: loss.backward() else: - if self.do_grad_scaling: - self.scaler.scale(loss).backward() - elif self.use_apex: + if self.use_apex: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() elif NEW_DEEPSPEED_FLAG: From 57e08db05ec6d80a5a80570fbd5980f49510c7d2 Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Tue, 9 Jan 2024 18:20:10 +0800 Subject: [PATCH 8/8] update code because of the pr #1121. --- .../optimized_instructor_embedding.py | 12 +++++++---- .../optimized_sentence_transformers.py | 20 ++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py b/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py index 48bc64ef4a0..20078ded4f2 100644 --- a/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py +++ b/intel_extension_for_transformers/langchain/embeddings/optimized_instructor_embedding.py @@ -56,12 +56,14 @@ def __init__(self, *args, **kwargs): def _load_auto_model(self, model_name_or_path, token: Optional[Union[bool, str]], - cache_folder: Optional[str]): # pragma: no cover + cache_folder: Optional[str], + trust_remote_code: bool = False): # pragma: no cover """Creates a simple Transformer + Mean Pooling model and returns the modules.""" logger.warning("No sentence-transformers model found with name {}." \ "Creating a new one with MEAN pooling.".format(model_name_or_path)) transformer_model = OptimzedTransformer( - model_name_or_path, cache_dir=cache_folder, model_args={"token": token}) + model_name_or_path, cache_dir=cache_folder, model_args={"token": token, + "trust_remote_code": trust_remote_code}) pooling_model = sentence_transformers.models.Pooling( transformer_model.get_word_embedding_dimension(), 'mean') return [transformer_model, pooling_model] @@ -69,7 +71,8 @@ def _load_auto_model(self, def _load_sbert_model(self, model_name_or_path: str, token: Optional[Union[bool, str]], - cache_folder: Optional[str]): + cache_folder: Optional[str], + trust_remote_code: bool = False): """Loads a full sentence-transformers model.""" # Check if the config_sentence_transformers.json file exists (exists since v2 of the framework) config_sentence_transformers_json_path = sentence_transformers.util.load_file_path( @@ -121,8 +124,9 @@ def _load_sbert_model(self, break if "model_args" in kwargs: kwargs["model_args"]["token"] = token + kwargs["model_args"]["trust_remote_code"] = trust_remote_code else: - kwargs["model_args"] = {"token": token} + kwargs["model_args"] = {"token": token, "trust_remote_code": trust_remote_code} module = OptimizedInstructorTransformer(model_name_or_path, cache_dir=cache_folder, **kwargs) elif module_config['idx']==1: module_class = InstructorEmbedding.INSTRUCTOR_Pooling diff --git a/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py b/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py index c5f19d89074..d56edb34b48 100644 --- a/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py +++ b/intel_extension_for_transformers/langchain/embeddings/optimized_sentence_transformers.py @@ -55,20 +55,29 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def _load_auto_model( - self, model_name_or_path: str, token: Optional[Union[bool, str]], cache_folder: Optional[str]): + self, + model_name_or_path: str, + token: Optional[Union[bool, str]], + cache_folder: Optional[str], + trust_remote_code: bool = False): """ Creates a simple Transformer + Mean Pooling model and returns the modules """ logger.warning("No sentence-transformers model found with name {}." \ "Creating a new one with MEAN pooling.".format(model_name_or_path)) transformer_model = OptimzedTransformer( - model_name_or_path, cache_dir=cache_folder, model_args={"token": token}) + model_name_or_path, cache_dir=cache_folder, model_args={"token": token, + "trust_remote_code": trust_remote_code}) pooling_model = sentence_transformers.models.Pooling( transformer_model.get_word_embedding_dimension(), 'mean') return [transformer_model, pooling_model] def _load_sbert_model( - self, model_name_or_path: str, token: Optional[Union[bool, str]], cache_folder: Optional[str]): + self, + model_name_or_path: str, + token: Optional[Union[bool, str]], + cache_folder: Optional[str], + trust_remote_code: bool = False): """ Loads a full sentence-transformers model """ @@ -124,8 +133,9 @@ def _load_sbert_model( break if "model_args" in kwargs: kwargs["model_args"]["token"] = token + kwargs["model_args"]["trust_remote_code"] = trust_remote_code else: - kwargs["model_args"] = {"token": token} + kwargs["model_args"] = {"token": token, "trust_remote_code": trust_remote_code} module = sentence_transformers.models.Transformer( model_name_or_path, cache_dir=cache_folder, **kwargs) else: @@ -134,4 +144,4 @@ def _load_sbert_model( module = module_class.load(module_path) modules[module_config['name']] = module - return modules \ No newline at end of file + return modules