Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function pytest() {
mkdir -p ${coverage_log_dir}
pip install --no-cache-dir protobuf==3.20.0
## install transformers==4.34.1, to work with SharedDPO API
pip install transformers==4.34.1
pip install transformers
cd /intel-extension-for-transformers/tests/CI || exit 1
JOB_NAME=unit_test
ut_log_name=${LOG_DIR}/${JOB_NAME}.log
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,23 @@ def __init__(self, *args, **kwargs):
def _load_auto_model(self,
model_name_or_path,
token: Optional[Union[bool, str]],
cache_folder: Optional[str]): # pragma: no cover
cache_folder: Optional[str],
trust_remote_code: bool = False): # pragma: no cover
"""Creates a simple Transformer + Mean Pooling model and returns the modules."""
logger.warning("No sentence-transformers model found with name {}." \
"Creating a new one with MEAN pooling.".format(model_name_or_path))
transformer_model = OptimzedTransformer(
model_name_or_path, cache_dir=cache_folder, model_args={"token": token})
model_name_or_path, cache_dir=cache_folder, model_args={"token": token,
"trust_remote_code": trust_remote_code})
pooling_model = sentence_transformers.models.Pooling(
transformer_model.get_word_embedding_dimension(), 'mean')
return [transformer_model, pooling_model]

def _load_sbert_model(self,
model_name_or_path: str,
token: Optional[Union[bool, str]],
cache_folder: Optional[str]):
cache_folder: Optional[str],
trust_remote_code: bool = False):
"""Loads a full sentence-transformers model."""
# Check if the config_sentence_transformers.json file exists (exists since v2 of the framework)
config_sentence_transformers_json_path = sentence_transformers.util.load_file_path(
Expand Down Expand Up @@ -121,8 +124,9 @@ def _load_sbert_model(self,
break
if "model_args" in kwargs:
kwargs["model_args"]["token"] = token
kwargs["model_args"]["trust_remote_code"] = trust_remote_code
else:
kwargs["model_args"] = {"token": token}
kwargs["model_args"] = {"token": token, "trust_remote_code": trust_remote_code}
module = OptimizedInstructorTransformer(model_name_or_path, cache_dir=cache_folder, **kwargs)
elif module_config['idx']==1:
module_class = InstructorEmbedding.INSTRUCTOR_Pooling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,29 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def _load_auto_model(
self, model_name_or_path: str, token: Optional[Union[bool, str]], cache_folder: Optional[str]):
self,
model_name_or_path: str,
token: Optional[Union[bool, str]],
cache_folder: Optional[str],
trust_remote_code: bool = False):
"""
Creates a simple Transformer + Mean Pooling model and returns the modules
"""
logger.warning("No sentence-transformers model found with name {}." \
"Creating a new one with MEAN pooling.".format(model_name_or_path))
transformer_model = OptimzedTransformer(
model_name_or_path, cache_dir=cache_folder, model_args={"token": token})
model_name_or_path, cache_dir=cache_folder, model_args={"token": token,
"trust_remote_code": trust_remote_code})
pooling_model = sentence_transformers.models.Pooling(
transformer_model.get_word_embedding_dimension(), 'mean')
return [transformer_model, pooling_model]

def _load_sbert_model(
self, model_name_or_path: str, token: Optional[Union[bool, str]], cache_folder: Optional[str]):
self,
model_name_or_path: str,
token: Optional[Union[bool, str]],
cache_folder: Optional[str],
trust_remote_code: bool = False):
"""
Loads a full sentence-transformers model
"""
Expand Down Expand Up @@ -124,8 +133,9 @@ def _load_sbert_model(
break
if "model_args" in kwargs:
kwargs["model_args"]["token"] = token
kwargs["model_args"]["trust_remote_code"] = trust_remote_code
else:
kwargs["model_args"] = {"token": token}
kwargs["model_args"] = {"token": token, "trust_remote_code": trust_remote_code}
module = sentence_transformers.models.Transformer(
model_name_or_path, cache_dir=cache_folder, **kwargs)
else:
Expand All @@ -134,4 +144,4 @@ def _load_sbert_model(
module = module_class.load(module_path)
modules[module_config['name']] = module

return modules
return modules
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,6 @@
"save_strategy=no,\n",
"save_total_limit=2,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
Expand Down Expand Up @@ -1526,7 +1525,6 @@
"save_strategy=no,\n",
"save_total_limit=2,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,6 @@
"save_strategy=no,\n",
"save_total_limit=2,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
Expand Down Expand Up @@ -740,7 +739,6 @@
"save_strategy=no,\n",
"save_total_limit=2,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
Expand Down Expand Up @@ -1322,7 +1320,6 @@
"save_strategy=no,\n",
"save_total_limit=2,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
Expand Down Expand Up @@ -1807,7 +1804,6 @@
"save_strategy=no,\n",
"save_total_limit=2,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
get_parameter_names,
has_length,
ALL_LAYERNORM_LAYERS,
ShardedDDPOption,
logger,
)
from typing import List, Optional
Expand Down Expand Up @@ -176,7 +175,7 @@ def create_optimizer(self):
"""
if is_sagemaker_mp_enabled():
return super().create_optimizer()
if self.sharded_ddp == ShardedDDPOption.SIMPLE:
if self.is_fsdp_enabled:
return super().create_optimizer()

opt_model = self.model
Expand Down Expand Up @@ -237,27 +236,20 @@ def create_optimizer(self):

optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(self.args)

if self.sharded_ddp == ShardedDDPOption.SIMPLE:
self.optimizer = OSS(
params=optimizer_grouped_parameters,
optim=optimizer_cls,
**optimizer_kwargs,
)
else:
self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
if optimizer_cls.__name__ == "Adam8bit":
import bitsandbytes
self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
if optimizer_cls.__name__ == "Adam8bit":
import bitsandbytes

manager = bitsandbytes.optim.GlobalOptimManager.get_instance()
manager = bitsandbytes.optim.GlobalOptimManager.get_instance()

skipped = 0
for module in opt_model.modules():
if isinstance(module, nn.Embedding):
skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values())
logger.info(f"skipped {module}: {skipped/2**20}M params")
manager.register_module_override(module, "weight", {"optim_bits": 32})
logger.debug(f"bitsandbytes: will optimize {module} in fp32")
logger.info(f"skipped: {skipped/2**20}M params")
skipped = 0
for module in opt_model.modules():
if isinstance(module, nn.Embedding):
skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values())
logger.info(f"skipped {module}: {skipped/2**20}M params")
manager.register_module_override(module, "weight", {"optim_bits": 32})
logger.debug(f"bitsandbytes: will optimize {module} in fp32")
logger.info(f"skipped: {skipped/2**20}M params")

return self.optimizer

Expand Down Expand Up @@ -297,7 +289,6 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None):
get_parameter_names,
has_length,
ALL_LAYERNORM_LAYERS,
ShardedDDPOption,
logger,
)
from typing import List, Optional
Expand Down Expand Up @@ -328,7 +319,7 @@ def create_optimizer(self):
"""
if is_sagemaker_mp_enabled():
return super().create_optimizer()
if self.sharded_ddp == ShardedDDPOption.SIMPLE:
if self.is_fsdp_enabled:
return super().create_optimizer()

opt_model = self.model
Expand Down Expand Up @@ -401,27 +392,20 @@ def create_optimizer(self):

# optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(self.args)

if self.sharded_ddp == ShardedDDPOption.SIMPLE:
self.optimizer = OSS(
params=optimizer_grouped_parameters,
optim=optimizer_cls,
**optimizer_kwargs,
)
else:
self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
if optimizer_cls.__name__ == "Adam8bit":
import bitsandbytes

manager = bitsandbytes.optim.GlobalOptimManager.get_instance()

skipped = 0
for module in opt_model.modules():
if isinstance(module, nn.Embedding):
skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values())
logger.info(f"skipped {module}: {skipped/2**20}M params")
manager.register_module_override(module, "weight", {"optim_bits": 32})
logger.debug(f"bitsandbytes: will optimize {module} in fp32")
logger.info(f"skipped: {skipped/2**20}M params")
self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
if optimizer_cls.__name__ == "Adam8bit":
import bitsandbytes

manager = bitsandbytes.optim.GlobalOptimManager.get_instance()

skipped = 0
for module in opt_model.modules():
if isinstance(module, nn.Embedding):
skipped += sum({p.data_ptr(): p.numel() for p in module.parameters()}.values())
logger.info(f"skipped {module}: {skipped/2**20}M params")
manager.register_module_override(module, "weight", {"optim_bits": 32})
logger.debug(f"bitsandbytes: will optimize {module} in fp32")
logger.info(f"skipped: {skipped/2**20}M params")

return self.optimizer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@


class LlavaConfig(MistralConfig):
model_type = "llava"
model_type = "llava_custom"


class LlavaMistralModel(LlavaMetaModel, MistralModel):
Expand Down Expand Up @@ -110,5 +110,5 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_
_inputs['images'] = images
return _inputs

AutoConfig.register("llava", LlavaConfig)
AutoConfig.register("llava_custom", LlavaConfig)
AutoModelForCausalLM.register(LlavaConfig, LlavaMistralForCausalLM)
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
QUANT_CONFIG,
WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
)
from intel_extension_for_transformers.llm.quantization.utils import replace_linear
from transformers.configuration_utils import PretrainedConfig
Expand Down Expand Up @@ -727,6 +728,13 @@ def load_low_bit(cls,
pretrained_model_name_or_path, subfolder, _add_variant(WEIGHTS_INDEX_NAME, variant)
)
is_sharded = True
elif os.path.isfile(
os.path.join(pretrained_model_name_or_path, subfolder, _add_variant(SAFE_WEIGHTS_NAME, variant))
):
# Load from a safetensors checkpoint
archive_file = os.path.join(
pretrained_model_name_or_path, subfolder, _add_variant(SAFE_WEIGHTS_NAME, variant)
)
elif os.path.isfile(os.path.join(subfolder, pretrained_model_name_or_path)):
archive_file = pretrained_model_name_or_path
is_local = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
from neural_compressor.utils.pytorch import load
from transformers import AutoModel, PretrainedConfig
from transformers.file_utils import add_start_docstrings
from transformers.modeling_utils import no_init_weights
from transformers.models.auto.auto_factory import _get_model_class
from transformers.utils.generic import ContextManagers
from optimum.exporters import TasksManager

from optimum.intel.neural_compressor import INCConfig
Expand Down Expand Up @@ -268,9 +266,7 @@ def _from_pretrained(
decoder = model
else:
model_class = _get_model_class(config, cls.auto_model_class._model_mapping)
init_contexts = [no_init_weights(_enable=True)]
with ContextManagers(init_contexts):
model = model_class(config)
model = model_class(config)

# Load the model from local directory
if os.path.isdir(model_id):
Expand Down
18 changes: 6 additions & 12 deletions intel_extension_for_transformers/transformers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from transformers import __version__, Seq2SeqTrainer, Trainer, PreTrainedModel
from transformers.configuration_utils import PretrainedConfig
from transformers.debug_utils import DebugOption, DebugUnderflowOverflow
from transformers.file_utils import (
from transformers.utils import (
CONFIG_NAME,
WEIGHTS_NAME,
is_torch_tpu_available,
Expand All @@ -67,7 +67,6 @@
)
from transformers.trainer_utils import (
HPSearchBackend,
ShardedDDPOption,
TrainOutput,
EvalLoopOutput,
EvalPrediction,
Expand Down Expand Up @@ -762,7 +761,8 @@ def train(
else:
debug_overflow = DebugUnderflowOverflow(self.model) # noqa

delay_optimizer_creation = self.sharded_ddp is not None and self.sharded_ddp != ShardedDDPOption.SIMPLE
# delay_optimizer_creation = is_sagemaker_mp_enabled() or self.is_fsdp_xla_enabled or self.is_fsdp_enabled
delay_optimizer_creation = is_sagemaker_mp_enabled()

if not delay_optimizer_creation:
self.create_optimizer_and_scheduler(num_training_steps=max_steps)
Expand Down Expand Up @@ -1176,9 +1176,7 @@ def training_step(
else:
loss.backward()
else:
if self.do_grad_scaling:
self.scaler.scale(loss).backward()
elif self.use_apex:
if self.use_apex:
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
scaled_loss.backward()
elif NEW_DEEPSPEED_FLAG:
Expand Down Expand Up @@ -1265,9 +1263,7 @@ def training_step_length_adaptive(
else:
loss.backward()
else:
if self.do_grad_scaling:
self.scaler.scale(loss).backward()
elif self.use_apex:
if self.use_apex:
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
scaled_loss.backward()
elif NEW_DEEPSPEED_FLAG:
Expand Down Expand Up @@ -1360,9 +1356,7 @@ def training_step_length_adaptive(
else:
loss.backward()
else:
if self.do_grad_scaling:
self.scaler.scale(loss).backward()
elif self.use_apex:
if self.use_apex:
with amp.scale_loss(loss, self.optimizer) as scaled_loss:
scaled_loss.backward()
elif NEW_DEEPSPEED_FLAG:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json"
QUANT_CONFIG = "quantization_config.json"
SPARSITY_CONFIG = "sparsity_config.json"
SAFE_WEIGHTS_NAME = "model.safetensors"

torch = LazyImport("torch")

Expand Down
2 changes: 1 addition & 1 deletion tests/CI/test_weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def test_auto_model_saving_loading(self):
if isinstance(module, QuantizedLinearQBits):
module_list.append(name)
self.assertTrue(len(module_list) > 0)
model.save_pretrained(self.workspace)
model.save_pretrained(self.workspace, safe_serialization=False)
loaded_model = AutoModelForCausalLM.from_pretrained(self.workspace)
for name, module in loaded_model.named_modules():
if isinstance(module, QuantizedLinearQBits):
Expand Down