Skip to content

Commit bc72e1c

Browse files
authored
Update deprecated parameters in Hugging Face library (#2982)
* Summary: In `from_pretrained()` method in `huggingface/transformers`, `torch_dtype` is deprecated and `dtype` replaces it. To prevent deprecation warnings, this PR replaces `torch_dtype` with `dtype`. Test plan: CI Reference: huggingface/transformers#39782 * fix pre-commit * revert to source: model uploader
1 parent 9d88c16 commit bc72e1c

File tree

16 files changed

+25
-25
lines changed

16 files changed

+25
-25
lines changed

.github/scripts/torchao_model_releases/quantize_and_upload.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ def _untie_weights_and_save_locally(model_id):
592592
python -m executorch.examples.models.qwen3.convert_weights $(hf download {quantized_model}) pytorch_model_converted.bin
593593
```
594594
595-
Once we have the checkpoint, we export it to ExecuTorch with a max_seq_length/max_context_length of 1024 to the XNNPACK backend as follows.
595+
Once we have the checkpoint, we export it to ExecuTorch with a max_seq_length/max_context_length of 1024 to the XNNPACK backend as follows.
596596
597597
[TODO: fix config path in note where necessary]
598598
(Note: ExecuTorch LLM export script requires config.json have certain key names. The correct config to use for the LLM export script is located at examples/models/qwen3/config/4b_config.json within the ExecuTorch repo.)

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ quantization_config = TorchAoConfig(quant_type=Int4WeightOnlyConfig(group_size=1
159159
# Load and automatically quantize
160160
quantized_model = AutoModelForCausalLM.from_pretrained(
161161
"microsoft/Phi-4-mini-instruct",
162-
torch_dtype="auto",
162+
dtype="auto",
163163
device_map="auto",
164164
quantization_config=quantization_config
165165
)

benchmarks/_models/eval_hf_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def quantize_model_and_save(model_id, quant_config, output_dir="results"):
2525
quantized_model = AutoModelForCausalLM.from_pretrained(
2626
model_id,
2727
device_map="auto",
28-
torch_dtype=torch.bfloat16,
28+
dtype=torch.bfloat16,
2929
quantization_config=quantization_config,
3030
)
3131
tokenizer = AutoTokenizer.from_pretrained(model_id)

docs/source/serving.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Install the required packages:
8585
model = AutoModelForCausalLM.from_pretrained(
8686
model_path,
8787
device_map="auto",
88-
torch_dtype="auto",
88+
dtype="auto",
8989
trust_remote_code=True,
9090
)
9191
tokenizer = AutoTokenizer.from_pretrained(model_path)
@@ -134,7 +134,7 @@ Optionally, we can quantize the embedding and lm_head differently, since those l
134134
from transformers.modeling_utils import find_tied_parameters
135135
136136
model_id = "microsoft/Phi-4-mini-instruct"
137-
untied_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
137+
untied_model = AutoModelForCausalLM.from_pretrained(model_id, dtype="auto", device_map="auto")
138138
tokenizer = AutoTokenizer.from_pretrained(model_id)
139139
140140
print(untied_model)
@@ -202,7 +202,7 @@ Quantizing the model for mobile deployment using TorchAO's ``Int8DynamicActivati
202202
quantization_config = TorchAoConfig(quant_type=quant_config, include_embedding=True, untie_embedding_weights=True, modules_to_not_convert=[])
203203
204204
# either use `untied_model_id` or `untied_model_local_path`
205-
quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, torch_dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
205+
quantized_model = AutoModelForCausalLM.from_pretrained(untied_model_id, dtype=torch.float32, device_map="auto", quantization_config=quantization_config)
206206
tokenizer = AutoTokenizer.from_pretrained(model_id)
207207
208208
# Push to hub
@@ -285,7 +285,7 @@ For Phi-4-mini-instruct, when quantized with float8 dynamic quant, we can reduce
285285
286286
# use "microsoft/Phi-4-mini-instruct" or "pytorch/Phi-4-mini-instruct-float8dq"
287287
model_id = "pytorch/Phi-4-mini-instruct-float8dq"
288-
quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16)
288+
quantized_model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", dtype=torch.bfloat16)
289289
tokenizer = AutoTokenizer.from_pretrained(model_id)
290290
291291
torch.cuda.reset_peak_memory_stats()

docs/source/torchao_vllm_integration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ quantization_config = TorchAoConfig(
8888
# Load and automatically quantize the model
8989
model = AutoModelForCausalLM.from_pretrained(
9090
"meta-llama/Llama-3.2-1B",
91-
torch_dtype="auto",
91+
dtype="auto",
9292
device_map="auto",
9393
quantization_config=quantization_config
9494
)

test/integration/test_load_and_run_checkpoint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def test_deprecated_hf_models(self, model_info):
193193
with warnings.catch_warnings(record=True) as caught_warnings:
194194
quantized_model = AutoModelForCausalLM.from_pretrained(
195195
model_name,
196-
torch_dtype="bfloat16",
196+
dtype="bfloat16",
197197
device_map="cuda:0",
198198
)
199199
# version mismatch check in config.py
@@ -250,7 +250,7 @@ def test_deprecated_hf_models(self, model_info):
250250
with warnings.catch_warnings(record=True) as caught_warnings:
251251
_ = AutoModelForCausalLM.from_pretrained(
252252
_HIGH_PRECISION_MODEL,
253-
torch_dtype="bfloat16",
253+
dtype="bfloat16",
254254
device_map="cuda:0",
255255
quantization_config=quantized_model.config.quantization_config,
256256
)

test/integration/test_vllm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def quantize_and_save_model(
153153
# Load and quantize model
154154
quantized_model = AutoModelForCausalLM.from_pretrained(
155155
model_name,
156-
torch_dtype="bfloat16",
156+
dtype="bfloat16",
157157
device_map="cuda",
158158
quantization_config=quantization_config,
159159
)

torchao/prototype/autoround/autoround_llm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def main(args):
8888
# Get the model, tokenizer, and decoder_cls
8989
model_name_or_path = args.model_name_or_path
9090
model, tokenizer, decoder_cls = ar_utils.get_float_model_info(
91-
model_name_or_path, torch_dtype=torch.bfloat16
91+
model_name_or_path, dtype=torch.bfloat16
9292
)
9393
# Disable the `use_cache` for calibration stage.
9494
model.config.use_cache = False

torchao/prototype/autoround/eval_autoround.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def main(args):
8686
with torch.no_grad():
8787
model_name_or_path = args.model_name_or_path
8888
model, tokenizer, decoder_cls = ar_utils.get_float_model_info(
89-
model_name_or_path, torch_dtype=torch.bfloat16
89+
model_name_or_path, dtype=torch.bfloat16
9090
)
9191
model.eval()
9292
model_device = args.model_device

torchao/prototype/autoround/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,11 +140,11 @@ def _auto_detect_decoder_cls(model):
140140
return type(first_module)
141141

142142

143-
def get_float_model_info(model_name_or_path, torch_dtype=torch.float32):
143+
def get_float_model_info(model_name_or_path, dtype=torch.float32):
144144
import transformers
145145

146146
model = transformers.AutoModelForCausalLM.from_pretrained(
147-
model_name_or_path, torch_dtype=torch_dtype
147+
model_name_or_path, dtype=dtype
148148
)
149149
tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path)
150150
decoder_cls = _auto_detect_decoder_cls(model)

0 commit comments

Comments
 (0)