diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index c6d0f9ce3408..f71641e30f7d 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -22,6 +22,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from functools import partial @@ -182,15 +183,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -389,6 +396,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax") @@ -448,7 +461,7 @@ def main(): cache_dir=model_args.cache_dir, keep_in_memory=False, data_dir=data_args.data_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -465,7 +478,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -475,18 +488,18 @@ def main(): model_args.model_name_or_path, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) image_processor = AutoImageProcessor.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer.pad_token = tokenizer.convert_ids_to_tokens(model.config.pad_token_id) diff --git a/examples/flax/language-modeling/run_bart_dlm_flax.py b/examples/flax/language-modeling/run_bart_dlm_flax.py index 4452fb8d46ec..259f67f0b17d 100644 --- a/examples/flax/language-modeling/run_bart_dlm_flax.py +++ b/examples/flax/language-modeling/run_bart_dlm_flax.py @@ -26,6 +26,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain @@ -168,15 +169,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -463,6 +470,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_bart_dlm", model_args, data_args, framework="flax") @@ -517,7 +530,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in datasets.keys(): @@ -526,14 +539,14 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -548,7 +561,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in datasets.keys(): @@ -557,14 +570,14 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) datasets["train"] = load_dataset( extension, data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -576,14 +589,14 @@ def main(): model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: raise ValueError( @@ -596,13 +609,13 @@ def main(): model_args.config_name, cache_dir=model_args.cache_dir, vocab_size=len(tokenizer), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: config = BartConfig.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = CONFIG_MAPPING[model_args.model_type]() @@ -707,7 +720,7 @@ def group_texts(examples): config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config.vocab_size = len(tokenizer) diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index 65e50c6a4ecd..7c4206a0c7c9 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -27,6 +27,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain @@ -169,15 +170,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -334,6 +341,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_clm", model_args, data_args, framework="flax") @@ -397,7 +410,7 @@ def main(): data_args.dataset_config_name, cache_dir=model_args.cache_dir, keep_in_memory=False, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in dataset.keys(): @@ -406,14 +419,14 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) dataset["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -431,7 +444,7 @@ def main(): data_files=data_files, cache_dir=model_args.cache_dir, **dataset_args, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in dataset.keys(): @@ -441,7 +454,7 @@ def main(): split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, **dataset_args, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) dataset["train"] = load_dataset( extension, @@ -449,7 +462,7 @@ def main(): split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, **dataset_args, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -463,13 +476,13 @@ def main(): config = AutoConfig.from_pretrained( model_args.config_name, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: config = AutoConfig.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = CONFIG_MAPPING[model_args.model_type]() @@ -480,14 +493,14 @@ def main(): model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: raise ValueError( @@ -501,7 +514,7 @@ def main(): config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: model = FlaxAutoModelForCausalLM.from_config( diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index be6dc78b7d03..d5e44feaf3d8 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -26,6 +26,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain @@ -174,15 +175,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -377,6 +384,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_mlm", model_args, data_args, framework="flax") @@ -434,7 +447,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in datasets.keys(): @@ -443,14 +456,14 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -465,7 +478,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in datasets.keys(): @@ -474,14 +487,14 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) datasets["train"] = load_dataset( extension, data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -495,13 +508,13 @@ def main(): config = AutoConfig.from_pretrained( model_args.config_name, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: config = AutoConfig.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = CONFIG_MAPPING[model_args.model_type]() @@ -512,14 +525,14 @@ def main(): model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: raise ValueError( @@ -638,7 +651,7 @@ def group_texts(examples): config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: model = FlaxAutoModelForMaskedLM.from_config( diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index 57f7d7e31bc6..c3afc58207b4 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -25,6 +25,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field # You can also adapt this script on your own masked language modeling task. Pointers for this are left as comments. @@ -168,15 +169,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -504,6 +511,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax") @@ -558,7 +571,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in datasets.keys(): @@ -567,14 +580,14 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -589,7 +602,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in datasets.keys(): @@ -598,14 +611,14 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) datasets["train"] = load_dataset( extension, data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -617,14 +630,14 @@ def main(): model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: raise ValueError( @@ -637,13 +650,13 @@ def main(): model_args.config_name, cache_dir=model_args.cache_dir, vocab_size=len(tokenizer), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: config = T5Config.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = CONFIG_MAPPING[model_args.model_type]() @@ -738,7 +751,7 @@ def group_texts(examples): config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config.vocab_size = len(tokenizer) diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index 2b59185c9184..925e182e7e96 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -25,6 +25,7 @@ import random import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from pathlib import Path @@ -155,15 +156,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) dtype: Optional[str] = field( default="float32", metadata={ @@ -438,6 +445,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_qa", model_args, data_args, framework="flax") @@ -487,7 +500,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading the dataset from local csv or json file. @@ -507,7 +520,7 @@ def main(): data_files=data_files, field="data", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -520,14 +533,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=True, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion @@ -874,7 +887,7 @@ def write_eval_metric(summary_writer, eval_metrics, step): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), ) diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index d10453ef46a6..83af31d8d22a 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -24,6 +24,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from functools import partial @@ -188,15 +189,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -417,6 +424,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_summarization", model_args, data_args, framework="flax") @@ -475,7 +488,7 @@ def main(): data_args.dataset_config_name, cache_dir=model_args.cache_dir, keep_in_memory=False, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -492,7 +505,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -503,13 +516,13 @@ def main(): config = AutoConfig.from_pretrained( model_args.config_name, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: config = AutoConfig.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = CONFIG_MAPPING[model_args.model_type]() @@ -520,14 +533,14 @@ def main(): model_args.tokenizer_name, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: raise ValueError( @@ -541,7 +554,7 @@ def main(): config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: model = FlaxAutoModelForSeq2SeqLM.from_config( diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 0ed62ee8ce33..3f6d0a5eb6c3 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -21,6 +21,7 @@ import random import sys import time +import warnings from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Dict, Optional, Tuple @@ -101,15 +102,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -321,6 +328,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_glue", model_args, data_args, framework="flax") @@ -368,7 +381,7 @@ def main(): raw_datasets = load_dataset( "glue", data_args.task_name, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading the dataset from local csv or json file. @@ -381,7 +394,7 @@ def main(): raw_datasets = load_dataset( extension, data_files=data_files, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -411,17 +424,17 @@ def main(): model_args.model_name_or_path, num_labels=num_labels, finetuning_task=data_args.task_name, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.model_name_or_path, use_fast=not model_args.use_slow_tokenizer, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = FlaxAutoModelForSequenceClassification.from_pretrained( model_args.model_name_or_path, config=config, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Preprocessing the datasets diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index 06f410a3ea1f..f66d9b3128a7 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -21,6 +21,7 @@ import random import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from itertools import chain @@ -149,15 +150,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -377,6 +384,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_ner", model_args, data_args, framework="flax") @@ -422,7 +435,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading the dataset from local csv or json file. @@ -436,7 +449,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -490,7 +503,7 @@ def get_label_list(labels): finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path if config.model_type in {"gpt2", "roberta"}: @@ -498,7 +511,7 @@ def get_label_list(labels): tokenizer_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, add_prefix_space=True, ) else: @@ -506,14 +519,14 @@ def get_label_list(labels): tokenizer_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = FlaxAutoModelForTokenClassification.from_pretrained( model_args.model_name_or_path, config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Preprocessing the datasets diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index 40331b167802..63b638423a76 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -24,6 +24,7 @@ import os import sys import time +import warnings from dataclasses import asdict, dataclass, field from enum import Enum from pathlib import Path @@ -159,15 +160,21 @@ class ModelArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -257,6 +264,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_image_classification", model_args, data_args, framework="flax") @@ -338,7 +351,7 @@ def main(): num_labels=len(train_dataset.classes), image_size=data_args.image_size, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif model_args.model_name_or_path: config = AutoConfig.from_pretrained( @@ -346,7 +359,7 @@ def main(): num_labels=len(train_dataset.classes), image_size=data_args.image_size, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = CONFIG_MAPPING[model_args.model_type]() @@ -358,7 +371,7 @@ def main(): config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype), - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: model = FlaxAutoModelForImageClassification.from_config( diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py index 9e6a0bdff08e..c1568867e232 100644 --- a/examples/pytorch/audio-classification/run_audio_classification.py +++ b/examples/pytorch/audio-classification/run_audio_classification.py @@ -152,15 +152,21 @@ class ModelArguments: attention_mask: bool = field( default=True, metadata={"help": "Whether to generate an attention mask in the feature extractor."} ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) freeze_feature_extractor: Optional[bool] = field( default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."} ) @@ -198,6 +204,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_audio_classification", model_args, data_args) @@ -250,13 +262,13 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, split=data_args.train_split_name, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) raw_datasets["eval"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=data_args.eval_split_name, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if data_args.audio_column_name not in raw_datasets["train"].column_names: @@ -280,7 +292,7 @@ def main(): return_attention_mask=model_args.attention_mask, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # `datasets` takes care of automatically loading and resampling the audio, @@ -340,7 +352,7 @@ def compute_metrics(eval_pred): finetuning_task="audio-classification", cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForAudioClassification.from_pretrained( model_args.model_name_or_path, @@ -348,7 +360,7 @@ def compute_metrics(eval_pred): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) diff --git a/examples/pytorch/contrastive-image-text/run_clip.py b/examples/pytorch/contrastive-image-text/run_clip.py index 7c0a03099159..dad10ea27495 100644 --- a/examples/pytorch/contrastive-image-text/run_clip.py +++ b/examples/pytorch/contrastive-image-text/run_clip.py @@ -26,6 +26,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -86,15 +87,21 @@ class ModelArguments: default=True, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) freeze_vision_model: bool = field( default=False, metadata={"help": "Whether to freeze the vision model parameters or not."} ) @@ -235,6 +242,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_clip", model_args, data_args) @@ -294,7 +307,7 @@ def main(): cache_dir=model_args.cache_dir, keep_in_memory=False, data_dir=data_args.data_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -311,7 +324,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -336,14 +349,14 @@ def main(): model_args.image_processor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModel.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) config = model.config diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index 143a15712d52..35a74b253263 100644 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -16,6 +16,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -142,15 +143,21 @@ class ModelArguments: metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) ignore_mismatched_sizes: bool = field( default=False, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, @@ -176,6 +183,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_image_classification", model_args, data_args) @@ -229,7 +242,7 @@ def main(): data_args.dataset_config_name, cache_dir=model_args.cache_dir, task="image-classification", - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -276,7 +289,7 @@ def compute_metrics(p): finetuning_task="image-classification", cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForImageClassification.from_pretrained( model_args.model_name_or_path, @@ -284,14 +297,14 @@ def compute_metrics(p): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) image_processor = AutoImageProcessor.from_pretrained( model_args.image_processor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Define torchvision transforms to be applied to each image. diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index 3ae1caa20548..1c269fba3a2b 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -16,6 +16,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -133,15 +134,21 @@ class ModelArguments: metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) mask_ratio: float = field( default=0.75, metadata={"help": "The ratio of the number of masked tokens in the input sequence."} ) @@ -175,6 +182,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_mae", model_args, data_args) @@ -224,7 +237,7 @@ def main(): data_args.dataset_config_name, data_files=data_args.data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # If we don't have a validation split, split off a percentage of train as validation. @@ -242,7 +255,7 @@ def main(): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.config_name: config = ViTMAEConfig.from_pretrained(model_args.config_name, **config_kwargs) @@ -280,7 +293,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: logger.info("Training new model from scratch") diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index 21de0b24f7b6..25e780ab48be 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -16,6 +16,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -153,15 +154,21 @@ class ModelArguments: metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) image_size: Optional[int] = field( default=None, metadata={ @@ -239,6 +246,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_mim", model_args, data_args) @@ -288,7 +301,7 @@ def main(): data_args.dataset_config_name, data_files=data_args.data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # If we don't have a validation split, split off a percentage of train as validation. @@ -305,7 +318,7 @@ def main(): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.config_name_or_path: config = AutoConfig.from_pretrained(model_args.config_name_or_path, **config_kwargs) @@ -357,7 +370,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: logger.info("Training new model from scratch") diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 1ee03f100719..c1ed1c3412cc 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -25,6 +25,7 @@ import math import os import sys +import warnings from dataclasses import dataclass, field from itertools import chain from typing import Optional @@ -111,15 +112,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) torch_dtype: Optional[str] = field( default=None, metadata={ @@ -238,6 +245,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_clm", model_args, data_args) @@ -300,7 +313,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, streaming=data_args.streaming, ) if "validation" not in raw_datasets.keys(): @@ -309,7 +322,7 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, streaming=data_args.streaming, ) raw_datasets["train"] = load_dataset( @@ -317,7 +330,7 @@ def main(): data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, streaming=data_args.streaming, ) else: @@ -339,7 +352,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, **dataset_args, ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. @@ -349,7 +362,7 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, **dataset_args, ) raw_datasets["train"] = load_dataset( @@ -357,7 +370,7 @@ def main(): data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, **dataset_args, ) @@ -373,7 +386,7 @@ def main(): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.config_name: config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) @@ -391,7 +404,7 @@ def main(): "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) @@ -415,7 +428,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, torch_dtype=torch_dtype, low_cpu_mem_usage=model_args.low_cpu_mem_usage, ) diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index b6a490bfb3d8..d6c756edc387 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -25,6 +25,7 @@ import math import os import sys +import warnings from dataclasses import dataclass, field from itertools import chain from typing import Optional @@ -107,15 +108,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) low_cpu_mem_usage: bool = field( default=False, metadata={ @@ -238,6 +245,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_mlm", model_args, data_args) @@ -301,7 +314,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, streaming=data_args.streaming, ) if "validation" not in raw_datasets.keys(): @@ -310,7 +323,7 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, streaming=data_args.streaming, ) raw_datasets["train"] = load_dataset( @@ -318,7 +331,7 @@ def main(): data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, streaming=data_args.streaming, ) else: @@ -335,7 +348,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. @@ -345,14 +358,14 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) raw_datasets["train"] = load_dataset( extension, data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at @@ -366,7 +379,7 @@ def main(): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.config_name: config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) @@ -384,7 +397,7 @@ def main(): "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) @@ -403,7 +416,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, low_cpu_mem_usage=model_args.low_cpu_mem_usage, ) else: diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index 015e8573568e..30ac5d422578 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -22,6 +22,7 @@ import math import os import sys +import warnings from dataclasses import dataclass, field from itertools import chain from typing import Optional @@ -95,15 +96,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) low_cpu_mem_usage: bool = field( default=False, metadata={ @@ -229,6 +236,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_plm", model_args, data_args) @@ -291,7 +304,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in raw_datasets.keys(): raw_datasets["validation"] = load_dataset( @@ -299,14 +312,14 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) raw_datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -325,14 +338,14 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) raw_datasets["train"] = load_dataset( extension, data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at @@ -346,7 +359,7 @@ def main(): config_kwargs = { "cache_dir": model_args.cache_dir, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.config_name: config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs) @@ -364,7 +377,7 @@ def main(): "cache_dir": model_args.cache_dir, "use_fast": model_args.use_fast_tokenizer, "revision": model_args.model_revision, - "use_auth_token": True if model_args.use_auth_token else None, + "token": model_args.token, } if model_args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) @@ -383,7 +396,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, low_cpu_mem_usage=model_args.low_cpu_mem_usage, ) else: diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index e9c3d089b4ba..61f72fd60528 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -21,6 +21,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from itertools import chain from typing import Optional, Union @@ -79,15 +80,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -225,6 +232,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_swag", model_args, data_args) @@ -292,7 +305,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Downloading and loading the swag dataset from the hub. @@ -300,7 +313,7 @@ def main(): "swag", "regular", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -314,14 +327,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForMultipleChoice.from_pretrained( model_args.model_name_or_path, @@ -329,7 +342,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # When using your own dataset or a different dataset from swag, you will probably need to change this. diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index 415a7ddac728..d823b28256ba 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -21,6 +21,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -79,15 +80,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -227,6 +234,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_qa", model_args, data_args) @@ -289,7 +302,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -308,7 +321,7 @@ def main(): data_files=data_files, field="data", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -322,14 +335,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=True, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForQuestionAnswering.from_pretrained( model_args.model_name_or_path, @@ -337,7 +350,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Tokenizer check: this script requires a fast tokenizer. diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index 71ea498e4ec0..c12dd53ab7aa 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -21,6 +21,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -78,15 +79,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -226,6 +233,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_qa_beam_search", model_args, data_args) @@ -288,7 +301,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -306,7 +319,7 @@ def main(): data_files=data_files, field="data", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -320,13 +333,13 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = XLNetTokenizerFast.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = XLNetForQuestionAnswering.from_pretrained( model_args.model_name_or_path, @@ -334,7 +347,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Preprocessing the datasets. diff --git a/examples/pytorch/question-answering/run_seq2seq_qa.py b/examples/pytorch/question-answering/run_seq2seq_qa.py index f2d9a77b0144..ac3fb64974f8 100644 --- a/examples/pytorch/question-answering/run_seq2seq_qa.py +++ b/examples/pytorch/question-answering/run_seq2seq_qa.py @@ -21,6 +21,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import List, Optional, Tuple @@ -80,15 +81,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -273,6 +280,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_seq2seq_qa", model_args, data_args) @@ -335,7 +348,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -353,7 +366,7 @@ def main(): data_files=data_files, field="data", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -367,14 +380,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSeq2SeqLM.from_pretrained( model_args.model_name_or_path, @@ -382,7 +395,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index dc191136b604..ce63e15faca7 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -18,6 +18,7 @@ import os import random import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -241,15 +242,21 @@ class ModelArguments: metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) def main(): @@ -265,6 +272,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_semantic_segmentation", model_args, data_args) @@ -379,7 +392,7 @@ def compute_metrics(eval_pred): id2label=id2label, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSemanticSegmentation.from_pretrained( model_args.model_name_or_path, @@ -387,13 +400,13 @@ def compute_metrics(eval_pred): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) image_processor = AutoImageProcessor.from_pretrained( model_args.image_processor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Define torchvision transforms to be applied to each image + target. diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index c8dbdda70dd8..bff864ddb305 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -229,15 +229,21 @@ class DataTrainingArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "If :obj:`True`, will use the token generated when running" - ":obj:`huggingface-cli login` as HTTP bearer authorization for remote files." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) unk_token: str = field( default="[UNK]", metadata={"help": "The unk token for the tokenizer"}, @@ -379,6 +385,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if data_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if data_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + data_args.token = data_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_speech_recognition_ctc", model_args, data_args) @@ -427,7 +439,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, split=data_args.train_split_name, - use_auth_token=data_args.use_auth_token, + token=data_args.token, ) if data_args.audio_column_name not in raw_datasets["train"].column_names: @@ -452,7 +464,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, split=data_args.eval_split_name, - use_auth_token=data_args.use_auth_token, + token=data_args.token, ) if data_args.max_eval_samples is not None: @@ -490,7 +502,9 @@ def remove_special_characters(batch): # the tokenizer # load config config = AutoConfig.from_pretrained( - model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + token=data_args.token, ) # 4. Next, if no tokenizer file is defined, @@ -546,11 +560,13 @@ def remove_special_characters(batch): # load feature_extractor and tokenizer tokenizer = AutoTokenizer.from_pretrained( tokenizer_name_or_path, - use_auth_token=data_args.use_auth_token, + token=data_args.token, **tokenizer_kwargs, ) feature_extractor = AutoFeatureExtractor.from_pretrained( - model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + token=data_args.token, ) # adapt config @@ -578,7 +594,7 @@ def remove_special_characters(batch): model_args.model_name_or_path, cache_dir=model_args.cache_dir, config=config, - use_auth_token=data_args.use_auth_token, + token=data_args.token, ) # freeze encoder diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py index f2e2234e86d7..be9021874a48 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc_adapter.py @@ -232,15 +232,21 @@ class DataTrainingArguments: ) }, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "If :obj:`True`, will use the token generated when running" - ":obj:`huggingface-cli login` as HTTP bearer authorization for remote files." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) unk_token: str = field( default="[UNK]", metadata={"help": "The unk token for the tokenizer"}, @@ -375,6 +381,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if data_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if data_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + data_args.token = data_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_speech_recognition_ctc_adapter", model_args, data_args) @@ -423,7 +435,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, split=data_args.train_split_name, - use_auth_token=data_args.use_auth_token, + token=data_args.token, ) if data_args.audio_column_name not in raw_datasets["train"].column_names: @@ -448,7 +460,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, split=data_args.eval_split_name, - use_auth_token=data_args.use_auth_token, + token=data_args.token, ) if data_args.max_eval_samples is not None: @@ -486,7 +498,9 @@ def remove_special_characters(batch): # the tokenizer # load config config = AutoConfig.from_pretrained( - model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + token=data_args.token, ) # 4. Next, if no tokenizer file is defined, @@ -500,7 +514,10 @@ def remove_special_characters(batch): vocab_dict = {} if tokenizer_name_or_path is not None: # load vocabulary of other adapter languages so that new language can be appended - tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_auth_token=data_args.use_auth_token) + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_name_or_path, + token=data_args.token, + ) vocab_dict = tokenizer.vocab.copy() if tokenizer.target_lang is None: raise ValueError("Make sure to load a multi-lingual tokenizer with a set target language.") @@ -566,11 +583,13 @@ def remove_special_characters(batch): # load feature_extractor and tokenizer tokenizer = AutoTokenizer.from_pretrained( tokenizer_name_or_path, - use_auth_token=data_args.use_auth_token, + token=data_args.token, **tokenizer_kwargs, ) feature_extractor = AutoFeatureExtractor.from_pretrained( - model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token + model_args.model_name_or_path, + cache_dir=model_args.cache_dir, + token=data_args.token, ) # adapt config @@ -595,7 +614,7 @@ def remove_special_characters(batch): model_args.model_name_or_path, cache_dir=model_args.cache_dir, config=config, - use_auth_token=data_args.use_auth_token, + token=data_args.token, ignore_mismatched_sizes=True, ) diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py index 29c99e5ab2a2..aea7d3b7fdbd 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py @@ -22,6 +22,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union @@ -85,15 +86,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) freeze_feature_encoder: bool = field( default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."} ) @@ -278,6 +285,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_speech_recognition_seq2seq", model_args, data_args) @@ -336,7 +349,7 @@ def main(): data_args.dataset_config_name, split=data_args.train_split_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if training_args.do_eval: @@ -345,7 +358,7 @@ def main(): data_args.dataset_config_name, split=data_args.eval_split_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: @@ -370,7 +383,7 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) config.update({"forced_decoder_ids": model_args.forced_decoder_ids, "suppress_tokens": model_args.suppress_tokens}) @@ -383,21 +396,21 @@ def main(): model_args.feature_extractor_name if model_args.feature_extractor_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSpeechSeq2Seq.from_pretrained( model_args.model_name_or_path, config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) if model.config.decoder_start_token_id is None: diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index d5f4243e056d..4a27df7582f6 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -21,6 +21,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -99,15 +100,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) resize_position_embeddings: Optional[bool] = field( default=None, metadata={ @@ -312,6 +319,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_summarization", model_args, data_args) @@ -386,7 +399,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -403,7 +416,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -417,14 +430,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSeq2SeqLM.from_pretrained( model_args.model_name_or_path, @@ -432,7 +445,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch diff --git a/examples/pytorch/text-classification/run_classification.py b/examples/pytorch/text-classification/run_classification.py index 1b680973c7f4..a5a4ad49f852 100755 --- a/examples/pytorch/text-classification/run_classification.py +++ b/examples/pytorch/text-classification/run_classification.py @@ -20,6 +20,7 @@ import os import random import sys +import warnings from dataclasses import dataclass, field from typing import List, Optional @@ -227,15 +228,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) ignore_mismatched_sizes: bool = field( default=False, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, @@ -268,6 +275,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_classification", model_args, data_args) @@ -327,7 +340,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # Try print some info about the dataset logger.info(f"Dataset loaded: {raw_datasets}") @@ -358,7 +371,7 @@ def main(): "csv", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading a dataset from local json files @@ -366,7 +379,7 @@ def main(): "json", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset at @@ -468,7 +481,7 @@ def main(): finetuning_task="text-classification", cache_dir=model_args.cache_dir, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if is_regression: @@ -486,7 +499,7 @@ def main(): cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSequenceClassification.from_pretrained( model_args.model_name_or_path, @@ -494,7 +507,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index 50bbeba7ff0b..3f8bcc2fbb61 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -20,6 +20,7 @@ import os import random import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -188,15 +189,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) ignore_mismatched_sizes: bool = field( default=False, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, @@ -216,6 +223,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_glue", model_args, data_args) @@ -281,7 +294,7 @@ def main(): "glue", data_args.task_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) elif data_args.dataset_name is not None: # Downloading and loading a dataset from the hub. @@ -289,7 +302,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading a dataset from your local files. @@ -318,7 +331,7 @@ def main(): "csv", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading a dataset from local json files @@ -326,7 +339,7 @@ def main(): "json", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -361,14 +374,14 @@ def main(): finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSequenceClassification.from_pretrained( model_args.model_name_or_path, @@ -376,7 +389,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index 84751df54823..459a59282e32 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -21,6 +21,7 @@ import os import random import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -152,15 +153,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) ignore_mismatched_sizes: bool = field( default=False, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, @@ -175,6 +182,12 @@ def main(): parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_xnli", model_args) @@ -232,7 +245,7 @@ def main(): model_args.language, split="train", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: train_dataset = load_dataset( @@ -240,7 +253,7 @@ def main(): model_args.train_language, split="train", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) label_list = train_dataset.features["label"].names @@ -250,7 +263,7 @@ def main(): model_args.language, split="validation", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) label_list = eval_dataset.features["label"].names @@ -260,7 +273,7 @@ def main(): model_args.language, split="test", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) label_list = predict_dataset.features["label"].names @@ -278,7 +291,7 @@ def main(): finetuning_task="xnli", cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, @@ -286,7 +299,7 @@ def main(): cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSequenceClassification.from_pretrained( model_args.model_name_or_path, @@ -294,7 +307,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 256c862a194e..0e9d16041289 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -22,6 +22,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -79,15 +80,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) ignore_mismatched_sizes: bool = field( default=False, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, @@ -217,6 +224,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_ner", model_args, data_args) @@ -279,7 +292,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -348,7 +361,7 @@ def get_label_list(labels): finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path @@ -358,7 +371,7 @@ def get_label_list(labels): cache_dir=model_args.cache_dir, use_fast=True, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, add_prefix_space=True, ) else: @@ -367,7 +380,7 @@ def get_label_list(labels): cache_dir=model_args.cache_dir, use_fast=True, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForTokenClassification.from_pretrained( @@ -376,7 +389,7 @@ def get_label_list(labels): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 015e58c97b5f..179ef71ad3d2 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -21,6 +21,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -89,15 +90,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -261,6 +268,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_translation", model_args, data_args) @@ -335,7 +348,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -352,7 +365,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -366,14 +379,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) model = AutoModelForSeq2SeqLM.from_pretrained( model_args.model_name_or_path, @@ -381,7 +394,7 @@ def main(): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch diff --git a/examples/tensorflow/contrastive-image-text/run_clip.py b/examples/tensorflow/contrastive-image-text/run_clip.py index d3e24a44a764..f5519046c68f 100644 --- a/examples/tensorflow/contrastive-image-text/run_clip.py +++ b/examples/tensorflow/contrastive-image-text/run_clip.py @@ -26,6 +26,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -92,15 +93,21 @@ class ModelArguments: default=True, metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) freeze_vision_model: bool = field( default=False, metadata={"help": "Whether to freeze the vision model parameters or not."} ) @@ -245,6 +252,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + if model_args.model_name_or_path is not None: if model_args.vision_model_name_or_path is not None or model_args.text_model_name_or_path is not None: raise ValueError( @@ -315,7 +328,7 @@ def main(): cache_dir=model_args.cache_dir, keep_in_memory=False, data_dir=data_args.data_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -332,7 +345,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -362,14 +375,14 @@ def main(): model_args.image_processor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) with training_args.strategy.scope(): model = TFAutoModel.from_pretrained( model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Load image_processor, in this script we only use this to get the mean and std for normalization. @@ -377,14 +390,14 @@ def main(): model_args.image_processor_name or model_args.vision_model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) with training_args.strategy.scope(): model = TFVisionTextDualEncoderModel.from_vision_text_pretrained( vision_model_name_or_path=model_args.vision_model_name_or_path, text_model_name_or_path=model_args.text_model_name_or_path, cache_dir=model_args.cache_dir, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) config = model.config diff --git a/examples/tensorflow/image-classification/run_image_classification.py b/examples/tensorflow/image-classification/run_image_classification.py index 83f15fea41d8..23e04c93181d 100644 --- a/examples/tensorflow/image-classification/run_image_classification.py +++ b/examples/tensorflow/image-classification/run_image_classification.py @@ -23,6 +23,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -157,15 +158,21 @@ class ModelArguments: metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."}) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) ignore_mismatched_sizes: bool = field( default=False, metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."}, @@ -226,6 +233,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + if not (training_args.do_train or training_args.do_eval or training_args.do_predict): exit("Must specify at least one of --do_train, --do_eval or --do_predict!") @@ -275,7 +288,7 @@ def main(): data_args.dataset_config_name, cache_dir=model_args.cache_dir, task="image-classification", - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -309,13 +322,13 @@ def main(): finetuning_task="image-classification", cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) image_processor = AutoImageProcessor.from_pretrained( model_args.image_processor_name or model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # If we don't have a validation split, split off a percentage of train as validation. @@ -435,7 +448,7 @@ def compute_metrics(p): from_pt=bool(".bin" in model_path), cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, ) num_replicas = training_args.strategy.num_replicas_in_sync diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 650459d41819..7c068d5c8c62 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -30,6 +30,7 @@ import os import random import sys +import warnings from dataclasses import dataclass, field from itertools import chain from pathlib import Path @@ -112,15 +113,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) def __post_init__(self): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): @@ -220,6 +227,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow") @@ -287,7 +300,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in raw_datasets.keys(): raw_datasets["validation"] = load_dataset( @@ -295,14 +308,14 @@ def main(): data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) raw_datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -323,7 +336,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, **dataset_args, ) # If no validation data is there, validation_split_percentage will be used to divide the dataset. @@ -333,7 +346,7 @@ def main(): data_files=data_files, split=f"train[:{data_args.validation_split_percentage}%]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, **dataset_args, ) raw_datasets["train"] = load_dataset( @@ -341,7 +354,7 @@ def main(): data_files=data_files, split=f"train[{data_args.validation_split_percentage}%:]", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, **dataset_args, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index 89d68ade4d40..7ea1a2f8534a 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -28,6 +28,7 @@ import os import random import sys +import warnings from dataclasses import dataclass, field from itertools import chain from pathlib import Path @@ -110,15 +111,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) def __post_init__(self): if self.config_overrides is not None and (self.config_name is not None or self.model_name_or_path is not None): @@ -226,6 +233,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow") @@ -296,20 +309,20 @@ def main(): raw_datasets = load_dataset( data_args.dataset_name, data_args.dataset_config_name, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) if "validation" not in raw_datasets.keys(): raw_datasets["validation"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[:{data_args.validation_split_percentage}%]", - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) raw_datasets["train"] = load_dataset( data_args.dataset_name, data_args.dataset_config_name, split=f"train[{data_args.validation_split_percentage}%:]", - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -323,7 +336,7 @@ def main(): raw_datasets = load_dataset( extension, data_files=data_files, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py index 94416c381fc9..170117ad9d72 100644 --- a/examples/tensorflow/multiple-choice/run_swag.py +++ b/examples/tensorflow/multiple-choice/run_swag.py @@ -22,6 +22,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from itertools import chain from pathlib import Path @@ -146,15 +147,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -239,6 +246,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow") @@ -301,7 +314,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Downloading and loading the swag dataset from the hub. @@ -309,7 +322,7 @@ def main(): "swag", "regular", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -335,14 +348,14 @@ def main(): config_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion @@ -428,7 +441,7 @@ def preprocess_function(examples): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) num_replicas = training_args.strategy.num_replicas_in_sync diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index a0e847ec4e86..8bbc986d0e90 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -22,6 +22,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from pathlib import Path from typing import Optional @@ -77,15 +78,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -245,6 +252,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow") @@ -304,7 +317,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -323,7 +336,7 @@ def main(): data_files=data_files, field="data", cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -338,14 +351,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=True, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion @@ -625,7 +638,7 @@ def compute_metrics(p: EvalPrediction): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) if training_args.do_train: training_dataset = model.prepare_tf_dataset( diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py index c691aa9ebecf..82359f0921d4 100644 --- a/examples/tensorflow/summarization/run_summarization.py +++ b/examples/tensorflow/summarization/run_summarization.py @@ -22,6 +22,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -99,15 +100,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -287,6 +294,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow") @@ -355,7 +368,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -372,7 +385,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -388,14 +401,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) prefix = data_args.source_prefix if data_args.source_prefix is not None else "" @@ -513,7 +526,7 @@ def postprocess_text(preds, labels): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index ae2a0de6c733..daeed0c6eab2 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -20,6 +20,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -164,15 +165,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) # endregion @@ -192,6 +199,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow") @@ -242,7 +255,7 @@ def main(): "glue", data_args.task_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. @@ -284,14 +297,14 @@ def main(): finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion @@ -374,7 +387,7 @@ def compute_metrics(preds, label_ids): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index f6031fc49779..a68aed942d8e 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -20,6 +20,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from pathlib import Path from typing import Optional @@ -170,15 +171,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) # endregion @@ -198,6 +205,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_text_classification", model_args, data_args, framework="tensorflow") @@ -258,7 +271,7 @@ def main(): "csv", data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: # Loading a dataset from local json files @@ -301,20 +314,20 @@ def main(): num_labels=num_labels, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: config = AutoConfig.from_pretrained( config_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion @@ -402,7 +415,7 @@ def preprocess_function(examples): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # endregion diff --git a/examples/tensorflow/token-classification/run_ner.py b/examples/tensorflow/token-classification/run_ner.py index 2d5ed748fe95..ed8ab00e4f85 100644 --- a/examples/tensorflow/token-classification/run_ner.py +++ b/examples/tensorflow/token-classification/run_ner.py @@ -21,6 +21,7 @@ import logging import os import random +import warnings from dataclasses import dataclass, field from typing import Optional @@ -75,15 +76,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -196,6 +203,12 @@ def main(): parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments)) model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow") @@ -228,7 +241,7 @@ def main(): raw_datasets = load_dataset( data_args.dataset_name, data_args.dataset_config_name, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -240,7 +253,7 @@ def main(): raw_datasets = load_dataset( extension, data_files=data_files, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading_datasets.html. diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index 9004f648856f..68e9e9c16b3a 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -22,6 +22,7 @@ import logging import os import sys +import warnings from dataclasses import dataclass, field from typing import Optional @@ -93,15 +94,21 @@ class ModelArguments: default="main", metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."}, ) - use_auth_token: bool = field( - default=False, + token: str = field( + default=None, metadata={ "help": ( - "Will use the token generated when running `huggingface-cli login` (necessary to use this script " - "with private models)." + "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token " + "generated when running `huggingface-cli login` (stored in `~/.huggingface`)." ) }, ) + use_auth_token: bool = field( + default=None, + metadata={ + "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`." + }, + ) @dataclass @@ -268,6 +275,12 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + if model_args.use_auth_token is not None: + warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning) + if model_args.token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + model_args.token = model_args.use_auth_token + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The # information sent is the one passed as arguments along with your Python/PyTorch versions. send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow") @@ -322,7 +335,7 @@ def main(): data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) else: data_files = {} @@ -336,7 +349,7 @@ def main(): extension, data_files=data_files, cache_dir=model_args.cache_dir, - use_auth_token=True if model_args.use_auth_token else None, + token=model_args.token, ) # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at # https://huggingface.co/docs/datasets/loading @@ -352,14 +365,14 @@ def main(): model_args.config_name if model_args.config_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) prefix = data_args.source_prefix if data_args.source_prefix is not None else "" @@ -466,7 +479,7 @@ def preprocess_function(examples): config=config, cache_dir=model_args.cache_dir, revision=model_args.model_revision, - token=True if model_args.use_auth_token else None, + token=model_args.token, ) # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch