Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions examples/flax/image-captioning/run_image_captioning_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import os
import sys
import time
import warnings
from dataclasses import asdict, dataclass, field
from enum import Enum
from functools import partial
Expand Down Expand Up @@ -182,15 +183,21 @@ class ModelArguments:
)
},
)
use_auth_token: bool = field(
default=False,
token: str = field(
default=None,
metadata={
"help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
"with private models)."
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
)
},
)
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)


@dataclass
Expand Down Expand Up @@ -389,6 +396,12 @@ def main():
else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses()

if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token

# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax")
Expand Down Expand Up @@ -448,7 +461,7 @@ def main():
cache_dir=model_args.cache_dir,
keep_in_memory=False,
data_dir=data_args.data_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
data_files = {}
Expand All @@ -465,7 +478,7 @@ def main():
extension,
data_files=data_files,
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
Expand All @@ -475,18 +488,18 @@ def main():
model_args.model_name_or_path,
seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
image_processor = AutoImageProcessor.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
tokenizer.pad_token = tokenizer.convert_ids_to_tokens(model.config.pad_token_id)

Expand Down
43 changes: 28 additions & 15 deletions examples/flax/language-modeling/run_bart_dlm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import os
import sys
import time
import warnings
from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
Expand Down Expand Up @@ -168,15 +169,21 @@ class ModelArguments:
)
},
)
use_auth_token: bool = field(
default=False,
token: str = field(
default=None,
metadata={
"help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
"with private models)."
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
)
},
)
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)


@dataclass
Expand Down Expand Up @@ -463,6 +470,12 @@ def main():
else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses()

if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token

# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_bart_dlm", model_args, data_args, framework="flax")
Expand Down Expand Up @@ -517,7 +530,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)

if "validation" not in datasets.keys():
Expand All @@ -526,14 +539,14 @@ def main():
data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
datasets["train"] = load_dataset(
data_args.dataset_name,
data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
data_files = {}
Expand All @@ -548,7 +561,7 @@ def main():
extension,
data_files=data_files,
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)

if "validation" not in datasets.keys():
Expand All @@ -557,14 +570,14 @@ def main():
data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
datasets["train"] = load_dataset(
extension,
data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
Expand All @@ -576,14 +589,14 @@ def main():
model_args.tokenizer_name,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
raise ValueError(
Expand All @@ -596,13 +609,13 @@ def main():
model_args.config_name,
cache_dir=model_args.cache_dir,
vocab_size=len(tokenizer),
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
elif model_args.model_name_or_path:
config = BartConfig.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
config = CONFIG_MAPPING[model_args.model_type]()
Expand Down Expand Up @@ -707,7 +720,7 @@ def group_texts(examples):
config=config,
seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
config.vocab_size = len(tokenizer)
Expand Down
43 changes: 28 additions & 15 deletions examples/flax/language-modeling/run_clm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import os
import sys
import time
import warnings
from dataclasses import asdict, dataclass, field
from enum import Enum
from itertools import chain
Expand Down Expand Up @@ -169,15 +170,21 @@ class ModelArguments:
)
},
)
use_auth_token: bool = field(
default=False,
token: str = field(
default=None,
metadata={
"help": (
"Will use the token generated when running `huggingface-cli login` (necessary to use this script "
"with private models)."
"The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
"generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
)
},
)
use_auth_token: bool = field(
default=None,
metadata={
"help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
},
)


@dataclass
Expand Down Expand Up @@ -334,6 +341,12 @@ def main():
else:
model_args, data_args, training_args = parser.parse_args_into_dataclasses()

if model_args.use_auth_token is not None:
warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
if model_args.token is not None:
raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
model_args.token = model_args.use_auth_token

# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
send_example_telemetry("run_clm", model_args, data_args, framework="flax")
Expand Down Expand Up @@ -397,7 +410,7 @@ def main():
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
keep_in_memory=False,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)

if "validation" not in dataset.keys():
Expand All @@ -406,14 +419,14 @@ def main():
data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
dataset["train"] = load_dataset(
data_args.dataset_name,
data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
data_files = {}
Expand All @@ -431,7 +444,7 @@ def main():
data_files=data_files,
cache_dir=model_args.cache_dir,
**dataset_args,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)

if "validation" not in dataset.keys():
Expand All @@ -441,15 +454,15 @@ def main():
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
**dataset_args,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
dataset["train"] = load_dataset(
extension,
data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
**dataset_args,
use_auth_token=True if model_args.use_auth_token else None,
token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
Expand All @@ -463,13 +476,13 @@ def main():
config = AutoConfig.from_pretrained(
model_args.config_name,
cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
elif model_args.model_name_or_path:
config = AutoConfig.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
config = CONFIG_MAPPING[model_args.model_type]()
Expand All @@ -480,14 +493,14 @@ def main():
model_args.tokenizer_name,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
elif model_args.model_name_or_path:
tokenizer = AutoTokenizer.from_pretrained(
model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
raise ValueError(
Expand All @@ -501,7 +514,7 @@ def main():
config=config,
seed=training_args.seed,
dtype=getattr(jnp, model_args.dtype),
token=True if model_args.use_auth_token else None,
token=model_args.token,
)
else:
model = FlaxAutoModelForCausalLM.from_config(
Expand Down
Loading