From a2bac4bc31da2604423d374d0b4ac8075d39385f Mon Sep 17 00:00:00 2001 From: zhyncs Date: Mon, 17 Jun 2024 13:41:03 +0800 Subject: [PATCH 1/4] [Misc] use AutoTokenizer for benchmark serving when vLLM not installed --- benchmarks/backend_request_func.py | 11 ++++++++++- benchmarks/benchmark_serving.py | 5 ++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 52386b8cd62b..e3fcda7b3120 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -4,10 +4,12 @@ import time import traceback from dataclasses import dataclass, field -from typing import List, Optional +from typing import List, Optional, Union import aiohttp from tqdm.asyncio import tqdm +from transformers import (AutoTokenizer, PreTrainedTokenizer, + PreTrainedTokenizerFast) AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) @@ -388,6 +390,13 @@ def remove_prefix(text: str, prefix: str) -> str: return text +def get_tokenizer( + pretrained_model_name_or_path: str, trust_remote_code: bool +) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path, + trust_remote_code=trust_remote_code) + + ASYNC_REQUEST_FUNCS = { "tgi": async_request_tgi, "vllm": async_request_openai_completions, diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index c136ee572fdf..bf4099f03708 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -39,7 +39,10 @@ from tqdm.asyncio import tqdm from transformers import PreTrainedTokenizerBase -from vllm.transformers_utils.tokenizer import get_tokenizer +try: + from vllm.transformers_utils.tokenizer import get_tokenizer +except: + from backend_request_func import get_tokenizer @dataclass From 198bd20bf4d2616aa081660fb40dd5a4bd5afec5 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Mon, 17 Jun 2024 13:49:01 +0800 Subject: [PATCH 2/4] fix lint --- benchmarks/benchmark_serving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index bf4099f03708..eef03e7d81c3 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -41,7 +41,7 @@ try: from vllm.transformers_utils.tokenizer import get_tokenizer -except: +except ImportError: from backend_request_func import get_tokenizer From 9db70787024c49f7f65c09f7210f9993b9dcb522 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Mon, 17 Jun 2024 20:02:01 +0800 Subject: [PATCH 3/4] fix comment --- benchmarks/backend_request_func.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index e3fcda7b3120..bdbc629d7a4e 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -390,9 +390,26 @@ def remove_prefix(text: str, prefix: str) -> str: return text +def get_model(pretrained_model_name_or_path: str): + if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': + from modelscope import snapshot_download + else: + from huggingface_hub import snapshot_download + + model_path = snapshot_download( + model_id=pretrained_model_name_or_path, + local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, + ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"]) + return model_path + + def get_tokenizer( pretrained_model_name_or_path: str, trust_remote_code: bool ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: + if pretrained_model_name_or_path is not None and not os.path.exists( + pretrained_model_name_or_path): + pretrained_model_name_or_path = get_model( + pretrained_model_name_or_path) return AutoTokenizer.from_pretrained(pretrained_model_name_or_path, trust_remote_code=trust_remote_code) From 14c5919137aca561df46fec72db800a848190d04 Mon Sep 17 00:00:00 2001 From: zhyncs Date: Mon, 17 Jun 2024 20:08:19 +0800 Subject: [PATCH 4/4] fix lint --- benchmarks/backend_request_func.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index bdbc629d7a4e..4350b96b04a6 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -7,6 +7,7 @@ from typing import List, Optional, Union import aiohttp +import huggingface_hub.constants from tqdm.asyncio import tqdm from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast)