From 7a16b758ed35c9dae486cb42a6ae6061a83f5004 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sun, 15 Sep 2024 17:51:39 +0800 Subject: [PATCH 1/9] add image input support for serving benchmark --- benchmarks/backend_request_func.py | 1 + benchmarks/benchmark_serving.py | 95 +++++++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 3243bb94f787..b92e88521e28 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -25,6 +25,7 @@ class RequestFuncInput: best_of: int = 1 use_beam_search: bool = False logprobs: Optional[int] = None + multi_modal_content: Optional[dict] = None @dataclass diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 9ba3f649810b..f5b0cac7638a 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -24,6 +24,7 @@ """ import argparse import asyncio +import base64 import json import os import random @@ -31,11 +32,13 @@ import warnings from dataclasses import dataclass from datetime import datetime -from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Collection import numpy as np from backend_request_func import (ASYNC_REQUEST_FUNCS, RequestFuncInput, RequestFuncOutput) +from datasets import load_dataset +from PIL.Image import Image from tqdm.asyncio import tqdm from transformers import PreTrainedTokenizerBase @@ -119,7 +122,7 @@ def sample_sharegpt_requests( if prompt_len > 1024 or prompt_len + output_len > 2048: # Prune too long sequences. continue - filtered_dataset.append((prompt, prompt_len, output_len)) + filtered_dataset.append((prompt, prompt_len, output_len, None)) return filtered_dataset @@ -189,7 +192,54 @@ def sample_sonnet_requests( message, add_generation_prompt=True, tokenize=False) prompt_len = len(tokenizer(prompt_formatted).input_ids) sampled_requests.append( - (prompt, prompt_formatted, prompt_len, output_len)) + (prompt, prompt_formatted, prompt_len, output_len, None)) + + return sampled_requests + + +def sample_hf_requests( + dataset_path: str, + dataset_subset: str, + dataset_split: str, + num_requests: int, + tokenizer: PreTrainedTokenizerBase, + fixed_output_len: Optional[int] = None, +): + dataset = load_dataset(dataset_path, name=dataset_subset, split=dataset_split, streaming=True) + filtered_dataset = dataset.shuffle().filter(lambda x: len(x["conversations"]) >= 2) + sampled_requests: List[Tuple[str, int, int, Dict[str, Collection[str]]]] = [] + for data in filtered_dataset: + if len(sampled_requests) == num_requests: + break + + # Tokenize the prompts and completions. + prompt = data["conversations"][0]["value"] + prompt_token_ids = tokenizer(prompt).input_ids + completion = data["conversations"][1]["value"] + completion_token_ids = tokenizer(completion).input_ids + prompt_len = len(prompt_token_ids) + output_len = len(completion_token_ids + ) if fixed_output_len is None else fixed_output_len + if prompt_len < 4 or output_len < 4: + # Prune too short sequences. + continue + if prompt_len > 1024 or prompt_len + output_len > 2048: + # Prune too long sequences. + continue + + if "image" in data and isinstance(data["image"], Image): + image: Image = data["image"] + image_base64 = base64.b64encode(image.tobytes()).decode("utf-8") + mm_content = { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_base64}" + }, + } + else: + mm_content = None + + sampled_requests.append((prompt, prompt_len, output_len, mm_content)) return sampled_requests @@ -224,7 +274,7 @@ def sample_random_requests( for j in range(input_lens[i])]) input_requests.append( - (prompt, int(prefix_len + input_lens[i]), int(output_lens[i]))) + (prompt, int(prefix_len + input_lens[i]), int(output_lens[i]), None)) return input_requests @@ -343,7 +393,7 @@ async def benchmark( raise ValueError(f"Unknown backend: {backend}") print("Starting initial single prompt test run...") - test_prompt, test_prompt_len, test_output_len = input_requests[0] + test_prompt, test_prompt_len, test_output_len, test_mm_content = input_requests[0] test_input = RequestFuncInput( model=model_id, prompt=test_prompt, @@ -353,6 +403,7 @@ async def benchmark( logprobs=logprobs, best_of=best_of, use_beam_search=use_beam_search, + multi_modal_content=test_mm_content, ) test_output = await request_func(request_func_input=test_input) if not test_output.success: @@ -373,6 +424,7 @@ async def benchmark( logprobs=logprobs, best_of=best_of, use_beam_search=use_beam_search, + multi_modal_content=test_mm_content, ) profile_output = await request_func(request_func_input=profile_input) if profile_output.success: @@ -385,7 +437,7 @@ async def benchmark( benchmark_start_time = time.perf_counter() tasks: List[asyncio.Task] = [] async for request in get_request(input_requests, request_rate): - prompt, prompt_len, output_len = request + prompt, prompt_len, output_len, mm_content = request request_func_input = RequestFuncInput( model=model_id, prompt=prompt, @@ -395,6 +447,7 @@ async def benchmark( logprobs=logprobs, best_of=best_of, use_beam_search=use_beam_search, + multi_modal_content=mm_content, ) tasks.append( asyncio.create_task( @@ -574,6 +627,16 @@ def main(args: argparse.Namespace): input_requests = [(prompt_formatted, prompt_len, output_len) for prompt, prompt_formatted, prompt_len, output_len in input_requests] + + elif args.dataset_name == "hf": + input_requests = sample_hf_requests( + dataset_path=args.dataset_path, + dataset_subset=args.hf_subset, + dataset_split=args.hf_split, + num_requests=args.num_prompts, + tokenizer=tokenizer, + fixed_output_len=args.hf_output_len, + ) elif args.dataset_name == "random": input_requests = sample_random_requests( @@ -685,7 +748,7 @@ def main(args: argparse.Namespace): "--dataset-name", type=str, default="sharegpt", - choices=["sharegpt", "sonnet", "random"], + choices=["sharegpt", "sonnet", "random", "hf"], help="Name of the dataset to benchmark on.", ) parser.add_argument("--dataset-path", @@ -784,6 +847,24 @@ def main(args: argparse.Namespace): " context. The length range of context in a random " " request is [random-prefix-len, " " random-prefix-len + random-prefix-len * random-range-ratio).") + parser.add_argument( + "--hf-output-len", + type=int, + default=128, + help="Number of input tokens per request, used only for HF dataset.", + ) + parser.add_argument( + "--hf-subset", + type=str, + default=None, + help="Subset of the HF dataset." + ) + parser.add_argument( + "--hf-split", + type=str, + default=None, + help="Split of the HF dataset." + ) parser.add_argument( "--request-rate", type=float, From 78e5cb8487a7799349c7ba0d95f70c124463d498 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sun, 15 Sep 2024 18:06:29 +0800 Subject: [PATCH 2/9] add image input support for serving benchmark --- benchmarks/backend_request_func.py | 8 +++++++- benchmarks/benchmark_serving.py | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index b92e88521e28..6616eb0d3e67 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -318,7 +318,13 @@ async def async_request_openai_chat_completions( "messages": [ { "role": "user", - "content": request_func_input.prompt, + "content": [ + { + "type": "text", + "text": request_func_input.prompt + }, + request_func_input.multi_modal_content or {}, + ], }, ], "temperature": 0.0, diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index f5b0cac7638a..bed3747019e8 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -27,6 +27,7 @@ import base64 import json import os +import io import random import time import warnings @@ -229,7 +230,10 @@ def sample_hf_requests( if "image" in data and isinstance(data["image"], Image): image: Image = data["image"] - image_base64 = base64.b64encode(image.tobytes()).decode("utf-8") + image = image.convert("RGB") + image_data = io.BytesIO() + image.save(image_data, format='JPEG') + image_base64 = base64.b64encode(image_data.getvalue()).decode("utf-8") mm_content = { "type": "image_url", "image_url": { From 011841309828ee36ff4c125eb2390b56e1c1bbad Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sun, 15 Sep 2024 21:24:17 +0800 Subject: [PATCH 3/9] fix no image bench --- benchmarks/backend_request_func.py | 11 +++---- benchmarks/benchmark_serving.py | 47 ++++++++++++++++-------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 6616eb0d3e67..557b1d7a78e8 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -313,18 +313,15 @@ async def async_request_openai_chat_completions( async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session: assert not request_func_input.use_beam_search + content = [{"type": "text", "text": request_func_input.prompt}] + if request_func_input.multi_modal_content: + content.extend(request_func_input.multi_modal_content) payload = { "model": request_func_input.model, "messages": [ { "role": "user", - "content": [ - { - "type": "text", - "text": request_func_input.prompt - }, - request_func_input.multi_modal_content or {}, - ], + "content": content }, ], "temperature": 0.0, diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index bed3747019e8..927b78cbb380 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -25,15 +25,15 @@ import argparse import asyncio import base64 +import io import json import os -import io import random import time import warnings from dataclasses import dataclass from datetime import datetime -from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Collection +from typing import Any, AsyncGenerator, Collection, Dict, List, Optional, Tuple import numpy as np from backend_request_func import (ASYNC_REQUEST_FUNCS, RequestFuncInput, @@ -206,9 +206,14 @@ def sample_hf_requests( tokenizer: PreTrainedTokenizerBase, fixed_output_len: Optional[int] = None, ): - dataset = load_dataset(dataset_path, name=dataset_subset, split=dataset_split, streaming=True) - filtered_dataset = dataset.shuffle().filter(lambda x: len(x["conversations"]) >= 2) - sampled_requests: List[Tuple[str, int, int, Dict[str, Collection[str]]]] = [] + dataset = load_dataset(dataset_path, + name=dataset_subset, + split=dataset_split, + streaming=True) + filtered_dataset = dataset.shuffle().filter( + lambda x: len(x["conversations"]) >= 2) + sampled_requests: List[Tuple[str, int, int, Dict[str, + Collection[str]]]] = [] for data in filtered_dataset: if len(sampled_requests) == num_requests: break @@ -233,7 +238,8 @@ def sample_hf_requests( image = image.convert("RGB") image_data = io.BytesIO() image.save(image_data, format='JPEG') - image_base64 = base64.b64encode(image_data.getvalue()).decode("utf-8") + image_base64 = base64.b64encode( + image_data.getvalue()).decode("utf-8") mm_content = { "type": "image_url", "image_url": { @@ -277,8 +283,8 @@ def sample_random_requests( [(offsets[i] + i + j) % tokenizer.vocab_size for j in range(input_lens[i])]) - input_requests.append( - (prompt, int(prefix_len + input_lens[i]), int(output_lens[i]), None)) + input_requests.append((prompt, int(prefix_len + input_lens[i]), + int(output_lens[i]), None)) return input_requests @@ -397,7 +403,8 @@ async def benchmark( raise ValueError(f"Unknown backend: {backend}") print("Starting initial single prompt test run...") - test_prompt, test_prompt_len, test_output_len, test_mm_content = input_requests[0] + test_prompt, test_prompt_len, test_output_len, test_mm_content = ( + input_requests[0]) test_input = RequestFuncInput( model=model_id, prompt=test_prompt, @@ -631,7 +638,7 @@ def main(args: argparse.Namespace): input_requests = [(prompt_formatted, prompt_len, output_len) for prompt, prompt_formatted, prompt_len, output_len in input_requests] - + elif args.dataset_name == "hf": input_requests = sample_hf_requests( dataset_path=args.dataset_path, @@ -857,18 +864,14 @@ def main(args: argparse.Namespace): default=128, help="Number of input tokens per request, used only for HF dataset.", ) - parser.add_argument( - "--hf-subset", - type=str, - default=None, - help="Subset of the HF dataset." - ) - parser.add_argument( - "--hf-split", - type=str, - default=None, - help="Split of the HF dataset." - ) + parser.add_argument("--hf-subset", + type=str, + default=None, + help="Subset of the HF dataset.") + parser.add_argument("--hf-split", + type=str, + default=None, + help="Split of the HF dataset.") parser.add_argument( "--request-rate", type=float, From d20b233553373e7304ad98d173b93cdc12fe29e0 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sun, 15 Sep 2024 22:08:20 +0800 Subject: [PATCH 4/9] fix image input --- benchmarks/backend_request_func.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 557b1d7a78e8..3def4a6d67ac 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -315,7 +315,7 @@ async def async_request_openai_chat_completions( assert not request_func_input.use_beam_search content = [{"type": "text", "text": request_func_input.prompt}] if request_func_input.multi_modal_content: - content.extend(request_func_input.multi_modal_content) + content.append(request_func_input.multi_modal_content) payload = { "model": request_func_input.model, "messages": [ From 887bbca5e1c43b70390b3c062fdcf6c7674996ea Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Sun, 15 Sep 2024 22:33:26 +0800 Subject: [PATCH 5/9] add feature assertion --- benchmarks/benchmark_serving.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 927b78cbb380..791f01de1bda 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -210,6 +210,8 @@ def sample_hf_requests( name=dataset_subset, split=dataset_split, streaming=True) + assert "conversations" in dataset.features, ( + "HF Dataset must have 'conversations' column.") filtered_dataset = dataset.shuffle().filter( lambda x: len(x["conversations"]) >= 2) sampled_requests: List[Tuple[str, int, int, Dict[str, From a87b8a47439126ea51eaadc014129ad2e0870f3e Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Mon, 16 Sep 2024 18:27:31 +0800 Subject: [PATCH 6/9] fix typing and argument --- benchmarks/benchmark_serving.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 791f01de1bda..bd24a8200448 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -88,7 +88,7 @@ def sample_sharegpt_requests( num_requests: int, tokenizer: PreTrainedTokenizerBase, fixed_output_len: Optional[int] = None, -) -> List[Tuple[str, int, int]]: +) -> List[Tuple[str, int, int, None]]: if fixed_output_len is not None and fixed_output_len < 4: raise ValueError("output_len too small") # Load the dataset. @@ -135,7 +135,7 @@ def sample_sonnet_requests( output_len: int, prefix_len: int, tokenizer: PreTrainedTokenizerBase, -) -> List[Tuple[str, str, int, int]]: +) -> List[Tuple[str, str, int, int, None]]: assert ( input_len > prefix_len ), "'args.sonnet-input-len' must be greater than 'args.prefix-input-len'." @@ -205,7 +205,7 @@ def sample_hf_requests( num_requests: int, tokenizer: PreTrainedTokenizerBase, fixed_output_len: Optional[int] = None, -): +) -> List[Tuple[str, str, int, Optional[Dict[str, Collection[str]]]]]: dataset = load_dataset(dataset_path, name=dataset_subset, split=dataset_split, @@ -767,7 +767,8 @@ def main(args: argparse.Namespace): parser.add_argument("--dataset-path", type=str, default=None, - help="Path to the dataset.") + help="Path to the sharegpt/sonnet dataset. " + "Or the huggingface dataset ID if using HF dataset.") parser.add_argument( "--model", type=str, @@ -863,8 +864,9 @@ def main(args: argparse.Namespace): parser.add_argument( "--hf-output-len", type=int, - default=128, - help="Number of input tokens per request, used only for HF dataset.", + default=None, + help="Output length for each request. Overrides the output lengths " + "from the sampled HF dataset.", ) parser.add_argument("--hf-subset", type=str, From 78c7fb57b26e658cec1fd51c5137504329fe33f0 Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Mon, 16 Sep 2024 20:49:24 +0800 Subject: [PATCH 7/9] group dataset args and add backend check --- benchmarks/benchmark_serving.py | 154 +++++++++++++++++--------------- 1 file changed, 83 insertions(+), 71 deletions(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index bd24a8200448..40a7e9c17971 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -407,6 +407,10 @@ async def benchmark( print("Starting initial single prompt test run...") test_prompt, test_prompt_len, test_output_len, test_mm_content = ( input_requests[0]) + if backend != "openai-chat" and test_mm_content is not None: + # multi-modal benchmark is only available on OpenAI Chat backend. + raise ValueError( + "Multi-modal content is only supported for OpenAI Chat backend.") test_input = RequestFuncInput( model=model_id, prompt=test_prompt, @@ -795,26 +799,6 @@ def main(args: argparse.Namespace): default=1000, help="Number of prompts to process.", ) - parser.add_argument( - "--sharegpt-output-len", - type=int, - default=None, - help="Output length for each request. Overrides the output length " - "from the ShareGPT dataset.") - parser.add_argument( - "--sonnet-input-len", - type=int, - default=550, - help= - "Number of input tokens per request, used only for sonnet dataset.", - ) - parser.add_argument( - "--sonnet-output-len", - type=int, - default=150, - help= - "Number of output tokens per request, used only for sonnet dataset.", - ) parser.add_argument( "--logprobs", type=int, @@ -825,57 +809,6 @@ def main(args: argparse.Namespace): "logprob is returned for each token; or (2) if beam search " "is enabled 1 logprob per token is computed"), ) - parser.add_argument( - "--sonnet-prefix-len", - type=int, - default=200, - help= - "Number of prefix tokens per request, used only for sonnet dataset.", - ) - parser.add_argument( - "--random-input-len", - type=int, - default=1024, - help= - "Number of input tokens per request, used only for random sampling.", - ) - parser.add_argument( - "--random-output-len", - type=int, - default=128, - help= - "Number of output tokens per request, used only for random sampling.", - ) - parser.add_argument( - "--random-range-ratio", - type=float, - default=1.0, - help="Range of sampled ratio of input/output length, " - "used only for random sampling.", - ) - parser.add_argument( - "--random-prefix-len", - type=int, - default=0, - help="Number of fixed prefix tokens before random " - " context. The length range of context in a random " - " request is [random-prefix-len, " - " random-prefix-len + random-prefix-len * random-range-ratio).") - parser.add_argument( - "--hf-output-len", - type=int, - default=None, - help="Output length for each request. Overrides the output lengths " - "from the sampled HF dataset.", - ) - parser.add_argument("--hf-subset", - type=str, - default=None, - help="Subset of the HF dataset.") - parser.add_argument("--hf-split", - type=str, - default=None, - help="Split of the HF dataset.") parser.add_argument( "--request-rate", type=float, @@ -949,5 +882,84 @@ def main(args: argparse.Namespace): "Use \"--percentile-metrics\" to select metrics.", ) + sonnet_group = parser.add_argument_group("sonnet dataset options") + sonnet_group.add_argument( + "--sonnet-prefix-len", + type=int, + default=200, + help= + "Number of prefix tokens per request, used only for sonnet dataset.", + ) + sonnet_group.add_argument( + "--sonnet-input-len", + type=int, + default=550, + help= + "Number of input tokens per request, used only for sonnet dataset.", + ) + sonnet_group.add_argument( + "--sonnet-output-len", + type=int, + default=150, + help= + "Number of output tokens per request, used only for sonnet dataset.", + ) + + sharegpt_group = parser.add_argument_group("sharegpt dataset options") + sharegpt_group.add_argument( + "--sharegpt-output-len", + type=int, + default=None, + help="Output length for each request. Overrides the output length " + "from the ShareGPT dataset.") + + random_group = parser.add_argument_group("random dataset options") + random_group.add_argument( + "--random-input-len", + type=int, + default=1024, + help= + "Number of input tokens per request, used only for random sampling.", + ) + random_group.add_argument( + "--random-output-len", + type=int, + default=128, + help= + "Number of output tokens per request, used only for random sampling.", + ) + random_group.add_argument( + "--random-range-ratio", + type=float, + default=1.0, + help="Range of sampled ratio of input/output length, " + "used only for random sampling.", + ) + random_group.add_argument( + "--random-prefix-len", + type=int, + default=0, + help="Number of fixed prefix tokens before random " + " context. The length range of context in a random " + " request is [random-prefix-len, " + " random-prefix-len + random-prefix-len * random-range-ratio).") + + hf_group = parser.add_argument_group("hf dataset options") + hf_group.add_argument( + "--hf-output-len", + type=int, + default=None, + help="Output length for each request. Overrides the output lengths " + "from the sampled HF dataset.", + ) + hf_group.add_argument("--hf-subset", + type=str, + default=None, + help="Subset of the HF dataset.") + hf_group.add_argument("--hf-split", + type=str, + default=None, + help="Split of the HF dataset.") + args = parser.parse_args() main(args) From 176fe651bfca0951191d4b7b7605a7f34ab0512f Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Mon, 16 Sep 2024 20:57:35 +0800 Subject: [PATCH 8/9] reorder dataset arguments --- benchmarks/benchmark_serving.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 40a7e9c17971..66f6888fa76d 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -882,14 +882,8 @@ def main(args: argparse.Namespace): "Use \"--percentile-metrics\" to select metrics.", ) + # group for dataset specific arguments sonnet_group = parser.add_argument_group("sonnet dataset options") - sonnet_group.add_argument( - "--sonnet-prefix-len", - type=int, - default=200, - help= - "Number of prefix tokens per request, used only for sonnet dataset.", - ) sonnet_group.add_argument( "--sonnet-input-len", type=int, @@ -904,6 +898,13 @@ def main(args: argparse.Namespace): help= "Number of output tokens per request, used only for sonnet dataset.", ) + sonnet_group.add_argument( + "--sonnet-prefix-len", + type=int, + default=200, + help= + "Number of prefix tokens per request, used only for sonnet dataset.", + ) sharegpt_group = parser.add_argument_group("sharegpt dataset options") sharegpt_group.add_argument( @@ -945,13 +946,6 @@ def main(args: argparse.Namespace): " random-prefix-len + random-prefix-len * random-range-ratio).") hf_group = parser.add_argument_group("hf dataset options") - hf_group.add_argument( - "--hf-output-len", - type=int, - default=None, - help="Output length for each request. Overrides the output lengths " - "from the sampled HF dataset.", - ) hf_group.add_argument("--hf-subset", type=str, default=None, @@ -960,6 +954,13 @@ def main(args: argparse.Namespace): type=str, default=None, help="Split of the HF dataset.") + hf_group.add_argument( + "--hf-output-len", + type=int, + default=None, + help="Output length for each request. Overrides the output lengths " + "from the sampled HF dataset.", + ) args = parser.parse_args() main(args) From aab403170f0519ec07166d72425586610a7cb0fc Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Mon, 16 Sep 2024 21:28:58 +0800 Subject: [PATCH 9/9] update value error message --- benchmarks/benchmark_serving.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py index 66f6888fa76d..3ace910a6cac 100644 --- a/benchmarks/benchmark_serving.py +++ b/benchmarks/benchmark_serving.py @@ -410,7 +410,7 @@ async def benchmark( if backend != "openai-chat" and test_mm_content is not None: # multi-modal benchmark is only available on OpenAI Chat backend. raise ValueError( - "Multi-modal content is only supported for OpenAI Chat backend.") + "Multi-modal content is only supported on 'openai-chat' backend.") test_input = RequestFuncInput( model=model_id, prompt=test_prompt,