diff --git a/benchmarks/README.md b/benchmarks/README.md index 4777d8329f2d..b0417631c514 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -51,6 +51,12 @@ become available. ✅ ✅ likaixin/InstructCoder + + + HuggingFace-AIMO + ✅ + ✅ + AI-MO/aimo-validation-aime , AI-MO/NuminaMath-1.5, AI-MO/NuminaMath-CoT HuggingFace-Other @@ -187,6 +193,17 @@ python3 vllm/benchmarks/benchmark_serving.py \ --num-prompts 10 ``` +**`AI-MO/aimo-validation-aime`** + +``` bash +python3 vllm/benchmarks/benchmark_serving.py \ + --model Qwen/QwQ-32B \ + --dataset-name hf \ + --dataset-path AI-MO/aimo-validation-aime \ + --num-prompts 10 \ + --seed 42 +``` + --- ## Example - Offline Throughput Benchmark @@ -278,6 +295,18 @@ python3 vllm/benchmarks/benchmark_throughput.py \ --num-prompts 10 ``` +**`AI-MO/aimo-validation-aime`** + +```bash +python3 benchmarks/benchmark_throughput.py \ + --model Qwen/QwQ-32B \ + --backend vllm \ + --dataset-name hf \ + --dataset-path AI-MO/aimo-validation-aime \ + --hf-split train \ + --num-prompts 10 +``` + ### Benchmark with LoRA Adapters ``` bash diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py index 1ff63f0a4479..d0d7dfa1d795 100644 --- a/benchmarks/benchmark_throughput.py +++ b/benchmarks/benchmark_throughput.py @@ -11,10 +11,10 @@ import torch import uvloop -from benchmark_dataset import (BurstGPTDataset, ConversationDataset, - InstructCoderDataset, RandomDataset, - SampleRequest, ShareGPTDataset, SonnetDataset, - VisionArenaDataset) +from benchmark_dataset import (AIMODataset, BurstGPTDataset, + ConversationDataset, InstructCoderDataset, + RandomDataset, SampleRequest, ShareGPTDataset, + SonnetDataset, VisionArenaDataset) from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json from tqdm import tqdm from transformers import (AutoModelForCausalLM, AutoTokenizer, @@ -332,7 +332,10 @@ def get_requests(args, tokenizer): common_kwargs['dataset_subset'] = args.hf_subset common_kwargs['dataset_split'] = args.hf_split sample_kwargs["enable_multimodal_chat"] = True - + elif args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS: + dataset_cls = AIMODataset + common_kwargs['dataset_subset'] = None + common_kwargs['dataset_split'] = "train" else: raise ValueError(f"Unknown dataset name: {args.dataset_name}") # Remove None values @@ -467,12 +470,13 @@ def validate_args(args): since --dataset-name is not 'hf'.", stacklevel=2) elif args.dataset_name == "hf": - if args.dataset_path in VisionArenaDataset.SUPPORTED_DATASET_PATHS: - assert args.backend == "vllm-chat", "VisionArenaDataset needs to use vllm-chat as the backend." #noqa: E501 - elif args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS: - assert args.backend == "vllm", "InstructCoder dataset needs to use vllm as the backend." #noqa: E501 - elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS: - assert args.backend == "vllm-chat", "ConversationDataset needs to use vllm-chat as the backend." #noqa: E501 + if args.dataset_path in ( + VisionArenaDataset.SUPPORTED_DATASET_PATHS.keys() + | ConversationDataset.SUPPORTED_DATASET_PATHS): + assert args.backend == "vllm-chat", f"{args.dataset_path} needs to use vllm-chat as the backend." #noqa: E501 + elif args.dataset_path in (InstructCoderDataset.SUPPORTED_DATASET_PATHS + | AIMODataset.SUPPORTED_DATASET_PATHS): + assert args.backend == "vllm", f"{args.dataset_path} needs to use vllm as the backend." #noqa: E501 else: raise ValueError( f"{args.dataset_path} is not supported by hf dataset.")