diff --git a/benchmarks/README.md b/benchmarks/README.md
index 4777d8329f2d..b0417631c514 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -51,6 +51,12 @@ become available.
✅ |
✅ |
likaixin/InstructCoder |
+
+
+ | HuggingFace-AIMO |
+ ✅ |
+ ✅ |
+ AI-MO/aimo-validation-aime , AI-MO/NuminaMath-1.5, AI-MO/NuminaMath-CoT |
| HuggingFace-Other |
@@ -187,6 +193,17 @@ python3 vllm/benchmarks/benchmark_serving.py \
--num-prompts 10
```
+**`AI-MO/aimo-validation-aime`**
+
+``` bash
+python3 vllm/benchmarks/benchmark_serving.py \
+ --model Qwen/QwQ-32B \
+ --dataset-name hf \
+ --dataset-path AI-MO/aimo-validation-aime \
+ --num-prompts 10 \
+ --seed 42
+```
+
---
## Example - Offline Throughput Benchmark
@@ -278,6 +295,18 @@ python3 vllm/benchmarks/benchmark_throughput.py \
--num-prompts 10
```
+**`AI-MO/aimo-validation-aime`**
+
+```bash
+python3 benchmarks/benchmark_throughput.py \
+ --model Qwen/QwQ-32B \
+ --backend vllm \
+ --dataset-name hf \
+ --dataset-path AI-MO/aimo-validation-aime \
+ --hf-split train \
+ --num-prompts 10
+```
+
### Benchmark with LoRA Adapters
``` bash
diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index 1ff63f0a4479..d0d7dfa1d795 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -11,10 +11,10 @@
import torch
import uvloop
-from benchmark_dataset import (BurstGPTDataset, ConversationDataset,
- InstructCoderDataset, RandomDataset,
- SampleRequest, ShareGPTDataset, SonnetDataset,
- VisionArenaDataset)
+from benchmark_dataset import (AIMODataset, BurstGPTDataset,
+ ConversationDataset, InstructCoderDataset,
+ RandomDataset, SampleRequest, ShareGPTDataset,
+ SonnetDataset, VisionArenaDataset)
from benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
from tqdm import tqdm
from transformers import (AutoModelForCausalLM, AutoTokenizer,
@@ -332,7 +332,10 @@ def get_requests(args, tokenizer):
common_kwargs['dataset_subset'] = args.hf_subset
common_kwargs['dataset_split'] = args.hf_split
sample_kwargs["enable_multimodal_chat"] = True
-
+ elif args.dataset_path in AIMODataset.SUPPORTED_DATASET_PATHS:
+ dataset_cls = AIMODataset
+ common_kwargs['dataset_subset'] = None
+ common_kwargs['dataset_split'] = "train"
else:
raise ValueError(f"Unknown dataset name: {args.dataset_name}")
# Remove None values
@@ -467,12 +470,13 @@ def validate_args(args):
since --dataset-name is not 'hf'.",
stacklevel=2)
elif args.dataset_name == "hf":
- if args.dataset_path in VisionArenaDataset.SUPPORTED_DATASET_PATHS:
- assert args.backend == "vllm-chat", "VisionArenaDataset needs to use vllm-chat as the backend." #noqa: E501
- elif args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS:
- assert args.backend == "vllm", "InstructCoder dataset needs to use vllm as the backend." #noqa: E501
- elif args.dataset_path in ConversationDataset.SUPPORTED_DATASET_PATHS:
- assert args.backend == "vllm-chat", "ConversationDataset needs to use vllm-chat as the backend." #noqa: E501
+ if args.dataset_path in (
+ VisionArenaDataset.SUPPORTED_DATASET_PATHS.keys()
+ | ConversationDataset.SUPPORTED_DATASET_PATHS):
+ assert args.backend == "vllm-chat", f"{args.dataset_path} needs to use vllm-chat as the backend." #noqa: E501
+ elif args.dataset_path in (InstructCoderDataset.SUPPORTED_DATASET_PATHS
+ | AIMODataset.SUPPORTED_DATASET_PATHS):
+ assert args.backend == "vllm", f"{args.dataset_path} needs to use vllm as the backend." #noqa: E501
else:
raise ValueError(
f"{args.dataset_path} is not supported by hf dataset.")