NVIDIA · kaiyux · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025
diff --git a/tensorrt_llm/serve/scripts/benchmark_dataset.py b/tensorrt_llm/serve/scripts/benchmark_dataset.py
@@ -319,6 +319,67 @@ def sample(
         return requests
 
 
+# -----------------------------------------------------------------------------
+# Custom Dataset Implementation
+# -----------------------------------------------------------------------------
+
+
+class CustomDataset(BenchmarkDataset):
+    """
+    TensorRT-LLM customized dataset implementation.
+    It assumes the dataset to be consist of several lines of json, each line is a minimal OpenAI API format request.
+    Example format of each sample on each line:
+    {
+        "input": {
+            "messages": [
+                {
+                    "role": "system",
+                    "content": ""
+                },
+                {
+                    "role": "user",
+                    "content": ""
+                }
+            ],
+            "max_tokens": 2048,
+        }
+    }
+    """
+
+    def __init__(self, dataset_path: str, **kwargs) -> None:
+        super().__init__(**kwargs)
+        self.dataset_path = dataset_path
+        self.data = []
+        self.load_data()
+
+    def load_data(self) -> None:
+        if self.dataset_path is None:
+            raise ValueError("--dataset-path is not provided")
+        with open(self.dataset_path, encoding="utf-8") as f:
+            for line in f:
+                self.data.append(json.loads(line))
+        random.seed(self.random_seed)
+        random.shuffle(self.data)
+
+    def sample(self, tokenizer: PreTrainedTokenizerBase,
+               num_requests: int) -> list[SampleRequest]:
+        samples: list = []
+        for entry in self.data:
+            if len(samples) >= num_requests:
+                break
+            prompt = entry["input"]["messages"][1]["content"]
+            prompt_ids = tokenizer(prompt).input_ids
+            prompt_len = len(prompt_ids)
+            max_tokens = entry["input"]["max_tokens"]
+            samples.append(
+                SampleRequest(
+                    prompt=prompt,
+                    prompt_len=prompt_len,
+                    expected_output_len=max_tokens,
+                ))
+        return samples
+
+
 # -----------------------------------------------------------------------------
 # ShareGPT Dataset Implementation
 # -----------------------------------------------------------------------------

diff --git a/tensorrt_llm/serve/scripts/benchmark_serving.py b/tensorrt_llm/serve/scripts/benchmark_serving.py
@@ -38,10 +38,10 @@
                                    OPENAI_COMPATIBLE_BACKENDS, RequestFuncInput,
                                    RequestFuncOutput, get_tokenizer)
 from .benchmark_dataset import (AIMODataset, BurstGPTDataset,
-                                ConversationDataset, HuggingFaceDataset,
-                                InstructCoderDataset, RandomDataset,
-                                SampleRequest, ShareGPTDataset, SonnetDataset,
-                                VisionArenaDataset)
+                                ConversationDataset, CustomDataset,
+                                HuggingFaceDataset, InstructCoderDataset,
+                                RandomDataset, SampleRequest, ShareGPTDataset,
+                                SonnetDataset, VisionArenaDataset)
 from .benchmark_utils import convert_to_pytorch_benchmark_format, write_to_json
 
 MILLISECONDS_TO_SECONDS_CONVERSION = 1000
@@ -613,6 +613,13 @@ def main(args: argparse.Namespace):
             output_len=args.hf_output_len,
         )
 
+    elif args.dataset_name == "trtllm_custom":
+        input_requests = CustomDataset(dataset_path=args.dataset_path,
+                                       random_seed=args.seed).sample(
+                                           num_requests=args.num_prompts,
+                                           tokenizer=tokenizer,
+                                       )
+
     else:
         # For datasets that follow a similar structure, use a mapping.
         dataset_mapping = {
@@ -783,7 +790,9 @@ def main(args: argparse.Namespace):
         "--dataset-name",
         type=str,
         default="sharegpt",
-        choices=["sharegpt", "burstgpt", "sonnet", "random", "hf"],
+        choices=[
+            "sharegpt", "burstgpt", "sonnet", "random", "hf", "trtllm_custom"
+        ],
         help="Name of the dataset to benchmark on.",
     )
     parser.add_argument("--dataset-path",