From 50583a613e94a65401e8e21a050834fb7bf4a4a5 Mon Sep 17 00:00:00 2001 From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> Date: Thu, 29 May 2025 04:54:59 +0000 Subject: [PATCH 1/2] Download ShareGPT dataset if no dataset_path is given Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> --- .../serve/scripts/benchmark_dataset.py | 12 ++-- .../serve/scripts/benchmark_serving.py | 9 ++- tensorrt_llm/serve/scripts/benchmark_utils.py | 61 ++++++++++++++++++- 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/tensorrt_llm/serve/scripts/benchmark_dataset.py b/tensorrt_llm/serve/scripts/benchmark_dataset.py index 494c68f526f..93bb3d338ca 100644 --- a/tensorrt_llm/serve/scripts/benchmark_dataset.py +++ b/tensorrt_llm/serve/scripts/benchmark_dataset.py @@ -25,6 +25,7 @@ import numpy as np import pandas as pd +from benchmark_utils import download_and_cache_file from datasets import load_dataset from transformers import PreTrainedTokenizerBase @@ -242,14 +243,17 @@ class ShareGPTDataset(BenchmarkDataset): Implements the ShareGPT dataset. Loads data from a JSON file and generates sample requests based on conversation turns. """ + URL = "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json" - def __init__(self, **kwargs) -> None: + def __init__(self, download_path: Optional[str] = None, **kwargs) -> None: super().__init__(**kwargs) - self.load_data() + self.load_data(download_path) - def load_data(self) -> None: + def load_data(self, download_path: Optional[str] = None) -> None: if self.dataset_path is None: - raise ValueError("dataset_path must be provided for loading data.") + self.dataset_path = download_and_cache_file( + ShareGPTDataset.URL, download_path, + ShareGPTDataset.URL.split("/")[-1]) with open(self.dataset_path, encoding="utf-8") as f: self.data = json.load(f) diff --git a/tensorrt_llm/serve/scripts/benchmark_serving.py b/tensorrt_llm/serve/scripts/benchmark_serving.py index 70f4576fcfa..b4a6714e32d 100644 --- a/tensorrt_llm/serve/scripts/benchmark_serving.py +++ b/tensorrt_llm/serve/scripts/benchmark_serving.py @@ -612,7 +612,8 @@ def main(args: argparse.Namespace): # For datasets that follow a similar structure, use a mapping. dataset_mapping = { "sharegpt": - lambda: ShareGPTDataset(random_seed=args.seed, + lambda: ShareGPTDataset(download_path=args.download_path, + random_seed=args.seed, dataset_path=args.dataset_path).sample( tokenizer=tokenizer, num_requests=args.num_prompts, @@ -778,6 +779,12 @@ def main(args: argparse.Namespace): default=None, help="Path to the sharegpt/sonnet dataset. " "Or the huggingface dataset ID if using HF dataset.") + parser.add_argument( + "--download-path", + type=str, + default=None, + help="Path to download dataset if dataset-path is None, " + "only sharegpt is supported for now") parser.add_argument( "--max-concurrency", type=int, diff --git a/tensorrt_llm/serve/scripts/benchmark_utils.py b/tensorrt_llm/serve/scripts/benchmark_utils.py index 638ac22b679..5da964541af 100644 --- a/tensorrt_llm/serve/scripts/benchmark_utils.py +++ b/tensorrt_llm/serve/scripts/benchmark_utils.py @@ -6,7 +6,10 @@ import json import math import os -from typing import Any +from typing import Any, Optional + +import requests +from tqdm.asyncio import tqdm def convert_to_pytorch_benchmark_format(args: argparse.Namespace, @@ -69,3 +72,59 @@ def iterencode(self, o: Any, *args, **kwargs) -> Any: def write_to_json(filename: str, records: list) -> None: with open(filename, "w") as f: json.dump(records, f, cls=InfEncoder) + + +def download_and_cache_file(url: str, path: Optional[str], name: str) -> str: + # Adapted from + # https://github.com/sgl-project/sglang/blob/58f10679e1850fdc86046057c23bac5193156de9/python/sglang/bench_serving.py#L586 + """Read and cache a file from a url.""" + + # Check if the path is valid and if the file exists + if path is None or not os.path.exists(path): + raise ValueError("download_path is not provided or does not exist") + filename = os.path.join(path, name) + + if is_file_valid_json(filename): + return filename + + print(f"Downloading from {url} to {filename}") + + # Stream the response to show the progress bar + response = requests.get(url, stream=True) + response.raise_for_status() # Check for request errors + + # Total size of the file in bytes + total_size = int(response.headers.get("content-length", 0)) + chunk_size = 1024 # Download in chunks of 1KB + + # Use tqdm to display the progress bar + with open(filename, "wb") as f, tqdm( + desc=filename, + total=total_size, + unit="B", + unit_scale=True, + unit_divisor=1024, + ) as bar: + for chunk in response.iter_content(chunk_size=chunk_size): + f.write(chunk) + bar.update(len(chunk)) + + return filename + + +def is_file_valid_json(path) -> bool: + # Adapted from + # https://github.com/sgl-project/sglang/blob/58f10679e1850fdc86046057c23bac5193156de9/python/sglang/bench_serving.py#L620 + if not os.path.isfile(path): + return False + + # TODO can fuse into the real file open later + try: + with open(path) as f: + json.load(f) + return True + except json.JSONDecodeError as e: + print( + f"{path} exists but json loading fails ({e=}), thus treat as invalid file" + ) + return False From e85f2de81e13f663b1c31b564031a1d71349f17a Mon Sep 17 00:00:00 2001 From: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> Date: Thu, 29 May 2025 07:13:20 +0000 Subject: [PATCH 2/2] Add request timeout Signed-off-by: Pengyun Lin <81065165+LinPoly@users.noreply.github.com> --- tensorrt_llm/serve/scripts/benchmark_dataset.py | 14 ++++++++++---- tensorrt_llm/serve/scripts/benchmark_serving.py | 5 +++++ tensorrt_llm/serve/scripts/benchmark_utils.py | 5 +++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tensorrt_llm/serve/scripts/benchmark_dataset.py b/tensorrt_llm/serve/scripts/benchmark_dataset.py index 93bb3d338ca..1948d32892a 100644 --- a/tensorrt_llm/serve/scripts/benchmark_dataset.py +++ b/tensorrt_llm/serve/scripts/benchmark_dataset.py @@ -245,15 +245,21 @@ class ShareGPTDataset(BenchmarkDataset): """ URL = "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json" - def __init__(self, download_path: Optional[str] = None, **kwargs) -> None: + def __init__(self, + download_timeout: int, + download_path: Optional[str] = None, + **kwargs) -> None: super().__init__(**kwargs) - self.load_data(download_path) + self.load_data(download_timeout, download_path) - def load_data(self, download_path: Optional[str] = None) -> None: + def load_data(self, + download_timeout: int, + download_path: Optional[str] = None) -> None: if self.dataset_path is None: + logger.warning("dataset_path is not provided") self.dataset_path = download_and_cache_file( ShareGPTDataset.URL, download_path, - ShareGPTDataset.URL.split("/")[-1]) + ShareGPTDataset.URL.split("/")[-1], download_timeout) with open(self.dataset_path, encoding="utf-8") as f: self.data = json.load(f) diff --git a/tensorrt_llm/serve/scripts/benchmark_serving.py b/tensorrt_llm/serve/scripts/benchmark_serving.py index b4a6714e32d..89e452671fe 100644 --- a/tensorrt_llm/serve/scripts/benchmark_serving.py +++ b/tensorrt_llm/serve/scripts/benchmark_serving.py @@ -613,6 +613,7 @@ def main(args: argparse.Namespace): dataset_mapping = { "sharegpt": lambda: ShareGPTDataset(download_path=args.download_path, + download_timeout=args.download_timeout, random_seed=args.seed, dataset_path=args.dataset_path).sample( tokenizer=tokenizer, @@ -785,6 +786,10 @@ def main(args: argparse.Namespace): default=None, help="Path to download dataset if dataset-path is None, " "only sharegpt is supported for now") + parser.add_argument("--download-timeout", + type=int, + default=180, + help="Timeout for downloading datasets") parser.add_argument( "--max-concurrency", type=int, diff --git a/tensorrt_llm/serve/scripts/benchmark_utils.py b/tensorrt_llm/serve/scripts/benchmark_utils.py index 5da964541af..946a6ceeb07 100644 --- a/tensorrt_llm/serve/scripts/benchmark_utils.py +++ b/tensorrt_llm/serve/scripts/benchmark_utils.py @@ -74,7 +74,8 @@ def write_to_json(filename: str, records: list) -> None: json.dump(records, f, cls=InfEncoder) -def download_and_cache_file(url: str, path: Optional[str], name: str) -> str: +def download_and_cache_file(url: str, path: Optional[str], name: str, + timeout: int) -> str: # Adapted from # https://github.com/sgl-project/sglang/blob/58f10679e1850fdc86046057c23bac5193156de9/python/sglang/bench_serving.py#L586 """Read and cache a file from a url.""" @@ -90,7 +91,7 @@ def download_and_cache_file(url: str, path: Optional[str], name: str) -> str: print(f"Downloading from {url} to {filename}") # Stream the response to show the progress bar - response = requests.get(url, stream=True) + response = requests.get(url, stream=True, timeout=timeout) response.raise_for_status() # Check for request errors # Total size of the file in bytes