diff --git a/src/guidellm/benchmark/output.py b/src/guidellm/benchmark/output.py index 6cbb1865..56775dac 100644 --- a/src/guidellm/benchmark/output.py +++ b/src/guidellm/benchmark/output.py @@ -567,6 +567,13 @@ async def finalize(self, report: GenerativeBenchmarksReport) -> Path: benchmark_headers: list[str] = [] benchmark_values: list[str | float | list[float]] = [] + # Add basic run description info + desc_headers, desc_values = ( + self._get_benchmark_desc_headers_and_values(benchmark) + ) + benchmark_headers.extend(desc_headers) + benchmark_values.extend(desc_values) + # Add status-based metrics for status in StatusDistributionSummary.model_fields: status_headers, status_values = ( @@ -672,6 +679,21 @@ def _get_benchmark_status_metrics_stats( ] return headers, values + def _get_benchmark_extras_headers_and_values( + self, benchmark: GenerativeBenchmark, + ) -> tuple[list[str], list[str]]: + headers = ["Profile", "Backend", "Generator Data"] + values: list[str] = [ + benchmark.benchmarker.profile.model_dump_json(), + json.dumps(benchmark.benchmarker.backend), + json.dumps(benchmark.benchmarker.requests["attributes"]["data"]), + ] + + if len(headers) != len(values): + raise ValueError("Headers and values length mismatch.") + + return headers, values + @GenerativeBenchmarkerOutput.register("html") class GenerativeBenchmarkerHTML(GenerativeBenchmarkerOutput): diff --git a/tests/unit/benchmark/test_output.py b/tests/unit/benchmark/test_output.py index 85979c12..6763d978 100644 --- a/tests/unit/benchmark/test_output.py +++ b/tests/unit/benchmark/test_output.py @@ -80,7 +80,6 @@ def test_file_yaml(): mock_path.unlink() -@pytest.mark.skip(reason="CSV fix not merged yet") @pytest.mark.asyncio async def test_file_csv(): mock_benchmark = mock_generative_benchmark() @@ -96,6 +95,7 @@ async def test_file_csv(): rows = list(reader) assert "Type" in headers + assert "Profile" in headers assert len(rows) == 1 mock_path.unlink() diff --git a/tests/unit/mock_benchmark.py b/tests/unit/mock_benchmark.py index c0d6aa34..cdf4375a 100644 --- a/tests/unit/mock_benchmark.py +++ b/tests/unit/mock_benchmark.py @@ -76,7 +76,11 @@ def mock_generative_benchmark() -> GenerativeBenchmark: ), benchmarker=BenchmarkerDict( profile=SynchronousProfile.create("synchronous", rate=None), - requests={}, + requests={ + "attributes": { + "data": "prompt_tokens=256,output_tokens=128", + }, + }, backend={}, environment={}, aggregators={},