Skip to content

Commit 4331f10

Browse files
authored
Merge d8926f1 into f8f6f9d
2 parents f8f6f9d + d8926f1 commit 4331f10

File tree

3 files changed

+145
-6
lines changed

3 files changed

+145
-6
lines changed

src/guidellm/presentation/data_models.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
import random
22
from collections import defaultdict
33
from math import ceil
4-
from typing import TYPE_CHECKING, Optional, Union
4+
from typing import TYPE_CHECKING, Any, Optional, Union
55

6+
import httpx
67
from pydantic import BaseModel, computed_field
78

89
if TYPE_CHECKING:
910
from guidellm.benchmark.benchmark import GenerativeBenchmark
1011

12+
from guidellm.dataset.file import FileDatasetCreator
13+
from guidellm.dataset.hf_datasets import HFDatasetsCreator
14+
from guidellm.dataset.in_memory import InMemoryDatasetCreator
15+
from guidellm.dataset.synthetic import SyntheticDatasetConfig, SyntheticDatasetCreator
1116
from guidellm.objects.statistics import DistributionSummary
1217

1318

@@ -58,6 +63,39 @@ class Model(BaseModel):
5863
class Dataset(BaseModel):
5964
name: str
6065

66+
@classmethod
67+
def from_data(cls, request_loader: Any):
68+
creators = [
69+
InMemoryDatasetCreator,
70+
SyntheticDatasetCreator,
71+
FileDatasetCreator,
72+
HFDatasetsCreator,
73+
]
74+
dataset_name = None
75+
data = request_loader.data
76+
data_args = request_loader.data_args
77+
processor = request_loader.processor
78+
processor_args = request_loader.processor_args
79+
80+
for creator in creators:
81+
if not creator.is_supported(data, None):
82+
continue
83+
random_seed = 42
84+
dataset = creator.handle_create(
85+
data, data_args, processor, processor_args, random_seed
86+
)
87+
dataset_name = creator.extract_dataset_name(dataset)
88+
if dataset_name is None or dataset_name == "":
89+
if creator == SyntheticDatasetCreator:
90+
data_dict = SyntheticDatasetConfig.parse_str(data)
91+
dataset_name = data_dict.source
92+
if creator == FileDatasetCreator or creator == HFDatasetsCreator:
93+
dataset_name = data
94+
if creator == InMemoryDatasetCreator:
95+
dataset_name = "In-memory"
96+
break
97+
return cls(name=dataset_name or "")
98+
6199

62100
class RunInfo(BaseModel):
63101
model: Model
@@ -71,11 +109,14 @@ def from_benchmarks(cls, benchmarks: list["GenerativeBenchmark"]):
71109
timestamp = max(
72110
bm.run_stats.start_time for bm in benchmarks if bm.start_time is not None
73111
)
112+
response = httpx.get(f"https://huggingface.co/api/models/{model}")
113+
model_json = response.json()
114+
74115
return cls(
75-
model=Model(name=model, size=0),
116+
model=Model(name=model, size=model_json.get("usedStorage", 0)),
76117
task="N/A",
77118
timestamp=timestamp,
78-
dataset=Dataset(name="N/A"),
119+
dataset=Dataset.from_data(benchmarks[0].request_loader),
79120
)
80121

81122

src/ui/lib/components/PageHeader/PageHeader.component.tsx

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
import { Box, Typography } from '@mui/material';
33

44
import { useGetRunInfoQuery } from '../../store/slices/runInfo';
5-
import { formateDate } from '../../utils/helpers';
5+
import { formateDate, getFileSize } from '../../utils/helpers';
66
import { SpecBadge } from '../SpecBadge';
77
import { HeaderCell, HeaderWrapper } from './PageHeader.styles';
88

99
export const Component = () => {
1010
const { data } = useGetRunInfoQuery();
11+
const modelSize = getFileSize(data?.model?.size || 0);
12+
1113
return (
1214
<Box py={2}>
1315
<Typography variant="subtitle2" color="surface.onSurfaceAccent">
@@ -24,11 +26,24 @@ export const Component = () => {
2426
variant="metric2"
2527
withTooltip
2628
/>
29+
<SpecBadge
30+
label="Model size"
31+
value={data?.model?.size ? `${modelSize?.size} ${modelSize?.units}` : '0B'}
32+
variant="body1"
33+
/>
34+
</HeaderCell>
35+
<HeaderCell item xs={5} withDivider>
36+
<SpecBadge
37+
label="Dataset"
38+
value={data?.dataset?.name || 'N/A'}
39+
variant="caption"
40+
withTooltip
41+
/>
2742
</HeaderCell>
2843
<HeaderCell item xs={2} sx={{ paddingRight: 0 }}>
2944
<SpecBadge
3045
label="Time Stamp"
31-
value={data?.timestamp ? formateDate(data?.timestamp) : 'n/a'}
46+
value={data?.timestamp ? formateDate(data?.timestamp) : 'N/A'}
3247
variant="caption"
3348
/>
3449
</HeaderCell>

tests/unit/presentation/test_data_models.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import pytest
2+
from unittest.mock import MagicMock, patch
23

3-
from guidellm.presentation.data_models import Bucket
4+
from guidellm.dataset.file import FileDatasetCreator
5+
from guidellm.dataset.hf_datasets import HFDatasetsCreator
6+
from guidellm.dataset.in_memory import InMemoryDatasetCreator
7+
from guidellm.dataset.synthetic import SyntheticDatasetCreator
8+
from guidellm.presentation.data_models import Bucket, Dataset
9+
from tests.unit.mock_benchmark import mock_generative_benchmark
410

511

612
@pytest.mark.smoke
@@ -18,3 +24,80 @@ def test_bucket_from_data():
1824
assert buckets[1].value == 8.0
1925
assert buckets[1].count == 5
2026
assert bucket_width == 1
27+
28+
def mock_processor(cls):
29+
return mock_generative_benchmark().request_loader.processor
30+
31+
def new_handle_create(cls, *args, **kwargs):
32+
return MagicMock()
33+
34+
def new_extract_dataset_name(cls, *args, **kwargs):
35+
return "data:prideandprejudice.txt.gz"
36+
37+
@pytest.mark.smoke
38+
def test_dataset_from_data_uses_extracted_dataset_name():
39+
mock_benchmark = mock_generative_benchmark()
40+
with (
41+
patch.object(SyntheticDatasetCreator, 'handle_create', new=new_handle_create),
42+
patch.object(SyntheticDatasetCreator, 'extract_dataset_name', new=new_extract_dataset_name)
43+
):
44+
dataset = Dataset.from_data(mock_benchmark.request_loader)
45+
assert dataset.name == "data:prideandprejudice.txt.gz"
46+
# with unittest.mock.patch.object(PreTrainedTokenizerBase, 'processor', new=mock_processor):
47+
48+
def new_is_supported(cls, *args, **kwargs):
49+
return True
50+
51+
@pytest.mark.smoke
52+
def test_dataset_from_data_with_in_memory_dataset():
53+
mock_benchmark = mock_generative_benchmark()
54+
with patch.object(InMemoryDatasetCreator, 'is_supported', new=new_is_supported):
55+
dataset = Dataset.from_data(mock_benchmark.request_loader)
56+
assert dataset.name == "In-memory"
57+
58+
def hardcoded_isnt_supported(cls, *args, **kwargs):
59+
return False
60+
61+
def new_extract_dataset_name_none(cls, *args, **kwargs):
62+
return None
63+
64+
@pytest.mark.smoke
65+
def test_dataset_from_data_with_synthetic_dataset():
66+
mock_benchmark = mock_generative_benchmark()
67+
with (
68+
patch.object(SyntheticDatasetCreator, 'handle_create', new=new_handle_create),
69+
patch.object(InMemoryDatasetCreator, 'is_supported', new=hardcoded_isnt_supported),
70+
patch.object(SyntheticDatasetCreator, 'is_supported', new=new_is_supported),
71+
patch.object(SyntheticDatasetCreator, 'extract_dataset_name', new=new_extract_dataset_name_none)
72+
):
73+
dataset = Dataset.from_data(mock_benchmark.request_loader)
74+
assert dataset.name == "data:prideandprejudice.txt.gz"
75+
76+
@pytest.mark.smoke
77+
def test_dataset_from_data_with_file_dataset():
78+
mock_benchmark = mock_generative_benchmark()
79+
mock_benchmark.request_loader.data = 'dataset.yaml'
80+
with (
81+
patch.object(FileDatasetCreator, 'handle_create', new=new_handle_create),
82+
patch.object(InMemoryDatasetCreator, 'is_supported', new=hardcoded_isnt_supported),
83+
patch.object(SyntheticDatasetCreator, 'is_supported', new=hardcoded_isnt_supported),
84+
patch.object(FileDatasetCreator, 'is_supported', new=new_is_supported),
85+
patch.object(FileDatasetCreator, 'extract_dataset_name', new=new_extract_dataset_name_none)
86+
):
87+
dataset = Dataset.from_data(mock_benchmark.request_loader)
88+
assert dataset.name == "dataset.yaml"
89+
90+
@pytest.mark.smoke
91+
def test_dataset_from_data_with_hf_dataset():
92+
mock_benchmark = mock_generative_benchmark()
93+
mock_benchmark.request_loader.data = 'openai/gsm8k'
94+
with (
95+
patch.object(HFDatasetsCreator, 'handle_create', new=new_handle_create),
96+
patch.object(InMemoryDatasetCreator, 'is_supported', new=hardcoded_isnt_supported),
97+
patch.object(SyntheticDatasetCreator, 'is_supported', new=hardcoded_isnt_supported),
98+
patch.object(FileDatasetCreator, 'is_supported', new=hardcoded_isnt_supported),
99+
patch.object(HFDatasetsCreator, 'is_supported', new=new_is_supported),
100+
patch.object(HFDatasetsCreator, 'extract_dataset_name', new=new_extract_dataset_name_none)
101+
):
102+
dataset = Dataset.from_data(mock_benchmark.request_loader)
103+
assert dataset.name == "openai/gsm8k"

0 commit comments

Comments
 (0)