Skip to content

Commit 7b1d947

Browse files
committed
enable command line parameter
1 parent fb1b3c6 commit 7b1d947

File tree

4 files changed

+68
-3
lines changed

4 files changed

+68
-3
lines changed

engine/base_client/client.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pathlib import Path
55
from typing import List, Dict, Any, Optional
66
import warnings
7+
import random
78

89
from benchmark import ROOT_DIR
910
from benchmark.dataset import Dataset
@@ -174,7 +175,18 @@ def run_experiment(
174175
upload_end_idx: int = -1,
175176
num_queries: int = -1,
176177
ef_runtime: List[int] = [],
178+
mixed_workload_params: dict = None,
177179
):
180+
181+
# Extract mixed workload parameters
182+
insert_fraction = 0.0
183+
if mixed_workload_params:
184+
insert_fraction = mixed_workload_params.get("insert_fraction", 0.1)
185+
seed = mixed_workload_params.get("seed", None)
186+
if seed is not None:
187+
random.seed(seed) # Set seed for reproducible patterns
188+
189+
print(f"In run_experiment. insert_fraction: {insert_fraction} seed: {seed}" )
178190
results = {"upload": {}, "search": {}}
179191
execution_params = self.configurator.execution_params(
180192
distance=dataset.config.distance, vector_size=dataset.config.vector_size
@@ -273,7 +285,7 @@ def run_experiment(
273285
)
274286

275287
search_stats = searcher.search_all(
276-
dataset.config.distance, reader.read_queries(), num_queries
288+
dataset.config.distance, reader.read_queries(), num_queries, insert_fraction
277289
)
278290
# ensure we specify the client count in the results
279291
search_params["parallel"] = client_count

engine/base_client/search.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ def search_all(
8686
num_queries: int = -1,
8787
insert_fraction: float = 0.0,
8888
):
89-
insert_fraction = self.search_params.get("insert_fraction", 0.0)
89+
print(f"In base_client/search_all. insert_fraction {insert_fraction}")
90+
9091
parallel = self.search_params.get("parallel", 1)
9192
top = self.search_params.get("top", None)
9293
single_search_params = self.search_params.get("search_params", None)
@@ -271,7 +272,6 @@ def process_chunk(chunk, search_one, insert_one, insert_fraction):
271272
if random.random() < insert_fraction:
272273
result = insert_one(query)
273274
else:
274-
# Search
275275
result = search_one(query)
276276
results.append(result)
277277
return results

experiments/configurations/dbpedia-calibration-mixed.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
"parallel": 100,
2929
"algorithm": "hnsw",
3030
"data_type": "FLOAT16"
31+
},
32+
"mixed_workload_params": {
33+
"insert_fraction": 0.1,
34+
"seed": 42
3135
}
3236
},
3337
{
@@ -59,6 +63,10 @@
5963
"parallel": 100,
6064
"algorithm": "hnsw",
6165
"data_type": "FLOAT32"
66+
},
67+
"mixed_workload_params": {
68+
"insert_fraction": 0.1,
69+
"seed": 42
6270
}
6371
},
6472
{
@@ -91,6 +99,10 @@
9199
"parallel": 100,
92100
"data_type": "FLOAT16",
93101
"algorithm": "svs-vamana"
102+
},
103+
"mixed_workload_params": {
104+
"insert_fraction": 0.1,
105+
"seed": 42
94106
}
95107
},
96108
{
@@ -123,6 +135,10 @@
123135
"parallel": 100,
124136
"data_type": "FLOAT32",
125137
"algorithm": "svs-vamana"
138+
},
139+
"mixed_workload_params": {
140+
"insert_fraction": 0.1,
141+
"seed": 42
126142
}
127143
},
128144
{
@@ -156,6 +172,10 @@
156172
"parallel": 100,
157173
"data_type": "FLOAT16",
158174
"algorithm": "svs-vamana"
175+
},
176+
"mixed_workload_params": {
177+
"insert_fraction": 0.1,
178+
"seed": 42
159179
}
160180
},
161181
{
@@ -189,6 +209,10 @@
189209
"parallel": 100,
190210
"data_type": "FLOAT32",
191211
"algorithm": "svs-vamana"
212+
},
213+
"mixed_workload_params": {
214+
"insert_fraction": 0.1,
215+
"seed": 42
192216
}
193217
},
194218
{
@@ -222,6 +246,10 @@
222246
"parallel": 100,
223247
"data_type": "FLOAT32",
224248
"algorithm": "svs-vamana"
249+
},
250+
"mixed_workload_params": {
251+
"insert_fraction": 0.1,
252+
"seed": 42
225253
}
226254
},
227255
{
@@ -255,6 +283,10 @@
255283
"parallel": 100,
256284
"data_type": "FLOAT16",
257285
"algorithm": "svs-vamana"
286+
},
287+
"mixed_workload_params": {
288+
"insert_fraction": 0.1,
289+
"seed": 42
258290
}
259291
},
260292
{
@@ -288,6 +320,10 @@
288320
"parallel": 100,
289321
"data_type": "FLOAT16",
290322
"algorithm": "svs-vamana"
323+
},
324+
"mixed_workload_params": {
325+
"insert_fraction": 0.1,
326+
"seed": 42
291327
}
292328
},
293329
{
@@ -321,6 +357,10 @@
321357
"parallel": 100,
322358
"data_type": "FLOAT32",
323359
"algorithm": "svs-vamana"
360+
},
361+
"mixed_workload_params": {
362+
"insert_fraction": 0.1,
363+
"seed": 42
324364
}
325365
}
326366
]

run.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,14 @@ def run(
3030
ef_runtime: List[int] = typer.Option([], help="Filter search experiments by ef runtime values. Only experiments with these ef values will be run."),
3131
describe: str = typer.Option(None, help="Describe available options: 'datasets' or 'engines'. When used, shows information and exits."),
3232
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed information when using --describe"),
33+
mixed_workload: bool = typer.Option(False, help="Enable mixed workload mode"),
34+
insert_fraction: float = typer.Option(0.1, help="Fraction of operations that are inserts (0.0-1.0)"),
35+
mixed_workload_seed: int = typer.Option(None, help="Random seed for reproducible mixed workload patterns"),
3336
):
3437
"""
3538
Example:
3639
python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*
40+
python3 run.py --engines redis --datasets glove-* --mixed-workload --insert-fraction 0.2
3741
python3 run.py --describe datasets
3842
python3 run.py --describe engines --verbose
3943
"""
@@ -64,6 +68,14 @@ def run(
6468
if any(fnmatch.fnmatch(name, dataset) for dataset in datasets)
6569
}
6670

71+
mixed_params = {}
72+
if mixed_workload:
73+
mixed_params = {
74+
"insert_fraction": insert_fraction,
75+
"seed": mixed_workload_seed
76+
}
77+
print(f"Running mixed workload. insert_fraction: {insert_fraction} random_seed: {mixed_workload_seed}")
78+
6779
for engine_name, engine_config in selected_engines.items():
6880
for dataset_name, dataset_config in selected_datasets.items():
6981
print(f"Running experiment: {engine_name} - {dataset_name}")
@@ -88,6 +100,7 @@ def run(
88100
upload_end_idx,
89101
queries,
90102
ef_runtime,
103+
mixed_workload_params=mixed_params
91104
)
92105
client.delete_client()
93106

0 commit comments

Comments
 (0)