redis-performance
diff --git a/‎DOCKER_README.md‎
Lines changed: 6 additions & 6 deletions b/‎DOCKER_README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎README.md‎
Lines changed: 71 additions & 6 deletions b/‎README.md‎
Lines changed: 71 additions & 6 deletions
diff --git a/‎benchmark/dataset.py‎
Lines changed: 14 additions & 0 deletions b/‎benchmark/dataset.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎datasets/datasets.json‎
Lines changed: 42 additions & 2 deletions b/‎datasets/datasets.json‎
Lines changed: 42 additions & 2 deletions
@@ -58,7 +58,7 @@ docker run --rm redis/vector-db-benchmark:latest run.py --describe datasets
 # Basic Redis benchmark (requires local Redis)
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines redis-default-simple --dataset random-100
+  run.py --host localhost --engines redis-default-simple --datasets random-100
 ```
 
 ## Features
@@ -78,12 +78,12 @@ docker run --rm -v $(pwd)/results:/app/results --network=host \
 ### Redis 8.2 with RediSearch
 ```bash
 # Start Redis 8.2 with built-in vector support
-docker run -d --name redis-test -p 6379:6379 redis:8.2-rc1-bookworm
+docker run -d --name redis-test -p 6379:6379 redis:8.2-bookworm
 
 # Run benchmark
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines redis-default-simple --dataset glove-25-angular
+  run.py --host localhost --engines redis-default-simple --datasets glove-25-angular
 ```
 
 
@@ -103,18 +103,18 @@ docker run --rm redis/vector-db-benchmark:latest run.py --describe engines
 # Quick test with small dataset
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines redis-default-simple --dataset random-100
+  run.py --host localhost --engines redis-default-simple --datasets random-100
 
 # Comprehensive benchmark with multiple configurations
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines "*redis*" --dataset glove-25-angular
+  run.py --host localhost --engines "*redis*" --datasets glove-25-angular
 
 # With Redis authentication
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   -e REDIS_AUTH=mypassword -e REDIS_USER=myuser \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines redis-default-simple --dataset random-100
+  run.py --host localhost --engines redis-default-simple --datasets random-100
 ```
 
 ### Results Analysis
 
@@ -112,13 +112,13 @@ For testing with Redis, start a Redis container first:
 
 ```bash
 # Start Redis container
-docker run -d --name redis-test -p 6379:6379 redis:8.2-rc1-bookworm
+docker run -d --name redis-test -p 6379:6379 redis:8.2-bookworm
 
 # Run benchmark against Redis
 
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines redis-default-simple --dataset random-100
+  run.py --host localhost --engines redis-default-simple --datasets random-100
 
 # Or use the convenience script
 ./docker-run.sh -H localhost -e redis-default-simple -d random-100
@@ -221,14 +221,23 @@ Run the benchmark:
 
 ```bash
 # Basic usage examples
-python run.py --engines redis-default-simple --dataset random-100
-python run.py --engines redis-default-simple --dataset glove-25-angular
-python run.py --engines "*-m-16-*" --dataset "glove-*"
+python run.py --engines redis-default-simple --datasets random-100
+python run.py --engines redis-default-simple --datasets glove-25-angular
+python run.py --engines "*-m-16-*" --datasets "glove-*"
+
+# Using custom engine configurations from a JSON file
+python run.py --engines-file custom_engines.json --datasets glove-25-angular
+
+# Get information about available engines (with pattern matching)
+python run.py --engines "*redis*" --describe engines --verbose
+
+# Get information about engines from a custom file  
+python run.py --engines-file custom_engines.json --describe engines --verbose
 
 # Docker usage (recommended)
 docker run --rm -v $(pwd)/results:/app/results --network=host \
   redis/vector-db-benchmark:latest \
-  run.py --host localhost --engines redis-default-simple --dataset random-100
+  run.py --host localhost --engines redis-default-simple --datasets random-100
 
 # Get help
 python run.py --help
@@ -237,6 +246,62 @@ python run.py --help
 Command allows you to specify wildcards for engines and datasets.
 Results of the benchmarks are stored in the `./results/` directory.
 
+## Using Custom Engine Configurations
+
+The benchmark tool supports two ways to specify which engine configurations to use:
+
+### 1. Pattern Matching (Default)
+Use the `--engines` flag with wildcard patterns to select configurations from the `experiments/configurations/` directory:
+
+```bash
+python run.py --engines "*redis*" --datasets glove-25-angular
+python run.py --engines "qdrant-m-*" --datasets random-100
+```
+
+### 2. Custom Configuration File
+Use the `--engines-file` flag to specify a JSON file containing custom engine configurations:
+
+```bash
+python run.py --engines-file my_engines.json --datasets glove-25-angular
+```
+
+The JSON file should contain an array of engine configuration objects. Each configuration must have a `name` field and follow the same structure as configurations in `experiments/configurations/`:
+
+```json
+[
+  {
+    "name": "my-custom-redis-config",
+    "engine": "redis",
+    "connection_params": {},
+    "collection_params": {
+      "algorithm": "hnsw",
+      "data_type": "FLOAT32",
+      "hnsw_config": {
+        "M": 16,
+        "DISTANCE_METRIC": "L2",
+        "EF_CONSTRUCTION": 200
+      }
+    },
+    "search_params": [
+      {
+        "parallel": 1,
+        "top": 10,
+        "search_params": {
+          "ef": 100,
+          "data_type": "FLOAT32"
+        }
+      }
+    ],
+    "upload_params": {
+      "parallel": 16,
+      "data_type": "FLOAT32"
+    }
+  }
+]
+```
+
+**Note:** You cannot use both `--engines` and `--engines-file` at the same time.
+
 ## How to update benchmark parameters?
 
 Each engine has a configuration file, which is used to define the parameters for the benchmark.
 
@@ -1,6 +1,7 @@
 import os
 import shutil
 import tarfile
+import bz2
 import urllib.request
 import urllib.parse
 from dataclasses import dataclass, field
@@ -201,6 +202,19 @@ def _extract_or_move_file(self, tmp_path, target_path):
             with tarfile.open(tmp_path) as file:
                 file.extractall(target_path)
             os.remove(tmp_path)
+        elif tmp_path.endswith(".bz2"):
+            print(f"Extracting bz2: {tmp_path} -> {target_path}")
+            Path(target_path).parent.mkdir(exist_ok=True)
+            # Remove .bz2 extension from target path if present
+            if str(target_path).endswith(".bz2"):
+                final_target_path = str(target_path)[:-4]  # Remove .bz2
+            else:
+                final_target_path = target_path
+            
+            with bz2.BZ2File(tmp_path, 'rb') as f_in:
+                with open(final_target_path, 'wb') as f_out:
+                    shutil.copyfileobj(f_in, f_out)
+            os.remove(tmp_path)
         else:
             print(f"Moving: {tmp_path} -> {target_path}")
             Path(target_path).parent.mkdir(exist_ok=True)
 
@@ -982,13 +982,53 @@
     "vector_count": 100000,
     "description": "Image embeddings"
   },
+  {
+    "name": "dbpedia-openai-1M-512-angular",
+    "vector_size": 512,
+    "distance": "cosine",
+    "type": "h5",
+    "path": "dbpedia-openai-1M-512-angular/dbpedia_openai_1M",
+    "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-512d.hdf5",
+    "vector_count": 1000000,
+    "description": "Knowledge embeddings"
+  },
+  {
+    "name": "dbpedia-openai-1M-1024-angular",
+    "vector_size": 1024,
+    "distance": "cosine",
+    "type": "h5",
+    "path": "dbpedia-openai-1M-1024-angular/dbpedia_openai_1M",
+    "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-1024d.hdf5",
+    "vector_count": 1000000,
+    "description": "Knowledge embeddings"
+  },
   {
     "name": "dbpedia-openai-1M-1536-angular",
     "vector_size": 1536,
     "distance": "cosine",
-    "type": "tar",
+    "type": "h5",
     "path": "dbpedia-openai-1M-1536-angular/dbpedia_openai_1M",
-    "link": "https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz",
+    "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-1536d.hdf5",
+    "vector_count": 1000000,
+    "description": "Knowledge embeddings"
+  },
+  {
+    "name": "dbpedia-openai-1M-2048-angular",
+    "vector_size": 2048,
+    "distance": "cosine",
+    "type": "h5",
+    "path": "dbpedia-openai-1M-2048-angular/dbpedia_openai_1M",
+    "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-2048d.hdf5",
+    "vector_count": 1000000,
+    "description": "Knowledge embeddings"
+  },
+  {
+    "name": "dbpedia-openai-1M-3072-angular",
+    "vector_size": 3072,
+    "distance": "cosine",
+    "type": "h5",
+    "path": "dbpedia-openai-1M-3072-angular/dbpedia_openai_1M",
+    "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-3072d.hdf5",
     "vector_count": 1000000,
     "description": "Knowledge embeddings"
   },