diff --git a/engine/clients/qdrant/configure.py b/engine/clients/qdrant/configure.py index 50afab5a..efcf37c4 100644 --- a/engine/clients/qdrant/configure.py +++ b/engine/clients/qdrant/configure.py @@ -69,18 +69,16 @@ def recreate(self, dataset: Dataset, collection_params): if not set(payload_index_params.keys()).issubset(dataset.config.schema.keys()): raise ValueError("payload_index_params are not found in dataset schema") + optimizers_config = self.collection_params.setdefault("optimizers_config", {}) + # By default, disable index building while uploading + optimizers_config.setdefault("max_optimization_threads", 0) + self.client.recreate_collection( collection_name=QDRANT_COLLECTION_NAME, **vectors_config, **self.collection_params ) - self.client.update_collection( - collection_name=QDRANT_COLLECTION_NAME, - optimizer_config=rest.OptimizersConfigDiff( - # indexing_threshold=10000000, - max_optimization_threads=0, - ), - ) + for field_name, field_type in dataset.config.schema.items(): if field_type in ["keyword", "uuid"]: is_tenant = payload_index_params.get(field_name, {}).get( diff --git a/engine/clients/qdrant/upload.py b/engine/clients/qdrant/upload.py index d6f19c2b..b4b09f85 100644 --- a/engine/clients/qdrant/upload.py +++ b/engine/clients/qdrant/upload.py @@ -58,14 +58,17 @@ def upload_batch(cls, batch: List[Record]): @classmethod def post_upload(cls, _distance): - cls.client.update_collection( - collection_name=QDRANT_COLLECTION_NAME, - optimizer_config=OptimizersConfigDiff( - # indexing_threshold=10_000, - # Set to a high number to not apply limits, already limited by CPU budget - max_optimization_threads=100_000, - ), - ) + # If index building is disabled through the collection settings, enable it + collection = cls.client.get_collection(collection_name=QDRANT_COLLECTION_NAME) + if collection.config.optimizer_config.max_optimization_threads == 0: + cls.client.update_collection( + collection_name=QDRANT_COLLECTION_NAME, + optimizer_config=OptimizersConfigDiff( + # indexing_threshold=10_000, + # Set to a high number to not apply limits, already limited by CPU budget + max_optimization_threads=100_000, + ), + ) cls.wait_collection_green() return {} diff --git a/experiments/configurations/qdrant-on-disk.json b/experiments/configurations/qdrant-on-disk.json index 9eb60869..ee5717d2 100644 --- a/experiments/configurations/qdrant-on-disk.json +++ b/experiments/configurations/qdrant-on-disk.json @@ -17,7 +17,10 @@ "engine": "qdrant", "connection_params": {}, "collection_params": { - "optimizers_config": { "default_segment_number": 17 }, + "optimizers_config": { + "default_segment_number": 17, + "max_optimization_threads": null + }, "quantization_config": { "scalar": {"type": "int8", "quantile": 0.99, "always_ram": false} }, "vectors_config": { "on_disk": true }, "hnsw_config": { "on_disk": true, "m": 0, "payload_m": 16 }, @@ -36,7 +39,10 @@ "engine": "qdrant", "connection_params": {}, "collection_params": { - "optimizers_config": { "default_segment_number": 17 }, + "optimizers_config": { + "default_segment_number": 17, + "max_optimization_threads": null + }, "quantization_config": { "scalar": {"type": "int8", "quantile": 0.99, "always_ram": false} }, "vectors_config": { "on_disk": true }, "hnsw_config": { "on_disk": true, "m": 0, "payload_m": 16 }, diff --git a/experiments/configurations/qdrant-single-node.json b/experiments/configurations/qdrant-single-node.json index 8beb2c10..d324840e 100644 --- a/experiments/configurations/qdrant-single-node.json +++ b/experiments/configurations/qdrant-single-node.json @@ -29,7 +29,8 @@ "optimizers_config": { "max_segment_size": 1000000, "default_segment_number": 3, - "memmap_threshold": 10000000 + "memmap_threshold": 10000000, + "max_optimization_threads": null } }, "search_params": [ @@ -63,7 +64,8 @@ "optimizers_config": { "max_segment_size": 1000000, "default_segment_number": 3, - "memmap_threshold": 10000000 + "memmap_threshold": 10000000, + "max_optimization_threads": null } }, "search_params": [ @@ -93,7 +95,8 @@ "optimizers_config": { "max_segment_size": 1000000, "default_segment_number": 3, - "memmap_threshold": 10000000 + "memmap_threshold": 10000000, + "max_optimization_threads": null } }, "search_params": [ @@ -120,7 +123,8 @@ "optimizers_config": { "max_segment_size": 1000000, "default_segment_number": 3, - "memmap_threshold": 10000000 + "memmap_threshold": 10000000, + "max_optimization_threads": null } }, "search_params": [