From bbd48fbf2a8786c1dbb0762521e5e62b146ec243 Mon Sep 17 00:00:00 2001 From: "Aguerrebere, Cecilia" Date: Thu, 18 Sep 2025 19:35:32 -0700 Subject: [PATCH 1/6] Adding example to build a dynamic index with vector compression. --- examples/cpp/shared/CMakeLists.txt | 1 + ...xample_vamana_with_compression_dynamic.cpp | 133 ++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 examples/cpp/shared/example_vamana_with_compression_dynamic.cpp diff --git a/examples/cpp/shared/CMakeLists.txt b/examples/cpp/shared/CMakeLists.txt index 6f70c68b..6e7cd123 100644 --- a/examples/cpp/shared/CMakeLists.txt +++ b/examples/cpp/shared/CMakeLists.txt @@ -47,3 +47,4 @@ endfunction() create_example_executable(shared shared.cpp) create_example_executable(example_vamana_with_compression_lvq example_vamana_with_compression_lvq.cpp) create_example_executable(example_vamana_with_compression example_vamana_with_compression.cpp) +create_example_executable(example_vamana_with_compression_dynamic example_vamana_with_compression_dynamic.cpp) diff --git a/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp b/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp new file mode 100644 index 00000000..2f122707 --- /dev/null +++ b/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp @@ -0,0 +1,133 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// SVS +#include "svs/core/recall.h" +#include "svs/extensions/flat/leanvec.h" +#include "svs/extensions/flat/lvq.h" +#include "svs/extensions/vamana/leanvec.h" +#include "svs/extensions/vamana/lvq.h" +#include "svs/orchestrators/dynamic_vamana.h" +#include "svs/orchestrators/exhaustive.h" +#include "svs/orchestrators/vamana.h" + + +// Alias for blocked Lean dataset that supports resize/compact +using BlockedLean = svs::leanvec::LeanDataset< + svs::leanvec::UsingLVQ<4>, + svs::leanvec::UsingLVQ<8>, + svs::Dynamic, + svs::Dynamic, + svs::data::Blocked> +>; + + +int main() { + // STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions and using + // 4 and 8 bits for primary and secondary levels respectively. + //! [Compress data] + const size_t num_threads = 4; + size_t padding = 32; + size_t leanvec_dim = 64; + auto threadpool = svs::threads::as_threadpool(num_threads); + auto loaded = svs::VectorDataLoader(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs").load(); + auto data = BlockedLean::reduce( + loaded, std::nullopt, threadpool, padding, svs::lib::MaybeStatic(leanvec_dim) + ); + //! [Compress data] + + // STEP 2: Build Dynamic Vamana Index with initial set of vectors + //! [Index Build] + auto parameters = svs::index::vamana::VamanaBuildParameters{}; + + // Create id labels for build set + std::vector ids_build(loaded.size()); + for (size_t i = 0; i < loaded.size(); ++i) { + ids_build[i] = i; + } + + svs::DynamicVamana index = svs::DynamicVamana::build( + parameters, data, svs::lib::as_span(ids_build), svs::distance::DistanceL2(), num_threads + ); + //! [Index Build] + + // STEP 3: Add and delete vectors as needed. + //! [Delete vectors] + size_t num_to_delete = 100; + std::vector ids_delete(num_to_delete); + for (size_t i = 0; i < ids_delete.size(); ++i) { + ids_delete[i] = i; + } + + fmt::print("Deleting {} vectors.\n", ids_delete.size()); + + index.delete_points(ids_delete); + //! [Delete vectors] + + //! [Add vectors] + // Add the deleted vectors back in. + auto points = svs::data::SimpleData( + ids_delete.size(), loaded.dimensions() + ); + + size_t i = 0; + for (const auto& j : ids_delete) { + points.set_datum(i, loaded.get_datum(j)); + ++i; + } + auto points_const_view = points.cview(); + + fmt::print("Adding {} vectors.\n", ids_delete.size()); + + index.add_points(points_const_view, svs::lib::as_span(ids_delete), num_threads); + //! [Add vectors] + + // STEP 4: Search the Index + //! [Perform Queries] + const size_t search_window_size = 50; + const size_t n_neighbors = 10; + index.set_search_window_size(search_window_size); + + auto queries = svs::load_data(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs"); + auto results = index.search(queries, n_neighbors); + //! [Perform Queries] + + //! [Recall] + auto groundtruth = svs::load_data(std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"); + double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors); + + fmt::print("Recall@{} = {:.4f}\n", n_neighbors, recall); + fmt::print("Note that recall is low because this example is using a dummy random dataset.\n"); + //! [Recall] + + // STEP 5: Saving and reloading the index + //! [Saving Loading] + index.save("config", "graph", "data"); + index = svs::DynamicVamana::assemble( + "config", + svs::GraphLoader("graph"), + svs::lib::load_from_disk("data", padding), + svs::distance::DistanceL2(), + num_threads + ); + //! [Saving Loading] + index.set_search_window_size(search_window_size); + recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors); + + fmt::print("Recall@{} after saving and reloading = {:.4f}\n", n_neighbors, recall); + + return 0; +} From 4468d1a4f35938c18e808ef860e2ab98500debbb Mon Sep 17 00:00:00 2001 From: "Aguerrebere, Cecilia" Date: Fri, 19 Sep 2025 15:09:25 -0700 Subject: [PATCH 2/6] Adding Python example with LVQ. --- .../example_vamana_with_compression_lvq.py | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 examples/python/example_vamana_with_compression_lvq.py diff --git a/examples/python/example_vamana_with_compression_lvq.py b/examples/python/example_vamana_with_compression_lvq.py new file mode 100644 index 00000000..03700264 --- /dev/null +++ b/examples/python/example_vamana_with_compression_lvq.py @@ -0,0 +1,142 @@ +# Copyright (C) 2025 Intel Corporation +# +# This software and the related documents are Intel copyrighted materials, +# and your use of them is governed by the express license under which they +# were provided to you ("License"). Unless the License provides otherwise, +# you may not use, modify, copy, publish, distribute, disclose or transmit +# this software or the related documents without Intel's prior written +# permission. +# +# This software and the related documents are provided as is, with no +# express or implied warranties, other than those that are expressly stated +# in the License. + +# Import `unittest` to allow for automated testing. +import unittest + +# [imports] +import os +import svs +# [imports] + +DEBUG_MODE = False +def assert_equal(lhs, rhs, message: str = "", epsilon = 0.05): + if DEBUG_MODE: + print(f"{message}: {lhs} == {rhs}") + else: + assert lhs < rhs + epsilon, message + assert lhs > rhs - epsilon, message + +test_data_dir = None + +def run(): + # [generate-dataset] + # Create a test dataset. + # This will create a directory "example_data_vamana" and populate it with three + # entries: + # - data.fvecs: The test dataset. + # - queries.fvecs: The test queries. + # - groundtruth.fvecs: The groundtruth. + test_data_dir = "./example_data_vamana" + svs.generate_test_dataset( + 1000, # Create 1000 vectors in the dataset. + 100, # Generate 100 query vectors. + 256, # Set the vector dimensionality to 256. + test_data_dir, # The directory where results will be generated. + data_seed = 1234, # Random number seed for reproducibility. + query_seed = 5678, # Random number seed for reproducibility. + num_threads = 4, # Number of threads to use. + distance = svs.DistanceType.L2, # The distance type to use. + ) + # [generate-dataset] + + # [create-loader] + # We are going to construct a LeanVec dataset on-the-fly from uncompressed data. + # First, we construct a loader for the uncompressed data. + uncompressed_loader = svs.VectorDataLoader( + os.path.join(test_data_dir, "data.fvecs"), + svs.DataType.float32 + ) + + # Next - we construct a LVQLoader which is configured to use LVQ compression with 4 + # bits for the primary and 8 bits for the residual quantization. + B1 = 4 # Number of bits for the first level LVQ quantization + B2 = 8 # Number of bits for the residuals quantization + compressed_loader = svs.LVQLoader(uncompressed_loader, + primary=B1, + residual=B2, + ) + # [create-loader] + + # An index can be constructed using a LeanVec dataset. + # [build-parameters] + parameters = svs.VamanaBuildParameters( + graph_max_degree = 64, + window_size = 128, + ) + # [build-parameters] + + # [build-index] + index = svs.Vamana.build( + parameters, + compressed_loader, + svs.DistanceType.L2, + num_threads = 4, + ) + # [build-index] + + # Set the search window size of the index and perform queries and load the queries. + # [perform-queries] + n_neighbors = 10 + index.search_window_size = 20 + index.num_threads = 4 + + queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs")) + I, D = index.search(queries, n_neighbors) + # [perform-queries] + + # Compare with the groundtruth. + # [recall] + groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs")) + recall = svs.k_recall_at(groundtruth, I, n_neighbors, n_neighbors) + print(f"Recall = {recall}") + # [recall] + assert_equal(recall, 0.953) + + # Finally, we can save the index and reload from a previously saved set of files. + # [saving-loading] + index.save( + os.path.join(test_data_dir, "example_config"), + os.path.join(test_data_dir, "example_graph"), + os.path.join(test_data_dir, "example_data"), + ) + + index = svs.Vamana( + os.path.join(test_data_dir, "example_config"), + os.path.join(test_data_dir, "example_graph"), + os.path.join(test_data_dir, "example_data"), + svs.DistanceType.L2, + num_threads = 4, + ) + # [saving-loading] + + +##### +##### Main Executable +##### + +if __name__ == "__main__": + run() + +##### +##### As a unit test. +##### + +class VamanaExampleTestCase(unittest.TestCase): + def tearDown(self): + if test_data_dir is not None: + print(f"Removing temporary directory {test_data_dir}") + os.rmdir(test_data_dir) + + def test_all(self): + run() From 67c454edc7d332af8d7f57fd489f0b6764c4615a Mon Sep 17 00:00:00 2001 From: "Aguerrebere, Cecilia" Date: Mon, 29 Sep 2025 11:11:02 -0700 Subject: [PATCH 3/6] Fixing license header. --- .../example_vamana_with_compression_lvq.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/python/example_vamana_with_compression_lvq.py b/examples/python/example_vamana_with_compression_lvq.py index 03700264..47fc47b4 100644 --- a/examples/python/example_vamana_with_compression_lvq.py +++ b/examples/python/example_vamana_with_compression_lvq.py @@ -1,15 +1,17 @@ -# Copyright (C) 2025 Intel Corporation +# Copyright 2025 Intel Corporation # -# This software and the related documents are Intel copyrighted materials, -# and your use of them is governed by the express license under which they -# were provided to you ("License"). Unless the License provides otherwise, -# you may not use, modify, copy, publish, distribute, disclose or transmit -# this software or the related documents without Intel's prior written -# permission. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# This software and the related documents are provided as is, with no -# express or implied warranties, other than those that are expressly stated -# in the License. +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Import `unittest` to allow for automated testing. import unittest From a95d88d4597998ba11fb2f9a877a6cff406d7ac6 Mon Sep 17 00:00:00 2001 From: "Aguerrebere, Cecilia" Date: Mon, 29 Sep 2025 11:51:36 -0700 Subject: [PATCH 4/6] Fixing formatting issues. --- ...xample_vamana_with_compression_dynamic.cpp | 53 +++++++++++-------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp b/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp index 2f122707..ce71ccd4 100644 --- a/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp +++ b/examples/cpp/shared/example_vamana_with_compression_dynamic.cpp @@ -24,49 +24,56 @@ #include "svs/orchestrators/exhaustive.h" #include "svs/orchestrators/vamana.h" - // Alias for blocked Lean dataset that supports resize/compact using BlockedLean = svs::leanvec::LeanDataset< svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, svs::Dynamic, svs::Dynamic, - svs::data::Blocked> ->; - + svs::data::Blocked>>; int main() { - // STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions and using - // 4 and 8 bits for primary and secondary levels respectively. - //! [Compress data] + // STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions + // and using 4 and 8 bits for primary and secondary levels respectively. + //! [Compress data] const size_t num_threads = 4; size_t padding = 32; size_t leanvec_dim = 64; auto threadpool = svs::threads::as_threadpool(num_threads); - auto loaded = svs::VectorDataLoader(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs").load(); + auto loaded = + svs::VectorDataLoader(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs") + .load(); auto data = BlockedLean::reduce( - loaded, std::nullopt, threadpool, padding, svs::lib::MaybeStatic(leanvec_dim) - ); + loaded, + std::nullopt, + threadpool, + padding, + svs::lib::MaybeStatic(leanvec_dim) + ); //! [Compress data] // STEP 2: Build Dynamic Vamana Index with initial set of vectors //! [Index Build] auto parameters = svs::index::vamana::VamanaBuildParameters{}; - + // Create id labels for build set std::vector ids_build(loaded.size()); for (size_t i = 0; i < loaded.size(); ++i) { - ids_build[i] = i; + ids_build[i] = i; } svs::DynamicVamana index = svs::DynamicVamana::build( - parameters, data, svs::lib::as_span(ids_build), svs::distance::DistanceL2(), num_threads + parameters, + data, + svs::lib::as_span(ids_build), + svs::distance::DistanceL2(), + num_threads ); //! [Index Build] // STEP 3: Add and delete vectors as needed. - //! [Delete vectors] - size_t num_to_delete = 100; + //! [Delete vectors] + size_t num_to_delete = 100; std::vector ids_delete(num_to_delete); for (size_t i = 0; i < ids_delete.size(); ++i) { ids_delete[i] = i; @@ -79,9 +86,8 @@ int main() { //! [Add vectors] // Add the deleted vectors back in. - auto points = svs::data::SimpleData( - ids_delete.size(), loaded.dimensions() - ); + auto points = + svs::data::SimpleData(ids_delete.size(), loaded.dimensions()); size_t i = 0; for (const auto& j : ids_delete) { @@ -101,16 +107,21 @@ int main() { const size_t n_neighbors = 10; index.set_search_window_size(search_window_size); - auto queries = svs::load_data(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs"); + auto queries = + svs::load_data(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs"); auto results = index.search(queries, n_neighbors); //! [Perform Queries] //! [Recall] - auto groundtruth = svs::load_data(std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"); + auto groundtruth = svs::load_data( + std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs" + ); double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors); fmt::print("Recall@{} = {:.4f}\n", n_neighbors, recall); - fmt::print("Note that recall is low because this example is using a dummy random dataset.\n"); + fmt::print( + "Note that recall is low because this example is using a dummy random dataset.\n" + ); //! [Recall] // STEP 5: Saving and reloading the index From 66abf52d13967b59e6e0ebee7dc05808ea9be802 Mon Sep 17 00:00:00 2001 From: "Aguerrebere, Cecilia" Date: Mon, 29 Sep 2025 16:53:43 -0700 Subject: [PATCH 5/6] Renaming file as a temporary fix to prevent automated testing. --- ...ana_with_compression_lvq.py => vamana_with_compression_lvq.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/python/{example_vamana_with_compression_lvq.py => vamana_with_compression_lvq.py} (100%) diff --git a/examples/python/example_vamana_with_compression_lvq.py b/examples/python/vamana_with_compression_lvq.py similarity index 100% rename from examples/python/example_vamana_with_compression_lvq.py rename to examples/python/vamana_with_compression_lvq.py From 4e4a6fc409164b4d826a434c731ffbabe7e0527f Mon Sep 17 00:00:00 2001 From: "Aguerrebere, Cecilia" Date: Tue, 30 Sep 2025 08:44:21 -0700 Subject: [PATCH 6/6] Updating README in examples/cpp/shared folder. --- examples/cpp/shared/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/cpp/shared/README.md b/examples/cpp/shared/README.md index 43f4378b..4dc96605 100644 --- a/examples/cpp/shared/README.md +++ b/examples/cpp/shared/README.md @@ -17,6 +17,7 @@ These examples utilize LVQ and LeanVec interfaces which are available when linking to a SVS shared/static library, which are published with [releases](https://github.com/intel/ScalableVectorSearch/releases). Note that these examples will _not_ run after building the open source codebase without the shared/static library. These examples include: - [example_vamana_with_compression.cpp](./example_vamana_with_compression.cpp): Demonstrates building, searching, saving, and reloading an index with a LeanVec-compressed dataset. - [example_vamana_with_compression_lvq.cpp](./example_vamana_with_compression_lvq.cpp): Demonstrates building, searching, saving, and reloading an index with a LVQ-compressed dataset. +- [example_vamana_with_compression_dynamic.cpp](./example_vamana_with_compression_dynamic.cpp): Demonstrates building, searching, saving, and reloading a dynamic index (allows vector insertions and deletions over time) with a LeanVec-compressed dataset. See [CMakeLists.txt](./CMakeLists.txt) for details on linking to the SVS shared library and follow the commands below to compile and use the SVS shared library to run shared.cpp example: