diff --git a/bindings/python/include/svs/python/conversion.h b/bindings/python/include/svs/python/conversion.h new file mode 100644 index 00000000..ecaf5cec --- /dev/null +++ b/bindings/python/include/svs/python/conversion.h @@ -0,0 +1,24 @@ +/* + * Copyright 2023 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// pybind +#include + +namespace svs::python::conversion { +void wrap(pybind11::module& m); +} // namespace svs::python::conversion diff --git a/bindings/python/include/svs/python/core.h b/bindings/python/include/svs/python/core.h index ed378375..c6bb3631 100644 --- a/bindings/python/include/svs/python/core.h +++ b/bindings/python/include/svs/python/core.h @@ -27,6 +27,14 @@ #include "svs/lib/meta.h" #include "svs/lib/misc.h" +#include "svs/fallback/fallback.h" + +#ifdef USE_PROPRIETARY + +#include "svs/fallback/fallback_python.h" + +#endif // USE_PROPRIETARY + // pybind #include @@ -118,6 +126,22 @@ class UnspecializedGraphLoader { using DistanceL2 = svs::distance::DistanceL2; using DistanceIP = svs::distance::DistanceIP; +///// +///// LVQ +///// + +// Compressors - online compression of existing data +using LVQReloader = svs::quantization::lvq::Reload; +using LVQ = svs::quantization::lvq::ProtoLVQLoader; + +///// +///// LeanVec +///// + +// Dimensionality reduction using LeanVec +using LeanVecReloader = svs::leanvec::Reload; +using LeanVec = svs::leanvec::ProtoLeanVecLoader; + namespace core { void wrap(pybind11::module& m); } // namespace core diff --git a/bindings/python/include/svs/python/dispatch.h b/bindings/python/include/svs/python/dispatch.h index 221d4183..6dff43ea 100644 --- a/bindings/python/include/svs/python/dispatch.h +++ b/bindings/python/include/svs/python/dispatch.h @@ -49,3 +49,83 @@ struct svs::lib::DispatchConverter< return To{object.context().get_directory()}; } }; + +template < + size_t Primary, + size_t Residual, + size_t Extent, + svs::quantization::lvq::LVQPackingStrategy Strategy> +struct svs::lib::DispatchConverter< + svs::lib::SerializedObject, + svs::quantization::lvq::LVQLoader< + Primary, + Residual, + Extent, + Strategy, + svs::python::RebindAllocator>> { + using To = svs::quantization::lvq::LVQLoader< + Primary, + Residual, + Extent, + Strategy, + svs::python::RebindAllocator>; + + using LVQStrategyDispatch = svs::quantization::lvq::LVQStrategyDispatch; + + static int64_t match(const svs::lib::SerializedObject& object) { + // TODO: Use a LoadTable directly instead of forcing reparsing every time. + auto ex = svs::lib::try_load(object); + if (!ex) { + return svs::lib::invalid_match; + } + + return svs::quantization::lvq::overload_score( + ex.value(), LVQStrategyDispatch::Auto + ); + } + + static To convert(const svs::lib::SerializedObject& object) { + return To{ + svs::quantization::lvq::Reload{std::move(object.context().get_directory())}, + 0, + svs::python::RebindAllocator()}; + } +}; + +template +struct svs::lib::DispatchConverter< + svs::lib::SerializedObject, + svs::leanvec::LeanVecLoader< + PrimaryKind, + SecondaryKind, + LeanVecDims, + Extent, + svs::python::RebindAllocator>> { + using To = leanvec::LeanVecLoader< + PrimaryKind, + SecondaryKind, + LeanVecDims, + Extent, + svs::python::RebindAllocator>; + + static int64_t match(const svs::lib::SerializedObject& object) { + // TODO: Use a LoadTable directly instead of forcing reparsing every time. + auto ex = svs::lib::try_load(object); + if (!ex) { + return svs::lib::invalid_match; + } + + return svs::leanvec:: + overload_score(ex.value()); + } + + static To convert(const svs::lib::SerializedObject& object) { + return To{ + leanvec::Reload{object.context().get_directory()}, + LeanVecDims, // TODO: This is a hack for now. Since we're reloading, it doesn't + // matter. + std::nullopt, + 0, + svs::python::RebindAllocator()}; + } +}; diff --git a/bindings/python/include/svs/python/dynamic_vamana.h b/bindings/python/include/svs/python/dynamic_vamana.h index 26c42b68..7d75a2af 100644 --- a/bindings/python/include/svs/python/dynamic_vamana.h +++ b/bindings/python/include/svs/python/dynamic_vamana.h @@ -33,5 +33,50 @@ template void for_standard_specializations(F&& f) { #undef X } +template void for_compressed_specializations(F&& f) { + using Sequential = svs::quantization::lvq::Sequential; +#define X(Dist, Primary, Residual, Strategy, N) \ + f.template operator()() + // Sequential + X(DistanceL2, 4, 0, Sequential, Dynamic); + X(DistanceIP, 4, 0, Sequential, Dynamic); + X(DistanceL2, 4, 4, Sequential, Dynamic); + X(DistanceIP, 4, 4, Sequential, Dynamic); + X(DistanceL2, 4, 8, Sequential, Dynamic); + X(DistanceIP, 4, 8, Sequential, Dynamic); + X(DistanceL2, 8, 0, Sequential, Dynamic); + X(DistanceIP, 8, 0, Sequential, Dynamic); + + // Turbo + using Turbo16x8 = svs::quantization::lvq::Turbo<16, 8>; + X(DistanceL2, 4, 0, Turbo16x8, Dynamic); + X(DistanceIP, 4, 0, Turbo16x8, Dynamic); + X(DistanceL2, 4, 4, Turbo16x8, Dynamic); + X(DistanceIP, 4, 4, Turbo16x8, Dynamic); + X(DistanceL2, 4, 8, Turbo16x8, Dynamic); + X(DistanceIP, 4, 8, Turbo16x8, Dynamic); +#undef X +} + +template void for_leanvec_specializations(F&& f) { +#define X(Dist, Primary, Secondary, L, N) \ + f.template operator()() + X(DistanceL2, svs::Float16, svs::Float16, Dynamic, Dynamic); + X(DistanceIP, svs::Float16, svs::Float16, Dynamic, Dynamic); + + X(DistanceL2, svs::leanvec::UsingLVQ<8>, svs::Float16, Dynamic, Dynamic); + X(DistanceIP, svs::leanvec::UsingLVQ<8>, svs::Float16, Dynamic, Dynamic); + + X(DistanceL2, svs::leanvec::UsingLVQ<8>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic); + X(DistanceIP, svs::leanvec::UsingLVQ<8>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic); + + X(DistanceL2, svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic); + X(DistanceIP, svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic); + + X(DistanceL2, svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<4>, Dynamic, Dynamic); + X(DistanceIP, svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<4>, Dynamic, Dynamic); +#undef X +} + void wrap(pybind11::module& m); } // namespace svs::python::dynamic_vamana diff --git a/bindings/python/include/svs/python/vamana.h b/bindings/python/include/svs/python/vamana.h index f5bc774f..4545f453 100644 --- a/bindings/python/include/svs/python/vamana.h +++ b/bindings/python/include/svs/python/vamana.h @@ -89,6 +89,126 @@ template void for_standard_specializations(F&& f) { #undef XN #undef X } + +// Compressed search specializations. +// Pattern: +// DistanceType, Primary, Residual, Dimensionality, Strategy, EnableBuild +#define X(Dist, P, R, N, S, B) f.template operator()() +template void lvq_specialize_4x0(const F& f) { + using Sequential = svs::quantization::lvq::Sequential; + using Turbo = svs::quantization::lvq::Turbo<16, 8>; + + // Sequential + X(DistanceL2, 4, 0, Dynamic, Sequential, true); + X(DistanceIP, 4, 0, Dynamic, Sequential, true); + X(DistanceCosineSimilarity, 4, 0, Dynamic, Sequential, true); + // Turbo + X(DistanceL2, 4, 0, Dynamic, Turbo, true); + X(DistanceIP, 4, 0, Dynamic, Turbo, true); + X(DistanceCosineSimilarity, 4, 0, Dynamic, Turbo, true); +} + +template void lvq_specialize_4x4(const F& f) { + using Sequential = svs::quantization::lvq::Sequential; + using Turbo = svs::quantization::lvq::Turbo<16, 8>; + + // Sequential + X(DistanceL2, 4, 4, Dynamic, Sequential, true); + X(DistanceIP, 4, 4, Dynamic, Sequential, true); + X(DistanceCosineSimilarity, 4, 4, Dynamic, Sequential, true); + // Turbo + X(DistanceL2, 4, 4, Dynamic, Turbo, true); + X(DistanceIP, 4, 4, Dynamic, Turbo, true); + X(DistanceCosineSimilarity, 4, 4, Dynamic, Turbo, true); +} + +template void lvq_specialize_4x8(const F& f) { + using Sequential = svs::quantization::lvq::Sequential; + using Turbo = svs::quantization::lvq::Turbo<16, 8>; + + // Sequential + X(DistanceL2, 4, 8, Dynamic, Sequential, true); + X(DistanceIP, 4, 8, Dynamic, Sequential, true); + X(DistanceCosineSimilarity, 4, 8, Dynamic, Sequential, true); + // Turbo + X(DistanceL2, 4, 8, Dynamic, Turbo, true); + X(DistanceIP, 4, 8, Dynamic, Turbo, true); + X(DistanceCosineSimilarity, 4, 8, Dynamic, Turbo, true); +} + +template void lvq_specialize_8x0(const F& f) { + using Sequential = svs::quantization::lvq::Sequential; + using Turbo = svs::quantization::lvq::Turbo<16, 4>; + + // Sequential + X(DistanceL2, 8, 0, Dynamic, Sequential, true); + X(DistanceIP, 8, 0, Dynamic, Sequential, true); + X(DistanceCosineSimilarity, 8, 0, Dynamic, Sequential, true); + // Turbo + X(DistanceL2, 8, 0, Dynamic, Turbo, true); + X(DistanceIP, 8, 0, Dynamic, Turbo, true); + X(DistanceCosineSimilarity, 8, 0, Dynamic, Turbo, true); +} + +template void lvq_specialize_8x8(const F& f) { + using Sequential = svs::quantization::lvq::Sequential; + X(DistanceL2, 8, 8, Dynamic, Sequential, false); + X(DistanceIP, 8, 8, Dynamic, Sequential, false); + X(DistanceCosineSimilarity, 8, 8, Dynamic, Sequential, false); +} + +template void compressed_specializations(F&& f) { + lvq_specialize_4x0(f); + lvq_specialize_4x4(f); + lvq_specialize_4x8(f); + lvq_specialize_8x0(f); + lvq_specialize_8x8(f); +} +#undef X + +// LeanVec specializations. +// Pattern: +// Primary, Secondary, LeanVec Dimensionality, Dimensionality, DistanceType +#define X(P, S, L, N, D) f.template operator()() +template void leanvec_specialize_unc_unc(const F& f) { + X(float, float, Dynamic, Dynamic, DistanceL2); + X(float, float, Dynamic, Dynamic, DistanceIP); + X(float, float, Dynamic, Dynamic, DistanceCosineSimilarity); + + X(svs::Float16, svs::Float16, Dynamic, Dynamic, DistanceL2); + X(svs::Float16, svs::Float16, Dynamic, Dynamic, DistanceIP); + X(svs::Float16, svs::Float16, Dynamic, Dynamic, DistanceCosineSimilarity); +} + +template void leanvec_specialize_lvq_unc(const F& f) { + X(svs::leanvec::UsingLVQ<8>, svs::Float16, Dynamic, Dynamic, DistanceL2); + X(svs::leanvec::UsingLVQ<8>, svs::Float16, Dynamic, Dynamic, DistanceIP); + X(svs::leanvec::UsingLVQ<8>, svs::Float16, Dynamic, Dynamic, DistanceCosineSimilarity); +} + +template void leanvec_specialize_lvq_lvq(const F& f) { + // clang-format off + X(svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<4>, Dynamic, Dynamic, DistanceL2); + X(svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<4>, Dynamic, Dynamic, DistanceIP); + X(svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<4>, Dynamic, Dynamic, DistanceCosineSimilarity); + + X(svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic, DistanceL2); + X(svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic, DistanceIP); + X(svs::leanvec::UsingLVQ<4>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic, DistanceCosineSimilarity); + + X(svs::leanvec::UsingLVQ<8>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic, DistanceL2); + X(svs::leanvec::UsingLVQ<8>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic, DistanceIP); + X(svs::leanvec::UsingLVQ<8>, svs::leanvec::UsingLVQ<8>, Dynamic, Dynamic, DistanceCosineSimilarity); + // clang-format on +} + +template void leanvec_specializations(F&& f) { + leanvec_specialize_unc_unc(f); + leanvec_specialize_lvq_unc(f); + leanvec_specialize_lvq_lvq(f); +} +#undef X + } // namespace vamana_specializations namespace vamana { diff --git a/bindings/python/src/conversion.cpp b/bindings/python/src/conversion.cpp new file mode 100644 index 00000000..7c641f9a --- /dev/null +++ b/bindings/python/src/conversion.cpp @@ -0,0 +1,168 @@ +/* + * Copyright 2023 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// svs python bindings +#include "svs/python/conversion.h" +#include "svs/python/common.h" +#include "svs/python/core.h" + +// svs +#include "svs/quantization/lvq/lvq_concept.h" + +// pybind +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" +#include "pybind11/stl/filesystem.h" + +// stl +#include +#include + +namespace lvq = svs::quantization::lvq; +namespace py = pybind11; + +namespace svs::python { +namespace { + +template void register_specializations(F&& f) { + // Pattern: Primary, Residual, Strategy + f.template operator()<4, 0, lvq::Sequential>(); + f.template operator()<8, 0, lvq::Sequential>(); + f.template operator()<4, 4, lvq::Sequential>(); + f.template operator()<4, 8, lvq::Sequential>(); + f.template operator()<8, 8, lvq::Sequential>(); +} + +template +void compress( + lvq::LVQLoader SVS_UNUSED(dispatch + ), + const std::filesystem::path& data_path, + const std::filesystem::path& centroid_path, + const std::filesystem::path& assignment_path, + const std::filesystem::path& save_path, + size_t num_threads +) { + using dataset_t = + svs::quantization::lvq::LVQDataset; + + auto data = svs::VectorDataLoader(data_path).load(); + auto centroids = svs::VectorDataLoader(centroid_path).load(); + + auto assignments = std::vector(data.size()); + { + auto stream = svs::lib::open_read(assignment_path); + svs::lib::read_binary(stream, assignments); + } + + // Allocate the storage dataset and set copy over the centroids. + auto dst = dataset_t(data.size(), svs::lib::MaybeStatic(data.dimensions())); + dst.reproducibility_set_centroids(centroids.cview()); + + // Compress the dataset into the compressed destination. + auto pool = svs::threads::DefaultThreadPool(num_threads); + svs::threads::parallel_for( + pool, + svs::threads::StaticPartition(data.size()), + [&](auto is, auto SVS_UNUSED(tid)) { + for (auto i : is) { + dst.set_datum(i, data.get_datum(i), assignments.at(i)); + } + } + ); + + // Save the result. + svs::lib::save_to_disk(dst, save_path); +} + +struct Compress { + void operator()( + const LVQ& source, + const std::filesystem::path& data_path, + const std::filesystem::path& centroid_path, + const std::filesystem::path& assignment_path, + const std::filesystem::path& save_path, + size_t num_threads + ) { + auto dispatcher = svs::lib::Dispatcher< + void, + LVQ, + const std::filesystem::path&, + const std::filesystem::path&, + const std::filesystem::path&, + const std::filesystem::path&, + size_t>(); + + register_specializations([&]() { + dispatcher.register_target(&compress); + }); + + dispatcher.invoke( + source, data_path, centroid_path, assignment_path, save_path, num_threads + ); + } +}; + +template +void decompress( + lvq::LVQLoader loader, + const std::filesystem::path& save_path +) { + auto dataset = loader.load(); + auto dst = svs::data::SimpleData(dataset.size(), dataset.dimensions()); + + auto decompressor = dataset.decompressor(); + for (size_t i = 0, imax = dataset.size(); i < imax; ++i) { + dst.set_datum(i, decompressor(dataset.get_datum(i))); + } + svs::lib::save_to_disk(dst, save_path); +} + +struct Decompress { + void operator()(const LVQ& loader, const std::filesystem::path& save_path) { + auto dispatcher = svs::lib::Dispatcher(); + register_specializations([&]() { + dispatcher.register_target(&decompress); + }); + dispatcher.invoke(loader, save_path); + } +}; + +} // namespace + +namespace conversion { + +void wrap(py::module& m) { + auto sub = m.def_submodule( + "reproducibility", "Compatibility methods to reproduce paper results." + ); + + sub.def( + "compress", + Compress(), + py::arg("source"), + py::arg("data_path"), + py::arg("centroid_path"), + py::arg("assignment_path"), + py::arg("save_path"), + py::arg("num_threads") = 1 + ); + + sub.def("decompress", Decompress(), py::arg("source"), py::arg("save_path")); +} + +} // namespace conversion +} // namespace svs::python diff --git a/bindings/python/src/core.cpp b/bindings/python/src/core.cpp index 1070fd90..ce5c423f 100644 --- a/bindings/python/src/core.cpp +++ b/bindings/python/src/core.cpp @@ -38,9 +38,533 @@ namespace py = pybind11; namespace svs::python { namespace { + +///// Logging +enum class LogStream { stdout_, stderr_, null }; + +void replace_logger_with_sink(svs::logging::sink_ptr sink) { + auto current_logger = svs::logging::get(); + auto current_level = svs::logging::get_level(current_logger); + const auto& name = current_logger->name(); + + auto new_logger = std::make_shared<::spdlog::logger>(name, std::move(sink)); + svs::logging::set_level(new_logger, current_level); + svs::logging::set(std::move(new_logger)); +} + +void set_log_stream(LogStream stream) { + auto pick_sink = [stream]() { + switch (stream) { + using enum LogStream; + case stdout_: { + return svs::logging::stdout_sink(); + } + case stderr_: { + return svs::logging::stdout_sink(); + } + case null: { + return svs::logging::null_sink(); + } + } + throw ANNEXCEPTION("Unknown Stream: {}\n", static_cast(stream)); + }; + replace_logger_with_sink(pick_sink()); +} + +void wrap_logging(py::module& m) { + auto logging = m.def_submodule("logging", "Logging API"); + + // Wrap the logging levels. + using Level = svs::logging::Level; + const char* logging_enum_description = R"( +Log levels used by SVS listed in increasing level of severity. +Only messages equal to or more severe than the currently configured log level will be +reported. + +See Also +-------- +svs.logging.set_level, svs.logging.get_level +)"; + + py::enum_(logging, "level", logging_enum_description) + .value("trace", Level::Trace, "The most verbose logging") + .value("debug", Level::Debug, "Log diagnostic debug information") + .value( + "info", + Level::Info, + "Report general information. Useful for long-running operations" + ) + .value( + "warn", + Level::Warn, + "Report information that is not immediately an error, but could be potentially " + "problematic" + ) + .value("error", Level::Error, "Report errors") + .value( + "critical", + Level::Critical, + "Report critical message that generall should not be suppressed" + ) + .value("off", Level::Off, "Disable logging"); + + py::enum_(logging, "stream", "Built-in Logging Stream") + .value("stdout", LogStream::stdout_, "Route all logging to stdout") + .value("stderr", LogStream::stderr_, "Route all logging to stderr") + .value("null", LogStream::null, "Suppress all logging") + .export_values(); + + logging.def( + "set_level", + [](Level level) { svs::logging::set_level(level); }, + py::arg("level"), + "Set logging to the specified level. Only messages more severe than the set level " + "will be reported." + ); + + logging.def( + "get_level", + [&]() { return svs::logging::get_level(); }, + "Get the current logging level." + ); + + logging.def( + "set_logging_stream", + &set_log_stream, + py::arg("stream"), + R"( +Route logging to use the specified stream. Note that setting this will supersede +the default environment variable selection mechanism and all previous calls to +``svs.logging.set_logging_stream`` and ``svs.logging.set_logging_file``. +)" + ); + + logging.def( + "set_logging_file", + [](const std::filesystem::path& file) { + replace_logger_with_sink(svs::logging::file_sink(file.native())); + }, + py::arg("file"), + R"( +Direct all logging message to the specified file. Caller must have sufficient permissions +to create the file. + +Note that setting this will supersede the default environment variable selection mechanism +and all previous calls to ``svs.logging.set_logging_stream`` and +``svs.logging.set_logging_file``. +)" + ); + + logging.def( + "log_message", + [](Level level, const std::string& message) { + svs::logging::log(level, "{}", message); + }, + py::arg("level"), + py::arg("message"), + "Log the message with the given severity level." + ); +} + +constexpr std::string_view compression_constructor_proto = R"( +Construct a loader that will lazily compress the results of the data loader. +Requires an appropriate back-end to be compiled for all combinations of primary and residual +bits. + +Args: + loader (:py:class:`svs.VectorDataLoader`): The uncompressed dataset to compress + in-memory. + primary (int): The number of bits to use for compression in the primary dataset. + residual (int): The number of bits to use for compression in the residual dataset. + Default: 0. + padding (int): The value (in bytes) to align the beginning of each compressed vectors. + Values of 32 or 64 may offer the best performance at the cost of a lower compression + ratio. A value of 0 implies no special alignment. + strategy (:py:class:`svs.LVQStrategy`): The packing strategy to use for the compressed + codes. See the associated documenation for that enum. +)"; + +constexpr std::string_view reload_constructor_proto = R"( +Reload a compressed dataset from a previously saved dataset. +Requires an appropriate back-end to be compiled for all combinations of primary and residual +bits. + +Args: + directory (str): The directory where the dataset was previously saved. + primary (int): The number of bits to use for compression in the primary dataset. + residual (int): The number of bits to use for compression in the residual dataset. + Default: 0> + dims (int): The number of dimensions in the dataset. May provide a performance boost + if given if a specialization has been compiled. Default: Dynamic (any dimension). + padding (int): The value (in bytes) to align the beginning of each compressed vectors. + Values of 32 or 64 may offer the best performance at the cost of a lower compression + ratio. A value of 0 implies no special alignment. Default: 0. + strategy (:py:class:`svs.LVQStrategy`): The packing strategy to use for the compressed + codes. See the associated documenation for that enum. +)"; + +constexpr std::string_view leanvec_online_proto = R"( +Construct a loader that will lazily reduce the dimensionality of the data loader. +Requires an appropriate back-end to be compiled for all combinations of primary and +secondary types. + +Args: + loader (:py:class:`svs.VectorDataLoader`): The uncompressed original dataset. + leanvec_dims (int): resulting value of reduced dimensionality + primary (LeanVecKind): Type of dataset used for Primary (Default: LVQ8) + secondary (LeanVecKind): Type of dataset used for Secondary (Default: LVQ8) + data_matrix (Optional[numpy.ndarray[numpy.float32]]): Matrix for data transformation + [see note 1] (Default: None). + query_matrix (Optional[numpy.ndarray[numpy.float32]]): Matrix for query transformation + [see note 1] (Default: None). + alignment (int): alignement/padding used in LVQ data types (Default: 32) + +**Note 1**: The arguments ``data_matrix`` and ``data_matrix`` are optional and have the +following requirements for valid combinations: + + a) Neither matrix provided: Transform dataset and queries using a default PCA-based + transformation. + b) Only ``data_matrix`` provided: The provided matrix is used to transform both the + queries and the original dataset. + c) Both arguments are provided: Use the respective matrices for transformation. +)"; + +constexpr std::string_view leanvec_reload_proto = R"( +Reload a LeanVec dataset from a previously saved dataset. +Requires an appropriate back-end to be compiled for all combinations of primary and +secondary types. + +Args: + directory (str): The directory where the dataset was previously saved. + leanvec_dims (int): resulting value of reduced dimensionality. + Default: Dynamic (any dimension). + dims (int): The number of dimensions in the original dataset. + Default: Dynamic (any dimension). + primary (LeanVecKind): Type of dataset used for Primary + Default: ``svs.LeanVecKind.lvq8``. + secondary (LeanVecKind): Type of dataset used for Secondary + Default: ``svs.LeanVecKind.LVQ8``. + alignment (int): alignement/padding used in LVQ data types. Default: 32. +)"; + +// Legacy definitions. +template struct LegacyLVQLoader { + public: + LegacyLVQLoader(UnspecializedVectorDataLoader loader, size_t padding) + : loader_{std::move(loader), Primary, Residual, padding} {} + + LegacyLVQLoader(std::string path, size_t dims, size_t padding) + : loader_{LVQReloader{std::move(path)}, padding} { + auto throw_err = [&](std::string_view kind, size_t has, size_t expected) { + throw ANNEXCEPTION( + "Reloaded dataset has {} {} but was expected to have {}!", + kind, + has, + expected + ); + }; + + // Make sure the deduced results are correct. + if (loader_.primary_ != Primary) { + throw_err("primary bits", loader_.primary_, Primary); + } + + if (loader_.residual_ != Residual) { + throw_err("residual bits", loader_.residual_, Residual); + } + + if (dims != Dynamic && dims != loader_.dims_) { + throw_err("dimensions", loader_.dims_, dims); + } + } + + // Implicitly convert to generic LVQ. + operator LVQ() const { return loader_; } + + public: + LVQ loader_; +}; + +template +void wrap_lvq_alias( + Parent& lvq_loader, + py::module& m, + std::string_view class_name, + std::string_view docstring +) { + auto class_def = py::class_>{ + m, std::string(class_name).c_str(), std::string(docstring).c_str()}; + + // Define a converting constructor taking the legacy type. + lvq_loader.def( + py::init([](const LegacyLVQLoader& legacy) { return legacy; }), + py::arg("legacy") + ); + + // Allow implicit conversions from LegacyLVQLoader to LVQLoader. + py::implicitly_convertible, LVQ>(); + + // Alias the datafile constructor. + class_def.def( + py::init(), + py::arg("datafile"), + py::arg("padding") = 0, + std::string(compression_constructor_proto).c_str() + ); + + // Alias the reload constructor + class_def.def( + py::init(), + py::arg("datafile"), + py::arg("dims") = svs::Dynamic, + py::arg("padding") = 0, + std::string(reload_constructor_proto).c_str() + ); +} + +void wrap_fallback(py::module& m) { + using enum svs::fallback::FallbackMode; + + // Strategy Dispatch enum. + py::enum_( + m, "FallbackMode", "Select the fallback mode for LVQ" + ) + .value("Silent", Silent, "Seamlessly fall back to the default Vamana index.") + .value( + "Warning", + Warning, + "Provide results using default Vamana index. Logs a warning message indicated " + "LeanVec/LVQ optimizations are unsupported." + ) + .value( + "Error", + Error, + "Enforces an error, stopping execution if LeanVec/LVQ optimizations are not " + "supported." + ) + .export_values(); + + m.def( + "set_fallback_mode", + [](svs::fallback::FallbackMode mode) { svs::fallback::set_mode(mode); }, + py::arg("mode"), + "Set the LVQ mode." + ); + m.def( + "get_fallback_mode", + []() { return svs::fallback::get_mode(); }, + "Get the current LVQ mode." + ); +} + +/// Generate bindings for LVQ compressors and loaders. +void wrap_lvq(py::module& m) { + using enum svs::quantization::lvq::LVQStrategyDispatch; + + // Strategy Dispatch enum. + py::enum_( + m, "LVQStrategy", "Select the packing mode for LVQ" + ) + .value("Auto", Auto, "Let SVS decide the best strategy.") + .value("Sequential", Sequential, "Use the Sequential packing strategy.") + .value("Turbo", Turbo, "Use the best Turbo packing strategy for this architecture.") + .export_values(); + + // Wrap the base class. + auto class_def = py::class_{m, "LVQLoader", "Generic LVQ Loader"}; + class_def + .def( + py::init< + UnspecializedVectorDataLoader, + size_t, + size_t, + size_t, + svs::quantization::lvq::LVQStrategyDispatch>(), + py::arg("datafile"), + py::arg("primary"), + py::arg("residual") = 0, + py::arg("padding") = 0, + py::arg("strategy") = Auto, + std::string(compression_constructor_proto).c_str() + ) + .def( + py::init([](const std::string& path, + size_t padding, + svs::quantization::lvq::LVQStrategyDispatch strategy) { + return LVQ{LVQReloader(path), padding, strategy}; + }), + py::arg("directory"), + py::arg("padding") = 0, + py::arg("strategy") = Auto, + std::string(reload_constructor_proto).c_str() + ) + .def( + "reload_from", + [](const LVQ& loader, const std::string& dir) { + auto copy = loader; + copy.source_ = LVQReloader{dir}; + return copy; + }, + py::arg("directory"), + R"( +Create a copy of the argument loader configured to reload a previously saved LVQ dataset +from the given directory.)" + ) + .def_readonly( + "primary_bits", + &LVQ::primary_, + "The number of bits used for the primary encoding." + ) + .def_readonly( + "residual_bits", + &LVQ::residual_, + "The number of bits used for the residual encoding." + ) + .def_readonly("strategy", &LVQ::strategy_, "The packing strategy to use.") + .def_readonly("dims", &LVQ::dims_, "The number of dimensions."); + + // Compression Sources + wrap_lvq_alias<4, 0>( + class_def, m, "LVQ4", "Perform one level LVQ compression using 4-bits." + ); + wrap_lvq_alias<8, 0>( + class_def, m, "LVQ8", "Perform one level LVQ compression using 8-bits." + ); + wrap_lvq_alias<4, 4>( + class_def, + m, + "LVQ4x4", + "Perform two level compression using 4 bits for the primary and residual." + ); + wrap_lvq_alias<4, 8>( + class_def, + m, + "LVQ4x8", + "Perform two level compression using 4 bits for the primary and 8 bits for the " + "residual residual." + ); + wrap_lvq_alias<8, 8>( + class_def, + m, + "LVQ8x8", + "Perform two level compression using 8 bits for the primary and residual." + ); +} + using MatrixType = float; using MatrixAlloc = svs::lib::Allocator; using MatrixData = svs::data::SimpleData; + +// Helper function to convert leanvec Python matrices to SimpleData +// Bundles both the matrices in a tuple +template +std::optional> convert_leanvec_matrices( + const std::optional& data_matrix, const std::optional& query_matrix +) { + // Convert the matrices from Python arrays to SimpleData + auto data_matrix_ = + transform_optional(create_data, data_matrix); + auto query_matrix_ = + transform_optional(create_data, query_matrix); + + if (data_matrix_.has_value() && !query_matrix_.has_value()) { + fmt::print("Warning: Query matrix not provided, using the Data matrix for both!"); + query_matrix_ = data_matrix_; + } else if (query_matrix_.has_value() && !data_matrix_.has_value()) { + throw ANNEXCEPTION("Invalid option: Query matrix provided but not the Data matrix!" + ); + } + + if (!data_matrix_.has_value()) { + return std::nullopt; + } + + return std::optional>( + std::in_place, std::move(data_matrix_).value(), std::move(query_matrix_).value() + ); +} + +/// Generate bindings for LeanVec compressors and loaders. +void wrap_leanvec(py::module& m) { + using enum svs::leanvec::LeanVecKind; + wrap_logging(m); + + // Kind of data types used for primary and secondary. + py::enum_( + m, "LeanVecKind", "LeanVec primary and secondary types" + ) + .value("float32", float32, "Uncompressed float32") + .value("float16", float16, "Uncompressed float16") + .value("lvq8", lvq8, "Compressed with LVQ 8bits") + .value("lvq4", lvq4, "Compressed with LVQ 4bits"); + + // Wrap the base class. + auto class_def = py::class_{m, "LeanVecLoader", "Generic LeanVec Loader"}; + class_def + .def( + py::init([](UnspecializedVectorDataLoader datafile, + size_t leanvec_dims, + svs::leanvec::LeanVecKind primary_kind, + svs::leanvec::LeanVecKind secondary_kind, + const std::optional>& data_matrix, + const std::optional>& query_matrix, + size_t alignment) { + return LeanVec{ + datafile, + leanvec_dims, + primary_kind, + secondary_kind, + convert_leanvec_matrices(data_matrix, query_matrix), + alignment}; + }), + py::arg("datafile"), + py::arg("leanvec_dims"), + py::arg("primary_kind") = lvq8, + py::arg("secondary_kind") = lvq8, + py::arg("data_matrix") = py::none(), + py::arg("query_matrix") = py::none(), + py::arg("alignment") = 32, + std::string(leanvec_online_proto).c_str() + ) + .def( + py::init([](const std::string& path, size_t alignment) { + return LeanVec{LeanVecReloader(path), alignment}; + }), + py::arg("directory"), + py::arg("alignment") = 32, + std::string(leanvec_reload_proto).c_str() + ) + .def( + "reload_from", + [](const LeanVec& loader, const std::string& dir) { + auto copy = loader; + copy.source_ = LeanVecReloader{dir}; + return copy; + }, + py::arg("directory"), + R"( +Create a copy of the argument loader configured to reload a previously saved LeanVec dataset +from the given directory.)" + ) + .def_readonly( + "leanvec_dims", &LeanVec::leanvec_dims_, "The reduced dimensionality." + ) + .def_readonly("dims", &LeanVec::dims_, "The full-dimensionality.") + .def_readonly( + "primary_kind", + &LeanVec::primary_kind_, + "The encoding of the reduced dimensional dataset." + ) + .def_readonly( + "secondary_kind", + &LeanVec::secondary_kind_, + "The encoding of the full-dimensional dataset." + ) + .def_readwrite( + "alignment", &LeanVec::alignment_, "The alignment to use for LVQ encoded data." + ); +} + } // namespace namespace core { @@ -117,6 +641,15 @@ Construct a new ``svs.GraphLoader``. })); py::implicitly_convertible(); + ///// Fallback + wrap_fallback(m); + + ///// LVQ + wrap_lvq(m); + + ///// LeanVec + wrap_leanvec(m); + ///// TOML Reconstructions m.def("__reformat_toml", [](const std::filesystem::path& path) { toml::table t = toml::parse_file(path.c_str()); diff --git a/bindings/python/src/dynamic_vamana.cpp b/bindings/python/src/dynamic_vamana.cpp index a91daafa..4b7bad32 100644 --- a/bindings/python/src/dynamic_vamana.cpp +++ b/bindings/python/src/dynamic_vamana.cpp @@ -43,6 +43,8 @@ namespace svs::python::dynamic_vamana { namespace { +namespace lvq = svs::quantization::lvq; + template svs::DynamicVamana build_from_array( const svs::index::vamana::VamanaBuildParameters& parameters, @@ -214,13 +216,76 @@ svs::DynamicVamana assemble_uncompressed( ); } +template < + typename Dist, + size_t Primary, + size_t Residual, + lvq::LVQPackingStrategy Strategy, + size_t N> +svs::DynamicVamana assemble_lvq( + const std::filesystem::path& config_path, + const UnspecializedGraphLoader& graph_loader, + svs::quantization::lvq::LVQLoader loader, + Dist distance, + size_t num_threads, + bool debug_load_from_static +) { + auto load_graph = svs::lib::Lazy([&]() { + return svs::graphs::SimpleBlockedGraph::load(graph_loader.path()); + }); + + return svs::DynamicVamana::assemble( + config_path, + load_graph, + loader.rebind_alloc(as_blocked), + distance, + num_threads, + debug_load_from_static + ); +} + +template +svs::DynamicVamana assemble_leanvec( + const std::filesystem::path& config_path, + const UnspecializedGraphLoader& graph_loader, + svs::leanvec::LeanVecLoader loader, + Dist distance, + size_t num_threads, + bool debug_load_from_static +) { + auto load_graph = svs::lib::Lazy([&]() { + return svs::graphs::SimpleBlockedGraph::load(graph_loader.path()); + }); + + return svs::DynamicVamana::assemble( + config_path, + load_graph, + loader.rebind_alloc(as_blocked), + distance, + num_threads, + debug_load_from_static + ); +} + template void register_assembly(Dispatcher& dispatcher) { for_standard_specializations([&]() { dispatcher.register_target(&assemble_uncompressed); }); + + for_compressed_specializations( + [&]() { + dispatcher.register_target(&assemble_lvq); + } + ); + + for_leanvec_specializations([&]( + ) { + dispatcher.register_target(&assemble_leanvec); + }); } -using DynamicVamanaAssembleTypes = std::variant; +using DynamicVamanaAssembleTypes = + std::variant; svs::DynamicVamana assemble( const std::string& config_path, diff --git a/bindings/python/src/flat.cpp b/bindings/python/src/flat.cpp index a897dd30..78cc88af 100644 --- a/bindings/python/src/flat.cpp +++ b/bindings/python/src/flat.cpp @@ -24,6 +24,7 @@ #include "svs/lib/datatype.h" #include "svs/lib/dispatcher.h" #include "svs/orchestrators/exhaustive.h" +#include "svs/quantization/lvq/lvq_concept.h" // stl #include @@ -39,6 +40,7 @@ ///// namespace py = pybind11; +namespace lvq = svs::quantization::lvq; namespace svs::python::flat { template void for_standard_specializations(F&& f) { @@ -54,9 +56,22 @@ template void for_standard_specializations(F&& f) { #undef X } +// Compressed search specializations. +template void for_lvq_specializations(F&& f) { +#define X(Dist, Primary, Residual, N) f.template operator()() + // Pattern: + // DistanceType, Primary, Residual, Dimensionality + X(DistanceL2, 4, 4, Dynamic); + X(DistanceL2, 8, 0, Dynamic); + + X(DistanceIP, 4, 4, Dynamic); + X(DistanceIP, 8, 0, Dynamic); +#undef X +} + namespace detail { -using FlatSourceTypes = std::variant; +using FlatSourceTypes = std::variant; template svs::Flat assemble_uncompressed( @@ -67,6 +82,15 @@ svs::Flat assemble_uncompressed( return svs::Flat::assemble(std::move(datafile), distance_type, num_threads); } +template +svs::Flat assemble_lvq( + lvq::LVQLoader loader, + D distance, + size_t num_threads +) { + return svs::Flat::assemble(std::move(loader), std::move(distance), num_threads); +} + using AssemblyDispatcher = svs::lib::Dispatcher; @@ -78,6 +102,12 @@ AssemblyDispatcher assembly_dispatcher() { dispatcher.register_target(svs::lib::dispatcher_build_docs, method); }); + // LVQ instantiations. + for_lvq_specializations([&dispatcher]() { + auto method = &assemble_lvq; + dispatcher.register_target(svs::lib::dispatcher_build_docs, method); + }); + return dispatcher; } ///// @@ -160,8 +190,8 @@ be instantiated based on their applicability to the particular problem instance. The arguments upon which specialization is conducted are: -* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type - and number of dimensions. +* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type, + quantization type, and number of dimensions. * `distance`: The distance measure being used. Specializations compiled into the binary are listed below. diff --git a/bindings/python/src/python_bindings.cpp b/bindings/python/src/python_bindings.cpp index e1ac92b6..88a414bd 100644 --- a/bindings/python/src/python_bindings.cpp +++ b/bindings/python/src/python_bindings.cpp @@ -17,6 +17,7 @@ // Dependencies within the python SVS bindings directory. #include "svs/python/allocator.h" #include "svs/python/common.h" +#include "svs/python/conversion.h" #include "svs/python/core.h" #include "svs/python/dynamic_vamana.h" #include "svs/python/flat.h" @@ -202,6 +203,11 @@ Convert the `fvecs` file on disk with 32-bit floating point entries to a `fvecs` // Core data types svs::python::core::wrap(m); +#ifdef USE_PROPRIETARY + // Dataset conversion. + svs::python::conversion::wrap(m); +#endif + // Intel(R) MKL m.def( "have_mkl", diff --git a/bindings/python/src/svs/__init__.py b/bindings/python/src/svs/__init__.py index dd9948e7..43469f8b 100644 --- a/bindings/python/src/svs/__init__.py +++ b/bindings/python/src/svs/__init__.py @@ -31,6 +31,9 @@ k_recall_at, \ generate_test_dataset +# LeanVec computation +from .leanvec import compute_leanvec_matrices + # Make the upgrader available without explicit import. from . import upgrader diff --git a/bindings/python/src/svs/common.py b/bindings/python/src/svs/common.py index a0c8e5e2..d827e191 100644 --- a/bindings/python/src/svs/common.py +++ b/bindings/python/src/svs/common.py @@ -58,7 +58,7 @@ def np_to_svs(nptype): if nptype == np.float64: return lib.float64 - raise Exception(f"Could not convert {nptype} to a svs.DataType enum!"); + raise Exception(f"Could not convert {nptype} to a svs.DataType enum!") def read_npy(filename: str): """ @@ -280,3 +280,29 @@ def k_recall_at(gt_idx, result_idx, k: int, at: int): ls_recall = [len(intersect) for intersect in ls_intersection] return sum(ls_recall) / (len(ls_recall) * k) + +def get_lvq_range(data: np.array): + """ + For a given uncompressed dataset, get the difference between the minimum and maximum + values for each vector after LVQ-style preprocessing. + + This pre-processing involves removing the component-wise average of the dataset. + + This is not an efficient function. + + Args: + - data: A 2-D numpy array + + Returns: + - A 1-D numpy array returning the difference between each vector's maximum and + minimum component after pre-processing. + """ + + assert(data.ndim == 2) + center = np.sum(data, axis = 0, dtype = np.float64) / data.shape[0] + centered_data = data - center + + # Obtain the minimum and maximum values for each dimension. + mins = np.min(centered_data, axis = 1) + maxs = np.max(centered_data, axis = 1) + return maxs - mins diff --git a/bindings/python/src/svs/leanvec.py b/bindings/python/src/svs/leanvec.py new file mode 100644 index 00000000..15284725 --- /dev/null +++ b/bindings/python/src/svs/leanvec.py @@ -0,0 +1,24 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from typing import Tuple + + +def compute_leanvec_matrices(X: np.ndarray, Q: np.ndarray, n_components: int, + n_max_steps: int = 500, rel_tol:float = 1e-3) -> Tuple[np.ndarray, np.ndarray]: + A = np.zeros((Q.shape[1], n_components)) + B = np.zeros((X.shape[1], n_components)) + + return B.astype(np.float32), A.astype(np.float32) diff --git a/bindings/python/src/vamana.cpp b/bindings/python/src/vamana.cpp index 603f5007..fc331889 100644 --- a/bindings/python/src/vamana.cpp +++ b/bindings/python/src/vamana.cpp @@ -49,6 +49,8 @@ ///// namespace py = pybind11; +namespace lvq = svs::quantization::lvq; +namespace leanvec = svs::leanvec; using namespace svs::python::vamana_specializations; @@ -83,12 +85,64 @@ void register_uncompressed_vamana_assemble(Dispatcher& dispatcher) { ); } +template < + size_t Primary, + size_t Residual, + size_t N, + lvq::LVQPackingStrategy Strategy, + typename D> +svs::Vamana assemble_lvq( + const std::filesystem::path& config_path, + const UnspecializedGraphLoader& graph_loader, + lvq::LVQLoader data, + D distance, + size_t num_threads +) { + return svs::Vamana::assemble( + config_path, graph_loader, std::move(data), std::move(distance), num_threads + ); +} + +template void register_lvq_vamana_assemble(Dispatcher& dispatcher) { + compressed_specializations( + [&dispatcher]() { + auto method = &assemble_lvq; + dispatcher.register_target(svs::lib::dispatcher_build_docs, method); + } + ); +} + +template +svs::Vamana assemble_leanvec( + const std::filesystem::path& config_path, + const UnspecializedGraphLoader& graph_loader, + leanvec::LeanVecLoader data, + D distance, + size_t num_threads +) { + return svs::Vamana::assemble( + config_path, graph_loader, std::move(data), std::move(distance), num_threads + ); +} + +template +void register_leanvec_vamana_assemble(Dispatcher& dispatcher) { + leanvec_specializations( + [&dispatcher]() { + auto method = &assemble_leanvec; + dispatcher.register_target(svs::lib::dispatcher_build_docs, method); + } + ); +} + template void register_vamana_assembly(Dispatcher& dispatcher) { register_uncompressed_vamana_assemble(dispatcher); + register_lvq_vamana_assemble(dispatcher); + register_leanvec_vamana_assemble(dispatcher); } using VamanaAssembleTypes = - std::variant; + std::variant; ///// ///// Build From File @@ -116,12 +170,60 @@ void register_uncompressed_vamana_build_from_file(Dispatcher& dispatcher) { ); } +template +svs::Vamana build_lvq_from_file( + const svs::index::vamana::VamanaBuildParameters& parameters, + lvq::LVQLoader data, + D distance, + size_t num_threads +) { + return svs::Vamana::build( + parameters, std::move(data), std::move(distance), num_threads + ); +} + +template +void register_lvq_vamana_build_from_file(Dispatcher& dispatcher) { + compressed_specializations( + [&dispatcher]() { + if constexpr (B /* build-enabled*/) { + auto method = &build_lvq_from_file; + dispatcher.register_target(svs::lib::dispatcher_build_docs, method); + } + } + ); +} + +template +svs::Vamana build_leanvec_from_file( + const svs::index::vamana::VamanaBuildParameters& parameters, + leanvec::LeanVecLoader data, + D distance, + size_t num_threads +) { + return svs::Vamana::build( + parameters, std::move(data), std::move(distance), num_threads + ); +} + +template +void register_leanvec_vamana_build_from_file(Dispatcher& dispatcher) { + leanvec_specializations( + [&dispatcher]() { + auto method = &build_leanvec_from_file; + dispatcher.register_target(svs::lib::dispatcher_build_docs, method); + } + ); +} + template void register_vamana_build_from_file(Dispatcher& dispatcher) { register_uncompressed_vamana_build_from_file(dispatcher); + register_lvq_vamana_build_from_file(dispatcher); + register_leanvec_vamana_build_from_file(dispatcher); } -using VamanaBuildTypes = std::variant; +using VamanaBuildTypes = std::variant; ///// ///// Build from Array @@ -310,8 +412,8 @@ be instantiated based on their applicability to the particular problem instance. The arguments upon which specialization is conducted are: -* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type - and number of dimensions. +* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type, + quantization type, and number of dimensions. * `distance`: The distance measure being used. Specializations compiled into the binary are listed below. @@ -357,8 +459,9 @@ Construct a Vamana index over the given data file, returning a searchable index. Args: build_parameters (:py:class:`svs.VamanaBuildParameters`): Hyper-parameters controlling index build. - data_loader: The source of the data on-disk. Can be - :py:class:`svs.DataFile` to represent a standard uncompressed dataset. + data_loader: The source of the data on-disk. Can either be + :py:class:`svs.DataFile` to represent a standard uncompressed dataset, or a + compressed loader. distance_type: The similarity-function to use for this index. num_threads: The number of threads to use for index construction. Default: 1. @@ -367,8 +470,8 @@ be instantiated based on their applicability to the particular problem instance. The arguments upon which specialization is conducted are: -* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type - and number of dimensions. +* `data_loader`: Both kind (type of loader) and inner aspects of the loader like data type, + quantization type, and number of dimensions. * `distance`: The distance measure being used. Specializations compiled into the binary are listed below. diff --git a/bindings/python/tests/common.py b/bindings/python/tests/common.py index 583c91ad..29a1edd4 100644 --- a/bindings/python/tests/common.py +++ b/bindings/python/tests/common.py @@ -24,7 +24,6 @@ # directory of the SVS project. _current_file = Path(__file__).parent.resolve() #/svs/bindings/python/tests ROOT_DIR = _current_file.parents[2] -print("Root:", ROOT_DIR) TEST_DATASET_DIR = ROOT_DIR.joinpath("data", "test_dataset") # Main exports @@ -39,6 +38,8 @@ test_groundtruth_mip = str(TEST_DATASET_DIR.joinpath("groundtruth_mip.ivecs")) test_groundtruth_cosine = str(TEST_DATASET_DIR.joinpath("groundtruth_cosine.ivecs")) test_vamana_reference = str(TEST_DATASET_DIR.joinpath("reference/vamana_reference.toml")) +test_leanvec_data_matrix = str(TEST_DATASET_DIR.joinpath("leanvec_data_matrix.fvecs")) +test_leanvec_query_matrix = str(TEST_DATASET_DIR.joinpath("leanvec_query_matrix.fvecs")) test_number_of_vectors = 10000 test_dimensions = 128 @@ -77,7 +78,7 @@ def get_test_set(A, num_entries: int): """ assert(A.ndim == 2) assert(A.shape[0] >= num_entries) - return A[-num_entries:]; + return A[-num_entries:] def test_threading(f, *args, validate = None, iters = 4, print_times = False): """ @@ -124,6 +125,40 @@ def test_threading(f, *args, validate = None, iters = 4, print_times = False): # speedup when using 4 threads. testcase.assertTrue(1.3 * new_time < base_time) +def test_close_lvq(original, reconstructed, primary_bits: int, residual_bits: int = 0): + """ + Test that the reconstructed values are within the expected tolerance for LVQ compressed + data. + + Arguments: + - original: The original, uncompressed data. + - reconstucted: The reconstructed data. + + Keyword Arguments: + - primary_bits: The number of bits in the primary encoding. + - residual_bits: The number of bits in the residual encoding. + """ + + # Obtain the difference between the maximum and minimum values in the pre-processed + # dataset. + spans = svs.common.get_lvq_range(original) + + # Compute the max delta for each component of the dataset. + # NOTE: We *should* divide by another factor of two here, but there are some values in + # the LVQ quantization space that will exceed this threshold due to compression + # limitations. + # + # See the C++ tests for LVQ reconstruction for a more complete explanation. + deltas = spans / (((2 ** primary_bits) - 1) * 2) + if residual_bits != 0: + deltas = deltas / ((2 ** residual_bits) - 1) + + # Ensure that each reconstructed value is within the target threshold (plus a tiny + # fudge factor to help offset rounding imprecision. + upper_bound = np.expand_dims(deltas, axis = 1) + upper_bound = upper_bound + 0.0125 * upper_bound + return np.all(np.abs(original - reconstructed) <= upper_bound) + def test_get_distance(index, distance, data = svs.read_vecs(test_data_vecs), test_distance = True): """ Test the get_distance method of an index by comparing its results with direct distance computation. diff --git a/bindings/python/tests/dataset.py b/bindings/python/tests/dataset.py index bbd7c728..781b1367 100644 --- a/bindings/python/tests/dataset.py +++ b/bindings/python/tests/dataset.py @@ -27,3 +27,19 @@ def is_match(self, d: dict): return False return d["dataset"]["data_type"] == self.data_type + +# LVQ (fallback) datasets +class LVQMatcher(UncompressedMatcher): + def __init__(self, primary: int, residual: int = 0): + super().__init__("float32") + self.primary = primary + self.residual = residual + +# LeanVec (fallback) datasets +class LeanVecMatcher(UncompressedMatcher): + def __init__(self, primary_kind: str, secondary_kind: str, leanvec_dims: int, is_pca: bool = True): + super().__init__("float32") + self.primary_kind = primary_kind + self.secondary_kind = secondary_kind + self.leanvec_dims = leanvec_dims + self.is_pca = is_pca diff --git a/bindings/python/tests/test_common.py b/bindings/python/tests/test_common.py index 8d283c8c..f371ac56 100644 --- a/bindings/python/tests/test_common.py +++ b/bindings/python/tests/test_common.py @@ -141,28 +141,28 @@ def test_vecs_extension_checking(self): self.assertTrue(x.dtype == np.float32) self.assertRaises( RuntimeError, svs.write_vecs, x, os.path.join(self.tempdir_name, "temp.hvecs") - ); + ) # Half x = svs.common.random_dataset(10, 128, dtype = np.float16) self.assertTrue(x.dtype == np.float16) self.assertRaises( RuntimeError, svs.write_vecs, x, os.path.join(self.tempdir_name, "temp.fvecs") - ); + ) # UInt32 x = svs.common.random_dataset(10, 128, dtype = np.uint32) self.assertTrue(x.dtype == np.uint32) self.assertRaises( RuntimeError, svs.write_vecs, x, os.path.join(self.tempdir_name, "temp.bvecs") - ); + ) # UInt8 x = svs.common.random_dataset(10, 128, dtype = np.uint8) self.assertTrue(x.dtype == np.uint8) self.assertRaises( RuntimeError, svs.write_vecs, x, os.path.join(self.tempdir_name, "temp.ivecs") - ); + ) def test_generate_test_dataset(self): svs.generate_test_dataset( diff --git a/bindings/python/tests/test_dynamic_vamana.py b/bindings/python/tests/test_dynamic_vamana.py index 84d78217..1586e779 100644 --- a/bindings/python/tests/test_dynamic_vamana.py +++ b/bindings/python/tests/test_dynamic_vamana.py @@ -58,7 +58,7 @@ def recall_check( configdir = os.path.join(tempdir, "config") graphdir = os.path.join(tempdir, "graph") datadir = os.path.join(tempdir, "data") - index.save(configdir, graphdir, datadir); + index.save(configdir, graphdir, datadir) reloaded = svs.DynamicVamana( configdir, diff --git a/bindings/python/tests/test_flat.py b/bindings/python/tests/test_flat.py index fb36b036..3a916478 100644 --- a/bindings/python/tests/test_flat.py +++ b/bindings/python/tests/test_flat.py @@ -53,6 +53,10 @@ def _loaders(self, file: svs.VectorDataLoader): svs.DistanceType.L2: 1.0, svs.DistanceType.MIP: 1.0, }), + (svs.LVQ8(file, 0), { + svs.DistanceType.L2: 0.99997, + svs.DistanceType.MIP: 0.99993, + }), ] def _do_test(self, flat, queries, groundtruth, distance, data = svs.read_vecs(test_data_vecs), expected_recall = 1.0, test_distance = True): diff --git a/bindings/python/tests/test_loader_api.py b/bindings/python/tests/test_loader_api.py index b77b28cd..a86b5e9b 100644 --- a/bindings/python/tests/test_loader_api.py +++ b/bindings/python/tests/test_loader_api.py @@ -18,9 +18,13 @@ import svs # Local dependencies -from .common import test_data_vecs +from .common import \ + isapprox, \ + test_data_svs, \ + test_data_vecs, \ + test_data_dims -DEBUG = False; +DEBUG = False class LoaderAPITester(unittest.TestCase): """ @@ -31,3 +35,66 @@ def _get_basic_loader(self): self.assertEqual(loader.data_type, svs.float32) self.assertEqual(loader.dims, 128) return loader + + def test_lvq_loader(self): + loader = self._get_basic_loader() + + # One Level LVQ - 4 bits. + lvq = svs.LVQLoader(loader, primary = 4) + self.assertEqual(lvq.dims, 128) + self.assertEqual(lvq.primary_bits, 4) + self.assertEqual(lvq.residual_bits, 0) + self.assertEqual(lvq.strategy, svs.LVQStrategy.Auto) + + # One Level LVQ - 8 bits. + lvq = svs.LVQLoader( + loader, primary = 8, strategy = svs.LVQStrategy.Sequential + ) + self.assertEqual(lvq.dims, 128) + self.assertEqual(lvq.primary_bits, 8) + self.assertEqual(lvq.residual_bits, 0) + self.assertEqual(lvq.strategy, svs.LVQStrategy.Sequential) + + # Two level LVQ - 4x8 bits + lvq = svs.LVQLoader( + loader, primary = 4, residual = 8, strategy = svs.LVQStrategy.Turbo + ) + self.assertEqual(lvq.dims, 128) + self.assertEqual(lvq.primary_bits, 4) + self.assertEqual(lvq.residual_bits, 8) + self.assertEqual(lvq.strategy, svs.LVQStrategy.Turbo) + + + # Two level LVQ - 8x8 bits + lvq = svs.LVQLoader(loader, primary = 8, residual = 8) + self.assertEqual(lvq.dims, 128) + self.assertEqual(lvq.primary_bits, 8) + self.assertEqual(lvq.residual_bits, 8) + self.assertEqual(lvq.strategy, svs.LVQStrategy.Auto) + + def test_leanvec_loader(self): + loader = self._get_basic_loader() + + kinds = [ + svs.LeanVecKind.lvq4, + svs.LeanVecKind.lvq8, + svs.LeanVecKind.float16, + svs.LeanVecKind.float32, + ] + + alignments = [0, 32] + dims = [64, 96] + + for (p, s, a, d) in itertools.product(kinds, kinds, alignments, dims): + leanvec = svs.LeanVecLoader( + loader, + d, + primary_kind = p, + secondary_kind = s, + alignment = a + ) + + self.assertEqual(leanvec.dims, 128) + self.assertEqual(leanvec.primary_kind, p) + self.assertEqual(leanvec.secondary_kind, s) + self.assertEqual(leanvec.alignment, a) diff --git a/bindings/python/tests/test_reconstruction.py b/bindings/python/tests/test_reconstruction.py index c8e0e08b..c810fa26 100644 --- a/bindings/python/tests/test_reconstruction.py +++ b/bindings/python/tests/test_reconstruction.py @@ -26,21 +26,72 @@ # Local dependencies from .common import \ + isapprox, \ test_data_svs, \ test_data_vecs, \ + test_data_dims, \ test_graph, \ - test_vamana_config + test_vamana_config, \ + test_close_lvq -DEBUG = False; +DEBUG = False class ReconstructionTester(unittest.TestCase): """ Test the reconstruction interface for indexex. """ def _get_loaders(self, loader: svs.VectorDataLoader): + sequential = svs.LVQStrategy.Sequential + turbo = svs.LVQStrategy.Turbo + return [ # Uncompressed loader, + # LVQ + svs.LVQLoader(loader, primary = 8, padding = 0), + svs.LVQLoader(loader, primary = 4, padding = 0), + svs.LVQLoader( + loader, primary = 4, residual = 4, strategy = sequential, padding = 0 + ), + svs.LVQLoader( + loader, primary = 4, residual = 4, strategy = turbo, padding = 0 + ), + svs.LVQLoader( + loader, primary = 4, residual = 8, strategy = sequential, padding = 0 + ), + svs.LVQLoader( + loader, primary = 4, residual = 8, strategy = turbo, padding = 0 + ), + svs.LVQLoader(loader, primary = 8, residual = 8, padding = 0), + + # LeanVec + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.float32, + secondary_kind = svs.LeanVecKind.float32, + ), + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq4, + secondary_kind = svs.LeanVecKind.lvq8, + alignment = 0 + ), + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq8, + secondary_kind = svs.LeanVecKind.lvq8, + alignment = 0 + ), + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq8, + secondary_kind = svs.LeanVecKind.float16, + alignment = 0 + ), ] def _test_misc(self, loader: svs.VectorDataLoader, data): @@ -68,6 +119,30 @@ def _test_misc(self, loader: svs.VectorDataLoader, data): vamana.reconstruct(np.zeros((10, 10), dtype = np.uint64)).shape == (10, 10, d) ) + def _compare_lvq(self, data, reconstructed, loader: svs.LVQLoader): + print(f"LVQ: primary = {loader.primary_bits}, residual = {loader.residual_bits}") + self.assertTrue(isinstance(loader, svs.LVQLoader)) + self.assertTrue(test_close_lvq( + data, + reconstructed, + primary_bits = loader.primary_bits, + residual_bits = loader.residual_bits + )) + + def _compare_leanvec(self, data, reconstructed, loader: svs.LeanVecLoader): + self.assertTrue(isinstance(loader, svs.LeanVecLoader)) + secondary_kind = loader.secondary_kind + if secondary_kind == svs.LeanVecKind.float32: + self.assertTrue(np.array_equal(data, reconstructed)) + elif secondary_kind == svs.LeanVecKind.float16: + self.assertTrue(np.allclose(data, reconstructed)) + elif secondary_kind == svs.LeanVecKind.lvq4: + self.assertTrue(test_close_lvq(data, reconstructed, primary_bits = 4)) + elif secondary_kind == svs.LeanVecKind.lvq8: + self.assertTrue(test_close_lvq(data, reconstructed, primary_bits = 8)) + else: + raise Exception(f"Unknown leanvec kind {secondary_kind}") + def test_reconstruction(self): default_loader = svs.VectorDataLoader(test_data_svs, svs.DataType.float32) all_loaders = self._get_loaders(default_loader) @@ -88,6 +163,10 @@ def test_reconstruction(self): if isinstance(loader, svs.VectorDataLoader): self.assertTrue(np.array_equal(shuffled_data, r)) + elif isinstance(loader, svs.LVQLoader): + self._compare_lvq(shuffled_data, r, loader) + elif isinstance(loader, svs.LeanVecLoader): + self._compare_leanvec(shuffled_data, r, loader) else: raise Exception(f"Unhandled loader kind: {loader}") diff --git a/bindings/python/tests/test_vamana.py b/bindings/python/tests/test_vamana.py index b0a4f063..7aa91144 100644 --- a/bindings/python/tests/test_vamana.py +++ b/bindings/python/tests/test_vamana.py @@ -42,9 +42,12 @@ get_test_set, \ test_get_distance -from .dataset import UncompressedMatcher +from .dataset import \ + UncompressedMatcher, \ + LVQMatcher, \ + LeanVecMatcher -DEBUG = False; +DEBUG = False class VamanaTester(unittest.TestCase): """ @@ -60,8 +63,114 @@ def setUp(self): self.reference_results = toml.load(f) def _setup(self, loader: svs.VectorDataLoader): + sequential = svs.LVQStrategy.Sequential + turbo = svs.LVQStrategy.Turbo + + # Generate LeanVec OOD matrices + data = svs.read_vecs(test_data_vecs) + queries = svs.read_vecs(test_queries) + data_matrix, query_matrix = svs.compute_leanvec_matrices(data, queries, 64) + self.loader_and_matcher = [ (loader, UncompressedMatcher("float32")), + # LVQ + (svs.LVQLoader(loader, primary = 8, padding = 0), LVQMatcher(8)), + (svs.LVQLoader(loader, primary = 4, padding = 0), LVQMatcher(4)), + (svs.LVQLoader( + loader, primary = 4, residual = 4, strategy = sequential, padding = 0), + LVQMatcher(4, 4) + ), + (svs.LVQLoader( + loader, primary = 4, residual = 4, strategy = turbo, padding = 0), + LVQMatcher(4, 4) + ), + (svs.LVQLoader( + loader, primary = 4, residual = 8, strategy = sequential, padding = 0), + LVQMatcher(4, 8) + ), + (svs.LVQLoader( + loader, primary = 4, residual = 8, strategy = turbo, padding = 0), + LVQMatcher(4, 8) + ), + (svs.LVQLoader( + loader, primary = 8, residual = 8, padding = 0), + LVQMatcher(8, 8) + ), + + #LeanVec + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.float32, + secondary_kind = svs.LeanVecKind.float32, + ), + LeanVecMatcher("float32", "float32", 64) + ), + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq4, + secondary_kind = svs.LeanVecKind.lvq4, + ), + LeanVecMatcher("lvq4", "lvq4", 64) + ), + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq4, + secondary_kind = svs.LeanVecKind.lvq8, + ), + LeanVecMatcher("lvq4", "lvq8", 64), + ), + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq8, + secondary_kind = svs.LeanVecKind.lvq8, + alignment = 0 + ), + LeanVecMatcher("lvq8", "lvq8", 64) + ), + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 96, + primary_kind = svs.LeanVecKind.float32, + secondary_kind = svs.LeanVecKind.float32, + alignment = 0 + ), + LeanVecMatcher("float32", "float32", 96) + ), + + # LeanVec OOD + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.float32, + secondary_kind = svs.LeanVecKind.float32, + data_matrix = data_matrix, + query_matrix = query_matrix, + alignment = 0 + ), + LeanVecMatcher("float32", "float32", 64, False) + ), + ( + svs.LeanVecLoader( + loader, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq8, + secondary_kind = svs.LeanVecKind.lvq8, + data_matrix = data_matrix, + query_matrix = query_matrix, + alignment = 0 + ), + LeanVecMatcher("lvq8", "lvq8", 64, False) + ) ] def _distance_map(self): @@ -114,7 +223,7 @@ def _test_single_query( queries ): - I_full, D_full = vamana.search(queries, 10); + I_full, D_full = vamana.search(queries, 10) I_single = [] D_single = [] @@ -250,15 +359,18 @@ def _test_basic(self, loader, matcher, first_iter: bool = False): configdir = os.path.join(tempdir, "config") graphdir = os.path.join(tempdir, "graph") datadir = os.path.join(tempdir, "data") - vamana.save(configdir, graphdir, datadir); + vamana.save(configdir, graphdir, datadir) # Reload from raw-files. reloaded = svs.Vamana(configdir, graphdir, datadir, svs.DistanceType.L2) - self.assertTrue( - vamana.experimental_backend_string == - reloaded.experimental_backend_string - ) + # Backend strings should match unless this is LVQ loader with a Turbo backend + # TODO: Allow for more introspection in the LVQLoader fields. + if not isinstance(loader, (svs.LVQLoader, svs.LeanVecLoader)): + self.assertTrue( + vamana.experimental_backend_string == + reloaded.experimental_backend_string + ) reloaded.num_threads = num_threads self._test_basic_inner( @@ -282,6 +394,73 @@ def test_basic(self): self._test_basic(loader, matcher, first_iter = first_iter) first_iter = False + def test_lvq_reload(self): + # Test LVQ reloading with different alignemnts and strategies. + default_loader = svs.VectorDataLoader( + test_data_svs, svs.DataType.float32, dims = test_data_dims + ) + + lvq_loader = svs.LVQLoader( + default_loader, + primary = 4, + residual = 8, + strategy = svs.LVQStrategy.Sequential + ) + matcher = LVQMatcher(4, 8) + + num_threads = 2 + vamana = svs.Vamana( + test_vamana_config, + svs.GraphLoader(test_graph), + lvq_loader, + svs.DistanceType.L2, + num_threads = num_threads + ) + + print(f"Testing: {vamana.experimental_backend_string}") + self._test_basic_inner( + vamana, + matcher, + num_threads, + skip_thread_test = False, + first_iter = False, + ) + + # Test saving and reloading. + with TemporaryDirectory() as tempdir: + configdir = os.path.join(tempdir, "config") + graphdir = os.path.join(tempdir, "graph") + datadir = os.path.join(tempdir, "data") + vamana.save(configdir, graphdir, datadir) + + reloader = svs.LVQLoader( + datadir, + strategy = svs.LVQStrategy.Sequential, + padding = 32, + ) + + print("Reloading LVQ with padding") + self._test_basic_inner( + svs.Vamana(configdir, graphdir, reloader, num_threads = num_threads), + matcher, + num_threads, + skip_thread_test = False, + first_iter = False, + ) + + reloader = svs.LVQLoader( + datadir, strategy = svs.LVQStrategy.Turbo, padding = 32, + ) + + print("Reloading LVQ as Turbo") + self._test_basic_inner( + svs.Vamana(configdir, graphdir, reloader, num_threads = num_threads), + matcher, + num_threads, + skip_thread_test = False, + first_iter = False, + ) + def _groundtruth_map(self): return { svs.DistanceType.L2: test_groundtruth_l2, @@ -301,7 +480,7 @@ def _test_build( params = self._get_build_parameters( 'vamana_test_build', distance_map[distance], matcher - ); + ) vamana = svs.Vamana.build(params, loader, distance, num_threads = num_threads) print(f"Building: {vamana.experimental_backend_string}") @@ -353,6 +532,10 @@ def test_build(self): # Build directly from data data = svs.read_vecs(test_data_vecs) + # Generate LeanVec OOD matrices + queries = svs.read_vecs(test_queries) + data_matrix, query_matrix = svs.compute_leanvec_matrices(data, queries, 64) + matcher = UncompressedMatcher("float32") self._test_build(data, svs.DistanceType.L2, matcher) self._test_build(data, svs.DistanceType.MIP, matcher) @@ -378,3 +561,61 @@ def test_build(self): self._test_build(loader, svs.DistanceType.L2, matcher) self._test_build(loader, svs.DistanceType.MIP, matcher) self._test_build(loader, svs.DistanceType.Cosine, matcher) + + data = svs.VectorDataLoader(test_data_svs, svs.DataType.float32, dims = 128) + + # Build from LVQ + loader = svs.LVQ8(data) + matcher = LVQMatcher(8) + self._test_build(loader, svs.DistanceType.L2, matcher) + self._test_build(loader, svs.DistanceType.MIP, matcher) + self._test_build(loader, svs.DistanceType.Cosine, matcher) + + loader = svs.LVQ4x4(data) + matcher = LVQMatcher(4, 4) + self._test_build(loader, svs.DistanceType.L2, matcher) + self._test_build(loader, svs.DistanceType.MIP, matcher) + self._test_build(loader, svs.DistanceType.Cosine, matcher) + + loader = svs.LVQ4x8(data) + matcher = LVQMatcher(4, 8) + self._test_build(loader, svs.DistanceType.L2, matcher) + self._test_build(loader, svs.DistanceType.MIP, matcher) + self._test_build(loader, svs.DistanceType.Cosine, matcher) + + # Build from LeanVec + loader = svs.LeanVecLoader( + data, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.float32, + secondary_kind = svs.LeanVecKind.float32 + ) + matcher = LeanVecMatcher("float32", "float32", 64) + self._test_build(loader, svs.DistanceType.L2, matcher) + self._test_build(loader, svs.DistanceType.MIP, matcher) + self._test_build(loader, svs.DistanceType.Cosine, matcher) + + loader = svs.LeanVecLoader( + data, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq8, + secondary_kind = svs.LeanVecKind.lvq8 + ) + matcher = LeanVecMatcher("lvq8", "lvq8", 64) + self._test_build(loader, svs.DistanceType.L2, matcher) + self._test_build(loader, svs.DistanceType.MIP, matcher) + self._test_build(loader, svs.DistanceType.Cosine, matcher) + + # Build from LeanVec OOD + loader = svs.LeanVecLoader( + data, + leanvec_dims = 64, + primary_kind = svs.LeanVecKind.lvq8, + secondary_kind = svs.LeanVecKind.lvq8, + data_matrix = data_matrix, + query_matrix = query_matrix + ) + matcher = LeanVecMatcher("lvq8", "lvq8", 64, False) + self._test_build(loader, svs.DistanceType.L2, matcher) + self._test_build(loader, svs.DistanceType.MIP, matcher) + self._test_build(loader, svs.DistanceType.Cosine, matcher) diff --git a/data/test_dataset/leanvec_data_matrix.fvecs b/data/test_dataset/leanvec_data_matrix.fvecs new file mode 100644 index 00000000..ea76bc30 Binary files /dev/null and b/data/test_dataset/leanvec_data_matrix.fvecs differ diff --git a/data/test_dataset/leanvec_query_matrix.fvecs b/data/test_dataset/leanvec_query_matrix.fvecs new file mode 100644 index 00000000..88209216 Binary files /dev/null and b/data/test_dataset/leanvec_query_matrix.fvecs differ diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index b9f1c98e..998f2138 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -74,6 +74,19 @@ add_test( groundtruth_euclidean.ivecs ) +# The fallback executable. +add_executable(fallback fallback.cpp) +target_include_directories(fallback PRIVATE ${CMAKE_CURRENT_LIST_DIR}) +target_link_libraries(fallback ${SVS_LIB} svs_compile_options svs_native_options) +add_test( + NAME test_fallback + COMMAND + fallback + data_f32.fvecs + queries_f32.fvecs + groundtruth_euclidean.ivecs +) + ##### ##### Dispatcher diff --git a/examples/cpp/fallback.cpp b/examples/cpp/fallback.cpp new file mode 100644 index 00000000..6c3b57d3 --- /dev/null +++ b/examples/cpp/fallback.cpp @@ -0,0 +1,247 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//! [Example All] + +//! [Includes] +// SVS Dependencies +#include "svs/fallback/fallback.h" +#include "svs/core/recall.h" // Convenient k-recall@n computation. +#include "svs/orchestrators/vamana.h" // bulk of the dependencies required. + +// Alternative main definition +#include "svsmain.h" + +// stl +#include +#include +#include +#include +//! [Includes] + +//! [Helper Utilities] +double run_recall( + svs::Vamana& index, + const svs::data::SimpleData& queries, + const svs::data::SimpleData& groundtruth, + size_t search_window_size, + size_t num_neighbors, + std::string_view message = "" +) { + index.set_search_window_size(search_window_size); + auto results = index.search(queries, num_neighbors); + double recall = svs::k_recall_at_n(groundtruth, results, num_neighbors, num_neighbors); + if (!message.empty()) { + fmt::print("[{}] ", message); + } + fmt::print("Windowsize = {}, Recall = {}\n", search_window_size, recall); + return recall; +} + +const bool DEBUG = true; +void check(double expected, double got, double eps = 0.001) { + double diff = std::abs(expected - got); + if constexpr (DEBUG) { + fmt::print("Expected {}. Got {}\n", expected, got); + } else { + if (diff > eps) { + throw ANNEXCEPTION("Expected ", expected, ". Got ", got, '!'); + } + } +} +//! [Helper Utilities] + +// Alternative main definition +int svs_main(std::vector args) { + //! [Argument Extraction] + const size_t nargs = args.size(); + if (nargs != 4) { + throw ANNEXCEPTION("Expected 3 arguments. Instead, got ", nargs, '!'); + } + const std::string& data_vecs = args.at(1); + const std::string& query_vecs = args.at(2); + const std::string& groundtruth_vecs = args.at(3); + //! [Argument Extraction] + + // Building the index + + //! [Build Parameters] + auto parameters = svs::index::vamana::VamanaBuildParameters{ + 1.2, // alpha + 64, // graph max degree + 128, // search window size + 1024, // max candidate pool size + 60, // prune to degree + true, // full search history + }; + //! [Build Parameters] + + //! [Index Build] + size_t num_threads = 4; + svs::Vamana index = svs::Vamana::build( + parameters, svs::VectorDataLoader(data_vecs), svs::DistanceL2(), num_threads + ); + //! [Index Build] + + // Searching the index + + //! [Load Aux] + // Load the queries and ground truth. + auto queries = svs::load_data(query_vecs); + auto groundtruth = svs::load_data(groundtruth_vecs); + //! [Load Aux] + + //! [Perform Queries] + index.set_search_window_size(30); + svs::QueryResult results = index.search(queries, 10); + double recall = svs::k_recall_at_n(groundtruth, results); + check(0.8215, recall); + //! [Perform Queries] + + //! [Search Window Size] + auto expected_recall = + std::map({{10, 0.5509}, {20, 0.7281}, {30, 0.8215}, {40, 0.8788}}); + for (auto windowsize : {10, 20, 30, 40}) { + recall = run_recall(index, queries, groundtruth, windowsize, 10, "Sweep"); + check(expected_recall.at(windowsize), recall); + } + //! [Search Window Size] + + // Saving the index + + //! [Saving] + index.save("example_config", "example_graph", "example_data"); + //! [Saving] + + // Reloading a saved index + + //! [Loading] + // We can reload an index from a previously saved set of files. + index = svs::Vamana::assemble( + "example_config", + svs::GraphLoader("example_graph"), + svs::VectorDataLoader("example_data"), + svs::DistanceType::L2, + 4 // num_threads + ); + + recall = run_recall(index, queries, groundtruth, 30, 10, "Reload"); + check(0.8215, recall); + //! [Loading] + + // Search using vector compression + + //! [Compressed Loader] + // Quantization + size_t padding = 32; + namespace lvq = svs::quantization::lvq; + namespace leanvec = svs::leanvec; + namespace fallback = svs::fallback; + + // Wrap the compressor object in a lazy functor. + // This will defer loading and compression of the LVQ dataset until the threadpool + // used in the index has been created. + auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader("example_data").load(); + return lvq::LVQDataset<8, 0, 128>::compress(data, threadpool, padding); + }); + index = svs::Vamana::assemble( + "example_config", + svs::GraphLoader("example_graph"), + compressor, + svs::DistanceL2(), + 4 + ); + + //! [Compressed Loader] + + //! [Search Compressed] + recall = run_recall(index, queries, groundtruth, 30, 10, "Compressed Load"); + check(0.8215, recall); + //! [Search Compressed] + + //! [Build Index Compressed] + // Compressed building + index = + svs::Vamana::build(parameters, compressor, svs::DistanceL2(), num_threads); + recall = run_recall(index, queries, groundtruth, 30, 10, "Compressed Build"); + check(0.8212, recall); + //! [Build Index Compressed] + + // ! [Only Loading] + // We can reload an index from a previously saved set of files. + index = svs::Vamana::assemble( + "example_config", + svs::GraphLoader("example_graph"), + svs::VectorDataLoader("example_data"), + svs::DistanceType::L2, + 4 // num_threads + ); + //! [Only Loading] + + //! [Set n-threads] + index.set_threadpool(svs::threads::DefaultThreadPool(4)); + //! [Set n-threads] + + auto compressor_lean = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader("example_data").load(); + return leanvec::LeanDataset, leanvec::UsingLVQ<8>, 64, 128>:: + reduce(data, std::nullopt, threadpool, padding); + }); + index = svs::Vamana::assemble( + "example_config", + svs::GraphLoader("example_graph"), + compressor_lean, + svs::DistanceL2(), + 4 + ); + + //! [Compressed Loader] + + //! [Search Compressed] + recall = run_recall(index, queries, groundtruth, 30, 10, "Compressed Lean Load"); + check(0.8215, recall); + //! [Search Compressed] + + //! [Build Index Compressed] + // Compressed building + index = + svs::Vamana::build(parameters, compressor, svs::DistanceL2(), num_threads); + recall = run_recall(index, queries, groundtruth, 30, 10, "Compressed Build"); + check(0.8212, recall); + //! [Build Index Compressed] + + //! [Only Loading] + // We can reload an index from a previously saved set of files. + index = svs::Vamana::assemble( + "example_config", + svs::GraphLoader("example_graph"), + svs::VectorDataLoader("example_data"), + svs::DistanceType::L2, + 4 // num_threads + ); + //! [Only Loading] + + //! [Set n-threads] + index.set_threadpool(svs::threads::DefaultThreadPool(4)); + //! [Set n-threads] + + return 0; +} + +// Special main providing some helpful utilties. +SVS_DEFINE_MAIN(); +//! [Example All] diff --git a/examples/python/example_fallback.py b/examples/python/example_fallback.py new file mode 100644 index 00000000..c49bd25f --- /dev/null +++ b/examples/python/example_fallback.py @@ -0,0 +1,342 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Import `unittest` to allow for automated testing. +import unittest + +# [imports] +import os +import svs +# [imports] + +DEBUG_MODE = False +def assert_equal(lhs, rhs, message: str = "", expected_alpha = 0.05): + if DEBUG_MODE: + print(f"{message}: {lhs} == {rhs}") + else: + assert lhs < rhs + expected_alpha, f"{message}" + assert lhs > rhs - expected_alpha, f"{message}" + +def run_test_float(index, queries, groundtruth): + expected = { + 10: 0.5664, + 20: 0.7397, + 30: 0.8288, + 40: 0.8837, + } + + for window_size in range(10, 50, 10): + index.search_window_size = window_size + I, D = index.search(queries, 10) + recall = svs.k_recall_at(groundtruth, I, 10, 10) + assert_equal( + recall, expected[window_size], f"Standard Search Check ({window_size})" + ) + +def run_test_two_level4_8(index, queries, groundtruth): + expected = { + 10: 0.5664, + 20: 0.7397, + 30: 0.8288, + 40: 0.8837, + } + + for window_size in range(10, 50, 10): + index.search_window_size = window_size + I, D = index.search(queries, 10) + recall = svs.k_recall_at(groundtruth, I, 10, 10) + assert_equal( + recall, expected[window_size], f"Compressed Search Check ({window_size})" + ) + +def run_test_build_two_level4_8(index, queries, groundtruth): + expected = { + 10: 0.5664, + 20: 0.7397, + 30: 0.8288, + 40: 0.8837, + } + + for window_size in range(10, 50, 10): + index.search_window_size = window_size + I, D = index.search(queries, 10) + recall = svs.k_recall_at(groundtruth, I, 10, 10) + assert_equal( + recall, expected[window_size], f"Compressed Search Check ({window_size})" + ) + +# Shadow this as a global to make it available to the test-case clean-up. +test_data_dir = None + +def run(): + expected_delta = 0.05 + + # ### + # Generating test data + # ### + + # [generate-dataset] + # Create a test dataset. + # This will create a directory "example_data_vamana" and populate it with three + # entries: + # - data.fvecs: The test dataset. + # - queries.fvecs: The test queries. + # - groundtruth.ivecs: The groundtruth. + test_data_dir = "./example_data_vamana" + svs.generate_test_dataset( + 10000, # Create 10000 vectors in the dataset. + 1000, # Generate 1000 query vectors. + 128, # Set the vector dimensionality to 128. + test_data_dir, # The directory where results will be generated. + data_seed = 1234, # Random number seed for reproducibility. + query_seed = 5678, # Random number seed for reproducibility. + num_threads = 4, # Number of threads to use. + distance = svs.DistanceType.L2, # The distance type to use. + ) + # [generate-dataset] + + + # ### + # Building the index + # ### + + # [build-parameters] + # Now, we can build a graph index over the data set. + parameters = svs.VamanaBuildParameters( + graph_max_degree = 64, + window_size = 128, + ) + # [build-parameters] + + # [build-index] + # Build the index. + index = svs.Vamana.build( + parameters, + svs.VectorDataLoader( + os.path.join(test_data_dir, "data.fvecs"), svs.DataType.float32 + ), + svs.DistanceType.L2, + num_threads = 4, + ) + # [build-index] + + # [build-index-fromNumpyArray] + # Build the index. + data = svs.read_vecs(os.path.join(test_data_dir, "data.fvecs")) + index = svs.Vamana.build( + parameters, + data, + svs.DistanceType.L2, + num_threads = 4, + ) + # [build-index-fromNumpyArray] + + + # ### + # Searching the index + # ### + + # [load-aux] + # Load the queries and ground truth. + queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs")) + groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs")) + # [load-aux] + + # [perform-queries] + # Set the search window size of the index and perform queries. + index.search_window_size = 30 + I, D = index.search(queries, 10) + + # Compare with the groundtruth. + recall = svs.k_recall_at(groundtruth, I, 10, 10) + expected_recall = 0.8288 + print(f"Recall = {recall}") + assert recall < expected_recall + expected_delta + assert recall > expected_recall - expected_delta + + # [perform-queries] + + # [search-window-size] + # We can vary the search window size to demonstrate the trade off in accuracy. + for window_size in range(10, 50, 10): + index.search_window_size = window_size + I, D = index.search(queries, 10) + recall = svs.k_recall_at(groundtruth, I, 10, 10) + print(f"Window size = {window_size}, Recall = {recall}") + # [search-window-size] + + ##### Begin Test + run_test_float(index, queries, groundtruth) + ##### End Test + + + # ### + # Saving the index + # ### + + # [saving-results] + # Finally, we can save the results. + index.save( + os.path.join(test_data_dir, "example_config"), + os.path.join(test_data_dir, "example_graph"), + os.path.join(test_data_dir, "example_data"), + ) + # [saving-results] + + + # ### + # Reloading a saved index + # ### + + # [loading] + # We can reload an index from a previously saved set of files. + index = svs.Vamana( + os.path.join(test_data_dir, "example_config"), + svs.GraphLoader(os.path.join(test_data_dir, "example_graph")), + svs.VectorDataLoader( + os.path.join(test_data_dir, "example_data"), svs.DataType.float32 + ), + svs.DistanceType.L2, + num_threads = 4, + ) + + # We can rerun the queries to ensure everything works properly. + index.search_window_size = 30 + I, D = index.search(queries, 10) + + # Compare with the groundtruth. + recall = svs.k_recall_at(groundtruth, I, 10, 10) + print(f"Recall = {recall}") + expected_recall = 0.8288 + assert recall < expected_recall + expected_delta + assert recall > expected_recall - expected_delta + # [loading] + + ##### Begin Test + run_test_float(index, queries, groundtruth) + ##### End Test + + # [only-loading] + # We can reload an index from a previously saved set of files. + index = svs.Vamana( + os.path.join(test_data_dir, "example_config"), + svs.GraphLoader(os.path.join(test_data_dir, "example_graph")), + svs.VectorDataLoader( + os.path.join(test_data_dir, "example_data"), svs.DataType.float32 + ), + svs.DistanceType.L2, + num_threads = 4, + ) + # [only-loading] + + # [runtime-nthreads] + index.num_threads = 4 + # [runtime-nthreads] + + + # ### + # Search using vector compression + # ### + + # [search-compressed-loader] + data_loader = svs.VectorDataLoader( + os.path.join(test_data_dir, "example_data"), # Uncompressed data + svs.DataType.float32, + dims = 128 # Passing dimensionality is optional + ) + B1 = 4 # Number of bits for the first level LVQ quantization + B2 = 8 # Number of bits for the residuals quantization + padding = 32 + strategy = svs.LVQStrategy.Turbo + compressed_loader = svs.LVQLoader(data_loader, + primary=B1, + residual=B2, + strategy=strategy, # Passing the strategy is optional. + padding=padding # Passing padding is optional. + ) + # [search-compressed-loader] + + # [search-compressed] + index = svs.Vamana( + os.path.join(test_data_dir, "example_config"), + svs.GraphLoader(os.path.join(test_data_dir, "example_graph")), + compressed_loader, + # Optional keyword arguments + distance = svs.DistanceType.L2, + num_threads = 4 + ) + + # Compare with the groundtruth.. + index.search_window_size = 30 + I, D = index.search(queries, 10) + recall = svs.k_recall_at(groundtruth, I, 10, 10) + print(f"Compressed recall: {recall}") + expected_recall = 0.8223 + assert recall < expected_recall + expected_delta + assert recall > expected_recall - expected_delta + # [search-compressed] + + ##### Begin Test + run_test_two_level4_8(index, queries, groundtruth) + ##### End Test + + # [build-index-compressed] + # Build the index. + index = svs.Vamana.build( + parameters, + compressed_loader, + svs.DistanceType.L2, + num_threads = 4 + ) + # [build-index-compressed] + + # 1. Building Uncompressed + # 2. Loading Uncompressed + # 3. Loading with a recompressor + + # We can rerun the queries to ensure everything works properly. + index.search_window_size = 30 + I, D = index.search(queries, 10) + + # Compare with the groundtruth. + recall = svs.k_recall_at(groundtruth, I, 10, 10) + print(f"Recall = {recall}") + expected_recall = 0.8221 + assert recall < expected_recall + expected_delta + assert recall > expected_recall - expected_delta + # [loading] + + ##### Begin Test + run_test_build_two_level4_8(index, queries, groundtruth) + ##### End Test + +##### +##### Main Executable +##### + +if __name__ == "__main__": + run() + +##### +##### As a unit test. +##### + +class VamanaExampleTestCase(unittest.TestCase): + def tearDown(self): + if test_data_dir is not None: + print(f"Removing temporary directory {test_data_dir}") + os.rmdir(test_data_dir) + + def test_all(self): + run() diff --git a/examples/python/example_fallback_leanvec.py b/examples/python/example_fallback_leanvec.py new file mode 100644 index 00000000..64668be4 --- /dev/null +++ b/examples/python/example_fallback_leanvec.py @@ -0,0 +1,130 @@ +# Copyright 2025 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Import `unittest` to allow for automated testing. +import unittest + +# [imports] +import os +import svs +# [imports] + +DEBUG_MODE = False +def assert_equal(lhs, rhs, message: str = "", expected_alpha = 0.05): + if DEBUG_MODE: + print(f"{message}: {lhs} == {rhs}") + else: + assert lhs < rhs + expected_alpha, f"{message}" + assert lhs > rhs - expected_alpha, f"{message}" + +test_data_dir = None + +def run(): + expected_delta = 0.05 + + # [generate-dataset] + # Create a test dataset. + # This will create a directory "example_data_vamana" and populate it with three + # entries: + # - data.fvecs: The test dataset. + # - queries.fvecs: The test queries. + # - groundtruth.fvecs: The groundtruth. + test_data_dir = "./example_data_vamana" + svs.generate_test_dataset( + 1000, # Create 1000 vectors in the dataset. + 100, # Generate 100 query vectors. + 256, # Set the vector dimensionality to 256. + test_data_dir, # The directory where results will be generated. + data_seed = 1234, # Random number seed for reproducibility. + query_seed = 5678, # Random number seed for reproducibility. + num_threads = 4, # Number of threads to use. + distance = svs.DistanceType.MIP, # The distance type to use. + ) + # [generate-dataset] + + # [create-loader] + # We are going to construct a LeanVec dataset on-the-fly from uncompressed data. + # First, we construct a loader for the uncompressed data. + uncompressed_loader = svs.VectorDataLoader( + os.path.join(test_data_dir, "data.fvecs"), + svs.DataType.float32 + ) + + # Next - we construct a LeanVecLoader. + # This loader is configured to perform the following: + # - Reduce dimensionality of the primary dataset to 256 dimensions. + # - Use LVQ8 for the primary dataset. + # - Use Float16 for the secondary, unreduced dataset. + leanvec_loader = svs.LeanVecLoader( + uncompressed_loader, + 128, # The reduced number of dimensions. + primary_kind = svs.LeanVecKind.lvq8, # The encoding of the primary dataset. + secondary_kind = svs.LeanVecKind.float16, # The encoding of the secondary dataset. + ) + # [create-loader] + + # [build-and-search-index] + # An index can be constructed using a LeanVec dataset. + # Use an alpha less than 1 since we are using the Inner Product distance. + parameters = svs.VamanaBuildParameters( + alpha = 0.95, + graph_max_degree = 64, + prune_to = 60, + window_size = 128, + ) + + index = svs.Vamana.build( + parameters, + leanvec_loader, + svs.DistanceType.MIP, + num_threads = 4, + ) + + # Load queries and ground-truth. + queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs")) + groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs")) + + # Set the search window size of the index and perform queries. + p = index.search_parameters + p.buffer_config = svs.SearchBufferConfig(30, 60) + index.search_parameters = p + I, D = index.search(queries, 10) + + # Compare with the groundtruth. + recall = svs.k_recall_at(groundtruth, I, 10, 10) + print(f"Recall = {recall}") + expected_recall = 0.976 + assert recall < expected_recall + expected_delta + assert recall > expected_recall - expected_delta + # [build-and-search-index] + +##### +##### Main Executable +##### + +if __name__ == "__main__": + run() + +##### +##### As a unit test. +##### + +class VamanaExampleTestCase(unittest.TestCase): + def tearDown(self): + if test_data_dir is not None: + print(f"Removing temporary directory {test_data_dir}") + os.rmdir(test_data_dir) + + def test_all(self): + run() diff --git a/include/svs/fallback/fallback.h b/include/svs/fallback/fallback.h new file mode 100644 index 00000000..cfa72444 --- /dev/null +++ b/include/svs/fallback/fallback.h @@ -0,0 +1,27 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "svs/fallback/fallback_mode.h" +#include "svs/leanvec/leanvec_concept.h" +#include "svs/quantization/lvq/lvq_concept.h" + +#ifdef USE_PROPRIETARY + +#include "svs/fallback/fallback_cpp.h" + +#endif // USE_PROPRIETARY diff --git a/include/svs/fallback/fallback_mode.h b/include/svs/fallback/fallback_mode.h new file mode 100644 index 00000000..169c9fd1 --- /dev/null +++ b/include/svs/fallback/fallback_mode.h @@ -0,0 +1,44 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace svs { +namespace fallback { + +enum class FallbackMode { Silent, Warning, Error }; + +// Warn by default +inline FallbackMode mode = FallbackMode::Warning; + +inline void set_mode(FallbackMode new_mode) { mode = new_mode; } +inline FallbackMode get_mode() { return mode; } + +class UnsupportedHardwareError : public std::runtime_error { + public: + explicit UnsupportedHardwareError() + : std::runtime_error{"LVQ and Leanvec functionality of SVS is not supported on " + "non-Intel hardware."} {} +}; + +inline constexpr const char* fallback_warning = + "LVQ and Leanvec functionality of SVS is not supported on non-Intel hardware. " + "Using uncompressed data.\n"; + +} // namespace fallback +} // namespace svs diff --git a/include/svs/index/vamana/extensions.h b/include/svs/index/vamana/extensions.h index 58923567..0fbafc8a 100644 --- a/include/svs/index/vamana/extensions.h +++ b/include/svs/index/vamana/extensions.h @@ -583,6 +583,8 @@ struct Reconstruct { // Customization point for reconstructing vectors. inline constexpr Reconstruct reconstruct_accessor{}; +#ifdef USE_PROPRIETARY + template SVS_FORCE_INLINE data::GetDatumAccessor svs_invoke( svs::tag_t SVS_UNUSED(cpo), @@ -591,6 +593,17 @@ SVS_FORCE_INLINE data::GetDatumAccessor svs_invoke( return data::GetDatumAccessor(); } +#else // USE_PROPRIETARY + +template +SVS_FORCE_INLINE data::GetDatumAccessor svs_invoke( + svs::tag_t SVS_UNUSED(cpo), const Data& SVS_UNUSED(dataset) +) { + return data::GetDatumAccessor(); +} + +#endif // USE_PROPRIETARY + ///// ///// Distance ///// diff --git a/include/svs/leanvec/leanvec_common.h b/include/svs/leanvec/leanvec_common.h new file mode 100644 index 00000000..a0d15094 --- /dev/null +++ b/include/svs/leanvec/leanvec_common.h @@ -0,0 +1,56 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// svs +#include "svs/core/data.h" + +// stl +#include +#include +#include +#include + +// third-party +#include "fmt/core.h" + +namespace svs { +namespace leanvec { + +// Sentinel type to select an LVQ dataset as either the primary or secondary +// dataset for `LeanVec`. +template struct UsingLVQ {}; + +// Hoist out schemas for reuse while auto-loading. +inline constexpr std::string_view lean_dataset_schema = "leanvec_dataset"; +inline constexpr lib::Version lean_dataset_save_version = lib::Version(0, 0, 0); +inline constexpr std::string_view fallback_schema = "leanvec_fallback"; +inline constexpr lib::Version fallback_save_version = lib::Version(0, 0, 0); + +namespace detail { + +template inline constexpr bool is_using_lvq_tag_v = false; +template inline constexpr bool is_using_lvq_tag_v> = true; + +} // namespace detail + +// Compatible type parameters for LeanDatasets +template +concept LeanCompatible = has_datatype_v || detail::is_using_lvq_tag_v; + +} // namespace leanvec +} // namespace svs diff --git a/include/svs/leanvec/leanvec_concept.h b/include/svs/leanvec/leanvec_concept.h new file mode 100644 index 00000000..85277528 --- /dev/null +++ b/include/svs/leanvec/leanvec_concept.h @@ -0,0 +1,500 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "svs/quantization/lvq/lvq_concept.h" + +#ifndef USE_PROPRIETARY + +#include "svs/leanvec/leanvec_fallback.h" + +#else // USE_PROPRIETARY + +#include "svs/leanvec/leanvec.h" + +#endif // USE_PROPRIETARY + +namespace svs { +namespace leanvec { + +///// +///// Load Helpers +///// + +// Types to use for leanvec. +inline constexpr lib::Types LeanVecSourceTypes{}; + +// LeanVec based loaders can either perform LeanVec conversion online, or reload +// a previously saved LeanVec dataset. +struct OnlineLeanVec { + public: + explicit OnlineLeanVec(const std::filesystem::path& path, DataType type) + : path{path} + , type{type} { + if (!lib::in(type, LeanVecSourceTypes)) { + throw ANNEXCEPTION("Invalid type!"); + } + } + + // Members + public: + std::filesystem::path path; + DataType type; +}; + +struct Reload { + public: + explicit Reload(const std::filesystem::path& directory) + : directory{directory} {} + + // Members + public: + std::filesystem::path directory; +}; + +// The various ways we can instantiate LeanVec-based datasets.. +using SourceTypes = std::variant; + +/// A type used to request a specific specialization of LeanVec at runtime. +/// Used for dispatching. +enum class LeanVecKind { float32, float16, lvq8, lvq4 }; + +namespace detail { + +template struct LeanVecPicker; + +template <> struct LeanVecPicker { + static constexpr LeanVecKind value = LeanVecKind::float32; +}; +template <> struct LeanVecPicker { + static constexpr LeanVecKind value = LeanVecKind::float16; +}; +template <> struct LeanVecPicker> { + static constexpr LeanVecKind value = LeanVecKind::lvq8; +}; +template <> struct LeanVecPicker> { + static constexpr LeanVecKind value = LeanVecKind::lvq4; +}; + +} // namespace detail + +template +inline constexpr LeanVecKind leanvec_kind_v = detail::LeanVecPicker::value; + +// LeanDataset Matcher +struct Matcher { + private: + struct DatasetLayout { + size_t dims; + LeanVecKind kind; + }; + + static lib::TryLoadResult + detect_data(const lib::ContextFreeNodeView& node) { + // Is it an uncompressed dataset? + auto maybe_uncompressed = lib::try_load(node); + auto failure = lib::Unexpected{lib::TryLoadFailureReason::Other}; + + // On success - determine if this one of the recognized types. + if (maybe_uncompressed) { + const auto& matcher = maybe_uncompressed.value(); + size_t dims = matcher.dims; + switch (matcher.eltype) { + case DataType::float16: { + return DatasetLayout{dims, LeanVecKind::float16}; + } + case DataType::float32: { + return DatasetLayout{dims, LeanVecKind::float32}; + } + default: { + return failure; + } + } + } + + // Failed to match the uncompressed layout. Try LVQ. + auto maybe_lvq = lib::try_load(node); + if (maybe_lvq) { + const auto& matcher = maybe_lvq.value(); + size_t dims = matcher.dims; + size_t primary = matcher.primary; + switch (primary) { + case 4: { + return DatasetLayout{dims, LeanVecKind::lvq4}; + } + case 8: { + return DatasetLayout{dims, LeanVecKind::lvq8}; + } + default: { + return failure; + } + } + } + return lib::Unexpected(lib::TryLoadFailureReason::InvalidSchema); + } + + public: + ///// Loading. + static bool check_load_compatibility(std::string_view schema, lib::Version version) { + if (schema == lean_dataset_schema && version == lean_dataset_save_version) { + return true; + } + if (schema == fallback_schema && version == fallback_save_version) { + return true; + } + return false; + } + + static lib::TryLoadResult try_load(const lib::ContextFreeLoadTable& table) { + auto schema = table.schema(); + // For each of the primary and secondary, use the combinations of expected + // expected types until we have a successful match. + auto primary_expected = detect_data(table.at("primary")); + if (!primary_expected) { + return lib::Unexpected(primary_expected.error()); + } + const auto& primary = primary_expected.value(); + + if (schema == lean_dataset_schema) { + auto secondary_expected = detect_data(table.at("secondary")); + if (!secondary_expected) { + return lib::Unexpected(secondary_expected.error()); + } + const auto& secondary = secondary_expected.value(); + return Matcher{ + .leanvec_dims = primary.dims, + .total_dims = secondary.dims, + .primary_kind = primary.kind, + .secondary_kind = secondary.kind}; + } else if (schema == fallback_schema) { + return Matcher{ + .leanvec_dims = primary.dims, + .total_dims = primary.dims, + .primary_kind = primary.kind, + .secondary_kind = LeanVecKind::float32}; + } else { + // TODO raise exception + throw ANNEXCEPTION("Invalid schema!"); + } + } + + static Matcher load(const lib::ContextFreeLoadTable& table) { + auto schema = table.schema(); + // For each of the primary and secondary, use the combinations of expected + // expected types until we have a successful match. + auto primary_expected = detect_data(table.at("primary")); + if (!primary_expected) { + throw ANNEXCEPTION("Could not match the primary dataset!"); + } + const auto& primary = primary_expected.value(); + + if (schema == lean_dataset_schema) { + auto secondary_expected = detect_data(table.at("secondary")); + if (!secondary_expected) { + throw ANNEXCEPTION("Could not match the secondary dataset!"); + } + const auto& secondary = secondary_expected.value(); + return Matcher{ + .leanvec_dims = primary.dims, + .total_dims = secondary.dims, + .primary_kind = primary.kind, + .secondary_kind = secondary.kind}; + } + return Matcher{ + .leanvec_dims = primary.dims, + .total_dims = primary.dims, + .primary_kind = primary.kind, + .secondary_kind = LeanVecKind::float32}; + } + + constexpr bool friend operator==(const Matcher&, const Matcher&) = default; + + ///// Members + size_t leanvec_dims; + size_t total_dims; + LeanVecKind primary_kind; + LeanVecKind secondary_kind; +}; + +// Overload Matching Rules +template +int64_t overload_score( + LeanVecKind primary, size_t primary_dims, LeanVecKind secondary, size_t secondary_dims +) { + // Check primary kind + if (primary != leanvec::leanvec_kind_v) { + return lib::invalid_match; + } + + // Check secondary kind + if (secondary != leanvec::leanvec_kind_v) { + return lib::invalid_match; + } + + // Check extent-tags. + auto extent_match = lib::dispatch_match>( + lib::ExtentArg{secondary_dims} + ); + + // If extents don't match, then we abort immediately. + if (extent_match < 0) { + return lib::invalid_match; + } + + // Check leanvec_dims-tags. + auto leanvec_dims_match = + lib::dispatch_match>(lib::ExtentArg{ + primary_dims}); + + // If leanvec_dims don't match, then we abort immediately. + if (leanvec_dims_match < 0) { + return lib::invalid_match; + } + + return extent_match + leanvec_dims_match; +} + +template +int64_t overload_score(const Matcher& matcher) { + return overload_score( + matcher.primary_kind, + matcher.leanvec_dims, + matcher.secondary_kind, + matcher.total_dims + ); +} + +// Forward Declaration. +template +struct LeanVecLoader; + +template > struct ProtoLeanVecLoader { + public: + ProtoLeanVecLoader() = default; + explicit ProtoLeanVecLoader( + const UnspecializedVectorDataLoader& datafile, + size_t leanvec_dims, + LeanVecKind primary_kind, + LeanVecKind secondary_kind, + std::optional> matrices, + size_t alignment = 0 + ) + : source_{std::in_place_type, datafile.path_, datafile.type_} + , leanvec_dims_{leanvec_dims} + , dims_{datafile.dims_} + , primary_kind_{primary_kind} + , secondary_kind_{secondary_kind} + , matrices_{std::move(matrices)} + , alignment_{alignment} + , allocator_{datafile.allocator_} {} + + explicit ProtoLeanVecLoader( + Reload reloader, + size_t alignment = 0, + const Alloc& allocator = {} + ) + : source_{std::move(reloader)} + , matrices_{std::nullopt} + , alignment_{alignment} + , allocator_{allocator} { + // Produce a hard error if we cannot load and match the dataset. + auto matcher = lib::load_from_disk(std::get(source_).directory); + primary_kind_ = matcher.primary_kind; + secondary_kind_ = matcher.secondary_kind; + leanvec_dims_ = matcher.leanvec_dims; + dims_ = matcher.total_dims; + } + + template < + typename T1, + typename T2, + size_t LeanVecDims, + size_t Extent, + typename F = std::identity> + LeanVecLoader< + T1, + T2, + LeanVecDims, + Extent, + std::decay_t>> + refine(lib::Val, F&& f = std::identity()) const { + using ARet = std::decay_t>; + // Make sure the pre-set values are correct. + if constexpr (Extent != Dynamic) { + if (Extent != dims_) { + throw ANNEXCEPTION("Invalid Extent specialization!"); + } + } + + if constexpr (LeanVecDims != Dynamic) { + if (LeanVecDims != leanvec_dims_) { + throw ANNEXCEPTION("Invalid LeanVecDims specialization!"); + } + } + + if (leanvec_kind_v != primary_kind_) { + throw ANNEXCEPTION("Invalid Primary kind specialization!"); + } + + if (leanvec_kind_v != secondary_kind_) { + throw ANNEXCEPTION("Invalid Secondary kind specialization!"); + } + + // Convert dynamic Extent matrices to static LeanVecDims + auto matrices = std::optional>(matrices_); + + return LeanVecLoader( + source_, leanvec_dims_, std::move(matrices), alignment_, f(allocator_) + ); + } + + public: + SourceTypes source_; + size_t leanvec_dims_; + size_t dims_; + LeanVecKind primary_kind_; + LeanVecKind secondary_kind_; + std::optional> matrices_; + size_t alignment_; + Alloc allocator_; +}; + +template +struct LeanVecLoader { + public: + using loaded_type = LeanDataset; + + explicit LeanVecLoader( + SourceTypes source, + size_t leanvec_dims, + std::optional> matrices, + size_t alignment, + const Alloc& allocator + ) + : source_{std::move(source)} + , leanvec_dims_{leanvec_dims} + , matrices_{std::move(matrices)} + , alignment_{alignment} + , allocator_{allocator} {} + + loaded_type load() const { + auto pool = threads::SequentialThreadPool(); + return load(pool); + } + + template + LeanVecLoader< + T1, + T2, + LeanVecDims, + Extent, + std::decay_t>> + rebind_alloc(const F& f) { + return LeanVecLoader< + T1, + T2, + LeanVecDims, + Extent, + std::decay_t>>{ + source_, leanvec_dims_, matrices_, alignment_, f(allocator_)}; + } + + template loaded_type load(Pool& threadpool) const { + return std::visit( + [&](auto source) { + using U = std::decay_t; + if constexpr (std::is_same_v) { + return lib::load_from_disk( + source.directory, alignment_, allocator_ + ); + } else { + return lib::match( + LeanVecSourceTypes, + source.type, + [&](lib::Type SVS_UNUSED(type)) { + using rebind_type = detail::select_rebind_allocator_t; + return loaded_type::reduce( + data::SimpleData::load(source.path), + matrices_, + threadpool, + alignment_, + leanvec_dims_, + allocator_ + ); + } + ); + } + }, + source_ + ); + } + + private: + SourceTypes source_; + lib::MaybeStatic leanvec_dims_; + std::optional> matrices_; + size_t alignment_; + Alloc allocator_; +}; + +} // namespace leanvec + +// Define dispatch conversion from ProtoLeanVecLoader to LeanVecLoader. +template < + typename Primary, + typename Secondary, + size_t LeanVecDims, + size_t Extent, + typename Alloc> +struct lib::DispatchConverter< + leanvec::ProtoLeanVecLoader, + leanvec::LeanVecLoader> { + static int64_t match(const leanvec::ProtoLeanVecLoader& loader) { + return overload_score( + loader.primary_kind_, loader.leanvec_dims_, loader.secondary_kind_, loader.dims_ + ); + } + + static leanvec::LeanVecLoader + convert(const leanvec::ProtoLeanVecLoader& loader) { + return loader.template refine( + lib::Val() + ); + } + + static std::string description() { + auto dims = []() { + if constexpr (Extent == Dynamic) { + return "any"; + } else { + return Extent; + } + }(); + + auto leanvec_dims = []() { + if constexpr (LeanVecDims == Dynamic) { + return "any"; + } else { + return LeanVecDims; + } + }(); + + return fmt::format("LeanVecLoader dims-{}x{}", dims, leanvec_dims); + } +}; + +} // namespace svs diff --git a/include/svs/leanvec/leanvec_fallback.h b/include/svs/leanvec/leanvec_fallback.h new file mode 100644 index 00000000..0032c619 --- /dev/null +++ b/include/svs/leanvec/leanvec_fallback.h @@ -0,0 +1,177 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "svs/fallback/fallback_mode.h" +#include "svs/leanvec/leanvec_common.h" +#include "svs/quantization/lvq/lvq_fallback.h" + +// #include leanvec_common.h + +namespace fallback = svs::fallback; + +namespace svs { +namespace leanvec { + +template struct LeanVecMatrices { + public: + using leanvec_matrix_type = data::SimpleData; + + LeanVecMatrices() = default; + LeanVecMatrices(leanvec_matrix_type data_matrix, leanvec_matrix_type query_matrix) + : data_matrix_{std::move(data_matrix)} + , query_matrix_{std::move(query_matrix)} { + // Check that the size and dimensionality of both the matrices should be same + if (data_matrix_.size() != query_matrix_.size()) { + throw ANNEXCEPTION("Mismatched data and query matrix sizes!"); + } + if (data_matrix_.dimensions() != query_matrix_.dimensions()) { + throw ANNEXCEPTION("Mismatched data and query matrix dimensions!"); + } + } + + private: + leanvec_matrix_type data_matrix_; + leanvec_matrix_type query_matrix_; +}; + +// is this necessary or duplicate of LVQ? +namespace detail { +template inline constexpr bool is_blocked = false; +template inline constexpr bool is_blocked> = true; + +template > struct select_rebind_allocator { + using type = lib::rebind_allocator_t; +}; +template struct select_rebind_allocator { + using base_allocator = typename A::allocator_type; + using rebind_base_allocator = lib::rebind_allocator_t; + using type = data::Blocked; +}; +template +using select_rebind_allocator_t = typename select_rebind_allocator::type; +} // namespace detail + +template < + typename T1, + typename T2, + size_t LeanVecDims, + size_t Extent, + typename Alloc = lib::Allocator> +class LeanDataset { + public: + using allocator_type = detail::select_rebind_allocator_t; + + private: + data::SimpleData primary_; + + public: + static constexpr bool is_resizeable = detail::is_blocked; + using leanvec_matrices_type = LeanVecMatrices; + using const_value_type = + typename data::SimpleData::const_value_type; + using element_type = float; + using value_type = const_value_type; + using primary_type = data::SimpleData; + + LeanDataset(primary_type primary) + : primary_{std::move(primary)} { + if (fallback::get_mode() == fallback::FallbackMode::Error) { + throw fallback::UnsupportedHardwareError(); + } else if (fallback::get_mode() == fallback::FallbackMode::Warning) { + fmt::print(fallback::fallback_warning); + } + } + + size_t size() const { return primary_.size(); } + size_t dimensions() const { return primary_.dimensions(); } + const_value_type get_datum(size_t i) const { return primary_.get_datum(i); } + void prefetch(size_t i) const { primary_.prefetch(i); } + template void set_datum(size_t i, std::span datum) { + primary_.set_datum(i, datum); + } + + void resize(size_t new_size) + requires is_resizeable + { + primary_.resize(new_size); + } + template + requires is_resizeable + void + compact(std::span new_to_old, Pool& threadpool, size_t batchsize = 1'000'000) { + primary_.compact(new_to_old, threadpool, batchsize); + } + + template + static LeanDataset reduce( + const Dataset& data, + size_t num_threads = 1, + size_t alignment = 0, + lib::MaybeStatic leanvec_dims = {}, + const Alloc& allocator = {} + ) { + return reduce(data, std::nullopt, num_threads, alignment, leanvec_dims, allocator); + } + + template + static LeanDataset reduce( + const Dataset& data, + std::optional matrices, + size_t num_threads = 1, + size_t alignment = 0, + lib::MaybeStatic leanvec_dims = {}, + const Alloc& allocator = {} + ) { + auto pool = threads::NativeThreadPool{num_threads}; + return reduce(data, std::move(matrices), pool, alignment, leanvec_dims, allocator); + } + + template + static LeanDataset reduce( + const Dataset& data, + std::optional SVS_UNUSED(matrices), + Pool& SVS_UNUSED(threadpool), + size_t SVS_UNUSED(alignment) = 0, + lib::MaybeStatic SVS_UNUSED(leanvec_dims) = {}, + const Alloc& allocator = {} + ) { + primary_type primary = + primary_type{data.size(), data.dimensions(), allocator_type{allocator}}; + svs::data::copy(data, primary); + return LeanDataset{primary}; + } + + static constexpr lib::Version save_version = fallback_save_version; + static constexpr std::string_view serialization_schema = fallback_schema; + lib::SaveTable save(const lib::SaveContext& ctx) const { + return lib::SaveTable( + serialization_schema, save_version, {SVS_LIST_SAVE_(primary, ctx)} + ); + } + + static LeanDataset load( + const lib::LoadTable& table, + size_t SVS_UNUSED(alignment) = 0, + const Alloc& allocator = {} + ) { + return LeanDataset{SVS_LOAD_MEMBER_AT_(table, primary, allocator)}; + } +}; + +} // namespace leanvec +} // namespace svs diff --git a/include/svs/quantization/lvq/lvq_common.h b/include/svs/quantization/lvq/lvq_common.h new file mode 100644 index 00000000..7968b81d --- /dev/null +++ b/include/svs/quantization/lvq/lvq_common.h @@ -0,0 +1,174 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// svs +#include "svs/core/data.h" +#include "svs/core/distance.h" +#include "svs/core/kmeans.h" +#include "svs/lib/dispatcher.h" +#include "svs/lib/meta.h" +#include "svs/lib/misc.h" +#include "svs/lib/saveload.h" + +// stl +#include +#include +#include +#include + +namespace svs { +namespace quantization { +namespace lvq { + +namespace detail { + +// Trait to determine if an allocator is blocked or not. +// Used to SFINAE away resizing methods if the allocator is not blocked. +template inline constexpr bool is_blocked = false; +template inline constexpr bool is_blocked> = true; + +} // namespace detail + +enum class LVQStrategyDispatch { + Auto, // Choose between sequential and turbo. + Sequential, // Force Sequential + Turbo // Force Turbo +}; + +/// +/// Place-holder to indicate that a given direct compression stores its values as +/// signed integers (taking positive and negative values in accordance with a two-s +/// complement encoding). +/// +struct Signed { + static constexpr std::string_view name = "signed"; +}; + +/// +/// Place-holder to indicate that a given direct compression stores its values as +/// unsigned integers. +/// +struct Unsigned { + static constexpr std::string_view name = "unsigned"; +}; + +// Schemas are independent of most type parameters. +// Hoist them as stand-alone variables to they are accessible to the auto load +// matchers as well. +inline constexpr std::string_view one_level_serialization_schema = "one_level_lvq_dataset"; +inline constexpr lib::Version one_level_save_version = lib::Version(0, 0, 2); +inline constexpr std::string_view two_level_serialization_schema = "two_level_lvq_dataset"; +inline constexpr lib::Version two_level_save_version = lib::Version(0, 0, 3); +inline constexpr std::string_view fallback_serialization_schema = "fallback_dataset"; +inline constexpr lib::Version fallback_save_version = lib::Version(0, 0, 0); + +enum class DatasetSchema { Compressed, ScaledBiased, Fallback }; +/// +/// Support for deduction. +/// +inline constexpr std::string_view get_schema(DatasetSchema kind) { + switch (kind) { + using enum DatasetSchema; + case Compressed: { + return "lvq_compressed_dataset"; + } + case ScaledBiased: { + return "lvq_with_scaling_constants"; + } + case Fallback: { + return "uncompressed_data"; + } + } + throw ANNEXCEPTION("Invalid schema!"); +} + +inline constexpr lib::Version get_current_version(DatasetSchema kind) { + switch (kind) { + using enum DatasetSchema; + case Compressed: { + return lib::Version(0, 0, 0); + } + case ScaledBiased: { + return lib::Version(0, 0, 3); + } + case Fallback: { + return lib::Version(0, 0, 0); + } + } + throw ANNEXCEPTION("Invalid schema!"); +} + +struct DatasetSummary { + static bool check_load_compatibility(std::string_view schema, lib::Version version) { + using enum DatasetSchema; + if (schema == get_schema(Compressed) && + version == get_current_version(Compressed)) { + return true; + } + if (schema == get_schema(ScaledBiased) && + version == get_current_version(ScaledBiased)) { + return true; + } + if (schema == get_schema(Fallback) && version == get_current_version(Fallback)) { + return true; + } + return false; + } + + static DatasetSummary load(const lib::ContextFreeLoadTable& table) { + using enum DatasetSchema; + auto schema = table.schema(); + if (schema == get_schema(Compressed)) { + return DatasetSummary{ + .kind = Compressed, + .is_signed = + (lib::load_at(table, "sign") == lvq::Signed::name), + .dims = lib::load_at(table, "ndims"), + .bits = lib::load_at(table, "bits")}; + } + if (schema == get_schema(ScaledBiased)) { + return DatasetSummary{ + .kind = ScaledBiased, + .is_signed = false, // ScaledBiased always uses unsigned codes. + .dims = lib::load_at(table, "logical_dimensions"), + .bits = lib::load_at(table, "bits")}; + } + if (schema == get_schema(Fallback)) { + return DatasetSummary{ + .kind = Fallback, + .is_signed = false, + .dims = lib::load_at(table, "dims"), + .bits = 32}; + } + throw ANNEXCEPTION("Invalid table schema {}!", schema); + } + + ///// Members + // The kind of the leaf dataset. + DatasetSchema kind; + // Whether each LVQ element is signed. + bool is_signed; + // The logical number of dimensions in the dataset. + size_t dims; + // The number of bits used for compression. + size_t bits; +}; + +} // namespace lvq +} // namespace quantization +} // namespace svs diff --git a/include/svs/quantization/lvq/lvq_concept.h b/include/svs/quantization/lvq/lvq_concept.h new file mode 100644 index 00000000..aac4f021 --- /dev/null +++ b/include/svs/quantization/lvq/lvq_concept.h @@ -0,0 +1,425 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#ifndef USE_PROPRIETARY + +#include "svs/quantization/lvq/lvq_fallback.h" + +#else // USE_PROPRIETARY + +#include "svs/quantization/lvq/lvq.h" + +#endif // USE_PROPRIETARY + +namespace svs { +namespace quantization { +namespace lvq { + +///// +///// Load Helpers +///// + +// Types to use for lazy compression. +inline constexpr lib::Types CompressionTs{}; + +// How are we expecting to obtain the data. +struct OnlineCompression { + public: + explicit OnlineCompression(const std::filesystem::path& path, DataType type) + : path{path} + , type{type} { + if (!lib::in(type, CompressionTs)) { + throw ANNEXCEPTION("Invalid type!"); + } + } + + ///// Members + std::filesystem::path path; + DataType type; +}; + +/// +/// @brief Dispatch type indicating that a compressed dataset should be reloaded +/// directly. +/// +/// LVQ based loaders can either perform dataset compression online, or reload a +/// previously saved dataset. +/// +/// Using this type in LVQ loader constructors indicates that reloading is +/// desired. +/// +struct Reload { + public: + /// + /// @brief Construct a new Reloader. + /// + /// @param directory The directory where a LVQ compressed dataset was + /// previously saved. + /// + explicit Reload(const std::filesystem::path& directory) + : directory{directory} {} + + ///// Members + std::filesystem::path directory; +}; + +// The various ways we can instantiate LVQ-based datasets.. +using SourceTypes = std::variant; + +// Forward Declaration. +template < + size_t Primary, + size_t Residual, + size_t Extent, + LVQPackingStrategy Strategy, + typename Alloc> +struct LVQLoader; + +struct Matcher { + // Load a matcher for either one or two level datasets. + static bool check_load_compatibility(std::string_view schema, lib::Version version) { + if (schema == one_level_serialization_schema && version == one_level_save_version) { + return true; + } + if (schema == two_level_serialization_schema && version == two_level_save_version) { + return true; + } + if (schema == fallback_serialization_schema && version == fallback_save_version) { + return true; + } + return false; + } + + static Matcher load(const lib::ContextFreeLoadTable& table) { + auto schema = table.schema(); + auto primary_summary = lib::load_at(table, "primary"); + if (schema == one_level_serialization_schema) { + return Matcher{ + .primary = primary_summary.bits, + .residual = 0, + .dims = primary_summary.dims}; + } + if (schema == two_level_serialization_schema) { + auto residual_summary = lib::load_at(table, "residual"); + return Matcher{ + .primary = primary_summary.bits, + .residual = residual_summary.bits, + .dims = primary_summary.dims}; + } + if (schema == fallback_serialization_schema) { + return Matcher{ + .primary = primary_summary.bits, + .residual = 0, + .dims = primary_summary.dims}; + } + throw ANNEXCEPTION( + "Unreachable reached with schema and version ({}, {})!", + table.schema(), + table.version() + ); + } + + static lib::TryLoadResult try_load(const lib::ContextFreeLoadTable& table) { + // The saving and loading framework will check schema compatibility before + // calling try-load. + // + // In that case, the logic behind `try_load` and `load` are the same. + // Note that `load` will throw if sub-keys do not match, but that is okay + // because mismatching sub-keys means we have an invalid schema. + return load(table); + } + + constexpr bool friend operator==(const Matcher&, const Matcher&) = default; + + ///// Members + size_t primary; + size_t residual; + size_t dims; +}; + +template +int64_t overload_match_strategy(LVQStrategyDispatch strategy) { + constexpr bool is_sequential = std::is_same_v; + constexpr bool is_turbo = lvq::TurboLike; + + switch (strategy) { + // If sequential is requested - we can only match sequential. + case LVQStrategyDispatch::Sequential: { + return is_sequential ? lib::perfect_match : lib::invalid_match; + } + // If turbo is requested - we can only match turbo. + case LVQStrategyDispatch::Turbo: { + return is_turbo ? lib::perfect_match : lib::invalid_match; + } + case LVQStrategyDispatch::Auto: { + // Preference: + // (1) Turbo + // (2) Sequential + return is_turbo ? 0 : 1; + } + } + throw ANNEXCEPTION("Unreachable!"); +} + +// Compatibility ranking for LVQ +template +int64_t overload_score(size_t p, size_t r, size_t e, LVQStrategyDispatch strategy) { + // Reject easy matches. + if (lvq::check_primary_residual(p, r)) { + return lib::invalid_match; + } + + // Check static dimensionality. + auto extent_match = + lib::dispatch_match>(lib::ExtentArg{e}); + + // If the extent match fails - abort immediately. + if (extent_match < 0) { + return lib::invalid_match; + } + + // We know dimensionality matches, now we have to try to match strategy. + auto strategy_match = overload_match_strategy(strategy); + if (lvq::check_strategy_match(strategy_match)) { + return lib::invalid_match; + } + + // Prioritize matching dimensionality over better strategies. + // Dispatch matching prefers lower return values over larger return values. + // + // By multiplying the `extent_match`, we enter a regime where better extent + // matches always have precedence over strategy matches. + constexpr size_t extent_multiplier = 1000; + return strategy_match + extent_multiplier * extent_match; +} + +template +int64_t overload_score(Matcher matcher, LVQStrategyDispatch strategy) { + return overload_score( + matcher.primary, matcher.residual, matcher.dims, strategy + ); +} + +template > struct ProtoLVQLoader { + public: + // Constructors + ProtoLVQLoader() = default; + + // TODO: Propagate allocator request. + explicit ProtoLVQLoader( + const UnspecializedVectorDataLoader& datafile, + size_t primary, + size_t residual, + size_t alignment = 0, + LVQStrategyDispatch strategy = LVQStrategyDispatch::Auto + ) + : source_{std::in_place_type_t(), datafile.path_, datafile.type_} + , primary_{primary} + , residual_{residual} + , dims_{datafile.dims_} + , alignment_{alignment} + , strategy_{strategy} + , allocator_{datafile.allocator_} {} + + explicit ProtoLVQLoader( + Reload reloader, + size_t alignment, + LVQStrategyDispatch strategy = LVQStrategyDispatch::Auto, + const Alloc& allocator = {} + ) + : source_{std::move(reloader)} + , primary_{0} + , residual_{0} + , dims_{0} + , alignment_{alignment} + , strategy_{strategy} + , allocator_{allocator} { + const auto& directory = std::get(source_).directory; + auto result = lib::try_load_from_disk(directory); + if (!result) { + throw ANNEXCEPTION( + "Cannot determine primary, residual, and dimensions " + "from data source {}. " + "Code {}!", + directory, + static_cast(result.error()) + ); + } + const auto& match = result.value(); + primary_ = match.primary; + residual_ = match.residual; + dims_ = match.dims; + } + + template < + size_t Primary, + size_t Residual, + size_t Extent, + LVQPackingStrategy Strategy, + typename F = std::identity> + LVQLoader< + Primary, + Residual, + Extent, + Strategy, + std::decay_t>> + refine(lib::Val, F&& f = std::identity()) const { + using ARet = std::decay_t>; + // Make sure the pre-set values are correct. + if constexpr (Extent != Dynamic) { + if (Extent != dims_) { + throw ANNEXCEPTION("Invalid specialization!"); + } + } + if (lvq::check_primary_residual(primary_, residual_)) { + throw ANNEXCEPTION("Encoding bits mismatched!"); + } + if (!detail::is_compatible(strategy_)) { + throw ANNEXCEPTION("Trying to dispatch to an inappropriate strategy!"); + } + + return LVQLoader( + source_, alignment_, f(allocator_) + ); + } + + public: + SourceTypes source_; + size_t primary_; + size_t residual_; + size_t dims_; + size_t alignment_; + LVQStrategyDispatch strategy_; + Alloc allocator_; +}; + +template < + size_t Primary, + size_t Residual, + size_t Extent, + LVQPackingStrategy Strategy, + typename Alloc> +struct LVQLoader { + public: + using loaded_type = LVQDataset; + + explicit LVQLoader(SourceTypes source, size_t alignment, const Alloc& allocator) + : source_{std::move(source)} + , alignment_{alignment} + , allocator_{allocator} {} + + loaded_type load() const { + auto pool = threads::SequentialThreadPool(); + return load(pool); + } + + template + LVQLoader< + Primary, + Residual, + Extent, + Strategy, + std::decay_t>> + rebind_alloc(const F& f) { + return LVQLoader< + Primary, + Residual, + Extent, + Strategy, + std::decay_t>>{ + source_, alignment_, f(allocator_)}; + } + + template loaded_type load(Pool& threadpool) const { + return std::visit( + [&](auto source) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return lib::load_from_disk( + source.directory, alignment_, allocator_ + ); + } else { + return lib::match( + CompressionTs, + source.type, + [&](lib::Type SVS_UNUSED(type)) { + return loaded_type::compress( + data::SimpleData::load(source.path), + threadpool, + alignment_, + allocator_ + ); + } + ); + } + }, + source_ + ); + } + + private: + SourceTypes source_; + size_t alignment_; + Alloc allocator_; +}; + +} // namespace lvq +} // namespace quantization + +// Define dispatch conversion from ProtoLVQLoader to LVQLoader. +template < + size_t Primary, + size_t Residual, + size_t Extent, + quantization::lvq::LVQPackingStrategy Strategy, + typename Alloc> +struct lib::DispatchConverter< + quantization::lvq::ProtoLVQLoader, + quantization::lvq::LVQLoader> { + static int64_t match(const quantization::lvq::ProtoLVQLoader& loader) { + return quantization::lvq::overload_score( + loader.primary_, loader.residual_, loader.dims_, loader.strategy_ + ); + } + + static quantization::lvq::LVQLoader + convert(const quantization::lvq::ProtoLVQLoader& loader) { + return loader.template refine(lib::Val( + )); + } + + static std::string description() { + auto dims = []() { + if constexpr (Extent == Dynamic) { + return "any"; + } else { + return Extent; + } + }(); + + return fmt::format( + "LVQLoader {}x{} ({}) with {} dimensions", + Primary, + Residual, + Strategy::name(), + dims + ); + } +}; + +} // namespace svs diff --git a/include/svs/quantization/lvq/lvq_fallback.h b/include/svs/quantization/lvq/lvq_fallback.h new file mode 100644 index 00000000..45fdcdb7 --- /dev/null +++ b/include/svs/quantization/lvq/lvq_fallback.h @@ -0,0 +1,192 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "svs/core/data/simple.h" +#include "svs/fallback/fallback_mode.h" +#include "svs/quantization/lvq/lvq_common.h" + +namespace fallback = svs::fallback; + +namespace svs { +namespace quantization { +namespace lvq { + +struct Sequential { + static constexpr std::string_view name() { return "sequential"; } +}; + +template struct Turbo { + static std::string name() { + return fmt::format("turbo<{}x{}>", Lanes, ElementsPerLane); + } +}; + +namespace detail { + +// Trait to identify and dispatch based on the Turbo class itself. +template inline constexpr bool is_turbo_like_v = false; +template inline constexpr bool is_lvq_packing_strategy_v = false; + +template +inline constexpr bool is_turbo_like_v> = true; + +template <> inline constexpr bool is_lvq_packing_strategy_v = true; +template + +inline constexpr bool is_lvq_packing_strategy_v> = true; + +template > struct select_rebind_allocator { + using type = lib::rebind_allocator_t; +}; +template struct select_rebind_allocator { + using base_allocator = typename A::allocator_type; + using rebind_base_allocator = lib::rebind_allocator_t; + using type = data::Blocked; +}; +template +using select_rebind_allocator_t = typename select_rebind_allocator::type; + +} // namespace detail + +template +concept LVQPackingStrategy = detail::is_lvq_packing_strategy_v; + +template +concept TurboLike = detail::is_turbo_like_v; + +// LVQDataset +template < + size_t Primary, + size_t Residual = 0, + size_t Extent = Dynamic, + LVQPackingStrategy Strategy = Sequential, + typename Alloc = lib::Allocator> +class LVQDataset { + public: + using allocator_type = detail::select_rebind_allocator_t; + + private: + data::SimpleData primary_; + + public: + static constexpr bool is_resizeable = detail::is_blocked; + using const_value_type = + typename data::SimpleData::const_value_type; + using element_type = float; + using value_type = const_value_type; + using primary_type = data::SimpleData; + void resize(size_t new_size) + requires is_resizeable + { + primary_.resize(new_size); + } + template + requires is_resizeable + void + compact(std::span new_to_old, Pool& threadpool, size_t batchsize = 1'000'000) { + primary_.compact(new_to_old, threadpool, batchsize); + } + + template + LVQDataset(Dataset primary) + : primary_{primary} { + if (fallback::get_mode() == fallback::FallbackMode::Error) { + throw fallback::UnsupportedHardwareError(); + } else if (fallback::get_mode() == fallback::FallbackMode::Warning) { + fmt::print(fallback::fallback_warning); + } + } + + size_t size() const { return primary_.size(); } + size_t dimensions() const { return primary_.dimensions(); } + const_value_type get_datum(size_t i) const { return primary_.get_datum(i); } + void prefetch(size_t i) const { primary_.prefetch(i); } + + template + void set_datum( + size_t i, std::span datum, size_t SVS_UNUSED(centroid_selector) = 0 + ) { + primary_.set_datum(i, datum); + } + + template + static LVQDataset compress(const Dataset& data, const Alloc& allocator = {}) { + return compress(data, 1, 0, allocator); + } + + template + static LVQDataset compress( + const Dataset& data, + size_t num_threads, + size_t alignment, + const Alloc& allocator = {} + ) { + auto pool = threads::NativeThreadPool{num_threads}; + return compress(data, pool, alignment, allocator); + } + + template + static LVQDataset compress( + const Dataset& data, + Pool& SVS_UNUSED(threadpool), + size_t SVS_UNUSED(alignment), + const Alloc& allocator = {} + ) { + primary_type primary = + primary_type{data.size(), data.dimensions(), allocator_type{allocator}}; + svs::data::copy(data, primary); + return LVQDataset{primary}; + } + + static constexpr lib::Version save_version = fallback_save_version; + static constexpr std::string_view serialization_schema = fallback_serialization_schema; + lib::SaveTable save(const lib::SaveContext& ctx) const { + return lib::SaveTable( + serialization_schema, save_version, {SVS_LIST_SAVE_(primary, ctx)} + ); + } + + static LVQDataset load( + const lib::LoadTable& table, + size_t SVS_UNUSED(alignment) = 0, + const Alloc& allocator = {} + ) { + return LVQDataset{SVS_LOAD_MEMBER_AT_(table, primary, allocator)}; + } +}; + +// No constraints on fallback for primary, residual, strategy +template +inline bool check_primary_residual(size_t SVS_UNUSED(p), size_t SVS_UNUSED(r)) { + return false; +} + +inline bool check_strategy_match(int64_t SVS_UNUSED(strategy_match)) { return false; } + +namespace detail { + +template +constexpr bool is_compatible(LVQStrategyDispatch SVS_UNUSED(strategy)) { + return true; +} + +} // namespace detail + +} // namespace lvq +} // namespace quantization +} // namespace svs diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6ed651f8..a39d5518 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -140,6 +140,8 @@ set(TEST_SOURCES # Inverted ${TEST_DIR}/svs/index/inverted/clustering.cpp ${TEST_DIR}/svs/index/inverted/memory_based.cpp + # Fallback + ${TEST_DIR}/svs/fallback/fallback.cpp # # ${TEST_DIR}/svs/index/vamana/dynamic_index.cpp ) diff --git a/tests/svs/fallback/fallback.cpp b/tests/svs/fallback/fallback.cpp new file mode 100644 index 00000000..47e92763 --- /dev/null +++ b/tests/svs/fallback/fallback.cpp @@ -0,0 +1,636 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// SVS +#include "svs/fallback/fallback.h" +#include "svs/core/recall.h" +#include "svs/lib/static.h" +#include "svs/orchestrators/dynamic_vamana.h" +#include "svs/orchestrators/exhaustive.h" +#include "svs/orchestrators/vamana.h" + +// catch2 +#include "catch2/catch_test_macros.hpp" + +#include "utils.h" + +namespace { + +// SVS setup and parameters +const size_t num_threads = 4; +size_t search_window_size = 20; +size_t n_neighbors = 10; +std::string dfname = "data.vecs"; +std::string dfname_f16 = "data_f16.vecs"; +std::string qfname = "query.vecs"; +std::string qfname_f16 = "query_f16.vecs"; +std::string gtfname = "gt.vecs"; + +const std::filesystem::path& config_path = "./config"; +const std::filesystem::path& graph_path = "./graph"; +// const std::filesystem::path& data_path = "./data"; +const std::filesystem::path& config_path_dynamic = "./config_dynamic"; +const std::filesystem::path& graph_path_dynamic = "./graph_dynamic"; + +void svs_setup() { + // convert to fp16 + auto reader = svs::io::vecs::VecsReader{dfname}; + auto writer = svs::io::vecs::VecsWriter{dfname_f16, reader.ndims()}; + { + for (auto i : reader) { + writer << i; + } + } + writer.flush(); + + reader = svs::io::vecs::VecsReader{qfname}; + writer = svs::io::vecs::VecsWriter{qfname_f16, reader.ndims()}; + { + for (auto i : reader) { + writer << i; + } + } + writer.flush(); +} + +template auto create_lvq_data() { + namespace lvq = svs::quantization::lvq; + + auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader(dfname).load(); + return lvq::LVQDataset::compress(data, threadpool, 32); + }); + + auto threadpool = svs::threads::as_threadpool(num_threads); + auto data = svs::detail::dispatch_load(compressor, threadpool); + fmt::print("Create LVQ data with P={}, R={}, E={}\n", P, R, E); + return data; +} + +template +auto create_blocked_lvq_data() { + namespace lvq = svs::quantization::lvq; + using blocked_type = svs::data::Blocked; + + auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader(dfname).load(); + return lvq::LVQDataset::compress(data, threadpool, 32); + }); + + auto threadpool = svs::threads::as_threadpool(num_threads); + auto data = svs::detail::dispatch_load(compressor, threadpool); + fmt::print("Create Blocked LVQ data with P={}, R={}, E={}\n", P, R, E); + return data; +} + +template +auto create_lvq_data_with_alloc_handle(const A& alloc) { + namespace lvq = svs::quantization::lvq; + + auto compressor = + svs::lib::Lazy([=, &alloc](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader(dfname).load(); + return lvq::LVQDataset::compress(data, threadpool, 32, alloc); + }); + + auto threadpool = svs::threads::as_threadpool(num_threads); + auto data = svs::detail::dispatch_load(compressor, threadpool); + fmt::print("Create LVQ data using AllocatorHandle with P={}, R={}, E={}\n", P, R, E); + return data; +} + +template +auto create_leanvec_data() { + namespace leanvec = svs::leanvec; + assert(D >= 32); + size_t leanvec_dim = (L == svs::Dynamic) ? 32 : L; + + auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader(dfname).load(); + return leanvec::LeanDataset::reduce( + data, std::nullopt, threadpool, 32, svs::lib::MaybeStatic(leanvec_dim) + ); + }); + + auto threadpool = svs::threads::as_threadpool(num_threads); + auto data = svs::detail::dispatch_load(compressor, threadpool); + fmt::print("Create Leanvec data with L={}, D={}\n", L, D); + return data; +} + +template +auto create_leanvec_data_with_alloc_handle(const A& alloc) { + namespace leanvec = svs::leanvec; + assert(D >= 32); + size_t leanvec_dim = (L == svs::Dynamic) ? 32 : L; + + auto compressor = svs::lib::Lazy([=, + &alloc](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader(dfname).load(); + return leanvec::LeanDataset::reduce( + data, std::nullopt, threadpool, 32, svs::lib::MaybeStatic(leanvec_dim), alloc + ); + }); + + auto threadpool = svs::threads::as_threadpool(num_threads); + auto data = svs::detail::dispatch_load(compressor, threadpool); + fmt::print("Create Leanvec data with L={}, D={}\n", L, D); + return data; +} + +template +auto create_blocked_leanvec_data() { + namespace leanvec = svs::leanvec; + using blocked_type = svs::data::Blocked; + assert(D >= 32); + size_t leanvec_dim = (L == svs::Dynamic) ? 32 : L; + + auto compressor = svs::lib::Lazy([=](svs::threads::ThreadPool auto& threadpool) { + auto data = svs::VectorDataLoader(dfname).load(); + return leanvec::LeanDataset::reduce( + data, std::nullopt, threadpool, 32, svs::lib::MaybeStatic(leanvec_dim) + ); + }); + + auto threadpool = svs::threads::as_threadpool(num_threads); + auto data = svs::detail::dispatch_load(compressor, threadpool); + fmt::print("Create Blocked Leanvec data with L={}, D={}\n", L, D); + return data; +} + +float get_alpha(svs::distance::DistanceL2 /*dist*/) { return 1.2; } + +float get_alpha(svs::distance::DistanceIP /*dist*/) { return 0.9; } + +template +void vamana_build(Data& data, Distance distance) { + auto parameters = svs::index::vamana::VamanaBuildParameters{ + get_alpha(distance), // alpha + 64, // graph max degree + 128, // search window size + 750, // max candidate pool size + 60, // prune to degree + true, // full search history + }; + + auto tic = svs::lib::now(); + svs::Vamana index = + svs::Vamana::build(parameters, data, Distance(), num_threads); + auto build_time = svs::lib::time_difference(tic); + fmt::print("Vamana index build time: {}\n", build_time); + index.save("config", "graph", "data"); +} + +template +void vamana_search(Data& data, Distance distance) { + auto index = svs::Vamana::assemble( + config_path, svs::GraphLoader(graph_path), data, distance, num_threads + ); + + index.set_search_window_size(search_window_size); + const auto query_data = svs::load_data(qfname); + const auto groundtruth = svs::load_data(gtfname); + + auto tic = svs::lib::now(); + auto query_result = index.search(query_data, n_neighbors); + auto search_time = svs::lib::time_difference(tic); + + std::vector qps; + for (int i = 0; i < 5; i++) { + tic = svs::lib::now(); + query_result = index.search(query_data, n_neighbors); + search_time = svs::lib::time_difference(tic); + qps.push_back(query_data.size() / search_time); + } + + auto recall = svs::k_recall_at_n(groundtruth, query_result, 1, 1); + fmt::print("Raw QPS: {:7.3f} \n", fmt::join(qps, ", ")); + fmt::print( + "Vamana search window size: {}, 1-Recall@1: {}, Max QPS: {:7.3f} \n", + search_window_size, + recall, + *std::max_element(qps.begin(), qps.end()) + ); +} + +template void vamana_build_search(Data& data) { + vamana_build(data, svs::distance::DistanceL2()); + vamana_search(data, svs::distance::DistanceL2()); + + vamana_build(data, svs::distance::DistanceIP()); + vamana_search(data, svs::distance::DistanceIP()); +} + +template +void dynamic_vamana_build(Data& data, Distance distance) { + auto parameters = svs::index::vamana::VamanaBuildParameters{ + get_alpha(distance), // alpha + 64, // graph max degree + 128, // search window size + 750, // max candidate pool size + 60, // prune to degree + true, // full search history + }; + + auto tic = svs::lib::now(); + std::vector ids(data.size()); + for (size_t i = 0; i < data.size(); ++i) { + ids[i] = i; + } + + svs::DynamicVamana index = svs::DynamicVamana::build( + parameters, data, svs::lib::as_span(ids), Distance(), num_threads + ); + auto build_time = svs::lib::time_difference(tic); + fmt::print("DynamicVamana index build time: {}\n", build_time); + index.save("config_dynamic", "graph_dynamic", "data_dynamic"); +} + +template +void dynamic_vamana_search(Data& data, Distance distance) { + using Idx = uint32_t; + auto index = svs::DynamicVamana::assemble( + config_path_dynamic, + SVS_LAZY(svs::graphs::SimpleBlockedGraph::load(graph_path_dynamic)), + data, + distance, + num_threads + ); + + index.set_search_window_size(search_window_size); + const auto query_data = svs::load_data(qfname); + const auto groundtruth = svs::load_data(gtfname); + + auto tic = svs::lib::now(); + auto query_result = index.search(query_data, n_neighbors); + auto search_time = svs::lib::time_difference(tic); + + std::vector qps; + for (int i = 0; i < 5; i++) { + tic = svs::lib::now(); + query_result = index.search(query_data, n_neighbors); + search_time = svs::lib::time_difference(tic); + qps.push_back(query_data.size() / search_time); + } + + auto recall = svs::k_recall_at_n(groundtruth, query_result, 1, 1); + fmt::print("Raw QPS: {:7.3f} \n", fmt::join(qps, ", ")); + fmt::print( + "Dynamic vamana search window size: {}, 1-Recall@1: {}, Max QPS: {:7.3f} \n", + search_window_size, + recall, + *std::max_element(qps.begin(), qps.end()) + ); +} + +template void dynamic_vamana_build_search(Data& data) { + dynamic_vamana_build(data, svs::distance::DistanceL2()); + dynamic_vamana_search(data, svs::distance::DistanceL2()); + + dynamic_vamana_build(data, svs::distance::DistanceIP()); + dynamic_vamana_search(data, svs::distance::DistanceIP()); +} + +template +void flat_search(Data& data, Distance distance) { + svs::Flat index = svs::Flat::assemble(data, distance, num_threads); + + const auto query_data = svs::load_data(qfname); + const auto groundtruth = svs::load_data(gtfname); + + auto tic = svs::lib::now(); + auto query_result = index.search(query_data, n_neighbors); + auto search_time = svs::lib::time_difference(tic); + + std::vector qps; + for (int i = 0; i < 5; i++) { + tic = svs::lib::now(); + query_result = index.search(query_data, n_neighbors); + search_time = svs::lib::time_difference(tic); + qps.push_back(query_data.size() / search_time); + } + + auto recall = svs::k_recall_at_n(groundtruth, query_result, 1, 1); + fmt::print("Raw QPS: {:7.3f} \n", fmt::join(qps, ", ")); + fmt::print( + "Flat search 1-Recall@1: {}, Max QPS: {:7.3f} \n", + recall, + *std::max_element(qps.begin(), qps.end()) + ); +} + +template void flat_search(Data& data) { + flat_search(data, svs::distance::DistanceL2()); + flat_search(data, svs::distance::DistanceIP()); +} + +template void dynamic_vamana_search() { + // Dynamic Index + using S = svs::quantization::lvq::Sequential; + using S1 = svs::quantization::lvq::Turbo<16, 8>; + { + using Alloc = svs::data::Blocked>; + auto data = svs::VectorDataLoader(dfname).load(); + dynamic_vamana_build_search(data); + } + + { + using Alloc = svs::data::Blocked>; + auto data = svs::VectorDataLoader(dfname_f16).load(); + dynamic_vamana_build_search(data); + } + + { + auto data = create_blocked_lvq_data<4, 8, D, S, A>(); + dynamic_vamana_build_search(data); + } + + { + auto data = create_blocked_lvq_data<4, 0, D, S1, A>(); + dynamic_vamana_build_search(data); + } + + { + auto data = create_blocked_lvq_data<4, 4, D, S1, A>(); + dynamic_vamana_build_search(data); + } + + { + auto data = create_blocked_lvq_data<4, 8, D, S1, A>(); + dynamic_vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<8>; + using S = svs::leanvec::UsingLVQ<8>; + auto data = create_blocked_leanvec_data(); + dynamic_vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<4>; + using S = svs::leanvec::UsingLVQ<8>; + auto data = create_blocked_leanvec_data(); + dynamic_vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<8>; + using S = svs::Float16; + auto data = create_blocked_leanvec_data(); + dynamic_vamana_build_search(data); + } +} + +template void flat_search() { + // using S = svs::quantization::lvq::Sequential; + using S1 = svs::quantization::lvq::Turbo<16, 8>; + { + auto data = svs::VectorDataLoader(dfname).load(); + flat_search(data); + } + + { + auto data = svs::VectorDataLoader(dfname_f16).load(); + flat_search(data); + } + + { + auto data = create_lvq_data<4, 8, D, S1, A>(); + flat_search(data); + } +} + +template void vamana_search() { + using S = svs::quantization::lvq::Sequential; + using S1 = svs::quantization::lvq::Turbo<16, 8>; + { + auto data = svs::VectorDataLoader(dfname).load(); + vamana_build_search(data); + } + + { + auto data = svs::VectorDataLoader(dfname_f16).load(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<4, 0, D, S, A>(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<4, 4, D, S, A>(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<4, 8, D, S, A>(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<4, 0, D, S1, A>(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<4, 4, D, S1, A>(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<4, 8, D, S1, A>(); + vamana_build_search(data); + } + + { + auto data = create_lvq_data<8, 0, D, S, A>(); + vamana_build_search(data); + } + + { + auto alloc = svs::make_allocator_handle(svs::HugepageAllocator()); + auto data = + create_lvq_data_with_alloc_handle<4, 4, D, S, decltype(alloc)>(std::move(alloc) + ); + vamana_build_search(data); + } + + { + auto alloc = + svs::make_blocked_allocator_handle(svs::HugepageAllocator()); + auto data = + create_lvq_data_with_alloc_handle<4, 4, D, S, decltype(alloc)>(std::move(alloc) + ); + vamana_build_search(data); + } + + { + auto alloc = + svs::make_blocked_allocator_handle(svs::HugepageAllocator()); + auto data = + create_lvq_data_with_alloc_handle<4, 8, D, S1, decltype(alloc)>(std::move(alloc) + ); + vamana_build_search(data); + } + + { + auto alloc = svs::make_allocator_handle(svs::lib::Allocator()); + auto data = + create_lvq_data_with_alloc_handle<4, 4, D, S, decltype(alloc)>(std::move(alloc) + ); + vamana_build_search(data); + } + + { + auto alloc = svs::make_blocked_allocator_handle(svs::lib::Allocator()); + auto data = + create_lvq_data_with_alloc_handle<4, 4, D, S, decltype(alloc)>(std::move(alloc) + ); + vamana_build_search(data); + } + + { + auto alloc = svs::make_blocked_allocator_handle(svs::lib::Allocator()); + auto data = + create_lvq_data_with_alloc_handle<4, 8, D, S1, decltype(alloc)>(std::move(alloc) + ); + vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<8>; + using S = svs::leanvec::UsingLVQ<8>; + auto data = create_leanvec_data(); + vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<8>; + using S = svs::leanvec::UsingLVQ<8>; + auto alloc = svs::make_allocator_handle(svs::lib::Allocator()); + auto data = create_leanvec_data_with_alloc_handle( + std::move(alloc) + ); + vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<8>; + using S = svs::leanvec::UsingLVQ<8>; + auto alloc = svs::make_blocked_allocator_handle(svs::lib::Allocator()); + auto data = create_leanvec_data_with_alloc_handle( + std::move(alloc) + ); + vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<4>; + using S = svs::leanvec::UsingLVQ<8>; + auto data = create_leanvec_data(); + vamana_build_search(data); + } + + { + using P = svs::leanvec::UsingLVQ<8>; + using S = svs::Float16; + auto data = create_leanvec_data(); + vamana_build_search(data); + } + + { + using P = svs::Float16; + using S = svs::Float16; + auto data = create_leanvec_data(); + vamana_build_search(data); + } + + { + using P = svs::Float16; + using S = svs::Float16; + auto alloc = svs::make_allocator_handle(svs::lib::Allocator()); + auto data = create_leanvec_data_with_alloc_handle( + std::move(alloc) + ); + vamana_build_search(data); + } + + { + using P = svs::Float16; + using S = svs::Float16; + auto alloc = svs::make_blocked_allocator_handle(svs::lib::Allocator()); + auto data = create_leanvec_data_with_alloc_handle( + std::move(alloc) + ); + vamana_build_search(data); + } + + { + using P = float; + using S = float; + auto data = create_leanvec_data(); + vamana_build_search(data); + } + + { + using P = float; + using S = float; + auto alloc = svs::make_allocator_handle(svs::lib::Allocator()); + auto data = create_leanvec_data_with_alloc_handle( + std::move(alloc) + ); + vamana_build_search(data); + } + + { + using P = float; + using S = float; + auto alloc = svs::make_blocked_allocator_handle(svs::lib::Allocator()); + auto data = create_leanvec_data_with_alloc_handle( + std::move(alloc) + ); + vamana_build_search(data); + } +} + +} // namespace + +CATCH_TEST_CASE("Shared library", "[shared][shared][shared_search]") { + const size_t D = 512; + size_t dataset_size = 14; + size_t query_size = 3; + using A = svs::lib::Allocator; + using A1 = svs::HugepageAllocator; + generate_random_data(D, dataset_size, query_size); + svs_setup(); + + CATCH_SECTION("Vamana Search") { + vamana_search(); + vamana_search(); + } + + CATCH_SECTION("Flat Search") { + flat_search(); + flat_search(); + } + + CATCH_SECTION("Dynamic Vamana Search") { + dynamic_vamana_search(); + dynamic_vamana_search(); + } +} diff --git a/tests/svs/fallback/utils.h b/tests/svs/fallback/utils.h new file mode 100644 index 00000000..c1475931 --- /dev/null +++ b/tests/svs/fallback/utils.h @@ -0,0 +1,116 @@ +/* + * Copyright 2025 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/***************************************************** + * I/O functions for fvecs, ivecs and xVecs + *****************************************************/ + +#include +#include +#include +#include + +int fvec_fwrite(FILE* fo, const float* v, int d) { + int ret; + ret = fwrite(&d, sizeof(int), 1, fo); + if (ret != 1) { + perror("fvec_fwrite: write error 1"); + return -1; + } + ret = fwrite(v, sizeof(float), d, fo); + if (ret != d) { + perror("fvec_fwrite: write error 2"); + return -1; + } + return 0; +} + +int fvecs_write(const char* fname, int d, int n, const float* vf) { + FILE* fo = fopen(fname, "w"); + if (!fo) { + perror("fvecs_write: cannot open file"); + return -1; + } + + int i; + /* write down the vectors as fvecs */ + for (i = 0; i < n; i++) { + if (fvec_fwrite(fo, vf + i * d, d) < 0) + return -1; + } + fclose(fo); + return n; +} + +int ivec_iwrite(FILE* fo, const int* v, int d) { + int ret; + ret = fwrite(&d, sizeof(int), 1, fo); + if (ret != 1) { + perror("fvec_fwrite: write error 1"); + return -1; + } + ret = fwrite(v, sizeof(float), d, fo); + if (ret != d) { + perror("fvec_fwrite: write error 2"); + return -1; + } + return 0; +} + +int ivecs_write(const char* fname, int d, int n, const int* vf) { + FILE* fo = fopen(fname, "w"); + if (!fo) { + perror("fvecs_write: cannot open file"); + return -1; + } + + int i; + /* write down the vectors as fvecs */ + for (i = 0; i < n; i++) { + if (ivec_iwrite(fo, vf + i * d, d) < 0) + return -1; + } + fclose(fo); + return n; +} + +void generate_random_data(size_t data_dim, size_t dataset_size, size_t query_size) { + float dataset_std = 1.0f, query_std = 0.1f; + + srand(100); + std::default_random_engine generator; + std::normal_distribution dataset_dist(0.0f, dataset_std); + std::normal_distribution query_dist(0.0f, query_std); + std::uniform_int_distribution<> uni_dist(0, dataset_size - 1); + + std::vector dataset(dataset_size * data_dim); + for (size_t i = 0; i < dataset.size(); ++i) { + dataset[i] = dataset_dist(generator); + } + + std::vector queries(query_size * data_dim); + std::vector gt(query_size); + for (size_t i = 0; i < query_size; ++i) { + int e = uni_dist(generator); + for (size_t j = 0; j < data_dim; ++j) { + queries[i * data_dim + j] = dataset[e * data_dim + j] + query_dist(generator); + } + gt[i] = e; + } + + fvecs_write("data.vecs", data_dim, dataset_size, dataset.data()); + fvecs_write("query.vecs", data_dim, query_size, queries.data()); + ivecs_write("gt.vecs", 1, query_size, gt.data()); +}