Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/cpp/shared/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ endfunction()
create_example_executable(shared shared.cpp)
create_example_executable(example_vamana_with_compression_lvq example_vamana_with_compression_lvq.cpp)
create_example_executable(example_vamana_with_compression example_vamana_with_compression.cpp)
create_example_executable(example_vamana_with_compression_dynamic example_vamana_with_compression_dynamic.cpp)
1 change: 1 addition & 0 deletions examples/cpp/shared/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
These examples utilize LVQ and LeanVec interfaces which are available when linking to a SVS shared/static library, which are published with [releases](https://github.com/intel/ScalableVectorSearch/releases). Note that these examples will _not_ run after building the open source codebase without the shared/static library. These examples include:
- [example_vamana_with_compression.cpp](./example_vamana_with_compression.cpp): Demonstrates building, searching, saving, and reloading an index with a LeanVec-compressed dataset.
- [example_vamana_with_compression_lvq.cpp](./example_vamana_with_compression_lvq.cpp): Demonstrates building, searching, saving, and reloading an index with a LVQ-compressed dataset.
- [example_vamana_with_compression_dynamic.cpp](./example_vamana_with_compression_dynamic.cpp): Demonstrates building, searching, saving, and reloading a dynamic index (allows vector insertions and deletions over time) with a LeanVec-compressed dataset.

See [CMakeLists.txt](./CMakeLists.txt) for details on linking to the SVS shared library and follow the commands below to compile and use the SVS shared library to run shared.cpp example:

Expand Down
144 changes: 144 additions & 0 deletions examples/cpp/shared/example_vamana_with_compression_dynamic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2025 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// SVS
#include "svs/core/recall.h"
#include "svs/extensions/flat/leanvec.h"
#include "svs/extensions/flat/lvq.h"
#include "svs/extensions/vamana/leanvec.h"
#include "svs/extensions/vamana/lvq.h"
#include "svs/orchestrators/dynamic_vamana.h"
#include "svs/orchestrators/exhaustive.h"
#include "svs/orchestrators/vamana.h"

// Alias for blocked Lean dataset that supports resize/compact
using BlockedLean = svs::leanvec::LeanDataset<
svs::leanvec::UsingLVQ<4>,
svs::leanvec::UsingLVQ<8>,
svs::Dynamic,
svs::Dynamic,
svs::data::Blocked<svs::HugepageAllocator<std::byte>>>;

int main() {
// STEP 1: Compress Data with LeanVec, reducing dimensionality to leanvec_dim dimensions
// and using 4 and 8 bits for primary and secondary levels respectively.
//! [Compress data]
const size_t num_threads = 4;
size_t padding = 32;
size_t leanvec_dim = 64;
auto threadpool = svs::threads::as_threadpool(num_threads);
auto loaded =
svs::VectorDataLoader<float>(std::filesystem::path(SVS_DATA_DIR) / "data_f32.svs")
.load();
auto data = BlockedLean::reduce(
loaded,
std::nullopt,
threadpool,
padding,
svs::lib::MaybeStatic<svs::Dynamic>(leanvec_dim)
);
//! [Compress data]

// STEP 2: Build Dynamic Vamana Index with initial set of vectors
//! [Index Build]
auto parameters = svs::index::vamana::VamanaBuildParameters{};

// Create id labels for build set
std::vector<size_t> ids_build(loaded.size());
for (size_t i = 0; i < loaded.size(); ++i) {
ids_build[i] = i;
}

svs::DynamicVamana index = svs::DynamicVamana::build<float>(
parameters,
data,
svs::lib::as_span(ids_build),
svs::distance::DistanceL2(),
num_threads
);
//! [Index Build]

// STEP 3: Add and delete vectors as needed.
//! [Delete vectors]
size_t num_to_delete = 100;
std::vector<size_t> ids_delete(num_to_delete);
for (size_t i = 0; i < ids_delete.size(); ++i) {
ids_delete[i] = i;
}

fmt::print("Deleting {} vectors.\n", ids_delete.size());

index.delete_points(ids_delete);
//! [Delete vectors]

//! [Add vectors]
// Add the deleted vectors back in.
auto points =
svs::data::SimpleData<float, svs::Dynamic>(ids_delete.size(), loaded.dimensions());

size_t i = 0;
for (const auto& j : ids_delete) {
points.set_datum(i, loaded.get_datum(j));
++i;
}
auto points_const_view = points.cview();

fmt::print("Adding {} vectors.\n", ids_delete.size());

index.add_points(points_const_view, svs::lib::as_span(ids_delete), num_threads);
//! [Add vectors]

// STEP 4: Search the Index
//! [Perform Queries]
const size_t search_window_size = 50;
const size_t n_neighbors = 10;
index.set_search_window_size(search_window_size);

auto queries =
svs::load_data<float>(std::filesystem::path(SVS_DATA_DIR) / "queries_f32.fvecs");
auto results = index.search(queries, n_neighbors);
//! [Perform Queries]

//! [Recall]
auto groundtruth = svs::load_data<int>(
std::filesystem::path(SVS_DATA_DIR) / "groundtruth_euclidean.ivecs"
);
double recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);

fmt::print("Recall@{} = {:.4f}\n", n_neighbors, recall);
fmt::print(
"Note that recall is low because this example is using a dummy random dataset.\n"
);
//! [Recall]

// STEP 5: Saving and reloading the index
//! [Saving Loading]
index.save("config", "graph", "data");
index = svs::DynamicVamana::assemble<float>(
"config",
svs::GraphLoader("graph"),
svs::lib::load_from_disk<BlockedLean>("data", padding),
svs::distance::DistanceL2(),
num_threads
);
//! [Saving Loading]
index.set_search_window_size(search_window_size);
recall = svs::k_recall_at_n(groundtruth, results, n_neighbors, n_neighbors);

fmt::print("Recall@{} after saving and reloading = {:.4f}\n", n_neighbors, recall);

return 0;
}
144 changes: 144 additions & 0 deletions examples/python/vamana_with_compression_lvq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright 2025 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# Import `unittest` to allow for automated testing.
import unittest

# [imports]
import os
import svs
# [imports]

DEBUG_MODE = False
def assert_equal(lhs, rhs, message: str = "", epsilon = 0.05):
if DEBUG_MODE:
print(f"{message}: {lhs} == {rhs}")
else:
assert lhs < rhs + epsilon, message
assert lhs > rhs - epsilon, message

test_data_dir = None

def run():
# [generate-dataset]
# Create a test dataset.
# This will create a directory "example_data_vamana" and populate it with three
# entries:
# - data.fvecs: The test dataset.
# - queries.fvecs: The test queries.
# - groundtruth.fvecs: The groundtruth.
test_data_dir = "./example_data_vamana"
svs.generate_test_dataset(
1000, # Create 1000 vectors in the dataset.
100, # Generate 100 query vectors.
256, # Set the vector dimensionality to 256.
test_data_dir, # The directory where results will be generated.
data_seed = 1234, # Random number seed for reproducibility.
query_seed = 5678, # Random number seed for reproducibility.
num_threads = 4, # Number of threads to use.
distance = svs.DistanceType.L2, # The distance type to use.
)
# [generate-dataset]

# [create-loader]
# We are going to construct a LeanVec dataset on-the-fly from uncompressed data.
# First, we construct a loader for the uncompressed data.
uncompressed_loader = svs.VectorDataLoader(
os.path.join(test_data_dir, "data.fvecs"),
svs.DataType.float32
)

# Next - we construct a LVQLoader which is configured to use LVQ compression with 4
# bits for the primary and 8 bits for the residual quantization.
B1 = 4 # Number of bits for the first level LVQ quantization
B2 = 8 # Number of bits for the residuals quantization
compressed_loader = svs.LVQLoader(uncompressed_loader,
primary=B1,
residual=B2,
)
# [create-loader]

# An index can be constructed using a LeanVec dataset.
# [build-parameters]
parameters = svs.VamanaBuildParameters(
graph_max_degree = 64,
window_size = 128,
)
# [build-parameters]

# [build-index]
index = svs.Vamana.build(
parameters,
compressed_loader,
svs.DistanceType.L2,
num_threads = 4,
)
# [build-index]

# Set the search window size of the index and perform queries and load the queries.
# [perform-queries]
n_neighbors = 10
index.search_window_size = 20
index.num_threads = 4

queries = svs.read_vecs(os.path.join(test_data_dir, "queries.fvecs"))
I, D = index.search(queries, n_neighbors)
# [perform-queries]

# Compare with the groundtruth.
# [recall]
groundtruth = svs.read_vecs(os.path.join(test_data_dir, "groundtruth.ivecs"))
recall = svs.k_recall_at(groundtruth, I, n_neighbors, n_neighbors)
print(f"Recall = {recall}")
# [recall]
assert_equal(recall, 0.953)

# Finally, we can save the index and reload from a previously saved set of files.
# [saving-loading]
index.save(
os.path.join(test_data_dir, "example_config"),
os.path.join(test_data_dir, "example_graph"),
os.path.join(test_data_dir, "example_data"),
)

index = svs.Vamana(
os.path.join(test_data_dir, "example_config"),
os.path.join(test_data_dir, "example_graph"),
os.path.join(test_data_dir, "example_data"),
svs.DistanceType.L2,
num_threads = 4,
)
# [saving-loading]


#####
##### Main Executable
#####

if __name__ == "__main__":
run()

#####
##### As a unit test.
#####

class VamanaExampleTestCase(unittest.TestCase):
def tearDown(self):
if test_data_dir is not None:
print(f"Removing temporary directory {test_data_dir}")
os.rmdir(test_data_dir)

def test_all(self):
run()
Loading