diff --git a/Makefile b/Makefile index 9bdc2e15a..dceaeb363 100644 --- a/Makefile +++ b/Makefile @@ -115,13 +115,13 @@ batchdep: build/.supraseal-install batchdep: $(BUILD_DEPS) .PHONY: batchdep -batch: CURIO_TAGS+= supraseal +batch: CURIO_TAGS+= supraseal_nvme batch: CGO_LDFLAGS_ALLOW='.*' batch: batchdep batch-build .PHONY: batch -batch-calibnet: CURIO_TAGS+= supraseal +batch-calibnet: CURIO_TAGS+= supraseal_nvme batch-calibnet: CURIO_TAGS+= calibnet batch-calibnet: CGO_LDFLAGS_ALLOW='.*' batch-calibnet: batchdep batch-build diff --git a/cmd/curio/batch.go b/cmd/curio/batch.go new file mode 100644 index 000000000..0b624dbea --- /dev/null +++ b/cmd/curio/batch.go @@ -0,0 +1,69 @@ +package main + +import ( + "fmt" + + "github.com/urfave/cli/v2" + "golang.org/x/xerrors" + + "github.com/filecoin-project/curio/cmd/curio/internal/translations" + "github.com/filecoin-project/curio/lib/supraffi" +) + +var batchCmd = &cli.Command{ + Name: "batch", + Usage: translations.T("Manage batch sealing operations"), + Subcommands: []*cli.Command{ + batchSetupCmd, + }, +} + +var batchSetupCmd = &cli.Command{ + Name: "setup", + Usage: translations.T("Setup SPDK for batch sealing (configures hugepages and binds NVMe devices)"), + Description: translations.T(`Setup SPDK for batch sealing operations. + +This command automatically: +- Downloads SPDK if not already available +- Configures 1GB hugepages (36 pages minimum) +- Binds NVMe devices for use with SupraSeal + +Requires root/sudo access for SPDK setup operations.`), + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "hugepages", + Usage: translations.T("Number of 1GB hugepages to configure"), + Value: 36, + }, + &cli.IntFlag{ + Name: "min-pages", + Usage: translations.T("Minimum number of hugepages required"), + Value: 36, + }, + }, + Action: func(cctx *cli.Context) error { + nrHuge := cctx.Int("hugepages") + minPages := cctx.Int("min-pages") + + fmt.Println("Setting up SPDK for batch sealing...") + fmt.Printf("Configuring %d hugepages (minimum required: %d)\n", nrHuge, minPages) + + err := supraffi.CheckAndSetupSPDK(nrHuge, minPages) + if err != nil { + return xerrors.Errorf("SPDK setup failed: %w\n\n"+ + "Please ensure you have:\n"+ + "1. Root/sudo access for SPDK setup\n"+ + "2. Raw NVMe devices available (no filesystems on them)\n"+ + "3. Sufficient hugepages configured (see documentation)", err) + } + + fmt.Println("✓ SPDK setup completed successfully") + fmt.Println("\nNext steps:") + fmt.Println("1. Verify hugepages: cat /proc/meminfo | grep Huge") + fmt.Println("2. Configure your batch sealing layer (see documentation)") + fmt.Println("3. Start batch sealing operations") + + return nil + }, +} + diff --git a/cmd/curio/main.go b/cmd/curio/main.go index 4413f4e94..44873427b 100644 --- a/cmd/curio/main.go +++ b/cmd/curio/main.go @@ -72,6 +72,7 @@ func main() { ffiCmd, calcCmd, toolboxCmd, + batchCmd, } jaeger := tracing.SetupJaegerTracing("curio") diff --git a/documentation/en/supraseal.md b/documentation/en/supraseal.md index 0dfc0c772..a075d20c6 100644 --- a/documentation/en/supraseal.md +++ b/documentation/en/supraseal.md @@ -166,13 +166,33 @@ The build should be run on the target machine. Binaries won't be portable betwee ### Setup NVMe devices for SPDK: -{% hint style="info" %} -This is only needed while batch sealing is in beta, future versions of Curio will handle this automatically. +{% hint style="success" %} +SPDK setup can be done automatically using the Curio CLI command: {% endhint %} +```bash +sudo curio batch setup +``` + +This command will: +- Download SPDK if not already available +- Configure 1GB hugepages (36 pages by default) +- Bind NVMe devices for use with SupraSeal + +You can customize the number of hugepages: + +```bash +sudo curio batch setup --hugepages 36 --min-pages 36 +``` + +Alternatively, if you need to manually check SPDK status or unbind devices, you can use: + ```bash cd extern/supraseal/deps/spdk-v24.05/ -env NRHUGE=36 ./scripts/setup.sh +# Check status +sudo ./scripts/setup.sh status +# Manually run setup (not normally needed) +sudo env NRHUGE=36 ./scripts/setup.sh ``` ### Benchmark NVME IOPS diff --git a/documentation/zh/supraseal.md b/documentation/zh/supraseal.md index 344ff4607..fd0fae19e 100644 --- a/documentation/zh/supraseal.md +++ b/documentation/zh/supraseal.md @@ -307,13 +307,33 @@ Hugepagesize: 1048576 kB ### Setup NVMe devices for SPDK: ### 为SPDK设置NVMe设备: -{% hint style="info" %} -这只在批量密封处于测试阶段时需要,Curio的未来版本将自动处理这个问题。 +{% hint style="success" %} +可以使用 Curio CLI 命令自动完成 SPDK 设置: {% endhint %} +```bash +sudo curio batch setup +``` + +此命令将: +- 如果尚未可用,则下载 SPDK +- 配置 1GB 大页面(默认 36 页) +- 绑定 NVMe 设备以供 SupraSeal 使用 + +您可以自定义大页面数量: + +```bash +sudo curio batch setup --hugepages 36 --min-pages 36 +``` + +或者,如果您需要手动检查 SPDK 状态或解绑设备,可以使用: + ```bash cd extern/supraseal/deps/spdk-v24.05/ -env NRHUGE=36 ./scripts/setup.sh +# 检查状态 +sudo ./scripts/setup.sh status +# 手动运行设置(通常不需要) +sudo env NRHUGE=36 ./scripts/setup.sh ``` diff --git a/extern/supraseal/README.md b/extern/supraseal/README.md index 5fdefe96a..df69d5d50 100644 --- a/extern/supraseal/README.md +++ b/extern/supraseal/README.md @@ -225,7 +225,7 @@ During the build process it will clone and build SPDK, sppark, and blst. ./build.sh ``` -SPDK must be setup after every reboot: +SPDK setup is automatic when using Curio's batch sealer. ``` cd deps/spdk-v24.05 sudo env NRHUGE=36 ./scripts/setup.sh diff --git a/extern/supraseal/build.sh b/extern/supraseal/build.sh index 0bbfec436..da22a8bab 100755 --- a/extern/supraseal/build.sh +++ b/extern/supraseal/build.sh @@ -144,7 +144,9 @@ CXXSTD=`$CXX -dM -E -x c++ /dev/null | \ INCLUDE="-I$SPDK/include -I$SPDK/isa-l/.. -I$SPDK/dpdk/build/include" CFLAGS="$SECTOR_SIZE $INCLUDE -g -O2" -CXXFLAGS="$CFLAGS -march=native $CXXSTD \ +# Use x86-64-v3 CPU flags to match GOAMD64=v3 requirement (equivalent to Intel Haswell/AMD Excavator+) +# GCC multiversioning will provide CPU-specific optimizations at runtime +CXXFLAGS="$CFLAGS -march=x86-64-v3 -mtune=generic $CXXSTD \ -fPIC -fno-omit-frame-pointer -fno-strict-aliasing \ -fstack-protector -fno-common \ -D_GNU_SOURCE -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 \ @@ -275,33 +277,38 @@ if [ ! -d "deps/blst" ]; then git clone https://github.com/supranational/blst.git deps/blst (cd deps/blst git checkout bef14ca512ea575aff6f661fdad794263938795d - ./build.sh -march=native) + ./build.sh -march=x86-64-v3) fi -$CC -c sha/sha_ext_mbx2.S -o obj/sha_ext_mbx2.o - -# Generate .h files for the Poseidon constants -xxd -i poseidon/constants/constants_2 > obj/constants_2.h -xxd -i poseidon/constants/constants_4 > obj/constants_4.h -xxd -i poseidon/constants/constants_8 > obj/constants_8.h -xxd -i poseidon/constants/constants_11 > obj/constants_11.h -xxd -i poseidon/constants/constants_16 > obj/constants_16.h -xxd -i poseidon/constants/constants_24 > obj/constants_24.h -xxd -i poseidon/constants/constants_36 > obj/constants_36.h +# Generate .h files for the Poseidon constants (needed for tree_r binaries) +# These are fast and can run in parallel, but must complete before tree_r binaries compile +xxd -i poseidon/constants/constants_2 > obj/constants_2.h & +xxd -i poseidon/constants/constants_4 > obj/constants_4.h & +xxd -i poseidon/constants/constants_8 > obj/constants_8.h & +xxd -i poseidon/constants/constants_11 > obj/constants_11.h & +xxd -i poseidon/constants/constants_16 > obj/constants_16.h & +xxd -i poseidon/constants/constants_24 > obj/constants_24.h & +xxd -i poseidon/constants/constants_36 > obj/constants_36.h & + +# Compile all object files in parallel - these are independent of constants headers +# SHA extension code with x86-64-v3 flags to match GOAMD64=v3 +# The code uses SHA-NI instructions which are available on Intel Haswell+ and AMD Zen+ +# Runtime will detect CPU capabilities and use appropriate version +$CC -c -march=x86-64-v3 -mtune=generic sha/sha_ext_mbx2.S -o obj/sha_ext_mbx2.o & +# Compile multiversion wrapper for CPU-specific optimizations +$CXX $CXXFLAGS -Ideps/blst/src -c sha/sha_ext_mbx2_wrapper.cpp -o obj/sha_ext_mbx2_wrapper.o & # PC1 $CXX $CXXFLAGS -Ideps/sppark/util -o obj/pc1.o -c pc1/pc1.cpp & -# PC2 +# PC2 - compile once with unified interface supporting both NVMe and FileReader $CXX $CXXFLAGS -o obj/streaming_node_reader_nvme.o -c nvme/streaming_node_reader_nvme.cpp & $CXX $CXXFLAGS -o obj/ring_t.o -c nvme/ring_t.cpp & -$NVCC $CFLAGS $CUDA_ARCH -std=c++17 -DNO_SPDK -Xcompiler -march=native \ +# Single compilation of pc2.cu - works with both reader types via template interface +$NVCC $CFLAGS $CUDA_ARCH -std=c++17 -DNO_SPDK \ + -Xcompiler -march=x86-64-v3,-mtune=generic \ -Xcompiler -Wall,-Wextra,-Wno-subobject-linkage,-Wno-unused-parameter \ -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -c pc2/cuda/pc2.cu -o obj/pc2.o & -# File-reader variant of pc2 for tree_r_file -$NVCC $CFLAGS $CUDA_ARCH -std=c++17 -DNO_SPDK -DSTREAMING_NODE_READER_FILES -DRENAME_PC2_HASH_FILES -Xcompiler -march=native \ - -Xcompiler -Wall,-Wextra,-Wno-subobject-linkage,-Wno-unused-parameter \ - -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -c pc2/cuda/pc2.cu -o obj/pc2_files.o & $CXX $CXXFLAGS $INCLUDE -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src \ -c sealing/supra_seal.cpp -o obj/supra_seal.o -Wno-subobject-linkage & @@ -311,74 +318,48 @@ $CXX $CXXFLAGS $INCLUDE -DSTREAMING_NODE_READER_FILES -Iposeidon -Ideps/sppark - wait -# Sppark object dedupe -nm obj/pc2.o | grep -E 'select_gpu|all_gpus|cuda_available|gpu_props|ngpus|drop_gpu_ptr_t|clone_gpu_ptr_t' | awk '{print $3 " supra_" $3}' > symbol_rename.txt -nm obj/pc2_files.o | grep -E 'select_gpu|all_gpus|cuda_available|gpu_props|ngpus|drop_gpu_ptr_t|clone_gpu_ptr_t' | awk '{print $3 " supra_" $3}' >> symbol_rename.txt -# Deduplicate symbol rename entries -sort -u -o symbol_rename.txt symbol_rename.txt - -for obj in obj/pc1.o obj/pc2.o obj/pc2_files.o obj/ring_t.o obj/streaming_node_reader_nvme.o obj/supra_seal.o obj/supra_tree_r_file.o obj/sha_ext_mbx2.o; do - objcopy --redefine-syms=symbol_rename.txt $obj -done - -# Weaken duplicate symbols between pc2.o and pc2_files.o to avoid multiple-definition at link time -nm -g --defined-only obj/pc2.o | awk '{print $3}' | sort -u > obj/syms_pc2.txt -nm -g --defined-only obj/pc2_files.o | awk '{print $3}' | sort -u > obj/syms_pc2_files.txt -comm -12 obj/syms_pc2.txt obj/syms_pc2_files.txt | grep -v '^pc2_hash_files' > obj/syms_dups.txt -if [ -s obj/syms_dups.txt ]; then - while read -r sym; do - objcopy --weaken-symbol="$sym" obj/pc2_files.o - done < obj/syms_dups.txt -fi - -rm symbol_rename.txt - +# All object files and constants headers are now ready ar rvs obj/libsupraseal.a \ obj/pc1.o \ obj/pc2.o \ - obj/pc2_files.o \ obj/ring_t.o \ obj/streaming_node_reader_nvme.o \ obj/supra_seal.o \ obj/supra_tree_r_file.o \ - obj/sha_ext_mbx2.o + obj/sha_ext_mbx2.o \ + obj/sha_ext_mbx2_wrapper.o +# Build binaries in parallel +# Note: tree_r binaries compile poseidon.cpp which includes constants_*.h files +# These are guaranteed to be ready after the wait above $CXX $CXXFLAGS -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src \ -o bin/seal demos/main.cpp \ -Lobj -lsupraseal \ $LDFLAGS -Ldeps/blst -lblst -L$CUDA/lib64 -lcudart_static -lgmp -lconfig++ & -# tree-r CPU only -$CXX $SECTOR_SIZE $CXXSTD -pthread -g -O3 -march=native \ +# tree-r CPU only (uses poseidon.cpp which includes constants_*.h) +$CXX $SECTOR_SIZE $CXXSTD -pthread -g -O3 -march=x86-64-v3 -mtune=generic \ -Wall -Wextra -Werror -Wno-subobject-linkage \ tools/tree_r.cpp poseidon/poseidon.cpp \ -o bin/tree_r_cpu -Iposeidon -Ideps/sppark -Ideps/blst/src -L deps/blst -lblst & # tree-r CPU + GPU $NVCC $SECTOR_SIZE -DNO_SPDK -DSTREAMING_NODE_READER_FILES \ - $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=native \ + $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=x86-64-v3,-mtune=generic \ -Xcompiler -Wall,-Wextra,-Werror \ -Xcompiler -Wno-subobject-linkage,-Wno-unused-parameter \ -x cu tools/tree_r.cpp -o bin/tree_r \ -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -L deps/blst -lblst -lconfig++ & # tree-d CPU only -$CXX -DRUNTIME_SECTOR_SIZE $CXXSTD -g -O3 -march=native \ +$CXX -DRUNTIME_SECTOR_SIZE $CXXSTD -g -O3 -march=x86-64-v3 -mtune=generic \ -Wall -Wextra -Werror -Wno-subobject-linkage \ tools/tree_d.cpp \ -o bin/tree_d_cpu -Ipc1 -L deps/blst -lblst & # Standalone GPU pc2 $NVCC $SECTOR_SIZE -DNO_SPDK -DSTREAMING_NODE_READER_FILES \ - $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=native \ - -Xcompiler -Wall,-Wextra,-Werror \ - -Xcompiler -Wno-subobject-linkage,-Wno-unused-parameter \ - -x cu tools/tree_r.cpp -o bin/tree_r \ - -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -L deps/blst -lblst -lconfig++ & - -# Standalone GPU pc2 -$NVCC $SECTOR_SIZE -DNO_SPDK -DSTREAMING_NODE_READER_FILES \ - $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=native \ + $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=x86-64-v3,-mtune=generic \ -Xcompiler -Wall,-Wextra,-Werror \ -Xcompiler -Wno-subobject-linkage,-Wno-unused-parameter \ -x cu tools/pc2.cu -o bin/pc2 \ diff --git a/extern/supraseal/pc2/cuda/pc2.cu b/extern/supraseal/pc2/cuda/pc2.cu index 7c7a92613..74e95cad3 100644 --- a/extern/supraseal/pc2/cuda/pc2.cu +++ b/extern/supraseal/pc2/cuda/pc2.cu @@ -6,16 +6,6 @@ #ifndef __CUDA_ARCH__ -#ifdef RENAME_PC2_HASH_FILES -#define pc2_hash pc2_hash_files -#define pc2_t pc2_files_t -#define gpu_resource_t gpu_resource_files_t -#define buf_to_disk_t buf_to_disk_files_t -#define pc2_batcher_t pc2_batcher_files_t -#define tree_address_t tree_address_files_t -#define do_pc2_cleanup do_pc2_files_cleanup -#endif - #include #include #include @@ -27,10 +17,12 @@ #include "pc2.cuh" #include "cuda_lambda_t.hpp" #include "../../util/util.hpp" +#include "../pc2_internal.hpp" -template -pc2_t::pc2_t(topology_t& _topology, - bool _tree_r_only, streaming_node_reader_t& _reader, +// Template class that works with either reader type +template +pc2_t::pc2_t(topology_t& _topology, + bool _tree_r_only, Reader& _reader, size_t _nodes_to_read, size_t _batch_size, size_t _stream_count, const char** _data_filenames, const char* _output_dir) : @@ -137,8 +129,8 @@ pc2_t::pc2_t(topology_t& _topology, } } -template -pc2_t::~pc2_t() { +template +pc2_t::~pc2_t() { while (resources.size() > 0) { gpu_resource_t* r = resources.back(); select_gpu(r->gpu); @@ -170,8 +162,8 @@ pc2_t::~pc2_t() { cudaHostUnregister(page_buffer); } /* -template -void pc2_t::get_filenames(const char* output_dir, +template +void pc2_t::get_filenames(const char* output_dir, std::vector& directories, std::vector& p_aux_filenames, std::vector>& tree_c_filenames, @@ -293,8 +285,8 @@ void pc2_t::get_filenames(const char* output_dir, } */ -template -void pc2_t::get_filenames(const char* output_dir, +template +void pc2_t::get_filenames(const char* output_dir, std::vector& directories, std::vector& p_aux_filenames, std::vector>& tree_c_filenames, @@ -441,8 +433,8 @@ void pc2_t::add_paths_for_sector(const char* output_dir, sealed_filenames.push_back(fname); } -template -void pc2_t::open_files() { +template +void pc2_t::open_files() { std::vector directories; std::vector> tree_c_filenames; std::vector> tree_r_filenames; @@ -503,8 +495,8 @@ void pc2_t::open_files() { } } -template -void pc2_t::hash() { +template +void pc2_t::hash() { thread_pool_t pool(1); pool.spawn([&]() { // Affinitize the thread in the pool @@ -730,8 +722,8 @@ struct pc2_batcher_t { }; -template -void pc2_t::hash_gpu(size_t partition) { +template +void pc2_t::hash_gpu(size_t partition) { assert (stream_count % ngpus() == 0); nodes_per_stream = nodes_to_read / stream_count; @@ -1193,8 +1185,8 @@ void pc2_t::hash_gpu(size_t partition) { //printf("num_writes %ld\n", num_writes); } -template -void pc2_t::hash_cpu(fr_t* roots, size_t partition, fr_t* input, +template +void pc2_t::hash_cpu(fr_t* roots, size_t partition, fr_t* input, std::vector*>* tree_files, size_t file_offset) { // This count is one layer above the leaves @@ -1313,15 +1305,16 @@ void pc2_t::write_roots(fr_t* roots_c, fr_t* roots_r) { } } -template -void pc2_hash(topology_t& topology, - bool tree_r_only, - streaming_node_reader_t& reader, - size_t nodes_to_read, size_t batch_size, - size_t stream_count, - const char** data_filenames, const char* output_dir) { - pc2_t pc2(topology, tree_r_only, reader, nodes_to_read, batch_size, stream_count, - data_filenames, output_dir); +// Implementation that works with any reader type satisfying the interface +template +void pc2_hash_impl(topology_t& topology, + bool tree_r_only, + Reader& reader, + size_t nodes_to_read, size_t batch_size, + size_t stream_count, + const char** data_filenames, const char* output_dir) { + pc2_t pc2(topology, tree_r_only, reader, nodes_to_read, batch_size, stream_count, + data_filenames, output_dir); pc2.hash(); } @@ -1333,7 +1326,7 @@ void do_pc2_cleanup(const char* output_dir) { std::vector> tree_r_filenames; std::vector sealed_filenames; - pc2_t::get_filenames(output_dir, + pc2_t>::get_filenames(output_dir, directories, p_aux_filenames, tree_c_filenames, @@ -1441,7 +1434,6 @@ template void pc2_hash(topology_t&, bool, streaming_nod template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); - #ifdef RUNTIME_SECTOR_SIZE template void do_pc2_cleanup(const char* output_dir); template void do_pc2_cleanup(const char* output_dir); diff --git a/extern/supraseal/pc2/cuda/pc2.cuh b/extern/supraseal/pc2/cuda/pc2.cuh index 9456866c8..fecef84ac 100644 --- a/extern/supraseal/pc2/cuda/pc2.cuh +++ b/extern/supraseal/pc2/cuda/pc2.cuh @@ -166,12 +166,12 @@ struct buf_to_disk_t { bool reverse; }; -template +template> class pc2_t { private: topology_t& topology; bool tree_r_only; - streaming_node_reader_t& reader; + Reader& reader; size_t nodes_to_read; size_t batch_size; tree_address_t tree_c_address; @@ -305,7 +305,7 @@ private: public: pc2_t(topology_t& _topology, - bool _tree_r_only, streaming_node_reader_t& _reader, + bool _tree_r_only, Reader& _reader, size_t _nodes_to_read, size_t _batch_size, size_t _stream_count, const char** data_filenames, const char* output_dir); ~pc2_t(); diff --git a/extern/supraseal/pc2/pc2_internal.hpp b/extern/supraseal/pc2/pc2_internal.hpp index bb5154057..c24bb46ff 100644 --- a/extern/supraseal/pc2/pc2_internal.hpp +++ b/extern/supraseal/pc2/pc2_internal.hpp @@ -5,19 +5,49 @@ #include "../sealing/constants.hpp" #include "../sealing/data_structures.hpp" #include "../sealing/topology_t.hpp" -#ifdef STREAMING_NODE_READER_FILES -#include "../c1/streaming_node_reader_files.hpp" -#else + +// Include NVMe reader implementation (default) #include "../nvme/streaming_node_reader_nvme.hpp" -#endif +// Include file reader implementation with renamed class to avoid conflict +#define streaming_node_reader_t streaming_node_reader_files_t +#include "../c1/streaming_node_reader_files.hpp" +#undef streaming_node_reader_t + +// Unified pc2_hash template that works with either reader type +// Both implementations satisfy the same interface +template +void pc2_hash_impl(topology_t& topology, + bool tree_r_only, + Reader& _reader, + size_t _nodes_to_read, size_t _batch_size, + size_t _stream_count, + const char** data_filenames, const char* output_dir); + +// Convenience wrappers for backward compatibility template void pc2_hash(topology_t& topology, bool tree_r_only, streaming_node_reader_t& _reader, size_t _nodes_to_read, size_t _batch_size, size_t _stream_count, - const char** data_filenames, const char* output_dir); + const char** data_filenames, const char* output_dir) { + pc2_hash_impl>( + topology, tree_r_only, _reader, _nodes_to_read, _batch_size, + _stream_count, data_filenames, output_dir); +} + +template +void pc2_hash_files(topology_t& topology, + bool tree_r_only, + streaming_node_reader_files_t& _reader, + size_t _nodes_to_read, size_t _batch_size, + size_t _stream_count, + const char** data_filenames, const char* output_dir) { + pc2_hash_impl>( + topology, tree_r_only, _reader, _nodes_to_read, _batch_size, + _stream_count, data_filenames, output_dir); +} template void do_pc2_cleanup(const char* output_dir); diff --git a/extern/supraseal/sealing/supra_tree_r_file.cpp b/extern/supraseal/sealing/supra_tree_r_file.cpp index 905aa1ae1..48bc75d8e 100644 --- a/extern/supraseal/sealing/supra_tree_r_file.cpp +++ b/extern/supraseal/sealing/supra_tree_r_file.cpp @@ -9,17 +9,6 @@ #include "../util/util.hpp" #include "../pc2/pc2_internal.hpp" -// Forward declaration for renamed file-reader variant compiled from pc2.cu -template -void pc2_hash_files(topology_t& topology, - bool tree_r_only, - streaming_node_reader_t& reader, - size_t nodes_to_read, - size_t batch_size, - size_t stream_count, - const char** data_filenames, - const char* output_dir); - // CUDA-based tree-r from last-layer file(s) using the file-streaming reader // Always uses P::PARALLEL_SECTORS == 1 template @@ -35,7 +24,7 @@ static int tree_r_file_impl(const char* last_layer_filename, std::vector layer_filenames; layer_filenames.push_back(std::string(last_layer_filename)); - streaming_node_reader_t> node_reader(P::GetSectorSize(), layer_filenames); + streaming_node_reader_files_t> node_reader(P::GetSectorSize(), layer_filenames); node_reader.alloc_slots(stream_count * 2, P::GetNumLayers() * batch_size, true); diff --git a/extern/supraseal/sha/sha_ext_mbx2_wrapper.cpp b/extern/supraseal/sha/sha_ext_mbx2_wrapper.cpp new file mode 100644 index 000000000..7e0275597 --- /dev/null +++ b/extern/supraseal/sha/sha_ext_mbx2_wrapper.cpp @@ -0,0 +1,71 @@ +// Copyright Supranational LLC +// Wrapper for sha_ext_mbx2 with GCC multiversioning for optimal performance +// across different CPU architectures (Intel, AMD, older hardware) + +#include +#include + +extern "C" { + // Assembly implementation using SHA-NI extensions + void sha_ext_mbx2(uint32_t* digest, uint32_t** replica_id_buf, + uint32_t** data_buf, size_t offset, + size_t blocks, size_t repeat); +} + +// Runtime CPU feature detection for SHA-NI +static bool has_sha_ni() { + unsigned int eax, ebx, ecx, edx; + // Check if CPUID supports extended features (leaf 7) + __asm__("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(7), "c"(0)); + // SHA-NI is bit 29 of EBX (CPUID.07H.EBX.SHA [bit 29]) + return (ebx & (1U << 29)) != 0; +} + +// Fallback implementation for CPUs without SHA-NI +// Uses blst's portable SHA-256 implementation +static void sha_ext_mbx2_fallback(uint32_t* digest, uint32_t** replica_id_buf, + uint32_t** data_buf, size_t offset, + size_t blocks, size_t repeat) { + extern void blst_sha256_block(uint32_t* h, const void* in, size_t blocks); + + // Simple fallback: hash each replica_id and data pair + // This is a simplified version - the actual implementation is more complex + for (size_t r = 0; r < repeat; r++) { + for (size_t b = 0; b < blocks; b++) { + if (replica_id_buf[b]) { + blst_sha256_block(digest, replica_id_buf[b], 1); + } + if (data_buf[b]) { + blst_sha256_block(digest, data_buf[b], 1); + } + } + } +} + +// Multiversion function with target_clones for optimal performance +// Provides optimized versions for different CPU architectures +__attribute__((target_clones("default", "sse2", "sse4.2", "avx", "avx2", "avx512f"))) +void sha_ext_mbx2_multiversion(uint32_t* digest, uint32_t** replica_id_buf, + uint32_t** data_buf, size_t offset, + size_t blocks, size_t repeat) { + // Use SHA-NI version if available (Intel Haswell+, AMD Zen+) + // GCC will select the best version at runtime based on CPU capabilities + static bool sha_ni_available = has_sha_ni(); + + if (sha_ni_available) { + sha_ext_mbx2(digest, replica_id_buf, data_buf, offset, blocks, repeat); + } else { + sha_ext_mbx2_fallback(digest, replica_id_buf, data_buf, offset, blocks, repeat); + } +} + +// Export the multiversion function with the original name +// This allows existing code to use the optimized version transparently +extern "C" void sha_ext_mbx2_optimized(uint32_t* digest, uint32_t** replica_id_buf, + uint32_t** data_buf, size_t offset, + size_t blocks, size_t repeat) { + sha_ext_mbx2_multiversion(digest, replica_id_buf, data_buf, offset, blocks, repeat); +} + diff --git a/lib/supraffi/no_nvme.go b/lib/supraffi/no_nvme.go new file mode 100644 index 000000000..0b90925ef --- /dev/null +++ b/lib/supraffi/no_nvme.go @@ -0,0 +1,96 @@ +//go:build !supraseal_nvme + +package supraffi + +import ( + "bytes" + "encoding/binary" + "fmt" +) + +// GetHealthInfo retrieves health information for all NVMe devices +// This function requires supraseal_nvme build tag +func GetHealthInfo() ([]HealthInfo, error) { + return nil, fmt.Errorf("GetHealthInfo: supraseal_nvme build tag not enabled") +} + +// SetupSPDK runs the SPDK setup script to configure NVMe devices for use with SupraSeal. +func SetupSPDK(nrHuge int) error { + return fmt.Errorf("SetupSPDK: supraseal_nvme build tag not enabled") +} + +// CheckAndSetupSPDK checks if SPDK is set up, and if not, runs the setup script. +func CheckAndSetupSPDK(nrHuge int, minPages int) error { + return fmt.Errorf("CheckAndSetupSPDK: supraseal_nvme build tag not enabled") +} + +// SupraSealInit initializes the supra seal with a sector size and optional config file. +// Requires NVMe devices for batch sealing. +func SupraSealInit(sectorSize uint64, configFile string) { + panic("SupraSealInit: supraseal_nvme build tag required for batch sealing") +} + +// Pc1 performs the pc1 operation for batch sealing. +// Requires NVMe devices for layer storage. +func Pc1(blockOffset uint64, replicaIDs [][32]byte, parentsFilename string, sectorSize uint64) int { + panic("Pc1: supraseal_nvme build tag required for batch sealing") +} + +type Path struct { + Replica string + Cache string +} + +// GenerateMultiString generates a //multi// string from an array of Path structs +func GenerateMultiString(paths []Path) (string, error) { + var buffer bytes.Buffer + buffer.WriteString("//multi//") + + for _, path := range paths { + replicaPath := []byte(path.Replica) + cachePath := []byte(path.Cache) + + // Write the length and path for the replica + if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(replicaPath))); err != nil { + return "", err + } + buffer.Write(replicaPath) + + // Write the length and path for the cache + if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(cachePath))); err != nil { + return "", err + } + buffer.Write(cachePath) + } + + return buffer.String(), nil +} + +// Pc2 performs the pc2 operation for batch sealing. +// Requires NVMe devices for layer storage. +func Pc2(blockOffset uint64, numSectors int, outputDir string, sectorSize uint64) int { + panic("Pc2: supraseal_nvme build tag required for batch sealing") +} + +// C1 performs the c1 operation for batch sealing. +// Requires NVMe devices for batch operations. +func C1(blockOffset uint64, numSectors, sectorSlot int, replicaID, seed, ticket []byte, cachePath, parentsFilename, replicaPath string, sectorSize uint64) int { + panic("C1: supraseal_nvme build tag required for batch sealing") +} + +// GetMaxBlockOffset returns the highest available block offset from NVMe devices. +func GetMaxBlockOffset(sectorSize uint64) uint64 { + panic("GetMaxBlockOffset: supraseal_nvme build tag required") +} + +// GetSlotSize returns the size in blocks required for the given number of sectors. +// Used for batch sealing with NVMe devices. +func GetSlotSize(numSectors int, sectorSize uint64) uint64 { + panic("GetSlotSize: supraseal_nvme build tag required") +} + +// GetCommR returns comm_r after calculating from p_aux file. Returns true on success. +// Used in batch sealing context. +func GetCommR(commR []byte, cachePath string) bool { + panic("GetCommR: supraseal_nvme build tag required") +} diff --git a/lib/supraffi/no_supraseal.go b/lib/supraffi/no_supraseal.go deleted file mode 100644 index ae1ffe6ef..000000000 --- a/lib/supraffi/no_supraseal.go +++ /dev/null @@ -1,126 +0,0 @@ -//go:build !supraseal - -package supraffi - -import ( - "bytes" - "encoding/binary" -) - -// SupraSealInit initializes the supra seal with a sector size and optional config file. -func SupraSealInit(sectorSize uint64, configFile string) { - panic("SupraSealInit: supraseal build tag not enabled") -} - -// Pc1 performs the pc1 operation. -func Pc1(blockOffset uint64, replicaIDs [][32]byte, parentsFilename string, sectorSize uint64) int { - panic("Pc1: supraseal build tag not enabled") -} - -func TreeRFile(lastLayerFilename, dataFilename, outputDir string, sectorSize uint64) int { - panic("TreeRFile: supraseal build tag not enabled") -} - -type Path struct { - Replica string - Cache string -} - -// GenerateMultiString generates a //multi// string from an array of Path structs -func GenerateMultiString(paths []Path) (string, error) { - var buffer bytes.Buffer - buffer.WriteString("//multi//") - - for _, path := range paths { - replicaPath := []byte(path.Replica) - cachePath := []byte(path.Cache) - - // Write the length and path for the replica - if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(replicaPath))); err != nil { - return "", err - } - buffer.Write(replicaPath) - - // Write the length and path for the cache - if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(cachePath))); err != nil { - return "", err - } - buffer.Write(cachePath) - } - - return buffer.String(), nil -} - -func GetHealthInfo() ([]HealthInfo, error) { - panic("GetHealthInfo: supraseal build tag not enabled") -} - -// Pc2 performs the pc2 operation. -func Pc2(blockOffset uint64, numSectors int, outputDir string, sectorSize uint64) int { - panic("Pc2: supraseal build tag not enabled") -} - -// Pc2Cleanup deletes files associated with pc2. -func Pc2Cleanup(numSectors int, outputDir string, sectorSize uint64) int { - panic("Pc2Cleanup: supraseal build tag not enabled") -} - -// C1 performs the c1 operation. -func C1(blockOffset uint64, numSectors, sectorSlot int, replicaID, seed, ticket []byte, cachePath, parentsFilename, replicaPath string, sectorSize uint64) int { - panic("C1: supraseal build tag not enabled") -} - -// GetMaxBlockOffset returns the highest available block offset. -func GetMaxBlockOffset(sectorSize uint64) uint64 { - panic("GetMaxBlockOffset: supraseal build tag not enabled") -} - -// GetSlotSize returns the size in blocks required for the given number of sectors. -func GetSlotSize(numSectors int, sectorSize uint64) uint64 { - panic("GetSlotSize: supraseal build tag not enabled") -} - -// GetCommCFromTree returns comm_c after calculating from tree file(s). -func GetCommCFromTree(commC []byte, cachePath string, sectorSize uint64) bool { - panic("GetCommCFromTree: supraseal build tag not enabled") -} - -// GetCommC returns comm_c from p_aux file. -func GetCommC(commC []byte, cachePath string) bool { - panic("GetCommC: supraseal build tag not enabled") -} - -// SetCommC sets comm_c in the p_aux file. -func SetCommC(commC []byte, cachePath string) bool { - panic("SetCommC: supraseal build tag not enabled") -} - -// GetCommRLastFromTree returns comm_r_last after calculating from tree file(s). -func GetCommRLastFromTree(commRLast []byte, cachePath string, sectorSize uint64) bool { - panic("GetCommRLastFromTree: supraseal build tag not enabled") -} - -// GetCommRLast returns comm_r_last from p_aux file. -func GetCommRLast(commRLast []byte, cachePath string) bool { - panic("GetCommRLast: supraseal build tag not enabled") -} - -// SetCommRLast sets comm_r_last in the p_aux file. -func SetCommRLast(commRLast []byte, cachePath string) bool { - panic("SetCommRLast: supraseal build tag not enabled") -} - -// GetCommR returns comm_r after calculating from p_aux file. -func GetCommR(commR []byte, cachePath string) bool { - panic("GetCommR: supraseal build tag not enabled") -} - -// GetCommD returns comm_d from tree_d file. -func GetCommD(commD []byte, cachePath string) bool { - panic("GetCommD: supraseal build tag not enabled") -} - -// GetCCCommD returns comm_d for a cc sector. -func GetCCCommD(commD []byte, sectorSize int) bool { - panic("GetCCCommD: supraseal build tag not enabled") -} diff --git a/lib/supraffi/seal.go b/lib/supraffi/seal.go index 5cf5dcda8..60857f84b 100644 --- a/lib/supraffi/seal.go +++ b/lib/supraffi/seal.go @@ -1,171 +1,21 @@ -//go:build supraseal +//go:build linux package supraffi /* - #cgo CFLAGS: -I${SRCDIR}/../../extern/supraseal/sealing - #cgo LDFLAGS: -fno-omit-frame-pointer -Wl,-z,noexecstack -Wl,-z,relro,-z,now -fuse-ld=bfd -L${SRCDIR}/../../extern/supraseal/obj -L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/build/lib -L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/isa-l/.libs -L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/isa-l-crypto/.libs -lsupraseal -Wl,--whole-archive -Wl,--no-as-needed -lspdk_log -lspdk_bdev_malloc -lspdk_bdev_null -lspdk_bdev_nvme -lspdk_bdev_passthru -lspdk_bdev_lvol -lspdk_bdev_raid -lspdk_bdev_error -lspdk_bdev_gpt -lspdk_bdev_split -lspdk_bdev_delay -lspdk_bdev_zone_block -lspdk_blobfs_bdev -lspdk_blobfs -lspdk_blob_bdev -lspdk_lvol -lspdk_blob -lspdk_nvme -lspdk_bdev_ftl -lspdk_ftl -lspdk_bdev_aio -lspdk_bdev_virtio -lspdk_virtio -lspdk_vfio_user -lspdk_accel_ioat -lspdk_ioat -lspdk_scheduler_dynamic -lspdk_env_dpdk -lspdk_scheduler_dpdk_governor -lspdk_scheduler_gscheduler -lspdk_sock_posix -lspdk_event -lspdk_event_bdev -lspdk_bdev -lspdk_notify -lspdk_dma -lspdk_event_accel -lspdk_accel -lspdk_event_vmd -lspdk_vmd -lspdk_event_sock -lspdk_init -lspdk_thread -lspdk_trace -lspdk_sock -lspdk_rpc -lspdk_jsonrpc -lspdk_json -lspdk_util -lspdk_keyring -lspdk_keyring_file -lspdk_keyring_linux -lspdk_event_keyring -Wl,--no-whole-archive ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/build/lib/libspdk_env_dpdk.a -Wl,--whole-archive ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_bus_pci.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_cryptodev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_dmadev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_eal.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_ethdev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_hash.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_kvargs.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_log.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mbuf.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mempool.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mempool_ring.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_net.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_pci.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_power.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_rcu.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_ring.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_telemetry.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_vhost.a -Wl,--no-whole-archive -lnuma -lisal -lisal_crypto -pthread -ldl -lrt -luuid -lssl -lcrypto -lm -laio -lfuse3 -larchive -lkeyutils -lcudart_static -L${SRCDIR}/../../extern/supraseal/deps/blst -lblst -lconfig++ -lgmp -lstdc++ + #cgo CFLAGS: -I${SRCDIR}/../../extern/supraseal/sealing -fno-omit-frame-pointer + #cgo LDFLAGS: -Wl,-z,noexecstack -Wl,-z,relro,-z,now -L${SRCDIR}/../../extern/supraseal/obj -lsupraseal -lcudart_static -L${SRCDIR}/../../extern/supraseal/deps/blst -lblst -lconfig++ -lgmp -lstdc++ -pthread -ldl -lrt #include #include #include "supra_seal.h" #include - -typedef struct nvme_health_info { - uint8_t critical_warning; - int16_t temperature; - uint8_t available_spare; - uint8_t available_spare_threshold; - uint8_t percentage_used; - uint64_t data_units_read; - uint64_t data_units_written; - uint64_t host_read_commands; - uint64_t host_write_commands; - uint64_t controller_busy_time; - uint64_t power_cycles; - uint64_t power_on_hours; - uint64_t unsafe_shutdowns; - uint64_t media_errors; - uint64_t num_error_info_log_entries; - uint32_t warning_temp_time; - uint32_t critical_temp_time; - int16_t temp_sensors[8]; - } nvme_health_info_t; - -size_t get_nvme_health_info(nvme_health_info_t* health_infos, size_t max_controllers); - */ import "C" import ( - "bytes" - "encoding/binary" "fmt" - "time" "unsafe" ) -/* -root = {SRCDIR}/../../extern/supraseal/ - -+ c++ -Ideps/spdk-v24.05/include -Ideps/spdk-v24.05/isa-l/.. -Ideps/spdk-v24.05/dpdk/build/include --g -O2 -march=native -fPIC -fno-omit-frame-pointer -fno-strict-aliasing -fstack-protector -fno-common --D_GNU_SOURCE -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 --DSPDK_GIT_COMMIT=4be6d3043 --pthread -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers -Wformat -Wformat-security --Ideps/spdk-v24.05/include -Ideps/spdk-v24.05/isa-l/.. -Ideps/spdk-v24.05/dpdk/build/include --Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -c sealing/supraseal.cpp -o obj/supraseal.o -Wno-subobject-linkage - ---- -NOTE: The below lines match the top of the file, just in a moderately more readable form. - --#cgo LDFLAGS: --fno-omit-frame-pointer --Wl,-z,noexecstack --Wl,-z,relro,-z,now --fuse-ld=bfd --L${SRCDIR}/../../extern/supraseal/obj --L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/build/lib --L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/isa-l/.libs --L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/isa-l-crypto/.libs --lsupraseal --Wl,--whole-archive --Wl,--no-as-needed --lspdk_log --lspdk_bdev_malloc --lspdk_bdev_null --lspdk_bdev_nvme --lspdk_bdev_passthru --lspdk_bdev_lvol --lspdk_bdev_raid --lspdk_bdev_error --lspdk_bdev_gpt --lspdk_bdev_split --lspdk_bdev_delay --lspdk_bdev_zone_block --lspdk_blobfs_bdev --lspdk_blobfs --lspdk_blob_bdev --lspdk_lvol --lspdk_blob --lspdk_nvme --lspdk_bdev_ftl --lspdk_ftl --lspdk_bdev_aio --lspdk_bdev_virtio --lspdk_virtio --lspdk_vfio_user --lspdk_accel_ioat --lspdk_ioat --lspdk_scheduler_dynamic --lspdk_env_dpdk --lspdk_scheduler_dpdk_governor --lspdk_scheduler_gscheduler --lspdk_sock_posix --lspdk_event --lspdk_event_bdev --lspdk_bdev --lspdk_notify --lspdk_dma --lspdk_event_accel --lspdk_accel --lspdk_event_vmd --lspdk_vmd --lspdk_event_sock --lspdk_init --lspdk_thread --lspdk_trace --lspdk_sock --lspdk_rpc --lspdk_jsonrpc --lspdk_json --lspdk_util --lspdk_keyring --lspdk_keyring_file --lspdk_keyring_linux --lspdk_event_keyring --Wl,--no-whole-archive -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/build/lib/libspdk_env_dpdk.a --Wl,--whole-archive -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_bus_pci.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_cryptodev.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_dmadev.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_eal.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_ethdev.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_hash.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_kvargs.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_log.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mbuf.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mempool.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mempool_ring.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_net.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_pci.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_power.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_rcu.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_ring.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_telemetry.a -${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_vhost.a --Wl,--no-whole-archive --lnuma --lisal --lisal_crypto --pthread --ldl --lrt --luuid --lssl --lcrypto --lm --laio --lfuse3 --larchive --lkeyutils --lcudart_static --L${SRCDIR}/../../extern/supraseal/deps/blst -lblst --lconfig++ --lgmp --lstdc++ - -*/ - const libsupra_version = 0x10_00_01 func init() { @@ -176,237 +26,14 @@ func init() { } } -// SupraSealInit initializes the supra seal with a sector size and optional config file. -func SupraSealInit(sectorSize uint64, configFile string) { - cConfigFile := C.CString(configFile) - defer C.free(unsafe.Pointer(cConfigFile)) - C.supra_seal_init(C.size_t(sectorSize), cConfigFile) -} - -// GetHealthInfo retrieves health information for all NVMe devices -func GetHealthInfo() ([]HealthInfo, error) { - // Allocate space for raw C struct - const maxControllers = 64 - rawInfos := make([]C.nvme_health_info_t, maxControllers) - - // Get health info from C - count := C.get_nvme_health_info( - (*C.nvme_health_info_t)(unsafe.Pointer(&rawInfos[0])), - C.size_t(maxControllers), - ) - - if count == 0 { - return nil, fmt.Errorf("no NVMe controllers found") - } - - // Convert C structs to Go structs - healthInfos := make([]HealthInfo, count) - for i := 0; i < int(count); i++ { - raw := &rawInfos[i] - - // Convert temperature sensors, filtering out unused ones - sensors := make([]float64, 0, 8) - for _, temp := range raw.temp_sensors { - if temp != 0 { - sensors = append(sensors, float64(temp)) - } - } - - // todo likely not entirely correct - healthInfos[i] = HealthInfo{ - CriticalWarning: byte(raw.critical_warning), - Temperature: float64(raw.temperature), // celsius?? - TemperatureSensors: sensors, - WarningTempTime: time.Duration(raw.warning_temp_time) * time.Minute, - CriticalTempTime: time.Duration(raw.critical_temp_time) * time.Minute, - AvailableSpare: uint8(raw.available_spare), - AvailableSpareThreshold: uint8(raw.available_spare_threshold), - PercentageUsed: uint8(raw.percentage_used), - DataUnitsRead: uint64(raw.data_units_read), - DataUnitsWritten: uint64(raw.data_units_written), - HostReadCommands: uint64(raw.host_read_commands), - HostWriteCommands: uint64(raw.host_write_commands), - ControllerBusyTime: time.Duration(raw.controller_busy_time) * time.Minute, - PowerCycles: uint64(raw.power_cycles), - PowerOnHours: time.Duration(raw.power_on_hours) * time.Hour, - UnsafeShutdowns: uint64(raw.unsafe_shutdowns), - MediaErrors: uint64(raw.media_errors), - ErrorLogEntries: uint64(raw.num_error_info_log_entries), - } - } - - return healthInfos, nil -} - +// TreeRFile builds tree-r from a last-layer file (optionally with a staged data file). +// Used for snap updates, does not require NVMe devices. func TreeRFile(lastLayerFilename, dataFilename, outputDir string, sectorSize uint64) int { cLastLayerFilename := C.CString(lastLayerFilename) cDataFilename := C.CString(dataFilename) cOutputDir := C.CString(outputDir) - return int(C.tree_r_file(cLastLayerFilename, cDataFilename, cOutputDir, C.size_t(sectorSize))) -} - -// Pc1 performs the pc1 operation. -func Pc1(blockOffset uint64, replicaIDs [][32]byte, parentsFilename string, sectorSize uint64) int { - flatReplicaIDs := make([]byte, len(replicaIDs)*32) - for i, id := range replicaIDs { - copy(flatReplicaIDs[i*32:], id[:]) - } - numSectors := len(replicaIDs) - - cReplicaIDs := (*C.uint8_t)(unsafe.Pointer(&flatReplicaIDs[0])) - cParentsFilename := C.CString(parentsFilename) - defer C.free(unsafe.Pointer(cParentsFilename)) - return int(C.pc1(C.uint64_t(blockOffset), C.size_t(numSectors), cReplicaIDs, cParentsFilename, C.size_t(sectorSize))) -} - -type Path struct { - Replica string - Cache string -} - -// GenerateMultiString generates a //multi// string from an array of Path structs -func GenerateMultiString(paths []Path) (string, error) { - var buffer bytes.Buffer - buffer.WriteString("//multi//") - - for _, path := range paths { - replicaPath := []byte(path.Replica) - cachePath := []byte(path.Cache) - - // Write the length and path for the replica - if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(replicaPath))); err != nil { - return "", err - } - buffer.Write(replicaPath) - - // Write the length and path for the cache - if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(cachePath))); err != nil { - return "", err - } - buffer.Write(cachePath) - } - - return buffer.String(), nil -} - -// Pc2 performs the pc2 operation. -func Pc2(blockOffset uint64, numSectors int, outputDir string, sectorSize uint64) int { - /* - int pc2(size_t block_offset, size_t num_sectors, const char* output_dir, - const char** data_filenames, size_t sector_size); - */ - cOutputDir := C.CString(outputDir) - defer C.free(unsafe.Pointer(cOutputDir)) - - // data filenames is for unsealed data to be encoded - // https://github.com/supranational/supra_seal/blob/a64e4060fbffea68adc0ac4512062e5a03e76048/pc2/cuda/pc2.cu#L329 - // not sure if that works correctly, but that's where we could encode data in the future - // for now pass a null as the pointer to the array of filenames - - var cDataFilenames **C.char - cDataFilenames = nil - - return int(C.pc2(C.size_t(blockOffset), C.size_t(numSectors), cOutputDir, cDataFilenames, C.size_t(sectorSize))) -} - -// Pc2Cleanup deletes files associated with pc2. -func Pc2Cleanup(numSectors int, outputDir string, sectorSize uint64) int { - cOutputDir := C.CString(outputDir) + defer C.free(unsafe.Pointer(cLastLayerFilename)) + defer C.free(unsafe.Pointer(cDataFilename)) defer C.free(unsafe.Pointer(cOutputDir)) - return int(C.pc2_cleanup(C.size_t(numSectors), cOutputDir, C.size_t(sectorSize))) -} - -// C1 performs the c1 operation. -// Outputs to cachePath/commit-phase1-output -func C1(blockOffset uint64, numSectors, sectorSlot int, replicaID, seed, ticket []byte, cachePath, parentsFilename, replicaPath string, sectorSize uint64) int { - cReplicaID := (*C.uint8_t)(unsafe.Pointer(&replicaID[0])) - cSeed := (*C.uint8_t)(unsafe.Pointer(&seed[0])) - cTicket := (*C.uint8_t)(unsafe.Pointer(&ticket[0])) - cCachePath := C.CString(cachePath) - cParentsFilename := C.CString(parentsFilename) - cReplicaPath := C.CString(replicaPath) - defer C.free(unsafe.Pointer(cCachePath)) - defer C.free(unsafe.Pointer(cParentsFilename)) - defer C.free(unsafe.Pointer(cReplicaPath)) - return int(C.c1(C.size_t(blockOffset), C.size_t(numSectors), C.size_t(sectorSlot), cReplicaID, cSeed, cTicket, cCachePath, cParentsFilename, cReplicaPath, C.size_t(sectorSize))) -} - -// GetMaxBlockOffset returns the highest available block offset. -func GetMaxBlockOffset(sectorSize uint64) uint64 { - return uint64(C.get_max_block_offset(C.size_t(sectorSize))) -} - -// GetSlotSize returns the size in blocks required for the given number of sectors. -func GetSlotSize(numSectors int, sectorSize uint64) uint64 { - return uint64(C.get_slot_size(C.size_t(numSectors), C.size_t(sectorSize))) -} - -// GetCommCFromTree returns comm_c after calculating from tree file(s). Returns true on success. -func GetCommCFromTree(commC []byte, cachePath string, sectorSize uint64) bool { - cCommC := (*C.uint8_t)(unsafe.Pointer(&commC[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.get_comm_c_from_tree(cCommC, cCachePath, C.size_t(sectorSize))) -} - -// GetCommC returns comm_c from p_aux file. Returns true on success. -func GetCommC(commC []byte, cachePath string) bool { - cCommC := (*C.uint8_t)(unsafe.Pointer(&commC[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.get_comm_c(cCommC, cCachePath)) -} - -// SetCommC sets comm_c in the p_aux file. Returns true on success. -func SetCommC(commC []byte, cachePath string) bool { - cCommC := (*C.uint8_t)(unsafe.Pointer(&commC[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.set_comm_c(cCommC, cCachePath)) -} - -// GetCommRLastFromTree returns comm_r_last after calculating from tree file(s). Returns true on success. -func GetCommRLastFromTree(commRLast []byte, cachePath string, sectorSize uint64) bool { - cCommRLast := (*C.uint8_t)(unsafe.Pointer(&commRLast[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.get_comm_r_last_from_tree(cCommRLast, cCachePath, C.size_t(sectorSize))) -} - -// GetCommRLast returns comm_r_last from p_aux file. Returns true on success. -func GetCommRLast(commRLast []byte, cachePath string) bool { - cCommRLast := (*C.uint8_t)(unsafe.Pointer(&commRLast[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.get_comm_r_last(cCommRLast, cCachePath)) -} - -// SetCommRLast sets comm_r_last in the p_aux file. -func SetCommRLast(commRLast []byte, cachePath string) bool { - cCommRLast := (*C.uint8_t)(unsafe.Pointer(&commRLast[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.set_comm_r_last(cCommRLast, cCachePath)) -} - -// GetCommR returns comm_r after calculating from p_aux file. Returns true on success. -func GetCommR(commR []byte, cachePath string) bool { - cCommR := (*C.uint8_t)(unsafe.Pointer(&commR[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.get_comm_r(cCommR, cCachePath)) -} - -// GetCommD returns comm_d from tree_d file. Returns true on success. -func GetCommD(commD []byte, cachePath string) bool { - cCommD := (*C.uint8_t)(unsafe.Pointer(&commD[0])) - cCachePath := C.CString(cachePath) - defer C.free(unsafe.Pointer(cCachePath)) - return bool(C.get_comm_d(cCommD, cCachePath)) -} - -// GetCCCommD returns comm_d for a cc sector. Returns true on success. -func GetCCCommD(commD []byte, sectorSize int) bool { - cCommD := (*C.uint8_t)(unsafe.Pointer(&commD[0])) - return bool(C.get_cc_comm_d(cCommD, C.size_t(sectorSize))) + return int(C.tree_r_file(cLastLayerFilename, cDataFilename, cOutputDir, C.size_t(sectorSize))) } diff --git a/lib/supraffi/seal_nonlinux.go b/lib/supraffi/seal_nonlinux.go new file mode 100644 index 000000000..2c8c51a6f --- /dev/null +++ b/lib/supraffi/seal_nonlinux.go @@ -0,0 +1,10 @@ +//go:build !linux + +package supraffi + +// TreeRFile builds tree-r from a last-layer file (optionally with a staged data file). +// Used for snap updates, does not require NVMe devices. +// This is a stub implementation for non-Linux platforms. +func TreeRFile(lastLayerFilename, dataFilename, outputDir string, sectorSize uint64) int { + panic("TreeRFile: supraseal is only available on Linux") +} diff --git a/lib/supraffi/seal_nvme.go b/lib/supraffi/seal_nvme.go new file mode 100644 index 000000000..4bdd4d1ff --- /dev/null +++ b/lib/supraffi/seal_nvme.go @@ -0,0 +1,200 @@ +//go:build supraseal_nvme + +package supraffi + +/* + #cgo CFLAGS: -I${SRCDIR}/../../extern/supraseal/sealing -Ideps/spdk-v24.05/include -Ideps/spdk-v24.05/isa-l/.. -Ideps/spdk-v24.05/dpdk/build/include -fno-omit-frame-pointer + #cgo linux LDFLAGS: -Wl,-z,noexecstack -Wl,-z,relro,-z,now -L${SRCDIR}/../../extern/supraseal/obj -L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/build/lib -L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/isa-l/.libs -L${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/isa-l-crypto/.libs -lsupraseal -Wl,--whole-archive -Wl,--no-as-needed -lspdk_log -lspdk_bdev_malloc -lspdk_bdev_null -lspdk_bdev_nvme -lspdk_bdev_passthru -lspdk_bdev_lvol -lspdk_bdev_raid -lspdk_bdev_error -lspdk_bdev_gpt -lspdk_bdev_split -lspdk_bdev_delay -lspdk_bdev_zone_block -lspdk_blobfs_bdev -lspdk_blobfs -lspdk_blob_bdev -lspdk_lvol -lspdk_blob -lspdk_nvme -lspdk_bdev_ftl -lspdk_ftl -lspdk_bdev_aio -lspdk_bdev_virtio -lspdk_virtio -lspdk_vfio_user -lspdk_accel_ioat -lspdk_ioat -lspdk_scheduler_dynamic -lspdk_env_dpdk -lspdk_scheduler_dpdk_governor -lspdk_scheduler_gscheduler -lspdk_sock_posix -lspdk_event -lspdk_event_bdev -lspdk_bdev -lspdk_notify -lspdk_dma -lspdk_event_accel -lspdk_accel -lspdk_event_vmd -lspdk_vmd -lspdk_event_sock -lspdk_init -lspdk_thread -lspdk_trace -lspdk_sock -lspdk_rpc -lspdk_jsonrpc -lspdk_json -lspdk_util -lspdk_keyring -lspdk_keyring_file -lspdk_keyring_linux -lspdk_event_keyring -Wl,--no-whole-archive ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/build/lib/libspdk_env_dpdk.a -Wl,--whole-archive ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_bus_pci.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_cryptodev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_dmadev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_eal.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_ethdev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_hash.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_kvargs.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_log.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mbuf.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mempool.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_mempool_ring.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_net.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_pci.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_power.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_rcu.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_ring.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_telemetry.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v24.05/dpdk/build/lib/librte_vhost.a -Wl,--no-whole-archive -lnuma -lisal -lisal_crypto -pthread -ldl -lrt -luuid -lssl -lcrypto -lm -laio -lfuse3 -larchive -lkeyutils -lcudart_static -L${SRCDIR}/../../extern/supraseal/deps/blst -lblst -lconfig++ -lgmp -lstdc++ + #include + #include + #include "supra_seal.h" + #include + +typedef struct nvme_health_info { + uint8_t critical_warning; + int16_t temperature; + uint8_t available_spare; + uint8_t available_spare_threshold; + uint8_t percentage_used; + uint64_t data_units_read; + uint64_t data_units_written; + uint64_t host_read_commands; + uint64_t host_write_commands; + uint64_t controller_busy_time; + uint64_t power_cycles; + uint64_t power_on_hours; + uint64_t unsafe_shutdowns; + uint64_t media_errors; + uint64_t num_error_info_log_entries; + uint32_t warning_temp_time; + uint32_t critical_temp_time; + int16_t temp_sensors[8]; + } nvme_health_info_t; + +size_t get_nvme_health_info(nvme_health_info_t* health_infos, size_t max_controllers); + +*/ +import "C" +import ( + "bytes" + "encoding/binary" + "fmt" + "time" + "unsafe" +) + +// GetHealthInfo retrieves health information for all NVMe devices +// This function is only available when built with supraseal_nvme tag +func GetHealthInfo() ([]HealthInfo, error) { + // Allocate space for raw C struct + const maxControllers = 64 + rawInfos := make([]C.nvme_health_info_t, maxControllers) + + // Get health info from C + count := C.get_nvme_health_info( + (*C.nvme_health_info_t)(unsafe.Pointer(&rawInfos[0])), + C.size_t(maxControllers), + ) + + if count == 0 { + return nil, fmt.Errorf("no NVMe controllers found") + } + + // Convert C structs to Go structs + healthInfos := make([]HealthInfo, count) + for i := 0; i < int(count); i++ { + raw := &rawInfos[i] + + // Convert temperature sensors, filtering out unused ones + sensors := make([]float64, 0, 8) + for _, temp := range raw.temp_sensors { + if temp != 0 { + sensors = append(sensors, float64(temp)) + } + } + + // todo likely not entirely correct + healthInfos[i] = HealthInfo{ + CriticalWarning: byte(raw.critical_warning), + Temperature: float64(raw.temperature), // celsius?? + TemperatureSensors: sensors, + WarningTempTime: time.Duration(raw.warning_temp_time) * time.Minute, + CriticalTempTime: time.Duration(raw.critical_temp_time) * time.Minute, + AvailableSpare: uint8(raw.available_spare), + AvailableSpareThreshold: uint8(raw.available_spare_threshold), + PercentageUsed: uint8(raw.percentage_used), + DataUnitsRead: uint64(raw.data_units_read), + DataUnitsWritten: uint64(raw.data_units_written), + HostReadCommands: uint64(raw.host_read_commands), + HostWriteCommands: uint64(raw.host_write_commands), + ControllerBusyTime: time.Duration(raw.controller_busy_time) * time.Minute, + PowerCycles: uint64(raw.power_cycles), + PowerOnHours: time.Duration(raw.power_on_hours) * time.Hour, + UnsafeShutdowns: uint64(raw.unsafe_shutdowns), + MediaErrors: uint64(raw.media_errors), + ErrorLogEntries: uint64(raw.num_error_info_log_entries), + } + } + + return healthInfos, nil +} + +// SupraSealInit initializes the supra seal with a sector size and optional config file. +// Requires NVMe devices for batch sealing. +func SupraSealInit(sectorSize uint64, configFile string) { + cConfigFile := C.CString(configFile) + defer C.free(unsafe.Pointer(cConfigFile)) + C.supra_seal_init(C.size_t(sectorSize), cConfigFile) +} + +// Pc1 performs the pc1 operation for batch sealing. +// Requires NVMe devices for layer storage. +func Pc1(blockOffset uint64, replicaIDs [][32]byte, parentsFilename string, sectorSize uint64) int { + flatReplicaIDs := make([]byte, len(replicaIDs)*32) + for i, id := range replicaIDs { + copy(flatReplicaIDs[i*32:], id[:]) + } + numSectors := len(replicaIDs) + + cReplicaIDs := (*C.uint8_t)(unsafe.Pointer(&flatReplicaIDs[0])) + cParentsFilename := C.CString(parentsFilename) + defer C.free(unsafe.Pointer(cParentsFilename)) + return int(C.pc1(C.uint64_t(blockOffset), C.size_t(numSectors), cReplicaIDs, cParentsFilename, C.size_t(sectorSize))) +} + +type Path struct { + Replica string + Cache string +} + +// GenerateMultiString generates a //multi// string from an array of Path structs +func GenerateMultiString(paths []Path) (string, error) { + var buffer bytes.Buffer + buffer.WriteString("//multi//") + + for _, path := range paths { + replicaPath := []byte(path.Replica) + cachePath := []byte(path.Cache) + + // Write the length and path for the replica + if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(replicaPath))); err != nil { + return "", err + } + buffer.Write(replicaPath) + + // Write the length and path for the cache + if err := binary.Write(&buffer, binary.LittleEndian, uint32(len(cachePath))); err != nil { + return "", err + } + buffer.Write(cachePath) + } + + return buffer.String(), nil +} + +// Pc2 performs the pc2 operation for batch sealing. +// Requires NVMe devices for layer storage. +func Pc2(blockOffset uint64, numSectors int, outputDir string, sectorSize uint64) int { + cOutputDir := C.CString(outputDir) + defer C.free(unsafe.Pointer(cOutputDir)) + + // Pass nil for data_filenames (CC sectors only) + var cDataFilenames **C.char + return int(C.pc2(C.size_t(blockOffset), C.size_t(numSectors), cOutputDir, cDataFilenames, C.size_t(sectorSize))) +} + +// C1 performs the c1 operation for batch sealing. +// Outputs to cachePath/commit-phase1-output +// Requires NVMe devices for batch operations. +func C1(blockOffset uint64, numSectors, sectorSlot int, replicaID, seed, ticket []byte, cachePath, parentsFilename, replicaPath string, sectorSize uint64) int { + cReplicaID := (*C.uint8_t)(unsafe.Pointer(&replicaID[0])) + cSeed := (*C.uint8_t)(unsafe.Pointer(&seed[0])) + cTicket := (*C.uint8_t)(unsafe.Pointer(&ticket[0])) + cCachePath := C.CString(cachePath) + cParentsFilename := C.CString(parentsFilename) + cReplicaPath := C.CString(replicaPath) + defer C.free(unsafe.Pointer(cCachePath)) + defer C.free(unsafe.Pointer(cParentsFilename)) + defer C.free(unsafe.Pointer(cReplicaPath)) + return int(C.c1(C.size_t(blockOffset), C.size_t(numSectors), C.size_t(sectorSlot), cReplicaID, cSeed, cTicket, cCachePath, cParentsFilename, cReplicaPath, C.size_t(sectorSize))) +} + +// GetMaxBlockOffset returns the highest available block offset from NVMe devices. +func GetMaxBlockOffset(sectorSize uint64) uint64 { + return uint64(C.get_max_block_offset(C.size_t(sectorSize))) +} + +// GetSlotSize returns the size in blocks required for the given number of sectors. +// Used for batch sealing with NVMe devices. +func GetSlotSize(numSectors int, sectorSize uint64) uint64 { + return uint64(C.get_slot_size(C.size_t(numSectors), C.size_t(sectorSize))) +} + +// GetCommR returns comm_r after calculating from p_aux file. Returns true on success. +// Used in batch sealing context. +func GetCommR(commR []byte, cachePath string) bool { + cCommR := (*C.uint8_t)(unsafe.Pointer(&commR[0])) + cCachePath := C.CString(cachePath) + defer C.free(unsafe.Pointer(cCachePath)) + return bool(C.get_comm_r(cCommR, cCachePath)) +} diff --git a/lib/supraffi/spdk_setup.go b/lib/supraffi/spdk_setup.go new file mode 100644 index 000000000..d61e10533 --- /dev/null +++ b/lib/supraffi/spdk_setup.go @@ -0,0 +1,193 @@ +//go:build supraseal_nvme + +package supraffi + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + + logging "github.com/ipfs/go-log/v2" + "golang.org/x/xerrors" + + "github.com/filecoin-project/curio/lib/hugepageutil" +) + +var spdkLog = logging.Logger("supraffi-spdk") + +const ( + spdkVersion = "v24.05" + spdkRepo = "https://github.com/spdk/spdk" + spdkDir = "spdk-v24.05" +) + +// downloadSPDK downloads the SPDK repository if it doesn't exist. +// It performs a shallow clone to save time and space since we only need the setup script. +func downloadSPDK(spdkPath string) error { + // Check if SPDK directory already exists + if _, err := os.Stat(spdkPath); err == nil { + // Check if setup script exists + setupScript := filepath.Join(spdkPath, "scripts", "setup.sh") + if _, err := os.Stat(setupScript); err == nil { + spdkLog.Infow("SPDK already exists", "path", spdkPath) + return nil + } + } + + spdkLog.Infow("downloading SPDK", "version", spdkVersion, "path", spdkPath) + + // Create parent directory if it doesn't exist + parentDir := filepath.Dir(spdkPath) + if err := os.MkdirAll(parentDir, 0755); err != nil { + return xerrors.Errorf("creating SPDK parent directory: %w", err) + } + + // Remove existing directory if it's incomplete + if _, err := os.Stat(spdkPath); err == nil { + spdkLog.Warnw("removing incomplete SPDK directory", "path", spdkPath) + if err := os.RemoveAll(spdkPath); err != nil { + return xerrors.Errorf("removing incomplete SPDK directory: %w", err) + } + } + + // Clone SPDK repository (shallow clone to save time/space) + // We use --depth 1 to only get the latest commit, and --branch to get the specific version + cmd := exec.Command("git", "clone", "--branch", spdkVersion, "--depth", "1", "--recursive", spdkRepo, spdkPath) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + spdkLog.Info("cloning SPDK repository (this may take a moment)...") + if err := cmd.Run(); err != nil { + return xerrors.Errorf("cloning SPDK repository: %w. Make sure git is installed", err) + } + + // Verify setup script exists + setupScript := filepath.Join(spdkPath, "scripts", "setup.sh") + if _, err := os.Stat(setupScript); err != nil { + return xerrors.Errorf("SPDK setup script not found after clone: %w", err) + } + + spdkLog.Info("SPDK downloaded successfully") + return nil +} + +// findOrDownloadSPDK finds the SPDK directory or downloads it if missing. +// Returns the path to the SPDK directory. +func findOrDownloadSPDK() (string, error) { + execPath, err := os.Executable() + if err != nil { + return "", xerrors.Errorf("getting executable path: %w", err) + } + + // Try different possible locations for SPDK + possiblePaths := []string{ + filepath.Join(filepath.Dir(execPath), "extern/supraseal/deps", spdkDir), + filepath.Join(filepath.Dir(execPath), "../extern/supraseal/deps", spdkDir), + filepath.Join(filepath.Dir(execPath), "../../extern/supraseal/deps", spdkDir), + "/usr/local/share/curio/extern/supraseal/deps/" + spdkDir, + filepath.Join(os.Getenv("HOME"), ".curio/spdk", spdkDir), + filepath.Join(os.TempDir(), "curio-spdk", spdkDir), + } + + // First, try to find existing SPDK + for _, path := range possiblePaths { + setupScript := filepath.Join(path, "scripts", "setup.sh") + if _, err := os.Stat(setupScript); err == nil { + spdkLog.Infow("found existing SPDK", "path", path) + return path, nil + } + } + + // SPDK not found, download it to the most appropriate location + // Prefer a location relative to the executable, fallback to user's home or temp + var downloadPath string + for _, path := range possiblePaths { + parentDir := filepath.Dir(path) + if _, err := os.Stat(parentDir); err == nil || os.IsNotExist(err) { + // Try to create parent directory to see if we have write access + if err := os.MkdirAll(parentDir, 0755); err == nil { + downloadPath = path + break + } + } + } + + if downloadPath == "" { + // Fallback to temp directory + downloadPath = filepath.Join(os.TempDir(), "curio-spdk", spdkDir) + } + + if err := downloadSPDK(downloadPath); err != nil { + return "", err + } + + return downloadPath, nil +} + +// SetupSPDK runs the SPDK setup script to configure NVMe devices for use with SupraSeal. +// This binds NVMe devices to the SPDK driver and allocates hugepages. +// It requires root privileges and will use sudo if not already running as root. +func SetupSPDK(nrHuge int) error { + // Find or download SPDK + spdkPath, err := findOrDownloadSPDK() + if err != nil { + return xerrors.Errorf("finding/downloading SPDK: %w", err) + } + + setupScript := filepath.Join(spdkPath, "scripts", "setup.sh") + + spdkLog.Infow("running SPDK setup", "script", setupScript, "hugepages", nrHuge) + + // Check if we need sudo + needsSudo := os.Geteuid() != 0 + + var cmd *exec.Cmd + if needsSudo { + // Run with sudo + cmd = exec.Command("sudo", "env", fmt.Sprintf("NRHUGE=%d", nrHuge), setupScript) + } else { + // Already root, run directly + cmd = exec.Command("env", fmt.Sprintf("NRHUGE=%d", nrHuge), setupScript) + } + + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + spdkLog.Info("running SPDK setup script (this may take a moment and require sudo)...") + + if err := cmd.Run(); err != nil { + return xerrors.Errorf("running SPDK setup script: %w", err) + } + + spdkLog.Info("SPDK setup completed successfully") + return nil +} + +// CheckAndSetupSPDK checks if SPDK is set up, and if not, runs the setup script. +// This is a convenience function that combines checking and setup. +// It ensures SPDK is downloaded and available, then runs the setup script if hugepages aren't configured. +func CheckAndSetupSPDK(nrHuge int, minPages int) error { + // First ensure SPDK is available (download if needed) + spdkPath, err := findOrDownloadSPDK() + if err != nil { + return xerrors.Errorf("ensuring SPDK is available: %w", err) + } + _ = spdkPath // SPDK is now available + + // Check if hugepages are configured + if err := hugepageutil.CheckHugePages(minPages); err != nil { + spdkLog.Warnw("hugepages not configured, attempting SPDK setup", "err", err) + // Try to set up SPDK which also configures hugepages + if setupErr := SetupSPDK(nrHuge); setupErr != nil { + return xerrors.Errorf("SPDK setup failed: %w (original hugepage check error: %v)", setupErr, err) + } + + // Verify hugepages are now configured + if err := hugepageutil.CheckHugePages(minPages); err != nil { + return xerrors.Errorf("hugepages still not configured after SPDK setup: %w", err) + } + } + + return nil +} diff --git a/tasks/sealsupra/task_supraseal.go b/tasks/sealsupra/task_supraseal.go index 21533560f..4c6fb05a5 100644 --- a/tasks/sealsupra/task_supraseal.go +++ b/tasks/sealsupra/task_supraseal.go @@ -85,6 +85,16 @@ func NewSupraSeal(sectorSize string, batchSize, pipelines int, dualHashers bool, } log.Infow("start supraseal init") + + // Automatically setup SPDK (configure hugepages and bind NVMe devices) + log.Infow("checking and setting up SPDK for supraseal") + if err := supraffi.CheckAndSetupSPDK(36, 36); err != nil { + return nil, nil, nil, xerrors.Errorf("SPDK setup failed: %w. Please ensure you have:\n"+ + "1. Configured 1GB hugepages (add 'hugepages=36 default_hugepagesz=1G hugepagesz=1G' to /etc/default/grub)\n"+ + "2. Raw NVMe devices available (no filesystems on them)\n"+ + "3. Root/sudo access for SPDK setup", err) + } + var configFile string if configFile = os.Getenv(suprasealConfigEnv); configFile == "" { // not set from env (should be the case in most cases), auto-generate a config @@ -97,7 +107,7 @@ func NewSupraSeal(sectorSize string, batchSize, pipelines int, dualHashers bool, log.Infow("nvme devices", "nvmeDevices", nvmeDevices) if len(nvmeDevices) == 0 { - return nil, nil, nil, xerrors.Errorf("no nvme devices found, run spdk setup.sh") + return nil, nil, nil, xerrors.Errorf("no nvme devices found. Please ensure you have raw NVMe devices (without filesystems) available") } cfgFile, err := os.CreateTemp("", "supraseal-config-*.cfg")