diff --git a/.cirrus.yml b/.cirrus.yml index 7f38da64d..4077de6de 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -5,10 +5,13 @@ task: name: FreeBSD freebsd_instance: image_family: freebsd-14-2-snap - install_script: pkg install -y cmake git samtools + install_script: | + # pkg update -f + pkg install -y cmake git samtools rust + export PATH=/usr/local/bin:$PATH compile_script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DHAVE_TESTS=1 -DENABLE_WERROR=1 -DHAVE_SSE4_1=1 .. + cmake -DCMAKE_BUILD_TYPE=Release -DHAVE_TESTS=1 -DENABLE_WERROR=1 -DHAVE_SSE4_1=1 -DRust_COMPILER=/usr/local/bin/rustc .. make -j $(sysctl -n hw.ncpu) test_script: ./util/regression/run_regression.sh ./build/src/mmseqs SCRATCH @@ -24,7 +27,9 @@ task: echo "deb [trusted=yes] http://archive.debian.org/debian jessie main" > /etc/apt/sources.list echo "deb [trusted=yes] http://archive.debian.org/debian-security jessie/updates main" >> /etc/apt/sources.list apt-get update --yes --force-yes || true - apt-get install wget clang-4.0 libc++-dev make git ca-certificates --yes --force-yes --no-install-suggests --no-install-recommends + apt-get install -y --no-install-suggests --no-install-recommends wget clang-4.0 libc++-dev make git ca-certificates curl + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + export PATH=/root/.cargo/bin:$PATH env: CC: clang-4.0 CXX: clang++-4.0 @@ -33,20 +38,24 @@ task: echo "deb [trusted=yes] http://archive.debian.org/debian jessie main" > /etc/apt/sources.list echo "deb [trusted=yes] http://archive.debian.org/debian-security jessie/updates main" >> /etc/apt/sources.list apt-get update --yes --force-yes || true - apt-get install wget gcc-4.9 g++-4.9 make git ca-certificates --yes --force-yes --no-install-suggests --no-install-recommends + apt-get install -y --no-install-suggests --no-install-recommends wget gcc-4.9 g++-4.9 make git ca-certificates curl + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + export PATH=/root/.cargo/bin:$PATH env: CC: gcc-4.9 CXX: g++-4.9 + compile_script: | mkdir -p /opt/cmake-3.31.0 wget https://cmake.org/files/v3.31/cmake-3.31.0-linux-x86_64.sh chmod +x cmake-3.31.0-linux-x86_64.sh ./cmake-3.31.0-linux-x86_64.sh --skip-license --prefix=/opt/cmake-3.31.0 export PATH=/opt/cmake-3.31.0/bin/:$PATH + export PATH=/root/.cargo/bin:$PATH mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DHAVE_TESTS=1 -DENABLE_WERROR=0 -DHAVE_SSE4_1=1 -DREQUIRE_OPENMP=0 .. + cmake -DCMAKE_BUILD_TYPE=Release -DHAVE_TESTS=1 -DENABLE_WERROR=0 -DHAVE_SSE4_1=1 -DREQUIRE_OPENMP=0 -DRust_COMPILER=/root/.cargo/bin/rustc .. make -j $(nproc --all) test_script: | export PATH=/opt/cmake-3.31.0/bin/:$PATH + export PATH=/root/.cargo/bin:$PATH MMSEQS_NUM_THREADS=4 ./util/regression/run_regression.sh ./build/src/mmseqs SCRATCH SEARCH - diff --git a/.github/workflows/Dockerfile.GPU-buster-cross-sbsa b/.github/workflows/Dockerfile.GPU-buster-cross-sbsa index 9c153265a..330007498 100644 --- a/.github/workflows/Dockerfile.GPU-buster-cross-sbsa +++ b/.github/workflows/Dockerfile.GPU-buster-cross-sbsa @@ -7,10 +7,12 @@ RUN mkdir -p /work/build && cd /work/build; \ -DCMAKE_TOOLCHAIN_FILE=/opt/toolchain.cmake \ -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP_AARCH64}" -DOpenMP_C_LIB_NAMES=omp -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP_AARCH64}" -DOpenMP_CXX_LIB_NAMES=omp -DOpenMP_omp_LIBRARY=${LIBOMP_AARCH64}/libomp.a \ -DCMAKE_POLICY_DEFAULT_CMP0074=NEW -DCMAKE_POLICY_DEFAULT_CMP0144=NEW \ + -DRust_TOOLCHAIN=stable-x86_64-unknown-linux-gnu \ + -DRust_CARGO_TARGET=aarch64-unknown-linux-gnu \ -DFORCE_STATIC_DEPS=1 -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75-real;80-real;86-real;89-real;90" ..; \ cmake --build . -j$(nproc --all) -v; -RUN if readelf -a /work/build/src/mmseqs | grep -i "Shared library" | grep -P -v "(linux-vdso|ld-linux-aarch64|libc|libm|libdl|librt|libpthread).so" | grep -q .; then \ +RUN if readelf -a /work/build/src/mmseqs | grep -i "Shared library" | grep -P -v "(linux-vdso|ld-linux-aarch64|libc|libm|libdl|librt|libpthread|libutil).so" | grep -q .; then \ echo "Error: unwanted libraries found"; \ readelf -a /work/build/src/mmseqs | grep -i "Shared library"; \ exit 1; \ diff --git a/.github/workflows/Dockerfile.GPU-manylinux2014 b/.github/workflows/Dockerfile.GPU-manylinux2014 index 0175ec4ae..b21f05506 100644 --- a/.github/workflows/Dockerfile.GPU-manylinux2014 +++ b/.github/workflows/Dockerfile.GPU-manylinux2014 @@ -14,7 +14,7 @@ RUN mkdir -p /work/build && cd /work/build; \ -DFORCE_STATIC_DEPS=1 -DENABLE_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES="75-real;80-real;86-real;89-real;90" ..; \ cmake --build . -j$(nproc --all) -v; -RUN if ldd /work/build/src/mmseqs | grep -P -v "linux-vdso.so|/lib64/(ld-linux-x86-64|libc|libm|libdl|librt|libpthread).so" | grep -q .; then \ +RUN if ldd /work/build/src/mmseqs | grep -P -v "linux-vdso.so|/lib64/(ld-linux-x86-64|libc|libm|libdl|librt|libpthread|libutil).so" | grep -q .; then \ echo "Error: unwanted libraries found"; \ ldd /work/build/src/mmseqs; \ exit 1; \ diff --git a/.github/workflows/mac-arm64.yml b/.github/workflows/mac-arm64.yml index 8ac27ffb0..9527a269c 100644 --- a/.github/workflows/mac-arm64.yml +++ b/.github/workflows/mac-arm64.yml @@ -20,6 +20,7 @@ jobs: - name: Build run: | mkdir -p build + rustup update cd build LIBOMP=$(brew --prefix libomp) cmake \ diff --git a/CMakeLists.txt b/CMakeLists.txt index a301bea35..5e9480c6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ set(HAVE_ARM8 0 CACHE BOOL "Have ARMv8 CPU") set(HAVE_S390X 0 CACHE BOOL "Have s390x architecture") set(NATIVE_ARCH 1 CACHE BOOL "Assume native architecture for SIMD. Use one of the HAVE_* options or set CMAKE_CXX_FLAGS to the appropriate flags if you disable this.") set(USE_SYSTEM_ZSTD 0 CACHE BOOL "Use zstd provided by system instead of bundled version") +set(IGNORE_RUST_VERSION 0 CACHE BOOL "Ignore Rust version check") set(ENABLE_CUDA 0 CACHE BOOL "Enable CUDA") set(FORCE_STATIC_DEPS 0 CACHE BOOL "Force static linking of deps") @@ -196,6 +197,53 @@ set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -std=c++1y") # set(MMSEQS_CXX_FLAGS "${MMSEQS_CXX_FLAGS} -stdlib=libc++") # endif () +set(ENV{CARGO_NET_OFFLINE} true) +add_subdirectory(lib/corrosion) +# don't try to link to gcc_s, its not needed anyway +list(REMOVE_ITEM Rust_CARGO_TARGET_LINK_NATIVE_LIBS "gcc_s") +set(Rust_CARGO_TARGET_LINK_NATIVE_LIBS ${Rust_CARGO_TARGET_LINK_NATIVE_LIBS} CACHE INTERNAL "") + +if(Rust_VERSION VERSION_LESS "1.68.0" AND NOT IGNORE_RUST_VERSION) + message(FATAL_ERROR "Rust >= 1.68.0 is required") +endif() + +set(RUST_FEATURE "" CACHE INTERNAL "") +if(HAVE_AVX2) + set(RUST_FEATURE simd_avx2 CACHE INTERNAL "") +elseif(HAVE_SSE4_1 OR HAVE_SSE2) + set(RUST_FEATURE simd_sse2 CACHE INTERNAL "") +elseif(HAVE_ARM8) + set(RUST_FEATURE simd_neon CACHE INTERNAL "") +endif() + +if (NATIVE_ARCH AND (RUST_FEATURE STREQUAL "")) + if(EMSCRIPTEN) + set(RUST_FEATURE simd_wasm CACHE INTERNAL "") + elseif(ARM) + set(RUST_FEATURE simd_neon CACHE INTERNAL "") + elseif(X86 OR X64) + if (HAVE_AVX2_EXTENSIONS) + set(RUST_FEATURE simd_avx2 CACHE INTERNAL "") + elseif (HAVE_SSE2_EXTENSIONS) + set(RUST_FEATURE simd_sse2 CACHE INTERNAL "") + endif() + endif() +endif() +if(RUST_FEATURE STREQUAL "") + message(WARNING "No SIMD support for this architecture. Alignment performance will be bad") + set(RUST_FEATURE no_simd CACHE INTERNAL "") +endif() +message("-- Rust Feature: ${RUST_FEATURE}") +corrosion_import_crate( + MANIFEST_PATH lib/block-aligner/c/Cargo.toml + FEATURES "${RUST_FEATURE}" + CRATE_TYPES staticlib +) +include_directories(lib/block-aligner/c) +if(EMSCRIPTEN) + corrosion_add_target_local_rustflags(block_aligner_c "-Clink-args=--no-entry -sRELOCATABLE=1") +endif() + if (USE_SYSTEM_ZSTD) include(FindPackageHandleStandardArgs) find_path(ZSTD_INCLUDE_DIRS NAMES zstd.h REQUIRED) diff --git a/Dockerfile b/Dockerfile index c288363a4..325e6f335 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,9 @@ RUN dpkg --add-architecture $TARGETARCH \ dpkg -i cuda-keyring_1.1-1_all.deb; \ apt-get update && apt-get install -y cuda-nvcc-12-6 cuda-cudart-dev-12-6 ninja-build; \ fi; \ - rm -rf /var/lib/apt/lists/*; + rm -rf /var/lib/apt/lists/*; \ + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | \ + sh -s -- --profile minimal --default-host ${ARCH}-unknown-linux-gnu -y WORKDIR /opt/build ADD . . diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ea5e680f9..52a4a4f8e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -44,6 +44,11 @@ jobs: steps: - checkout: self submodules: true + - script: | + rustup update + rustup target install x86_64-apple-darwin + rustup target install aarch64-apple-darwin + displayName: Install Rust Toolchain - script: | cd ${BUILD_SOURCESDIRECTORY} ./util/build_osx.sh . build @@ -89,14 +94,13 @@ jobs: STATIC: 0 MPI: 0 BUILD_TYPE: ASanOpt - steps: - checkout: self submodules: true - script: | # sudo add-apt-repository ppa:ubuntu-toolchain-r/test sudo apt-get update - sudo apt-get install -y gcc-11 g++-11 libgcc-11-dev build-essential + sudo apt-get install -y gcc-11 g++-11 libgcc-11-dev rustc cargo build-essential retryCountOnTaskFailure: "2" displayName: Install newer G++ - script: | @@ -197,6 +201,7 @@ jobs: HEREDOC sudo apt-get update sudo apt-get -y install -o APT::Immediate-Configure=false crossbuild-essential-$ARCH zlib1g-dev:$ARCH libbz2-dev:$ARCH + rustup target add ${CPREF}-unknown-linux-gnu retryCountOnTaskFailure: "2" displayName: Install Toolchain - script: | @@ -205,6 +210,7 @@ jobs: CC=${CPREF}-linux-gnu-gcc CXX=${CPREF}-linux-gnu-g++ \ cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DHAVE_TESTS=1 -DFORCE_STATIC_DEPS=1 \ -DOpenMP_C_FLAGS="-fopenmp -I${LIBGOMP}" -DOpenMP_C_LIB_NAMES=gomp -DOpenMP_CXX_FLAGS="-fopenmp -I${LIBGOMP}" -DOpenMP_CXX_LIB_NAMES=gomp -DOpenMP_gomp_LIBRARY=${LIBGOMP}/libgomp.a \ + -DRust_CARGO_TARGET=${CPREF}-unknown-linux-gnu \ -DENABLE_WERROR=1 -DHAVE_${SIMD}=1 .. make -j $(nproc --all) displayName: Build MMseqs2 @@ -215,6 +221,8 @@ jobs: - job: build_windows displayName: Windows + # TODO: no rust support in cygwin yet, re-enable soon + condition: 'false' pool: vmImage: 'windows-2019' variables: @@ -279,7 +287,7 @@ jobs: - build_ubuntu - build_ubuntu_gpu - build_ubuntu_cross - - build_windows + # - build_windows steps: - script: | cd "${BUILD_SOURCESDIRECTORY}" @@ -405,19 +413,19 @@ jobs: archiveType: tar - script: rm "${BUILD_SOURCESDIRECTORY}/mmseqs/bin/mmseqs" - - task: DownloadPipelineArtifact@1 - inputs: - artifactName: mmseqs-win64 - targetPath: $(Build.SourcesDirectory) - - script: | - unzip "${BUILD_SOURCESDIRECTORY}/mmseqs-win64.zip" - chmod +x mmseqs/mmseqs.bat mmseqs/bin/* - - task: ArchiveFiles@2 - inputs: - rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs - archiveFile: $(Build.SourcesDirectory)/mmseqs-win64.zip - includeRootFolder: true - archiveType: zip + # - task: DownloadPipelineArtifact@1 + # inputs: + # artifactName: mmseqs-win64 + # targetPath: $(Build.SourcesDirectory) + # - script: | + # unzip "${BUILD_SOURCESDIRECTORY}/mmseqs-win64.zip" + # chmod +x mmseqs/mmseqs.bat mmseqs/bin/* + # - task: ArchiveFiles@2 + # inputs: + # rootFolderOrFile: $(Build.SourcesDirectory)/mmseqs + # archiveFile: $(Build.SourcesDirectory)/mmseqs-win64.zip + # includeRootFolder: true + # archiveType: zip - task: DownloadSecureFile@1 inputs: secureFile: secretKeyPleaseDontSteal @@ -430,6 +438,8 @@ jobs: cp mmseqs/userguide.pdf userguide.pdf # disabled: mmseqs-linux-ppc64le-power8.tar.gz # -F file[]=@mmseqs-linux-ppc64le-power8.tar.gz -F signature[]=@mmseqs-linux-ppc64le-power8.tar.gz.sig + # mmseqs-win64.zip + # -F file[]=@mmseqs-win64.zip -F signature[]=@mmseqs-win64.zip.sig ssh-keygen -Y sign -f ~/.ssh/id_rsa -n file \ userguide.pdf \ mmseqs-osx-universal.tar.gz \ @@ -439,8 +449,7 @@ jobs: mmseqs-linux-gpu.tar.gz \ mmseqs-linux-gpu-arm64.tar.gz \ mmseqs-linux-arm64.tar.gz \ - mmseqs-linux-ppc64le-power9.tar.gz \ - mmseqs-win64.zip + mmseqs-linux-ppc64le-power9.tar.gz curl --fail --retry 5 --retry-all-errors -X POST \ -F file[]=@userguide.pdf -F signature[]=@userguide.pdf.sig \ -F file[]=@mmseqs-osx-universal.tar.gz -F signature[]=@mmseqs-osx-universal.tar.gz.sig \ @@ -451,7 +460,6 @@ jobs: -F file[]=@mmseqs-linux-gpu-arm64.tar.gz -F signature[]=@mmseqs-linux-gpu-arm64.tar.gz.sig \ -F file[]=@mmseqs-linux-arm64.tar.gz -F signature[]=@mmseqs-linux-arm64.tar.gz.sig \ -F file[]=@mmseqs-linux-ppc64le-power9.tar.gz -F signature[]=@mmseqs-linux-ppc64le-power9.tar.gz.sig \ - -F file[]=@mmseqs-win64.zip -F signature[]=@mmseqs-win64.zip.sig \ -F identifier="mmseqs" -F directory="${BUILD_SOURCEVERSION}" \ https://mmseqs.com/upload diff --git a/lib/block-aligner/.github/workflows/ci.yaml b/lib/block-aligner/.github/workflows/ci.yaml new file mode 100644 index 000000000..66bbb392f --- /dev/null +++ b/lib/block-aligner/.github/workflows/ci.yaml @@ -0,0 +1,107 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + ci-x86: + runs-on: ${{matrix.os}} + + strategy: + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Install Rust Nightly + uses: actions-rs/toolchain@v1 + + - name: Test AVX2 + run: | + cargo --version + cargo test --all-targets --features simd_avx2 + cargo test --doc --features simd_avx2 + + - name: Test SSE2 + run: | + cargo --version + cargo test --all-targets --features simd_sse2 + cargo test --doc --features simd_sse2 + + - name: Install Rust Stable + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + + - name: Test AVX2 + run: | + cargo --version + cargo build --features simd_avx2 + + - name: Test SSE2 + run: | + cargo --version + cargo build --features simd_sse2 + + ci-wasm32: + runs-on: ${{matrix.os}} + + strategy: + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Install Rust Nightly + uses: actions-rs/toolchain@v1 + with: + target: wasm32-wasi + + - name: Test WASM + run: | + WASMTIME_DIR=wasmtime-v2.0.2-x86_64-linux + curl -OL https://github.com/bytecodealliance/wasmtime/releases/download/v2.0.2/${WASMTIME_DIR}.tar.xz + tar -xvf ${WASMTIME_DIR}.tar.xz + mv ${WASMTIME_DIR}/wasmtime ./ + cargo --version + CARGO_TARGET_WASM32_WASI_RUNNER="./wasmtime --wasm-features simd --" cargo test --target=wasm32-wasi --features simd_wasm --all-targets -- --nocapture + + - name: Install Rust Stable + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + target: wasm32-wasi + + - name: Test WASM + run: | + cargo --version + cargo build --target=wasm32-wasi --features simd_wasm + + ci-aarch64: + runs-on: ${{matrix.os}} + + strategy: + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Install Rust Nightly + uses: actions-rs/toolchain@v1 + with: + target: aarch64-unknown-linux-gnu + + - name: Test Neon + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends gcc-aarch64-linux-gnu libc6-dev-arm64-cross qemu-user + cargo --version + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" cargo test --target=aarch64-unknown-linux-gnu --all-targets --features simd_neon diff --git a/lib/block-aligner/.gitignore b/lib/block-aligner/.gitignore new file mode 100644 index 000000000..b1b118d3a --- /dev/null +++ b/lib/block-aligner/.gitignore @@ -0,0 +1,13 @@ +/target +Cargo.lock +data/* +!data/README.md +!data/uc30_pairwise_aln.sh +!data/uc30_0.95_pairwise_aln.sh +c/a.out +c/target +c/example +c/align_prefix +c/align_local +vis/.ipynb_checkpoints +vis/*.log diff --git a/lib/block-aligner/Cargo.toml b/lib/block-aligner/Cargo.toml new file mode 100644 index 000000000..13c3f13e2 --- /dev/null +++ b/lib/block-aligner/Cargo.toml @@ -0,0 +1,59 @@ +[package] +name = "block-aligner" +version = "0.4.0" +authors = ["c0deb0t "] +edition = "2018" +license = "MIT" +description = "SIMD-accelerated library for computing global and X-drop affine gap penalty sequence-to-sequence or sequence-to-profile alignments using an adaptive block-based algorithm." +homepage = "https://github.com/Daniel-Liu-c0deb0t/block-aligner" +repository = "https://github.com/Daniel-Liu-c0deb0t/block-aligner" +readme = "README.md" +keywords = ["SIMD", "string-distance", "alignment", "biology", "edit-distance"] +categories = ["algorithms", "hardware-support", "science", "text-processing"] +exclude = ["vis/*", "!vis/block_img1.png"] + +[package.metadata.docs.rs] +features = ["simd_avx2"] + +[features] +# Enable SSE2 +simd_sse2 = [] +# Enable AVX2 +simd_avx2 = [] +# Enable WASM SIMD +simd_wasm = [] +# Enable Neon +simd_neon = [] + +# No SIMD +no_simd = [] + +# Print lots of debug information +debug = ["debug_size"] +# Print only the final block sizes +debug_size = [] +# Prepare code for analysis by llvm-mca +mca = [] + +[profile.release] +debug-assertions = false +lto = "thin" +panic = "abort" + +[profile.bench] +debug-assertions = false +lto = "thin" + +[dev-dependencies] +bio = "^0.33" +simulate-seqs = { git = "https://github.com/Daniel-Liu-c0deb0t/simulate-seqs" } + +[target.'cfg(not(any(target_arch = "wasm32", target_arch = "aarch64")))'.dev-dependencies] +parasailors = { git = "https://github.com/Daniel-Liu-c0deb0t/parasailors-new" } +image = "^0.23.14" +imageproc = "^0.22.0" +# Includes small changes that ensures wfa2 code autovectorizes and other updates +rust-wfa2 = { git = "https://github.com/pairwise-alignment/rust-wfa2" } +# Removed logging and extra dependencies +edlib_rs = { git = "https://github.com/Daniel-Liu-c0deb0t/edlib-rs" } +ksw2-sys = { git = "https://github.com/pairwise-alignment/ksw2-sys" } diff --git a/lib/block-aligner/LICENSE b/lib/block-aligner/LICENSE new file mode 100644 index 000000000..a4d125849 --- /dev/null +++ b/lib/block-aligner/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Daniel Liu + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/block-aligner/README.md b/lib/block-aligner/README.md new file mode 100644 index 000000000..adcad6e7c --- /dev/null +++ b/lib/block-aligner/README.md @@ -0,0 +1,198 @@ +# block aligner +[![CI](https://github.com/Daniel-Liu-c0deb0t/block-aligner/actions/workflows/ci.yaml/badge.svg)](https://github.com/Daniel-Liu-c0deb0t/block-aligner/actions/workflows/ci.yaml) +[![License](https://img.shields.io/github/license/Daniel-Liu-c0deb0t/block-aligner)](LICENSE) +[![Crates.io](https://img.shields.io/crates/v/block-aligner)](https://crates.io/crates/block_aligner) +[![Docs.rs](https://docs.rs/block-aligner/badge.svg)](https://docs.rs/block-aligner) + +SIMD-accelerated library for computing global and X-drop affine gap penalty sequence-to-sequence or +sequence-to-profile alignments using an adaptive block-based algorithm. + +Preprint paper available [here](https://www.biorxiv.org/content/10.1101/2021.11.08.467651). + +

+ +

+ +## Example +```rust +use block_aligner::{cigar::*, scan_block::*, scores::*}; + +let min_block_size = 32; +let max_block_size = 256; + +// A gap of length n will cost: open + extend * (n - 1) +let gaps = Gaps { open: -2, extend: -1 }; + +// Note that PaddedBytes, Block, and Cigar can be initialized with sequence length +// and block size upper bounds and be reused later for shorter sequences, to avoid +// repeated allocations. +let r = PaddedBytes::from_bytes::(b"TTAAAAAAATTTTTTTTTTTT", max_block_size); +let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", max_block_size); + +// Align with traceback, but no X-drop threshold (global alignment). +let mut a = Block::::new(q.len(), r.len(), max_block_size); +a.align(&q, &r, &NW1, gaps, min_block_size..=max_block_size, 0); +let res = a.res(); + +assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 }); + +let mut cigar = Cigar::new(res.query_idx, res.reference_idx); +// Compute traceback and resolve =/X (matches/mismatches). +a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar); + +assert_eq!(cigar.to_string(), "2=6I16=3D"); +``` +See the [docs](https://docs.rs/block-aligner) for detailed API information. + +## Algorithm +Block aligner provides a new efficient way to compute pairwise alignments on proteins, DNA sequences, +and byte strings with dynamic programming. +Block aligner also supports aligning sequences to profiles, which are position-specific +scoring matrices and position-specific gap open costs. + +It works by calculating scores in a small square block that is shifted down or right in a greedy +manner, based on the scores at the edges of the block. +This dynamic approach results in a much smaller calculated block area compared to previous approaches, +though at the expense of some accuracy. +The block can also go back to a previous best checkpoint and grow larger, to handle difficult regions +with large gaps. +The block size can also dynamically shrink when it detects that a large block is not needed. +Both block growing and shrinking are based on heuristics. + +By trading off some accuracy for speed, block aligner is able to efficiently handle a variety of scoring matrices and +adapt to sequences of varying sequence identities. In practice, it is still very accurate on a variety of protein and +nucleotide sequences. + +Block aligner is designed to exploit SIMD parallelism on modern CPUs. +Currently, SSE2 (128-bit vectors), AVX2 (256-bit vectors), Neon (128-bit vectors), and WASM SIMD (128-bit vectors) are supported. +For score calculations, 16-bit score values (lanes) and 32-bit per block offsets are used. + +Block aligner behaves similarly to an (adaptive) banded aligner when the minimum and maximum block size is set to +the same value. + +## Tuning block sizes + +For long, noisy Nanopore reads, a min block size of ~1% sequence length and a max block size +of ~10% sequence length performs well (tested with reads up to ~50kbps). +For proteins, a min block size of 32 and a max block size of 256 performs well. +Using a minimum block size that is at least 32 is recommended for most applications. +Using a maximum block size greater than `2^14 = 16384` is not recommended. +If the alignment scores are saturating (score too large), then use a smaller block size. +Let me know how block aligner performs on your data! + +## Install +This library can be used on both stable and nightly Rust channels. +The nightly channel is needed for running tests and benchmarks. Additionally, the tests +and benchmarks need to run on Linux or MacOS. + +To use this as a crate in your Rust project, add the following to your `Cargo.toml`: +``` +[dependencies] +block-aligner = { version = "0.4", features = ["simd_avx2"] } +``` +Use the `simd_sse2`, `simd_neon`, or `simd_wasm` feature flag for x86 SSE2, ARM Neon, or WASM SIMD support, respectively. +It is your responsibility to ensure the correct feature to be enabled and supported by the +platform that runs the code because this library does not automatically detect the supported +SIMD instruction set. More information on specifying different features for different platforms +with the same dependency [here](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#platform-specific-dependencies). +Here's a simple example: +``` +[target.'cfg(target_arch = "x86_64")'.dependencies] +block-aligner = { version = "0.4", features = ["simd_avx2"] } +[target.'cfg(target_arch = "aarch64")'.dependencies] +block-aligner = { version = "0.4", features = ["simd_neon"] } +``` + +For developing, testing, or using the C API, you should clone this repo +and use Rust nightly. In general, when building, you need to specify the +correct feature flags through the command line. + +For x86 AVX2: +``` +cargo build --features simd_avx2 --release +``` + +For x86 SSE2: +``` +cargo build --features simd_sse2 --release +``` + +For ARM Neon: +``` +cargo build --target=aarch64-unknown-linux-gnu --features simd_neon --release +``` + +For WASM SIMD: +``` +cargo build --target=wasm32-wasi --features simd_wasm --release +``` + +To run WASM programs, you will need [`wasmtime`](https://github.com/bytecodealliance/wasmtime) +installed and on your `$PATH`. + +## C API +There are C bindings for block aligner. More information on how to use them is located in +the [C readme](c/README.md). +See the `3di` branch for an example of using block aligner to do local alignment in C, +along with block aligner modifications to support aligning with amino acid 3D interaction (3Di) information. + +Most of the instructions below are for benchmarking and testing block aligner. + +## Data +Some Illumina/Nanopore (DNA), Uniclust30 (protein), and SCOP (protein profile) data are used in some tests and benchmarks. +You will need to download them by following the instructions in the [data readme](data/README.md). + +## Test +Run `scripts/test_avx2.sh` or `scripts/test_wasm.sh` to run tests. +CI will run these tests when commits are pushed to this repo. +More testing and evaluating scripts are available in the `scripts` directory. + +For debugging, there exists a `debug` feature flag that prints out a lot of +useful info about the internal state of the aligner while it runs. +There is another feature flag, `debug_size`, that prints the sizes of blocks after they grow. +To manually inspect alignments, run `scripts/debug_avx2.sh` with two sequences as arguments. + +## Docs +Run `scripts/doc_avx2.sh` or `scripts/doc_wasm.sh` to build the docs locally. + +## Benchmark +Run `scripts/bench_avx2.sh` or `scripts/bench_wasm.sh` for basic benchmarks. +See the `scripts` directory for more benchmark scripts on real data. + +## Data analysis and visualizations +Use the Jupyter notebook in the `vis/` directory to gather data and plot them. An easier way +to run the whole notebook is to run the `vis/run_vis.sh` script. + +## Profiling with MacOS Instruments +Use +``` +brew install cargo-instruments +RUSTFLAGS="-g" cargo instruments --example profile --release --features simd_avx2 --open +``` + +## Analyzing performance with LLVM-MCA +Use +``` +scripts/build_ir_asm.sh +``` +to generate assembly output and run LLVM-MCA. + +## Viewing the assembly +Use either `scripts/build_ir_asm.sh`, `objdump -d` on a binary (avoids recompiling code in +some cases), or a more advanced tool like Ghidra (has a decompiler, too). + +## Compare (relatively unused) +Edits were made to [Hajime Suzuki](https://github.com/ocxtal)'s adaptive banding benchmark code +and difference recurrence benchmark code. These edits are available [here](https://github.com/Daniel-Liu-c0deb0t/adaptivebandbench) +and [here](https://github.com/Daniel-Liu-c0deb0t/diff-bench-paper), respectively. +Go to those repos, then follow the instructions for installing and running the code. + +If you run the scripts in those repos for comparing scores produced by different algorithms, +you should get `.tsv` generated files. Then, in this repo's directory, run +``` +scripts/compare_avx2.sh /path/to/file.tsv 50 +``` +to get the comparisons. The X-drop threshold is specified after the path. + +## Old ideas and history +See the [ideas](ideas.md) file. diff --git a/lib/block-aligner/benches/prefix_scan.rs b/lib/block-aligner/benches/prefix_scan.rs new file mode 100644 index 000000000..44dc4c2de --- /dev/null +++ b/lib/block-aligner/benches/prefix_scan.rs @@ -0,0 +1,42 @@ +#![feature(test)] +#![cfg(feature = "simd_avx2")] + +extern crate test; +use test::{Bencher, black_box}; + +use block_aligner::avx2::*; + +#[repr(align(32))] +struct A([i16; L]); + +#[bench] +fn bench_opt_prefix_scan(b: &mut Bencher) { + #[target_feature(enable = "avx2")] + unsafe fn inner(b: &mut Bencher) { + let vec = A([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 12, 13, 14, 11]); + let vec = simd_load(vec.0.as_ptr() as *const Simd); + + b.iter(|| { + let gap = simd_set1_i16(-1); + let (_, consts) = get_prefix_scan_consts(gap); + simd_prefix_scan_i16(black_box(vec), gap, consts) + }); + } + unsafe { inner(b); } +} + +#[bench] +fn bench_naive_prefix_scan(b: &mut Bencher) { + #[target_feature(enable = "avx2")] + unsafe fn inner(b: &mut Bencher) { + let vec = A([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 12, 13, 14, 11]); + let vec = simd_load(vec.0.as_ptr() as *const Simd); + + b.iter(|| { + let gap = simd_set1_i16(-1); + let (_, consts) = get_prefix_scan_consts(gap); + simd_naive_prefix_scan_i16(black_box(vec), gap, consts) + }); + } + unsafe { inner(b); } +} diff --git a/lib/block-aligner/benches/rand_scan.rs b/lib/block-aligner/benches/rand_scan.rs new file mode 100644 index 000000000..e55d94e8e --- /dev/null +++ b/lib/block-aligner/benches/rand_scan.rs @@ -0,0 +1,175 @@ +#![feature(test)] + +extern crate test; +use test::{Bencher, black_box}; + +use bio::alignment::pairwise::*; +use bio::scores::blosum62; + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +use bio::alignment::distance::simd::bounded_levenshtein; + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +use parasailors::{Matrix, *}; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; +use simulate_seqs::*; + +fn bench_rustbio_aa_core(b: &mut Bencher, len: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &AMINO_ACIDS, &mut rng)); + let q = black_box(rand_mutate(&r, K, &AMINO_ACIDS, &mut rng)); + + b.iter(|| { + let mut bio_aligner = Aligner::with_capacity(q.len(), r.len(), -10, -1, &blosum62); + bio_aligner.global(&q, &r).score + }); +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +fn bench_parasailors_aa_core(b: &mut Bencher, len: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &AMINO_ACIDS, &mut rng)); + let q = black_box(rand_mutate(&r, K, &AMINO_ACIDS, &mut rng)); + let matrix = Matrix::new(MatrixType::Blosum62); + let profile = parasailors::Profile::new(&q, &matrix); + + b.iter(|| { + global_alignment_score(&profile, &r, 11, 1) + }); +} + +fn bench_scan_aa_core(b: &mut Bencher, len: usize, insert: bool) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &AMINO_ACIDS, &mut rng)); + let q = if insert { + black_box(rand_mutate_insert(&r, K, &AMINO_ACIDS, len / 10, &mut rng)) + } else { + black_box(rand_mutate(&r, K, &AMINO_ACIDS, &mut rng)) + }; + let r = PaddedBytes::from_bytes::(&r, 2048); + let q = PaddedBytes::from_bytes::(&q, 2048); + let bench_gaps = Gaps { open: -11, extend: -1 }; + + b.iter(|| { + let mut a = Block::::new(q.len(), r.len(), 2048); + a.align(&q, &r, &BLOSUM62, bench_gaps, 32..=2048, 0); + a.res() + }); +} + +fn bench_scan_aa_core_small(b: &mut Bencher, len: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &AMINO_ACIDS, &mut rng)); + let q = black_box(rand_mutate(&r, K, &AMINO_ACIDS, &mut rng)); + let r = PaddedBytes::from_bytes::(&r, 2048); + let q = PaddedBytes::from_bytes::(&q, 2048); + let bench_gaps = Gaps { open: -11, extend: -1 }; + + b.iter(|| { + let mut a = Block::::new(q.len(), r.len(), 32); + a.align(&q, &r, &BLOSUM62, bench_gaps, 32..=32, 0); + a.res() + }); +} + +fn bench_scan_aa_core_trace(b: &mut Bencher, len: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &AMINO_ACIDS, &mut rng)); + let q = black_box(rand_mutate(&r, K, &AMINO_ACIDS, &mut rng)); + let r = PaddedBytes::from_bytes::(&r, 2048); + let q = PaddedBytes::from_bytes::(&q, 2048); + let bench_gaps = Gaps { open: -11, extend: -1 }; + + b.iter(|| { + let mut a = Block::::new(q.len(), r.len(), 2048); + a.align(&q, &r, &BLOSUM62, bench_gaps, 32..=2048, 0); + //a.res() + let mut cigar = Cigar::new(q.len(), r.len()); + a.trace().cigar(q.len(), r.len(), &mut cigar); + (a.res(), cigar) + }); +} + +fn bench_scan_nuc_core(b: &mut Bencher, len: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &NUC, &mut rng)); + let q = black_box(rand_mutate(&r, K, &NUC, &mut rng)); + let r = PaddedBytes::from_bytes::(&r, 2048); + let q = PaddedBytes::from_bytes::(&q, 2048); + let bench_gaps = Gaps { open: -2, extend: -1 }; + + b.iter(|| { + let mut a = Block::::new(q.len(), r.len(), 2048); + a.align(&q, &r, &NW1, bench_gaps, 32..=2048, 0); + a.res() + }); +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +fn bench_triple_accel_core(b: &mut Bencher, len: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = black_box(rand_str(len, &NUC, &mut rng)); + let q = black_box(rand_mutate(&r, K, &NUC, &mut rng)); + + b.iter(|| { + bounded_levenshtein(&q, &r, K as u32) + }); +} + +#[bench] +fn bench_scan_aa_10_100(b: &mut Bencher) { bench_scan_aa_core::<10>(b, 100, false); } +#[bench] +fn bench_scan_aa_100_1000(b: &mut Bencher) { bench_scan_aa_core::<100>(b, 1000, false); } +#[bench] +fn bench_scan_aa_1000_10000(b: &mut Bencher) { bench_scan_aa_core::<1000>(b, 10000, false); } + +#[bench] +fn bench_scan_aa_10_100_insert(b: &mut Bencher) { bench_scan_aa_core::<10>(b, 100, true); } +#[bench] +fn bench_scan_aa_100_1000_insert(b: &mut Bencher) { bench_scan_aa_core::<100>(b, 1000, true); } +#[bench] +fn bench_scan_aa_1000_10000_insert(b: &mut Bencher) { bench_scan_aa_core::<1000>(b, 10000, true); } + +#[bench] +fn bench_scan_aa_10_100_small(b: &mut Bencher) { bench_scan_aa_core_small::<10>(b, 100); } +#[bench] +fn bench_scan_aa_100_1000_small(b: &mut Bencher) { bench_scan_aa_core_small::<100>(b, 1000); } +#[bench] +fn bench_scan_aa_1000_10000_small(b: &mut Bencher) { bench_scan_aa_core_small::<1000>(b, 10000); } + +#[bench] +fn bench_scan_aa_10_100_trace(b: &mut Bencher) { bench_scan_aa_core_trace::<10>(b, 100); } +#[bench] +fn bench_scan_aa_100_1000_trace(b: &mut Bencher) { bench_scan_aa_core_trace::<100>(b, 1000); } +#[bench] +fn bench_scan_aa_1000_10000_trace(b: &mut Bencher) { bench_scan_aa_core_trace::<1000>(b, 10000); } + +#[bench] +fn bench_scan_nuc_100_1000(b: &mut Bencher) { bench_scan_nuc_core::<100>(b, 1000); } +#[bench] +fn bench_scan_nuc_1000_10000(b: &mut Bencher) { bench_scan_nuc_core::<1000>(b, 10000); } + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +#[bench] +fn bench_triple_accel_100_1000(b: &mut Bencher) { bench_triple_accel_core::<100>(b, 1000); } +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +#[bench] +fn bench_triple_accel_1000_10000(b: &mut Bencher) { bench_triple_accel_core::<1000>(b, 10000); } + +#[bench] +fn bench_rustbio_aa_10_100(b: &mut Bencher) { bench_rustbio_aa_core::<10>(b, 100); } +#[bench] +fn bench_rustbio_aa_100_1000(b: &mut Bencher) { bench_rustbio_aa_core::<100>(b, 1000); } + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +#[bench] +fn bench_parasailors_aa_10_100(b: &mut Bencher) { bench_parasailors_aa_core::<10>(b, 100); } +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +#[bench] +fn bench_parasailors_aa_100_1000(b: &mut Bencher) { bench_parasailors_aa_core::<100>(b, 1000); } +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon")))] +#[bench] +fn bench_parasailors_aa_1000_10000(b: &mut Bencher) { bench_parasailors_aa_core::<1000>(b, 10000); } diff --git a/lib/block-aligner/c/Cargo.toml b/lib/block-aligner/c/Cargo.toml new file mode 100644 index 000000000..6b53bfcb3 --- /dev/null +++ b/lib/block-aligner/c/Cargo.toml @@ -0,0 +1,36 @@ +# Minimal Cargo.toml to avoid downloading dependencies. + +[package] +name = "block_aligner_c" +version = "0.4.0" +authors = ["c0deb0t "] +edition = "2018" + +[lib] +crate-type = ["lib", "cdylib", "staticlib"] +path = "../src/lib.rs" + +[features] +# Enable SSE2 +simd_sse2 = [] +# Enable AVX2 +simd_avx2 = [] +# Enable WASM SIMD +simd_wasm = [] +# Enable Neon +simd_neon = [] + +# No SIMD +no_simd = [] + +# Print lots of debug information +debug = ["debug_size"] +# Print only the final block sizes +debug_size = [] +# Prepare code for analysis by llvm-mca +mca = [] + +[profile.release] +debug-assertions = false +lto = "thin" +panic = "abort" diff --git a/lib/block-aligner/c/Makefile b/lib/block-aligner/c/Makefile new file mode 100644 index 000000000..70440fcd8 --- /dev/null +++ b/lib/block-aligner/c/Makefile @@ -0,0 +1,19 @@ +CC=clang +CFLAGS=-Ltarget/release -lblock_aligner_c -I. + +.PHONY: all block_aligner example align_prefix align_local + +all: example align_prefix align_local + +block_aligner: + cargo build --release --features simd_avx2 --offline + cbindgen --config cbindgen.toml --crate block_aligner_c --output block_aligner.h --quiet . + +example: block_aligner example.c + $(CC) $(CFLAGS) -o example example.c + +align_prefix: block_aligner align_prefix.c + $(CC) $(CFLAGS) -o align_prefix align_prefix.c + +align_local: block_aligner align_local.c + $(CC) $(CFLAGS) -o align_local align_local.c diff --git a/lib/block-aligner/c/README.md b/lib/block-aligner/c/README.md new file mode 100644 index 000000000..043bd2d49 --- /dev/null +++ b/lib/block-aligner/c/README.md @@ -0,0 +1,23 @@ +# C API +This directory contains an example of how to use the C API of block aligner. + +Currently, only sequence to sequence and sequence to profile alignment with +proteins is supported with the C API. Other features may be added if there +is demand for them. + +## Running the example +1. `cd` into this directory. +2. Run `make`. This will build block aligner in release mode, use cbindgen +to generate the header file, and make sure block aligner is linked to the +example program. Make sure you have [cbindgen](https://github.com/eqrion/cbindgen) installed +if you are making changes to the block aligner code and need to regenerate bindings. +3. Run `./example`. This will run the example program to perform alignment +calculations. + +The generated header file, `c/block_aligner.h`, should be included in +code that calls block aligner functions. It is C++ compatible. +Like in the example `Makefile`, the `block_aligner` library in `c/target/release` +must be linked to any C/C++ code that calls block aligner functions. + +Note that this directory has a minimal `Cargo.toml` that has no dependencies, so +block aligner can be compiled offline. diff --git a/lib/block-aligner/c/align_local.c b/lib/block-aligner/c/align_local.c new file mode 100644 index 000000000..e8339db68 --- /dev/null +++ b/lib/block-aligner/c/align_local.c @@ -0,0 +1,217 @@ +#include +#include + +#include "block_aligner.h" + +// max block size, assume that two seqs will not have a larger gap than this +#define MAX_SIZE 4096 +// threshold number of iterations where the score does not change for terminating alignment +#define ITER 2 + +// compute traceback, which is slightly slower +void align_prefix_trace(BlockHandle block, PaddedBytes* a, PaddedBytes* a_3di, PosBias* a_bias, PaddedBytes* b, PaddedBytes* b_3di, PosBias* b_bias, const AAMatrix* matrix, const AAMatrix* matrix_3di, Gaps gaps, AlignResult* res, Cigar* cigar, size_t* min_size) { + size_t num_iter = 0; + res->score = -1000000000; + res->query_idx = -1; + res->reference_idx = -1; + + // exponential search on min_size until either max_size is reached or score does not change for ITER iterations + while (*min_size <= MAX_SIZE && num_iter < ITER) { + // allow max block size to grow + SizeRange range = {.min = *min_size, .max = MAX_SIZE}; + // estimated x-drop threshold + int32_t x_drop = -(*min_size * gaps.extend + gaps.open); + block_align_3di_aa_trace_xdrop(block, a, a_3di, a_bias, b, b_3di, b_bias, matrix, matrix_3di, gaps, range, x_drop); + int32_t prev_score = res->score; + *res = block_res_aa_trace_xdrop(block); + + if (res->score == prev_score) { + num_iter++; + } else { + num_iter = 1; + } + + *min_size *= 2; + } + + block_cigar_aa_trace_xdrop(block, res->query_idx, res->reference_idx, cigar); +} + +// do not compute traceback, which is slightly faster +void align_prefix_no_trace(BlockHandle block, PaddedBytes* a, PaddedBytes* a_3di, PosBias* a_bias, PaddedBytes* b, PaddedBytes* b_3di, PosBias* b_bias, const AAMatrix* matrix, const AAMatrix* matrix_3di, Gaps gaps, AlignResult* res, size_t* min_size) { + size_t num_iter = 0; + res->score = -1000000000; + res->query_idx = -1; + res->reference_idx = -1; + + // exponential search on min_size until either max_size is reached or score does not change for ITER iterations + while (*min_size <= MAX_SIZE && num_iter < ITER) { + // allow max block size to grow + SizeRange range = {.min = *min_size, .max = MAX_SIZE}; + // estimated x-drop threshold + int32_t x_drop = -(*min_size * gaps.extend + gaps.open); + block_align_3di_aa_xdrop(block, a, a_3di, a_bias, b, b_3di, b_bias, matrix, matrix_3di, gaps, range, x_drop); + int32_t prev_score = res->score; + *res = block_res_aa_xdrop(block); + + if (res->score == prev_score) { + num_iter++; + } else { + num_iter = 1; + } + + *min_size *= 2; + } +} + +void rev_arr(int16_t* arr, size_t len) { + for (int i = 0; i < len / 2; i++) { + int16_t temp = arr[i]; + arr[i] = arr[len - 1 - i]; + arr[len - 1 - i] = temp; + } +} + +void rev_str(char* arr, size_t len) { + for (int i = 0; i < len / 2; i++) { + char temp = arr[i]; + arr[i] = arr[len - 1 - i]; + arr[len - 1 - i] = temp; + } +} + +typedef struct LocalAln { + size_t a_start; + size_t b_start; + size_t a_end; + size_t b_end; + int32_t score; +} LocalAln; + +// note: traceback cigar string will be reversed, but LocalAln will contain correct start and end positions +LocalAln align_local(BlockHandle block_trace, BlockHandle block_no_trace, size_t a_len, char* a_str, PaddedBytes* a, char* a_3di_str, PaddedBytes* a_3di, int16_t* a_bias_arr, PosBias* a_bias, size_t b_len, char* b_str, PaddedBytes* b, char* b_3di_str, PaddedBytes* b_3di, int16_t* b_bias_arr, PosBias* b_bias, const AAMatrix* matrix, const AAMatrix* matrix_3di, Gaps gaps, size_t a_idx, size_t b_idx, Cigar* cigar) { + LocalAln res_aln; + AlignResult res; + size_t min_size = 32; + + // forwards alignment starting at (a_idx, b_idx) + block_set_bytes_padded_aa(a, (uint8_t*)(a_str + a_idx), a_len - a_idx, MAX_SIZE); + block_set_bytes_padded_aa(a_3di, (uint8_t*)(a_3di_str + a_idx), a_len - a_idx, MAX_SIZE); + block_set_pos_bias(a_bias, a_bias_arr + a_idx, a_len - a_idx); + block_set_bytes_padded_aa(b, (uint8_t*)(b_str + b_idx), b_len - b_idx, MAX_SIZE); + block_set_bytes_padded_aa(b_3di, (uint8_t*)(b_3di_str + b_idx), b_len - b_idx, MAX_SIZE); + block_set_pos_bias(b_bias, b_bias_arr + b_idx, b_len - b_idx); + + align_prefix_no_trace(block_no_trace, a, a_3di, a_bias, b, b_3di, b_bias, matrix, matrix_3di, gaps, &res, &min_size); + + res_aln.a_end = a_idx + res.query_idx; + res_aln.b_end = b_idx + res.reference_idx; + + // reversed alignment starting at the max score location from forwards alignment + a_idx = a_len - (a_idx + res.query_idx); + b_idx = b_len - (b_idx + res.reference_idx); + // reverse all the sequences + rev_str(a_str, a_len); + rev_str(a_3di_str, a_len); + rev_arr(a_bias_arr, a_len); + rev_str(b_str, b_len); + rev_str(b_3di_str, b_len); + rev_arr(b_bias_arr, b_len); + + block_set_bytes_padded_aa(a, (uint8_t*)(a_str + a_idx), a_len - a_idx, MAX_SIZE); + block_set_bytes_padded_aa(a_3di, (uint8_t*)(a_3di_str + a_idx), a_len - a_idx, MAX_SIZE); + block_set_pos_bias(a_bias, a_bias_arr + a_idx, a_len - a_idx); + block_set_bytes_padded_aa(b, (uint8_t*)(b_str + b_idx), b_len - b_idx, MAX_SIZE); + block_set_bytes_padded_aa(b_3di, (uint8_t*)(b_3di_str + b_idx), b_len - b_idx, MAX_SIZE); + block_set_pos_bias(b_bias, b_bias_arr + b_idx, b_len - b_idx); + + // start at a reasonable min_size based on the forwards alignment + min_size >>= ITER; + + align_prefix_trace(block_trace, a, a_3di, a_bias, b, b_3di, b_bias, matrix, matrix_3di, gaps, &res, cigar, &min_size); + + res_aln.a_start = a_len - (a_idx + res.query_idx); + res_aln.b_start = b_len - (b_idx + res.reference_idx); + res_aln.score = res.score; + return res_aln; +} + +void example(void) { + char a_str[] = "AAAAAAAA"; + char b_str[] = "AARAAAA"; + char a_3di_str[] = "AAAAAAAA"; + char b_3di_str[] = "AAAAAAA"; + int16_t a_bias_arr[8] = {0}; + int16_t b_bias_arr[7] = {0}; + size_t a_len = strlen(a_str); + size_t b_len = strlen(b_str); + Gaps gaps = {.open = -11, .extend = -1}; + + // position to start aligning at + size_t a_idx = 3; + size_t b_idx = 3; + + // note: instead of a_len or b_len, it is possible to use really large lengths + // and reuse data structures to avoid allocations + PaddedBytes* a = block_new_padded_aa(a_len, MAX_SIZE); + PaddedBytes* a_3di = block_new_padded_aa(a_len, MAX_SIZE); + PosBias* a_bias = block_new_pos_bias(a_len, MAX_SIZE); + PaddedBytes* b = block_new_padded_aa(b_len, MAX_SIZE); + PaddedBytes* b_3di = block_new_padded_aa(b_len, MAX_SIZE); + PosBias* b_bias = block_new_pos_bias(b_len, MAX_SIZE); + + AAMatrix* matrix_3di = block_new_simple_aamatrix(1, -1); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 20; j++) { + uint8_t c = i + 'A'; + uint8_t d = j + 'A'; + // set to actual scores instead of zeros! + block_set_aamatrix(matrix_3di, c, d, 0); + } + } + + // block_trace, block_no_trace, and cigar can also be pre-allocated with really large lengths + BlockHandle block_trace = block_new_aa_trace_xdrop(a_len, b_len, MAX_SIZE); + BlockHandle block_no_trace = block_new_aa_xdrop(a_len, b_len, MAX_SIZE); + Cigar* cigar = block_new_cigar(a_len, b_len); + + // alignment performs no allocations + LocalAln local_aln = align_local(block_trace, block_no_trace, a_len, a_str, a, a_3di_str, a_3di, a_bias_arr, a_bias, b_len, b_str, b, b_3di_str, b_3di, b_bias_arr, b_bias, &BLOSUM62, matrix_3di, gaps, a_idx, b_idx, cigar); + + printf("a: %s\na_3di: %s\nb: %s\nb_3di: %s\nscore: %d\nstart idx: (%lu, %lu)\nend idx: (%lu, %lu)\n", + a_str, + a_3di_str, + b_str, + b_3di_str, + local_aln.score, + local_aln.a_start, + local_aln.b_start, + local_aln.a_end, + local_aln.b_end); + + size_t cigar_len = block_len_cigar(cigar); + // Note: 'M' signals either a match or mismatch + char ops_char[] = {' ', 'M', '=', 'X', 'I', 'D'}; + for (int i = 0; i < cigar_len; i++) { + // cigar string is reversed + OpLen o = block_get_cigar(cigar, cigar_len - 1 - i); + printf("%lu%c", o.len, ops_char[o.op]); + } + printf("\n"); + + block_free_cigar(cigar); + block_free_aa_trace_xdrop(block_trace); + block_free_aa_xdrop(block_no_trace); + block_free_padded_aa(a); + block_free_padded_aa(a_3di); + block_free_pos_bias(a_bias); + block_free_padded_aa(b); + block_free_padded_aa(b_3di); + block_free_pos_bias(b_bias); + block_free_aamatrix(matrix_3di); +} + +int main() { + example(); +} diff --git a/lib/block-aligner/c/align_prefix.c b/lib/block-aligner/c/align_prefix.c new file mode 100644 index 000000000..5dee318ca --- /dev/null +++ b/lib/block-aligner/c/align_prefix.c @@ -0,0 +1,109 @@ +#include +#include + +#include "block_aligner.h" + +// max block size, assume that two seqs will not have a larger gap than this +#define MAX_SIZE 4096 + +void align_prefix(BlockHandle block, PaddedBytes* a, PaddedBytes* a_3di, PosBias* a_bias, PaddedBytes* b, PaddedBytes* b_3di, PosBias* b_bias, const AAMatrix* matrix, const AAMatrix* matrix_3di, Gaps gaps, int32_t target_score, AlignResult* res, Cigar* cigar) { + size_t min_size = 32; + res->score = -1000000000; + res->query_idx = -1; + res->reference_idx = -1; + + // exponential search on min_size until either max_size is reached or target_score is reached + while (min_size <= MAX_SIZE && res->score < target_score) { + // allow max block size to grow + SizeRange range = {.min = min_size, .max = MAX_SIZE}; + // estimated x-drop threshold + int32_t x_drop = -(min_size * gaps.extend + gaps.open); + block_align_3di_aa_trace_xdrop(block, a, a_3di, a_bias, b, b_3di, b_bias, matrix, matrix_3di, gaps, range, x_drop); + *res = block_res_aa_trace_xdrop(block); + min_size *= 2; + } + + block_cigar_aa_trace_xdrop(block, res->query_idx, res->reference_idx, cigar); +} + +void example(void) { + const char* a_str = "AAAAAAAA"; + const char* b_str = "AARAAAA"; + const char* a_3di_str = "AAAAAAAA"; + const char* b_3di_str = "AAAAAAA"; + const int16_t a_bias_arr[8] = {0}; + const int16_t b_bias_arr[7] = {0}; + size_t a_len = strlen(a_str); + size_t b_len = strlen(b_str); + Gaps gaps = {.open = -11, .extend = -1}; + + int32_t target_score = 23; + + // note: instead of a_len or b_len, it is possible to use really large lengths + // and reuse data structures to avoid allocations + PaddedBytes* a = block_new_padded_aa(a_len, MAX_SIZE); + PaddedBytes* a_3di = block_new_padded_aa(a_len, MAX_SIZE); + PosBias* a_bias = block_new_pos_bias(a_len, MAX_SIZE); + PaddedBytes* b = block_new_padded_aa(b_len, MAX_SIZE); + PaddedBytes* b_3di = block_new_padded_aa(b_len, MAX_SIZE); + PosBias* b_bias = block_new_pos_bias(b_len, MAX_SIZE); + + AAMatrix* matrix_3di = block_new_simple_aamatrix(1, -1); + + // setting bytes, biases, and scoring matrix does not allocate + block_set_bytes_padded_aa(a, (const uint8_t*)a_str, a_len, MAX_SIZE); + block_set_bytes_padded_aa(a_3di, (const uint8_t*)a_3di_str, a_len, MAX_SIZE); + block_set_pos_bias(a_bias, a_bias_arr, a_len); + block_set_bytes_padded_aa(b, (const uint8_t*)b_str, b_len, MAX_SIZE); + block_set_bytes_padded_aa(b_3di, (const uint8_t*)b_3di_str, b_len, MAX_SIZE); + block_set_pos_bias(b_bias, b_bias_arr, b_len); + + for (int i = 0; i < 20; i++) { + for (int j = 0; j < 20; j++) { + uint8_t c = i + 'A'; + uint8_t d = j + 'A'; + // set to actual scores instead of zeros! + block_set_aamatrix(matrix_3di, c, d, 0); + } + } + + // block and cigar can also be pre-allocated with really large lengths + BlockHandle block = block_new_aa_trace_xdrop(a_len, b_len, MAX_SIZE); + Cigar* cigar = block_new_cigar(a_len, b_len); + AlignResult res; + + // alignment performs no allocations + align_prefix(block, a, a_3di, a_bias, b, b_3di, b_bias, &BLOSUM62, matrix_3di, gaps, target_score, &res, cigar); + + printf("a: %s\na_3di: %s\nb: %s\nb_3di: %s\nscore: %d\nidx: (%lu, %lu)\n", + a_str, + a_3di_str, + b_str, + b_3di_str, + res.score, + res.query_idx, + res.reference_idx); + + size_t cigar_len = block_len_cigar(cigar); + // Note: 'M' signals either a match or mismatch + char ops_char[] = {' ', 'M', '=', 'X', 'I', 'D'}; + for (int i = 0; i < cigar_len; i++) { + OpLen o = block_get_cigar(cigar, i); + printf("%lu%c", o.len, ops_char[o.op]); + } + printf("\n"); + + block_free_cigar(cigar); + block_free_aa_trace_xdrop(block); + block_free_padded_aa(a); + block_free_padded_aa(a_3di); + block_free_pos_bias(a_bias); + block_free_padded_aa(b); + block_free_padded_aa(b_3di); + block_free_pos_bias(b_bias); + block_free_aamatrix(matrix_3di); +} + +int main() { + example(); +} diff --git a/lib/block-aligner/c/block_aligner.h b/lib/block-aligner/c/block_aligner.h new file mode 100644 index 000000000..eef9974a6 --- /dev/null +++ b/lib/block-aligner/c/block_aligner.h @@ -0,0 +1,667 @@ +#ifndef block_aligner_h +#define block_aligner_h + +/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */ + +#include +#include +#include +#include +#define ALIGNED(n) __attribute__ ((aligned(n))) + +/** + * A match/mismatch, insertion, or deletion operation. + * + * When aligning `q` against `r`, this represents the edit operations to get from `r` to `q`. + */ +enum Operation +#ifdef __cplusplus + : uint8_t +#endif // __cplusplus + { + /** + * Placeholder variant. + */ + Sentinel = 0, + /** + * Match or mismatch. + * + * This is a diagonal transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + */ + M = 1, + /** + * Match. + */ + Eq = 2, + /** + * Mismatch. + */ + X = 3, + /** + * Insertion. + * + * When aligning sequences `q` against `r`, this is a gap in `r`. + * This is a row transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + */ + I = 4, + /** + * Deletion. + * + * When aligning sequences `q` against `r`, this is a gap in `q`. + * This is a column transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + */ + D = 5, +}; +#ifndef __cplusplus +typedef uint8_t Operation; +#endif // __cplusplus + +/** + * Amino acid scoring matrix. + */ +typedef struct AAMatrix AAMatrix; + +/** + * Amino acid position specific scoring matrix. + */ +typedef struct AAProfile AAProfile; + +/** + * A CIGAR string that holds a list of operations. + */ +typedef struct Cigar Cigar; + +/** + * Nucleotide scoring matrix. + */ +typedef struct NucMatrix NucMatrix; + +/** + * A padded string that helps avoid out of bounds access when using SIMD. + * + * A single padding byte in inserted before the start of the string, + * and `block_size` bytes are inserted after the end of the string. + */ +typedef struct PaddedBytes PaddedBytes; + +/** + * Positional score bias for scores. + */ +typedef struct PosBias PosBias; + +/** + * An operation and how many times that operation is repeated. + */ +typedef struct OpLen { + Operation op; + uintptr_t len; +} OpLen; + +/** + * A handle for a block in block aligner. + */ +typedef void *BlockHandle; + +/** + * Open and extend gap costs. + * + * Open cost must include the extend cost. For example, with `Gaps { open: -11, extend: -1 }`, + * a gap of length 1 costs -11, and a gap of length 2 costs -12. + */ +typedef struct Gaps { + int8_t open; + int8_t extend; +} Gaps; + +/** + * Represents a range that has inclusive lower and upper bounds. + */ +typedef struct SizeRange { + uintptr_t min; + uintptr_t max; +} SizeRange; + +/** + * Resulting score and alignment end position. + */ +typedef struct AlignResult { + int32_t score; + uintptr_t query_idx; + uintptr_t reference_idx; +} AlignResult; + +/** + * Arbitrary bytes scoring matrix. + */ +typedef struct ByteMatrix { + int8_t match_score; + int8_t mismatch_score; +} ByteMatrix; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +extern const struct NucMatrix NW1; + +extern const struct AAMatrix BLOSUM45; + +extern const struct AAMatrix BLOSUM50; + +extern const struct AAMatrix BLOSUM62; + +extern const struct AAMatrix BLOSUM80; + +extern const struct AAMatrix BLOSUM90; + +extern const struct AAMatrix PAM100; + +extern const struct AAMatrix PAM120; + +extern const struct AAMatrix PAM160; + +extern const struct AAMatrix PAM200; + +extern const struct AAMatrix PAM250; + +extern const struct ByteMatrix BYTES1; + +/** + * Create a new simple AAMatrix with custom match and mismatch scores. + * + * Note that the match score must be positive and the mismatch score must be negative. + */ +struct AAMatrix *block_new_simple_aamatrix(int8_t match_score, int8_t mismatch_score); + +/** + * Set an entry in the AAMatrix. + */ +void block_set_aamatrix(struct AAMatrix *matrix, uint8_t a, uint8_t b, int8_t score); + +void block_set_aamatrix_num(struct AAMatrix *matrix, int8_t a, int8_t b, int8_t score); +/** + * Frees an AAMatrix. + */ +void block_free_aamatrix(struct AAMatrix *matrix); + +/** + * Create a new profile of a specific length, with default (large negative) values. + * + * Note that internally, the created profile is longer than a conventional position-specific scoring + * matrix (and `str_len`) by 1, so the profile will have the same length as the number of + * columns in the DP matrix. + * The first column of scores in the profile should be large negative values (padding). + * This allows gap open costs to be specified for the first column of the DP matrix. + */ +struct AAProfile *block_new_aaprofile(uintptr_t str_len, uintptr_t block_size, int8_t gap_extend); + +/** + * Get the length of the profile. + */ +uintptr_t block_len_aaprofile(const struct AAProfile *profile); + +/** + * Clear the profile so it can be used for profile lengths less than or equal + * to the length this struct was created with. + */ +void block_clear_aaprofile(struct AAProfile *profile, uintptr_t str_len); + +/** + * Set the score for a position and byte. + * + * The first column (`i = 0`) should be padded with large negative values. + * Therefore, set values starting from `i = 1`. + */ +void block_set_aaprofile(struct AAProfile *profile, uintptr_t i, uint8_t b, int8_t score); + +/** + * Set the scores for all positions in the position specific scoring matrix. + * + * The profile should be first `clear`ed before it is reused with different lengths. + * + * Use `order` to specify the order of bytes that is used in the `scores` matrix. + * Scores (in `scores`) should be stored in row-major order, where each row is a different position + * and each column is a different byte. + */ +void block_set_all_aaprofile(struct AAProfile *profile, + const uint8_t *order, + uintptr_t order_len, + const int8_t *scores, + uintptr_t scores_len, + uintptr_t left_shift, + uintptr_t right_shift); + +/** + * Set the scores for all positions in reverse in the position specific scoring matrix. + * + * The profile should be first `clear`ed before it is reused with different lengths. + * + * Use `order` to specify the order of bytes that is used in the `scores` matrix. + * Scores (in `scores`) should be stored in row-major order, where each row is a different position + * and each column is a different byte. + */ +void block_set_all_rev_aaprofile(struct AAProfile *profile, + const uint8_t *order, + uintptr_t order_len, + const int8_t *scores, + uintptr_t scores_len, + uintptr_t left_shift, + uintptr_t right_shift); + +int8_t* aaprofile_pos_aa(struct AAProfile *profile); + +int16_t* aaprofile_aa_pos(struct AAProfile *profile); + +/** + * Set the gap open cost for a column. + * + * When aligning a sequence `q` to a profile `r`, this is the gap open cost at column `i` for a + * column transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + * This represents starting a gap in `q`. + */ +void block_set_gap_open_C_aaprofile(struct AAProfile *profile, uintptr_t i, int8_t gap); + +/** + * Set the gap close cost for a column. + * + * When aligning a sequence `q` to a profile `r`, this is the gap close cost at column `i` for + * ending column transitions in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + * This represents ending a gap in `q`. + */ +void block_set_gap_close_C_aaprofile(struct AAProfile *profile, uintptr_t i, int8_t gap); + +/** + * Set the gap open cost for a row. + * + * When aligning a sequence `q` to a profile `r`, this is the gap open cost at column `i` for + * a row transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + * This represents starting a gap in `r`. + */ +void block_set_gap_open_R_aaprofile(struct AAProfile *profile, uintptr_t i, int8_t gap); + +/** + * Set the gap open cost for all column transitions. + */ + void block_set_all_gap_open_C_aaprofile(struct AAProfile *profile, int8_t gap); + + /** + * Set the gap close cost for all column transitions. + */ + void block_set_all_gap_close_C_aaprofile(struct AAProfile *profile, int8_t gap); + + /** + * Set the gap open cost for all row transitions. + */ + void block_set_all_gap_open_R_aaprofile(struct AAProfile *profile, int8_t gap); + +/** + * Get the score for a position and byte. + */ +int8_t block_get_aaprofile(const struct AAProfile *profile, uintptr_t i, uint8_t b); + +/** + * Get the gap extend cost. + */ +int8_t block_get_gap_extend_aaprofile(const struct AAProfile *profile); + +size_t block_get_curr_len_aaprofile(const struct AAProfile *profile); + +/** + * Frees an AAProfile. + */ +void block_free_aaprofile(struct AAProfile *profile); + +/** + * Create a new empty CIGAR string. + */ +struct Cigar *block_new_cigar(uintptr_t query_len, uintptr_t reference_len); + +/** + * Get the operation at a certain index in a CIGAR string. + */ +struct OpLen block_get_cigar(const struct Cigar *cigar, uintptr_t i); + +/** + * Get the length of a CIGAR string. + */ +uintptr_t block_len_cigar(const struct Cigar *cigar); + +/** + * Frees a CIGAR string. + */ +void block_free_cigar(struct Cigar *cigar); + +/** + * Create a new empty padded amino acid string. + */ +struct PaddedBytes *block_new_padded_aa(uintptr_t len, uintptr_t max_size); + +/** + * Write to a padded amino acid string. + */ +void block_set_bytes_padded_aa(struct PaddedBytes *padded, + const uint8_t *s, + uintptr_t len, + uintptr_t max_size); + +void block_set_bytes_padded_aa_numsequence(struct PaddedBytes *padded, + const uint8_t *s, + uintptr_t len, + uintptr_t max_size); +/** + * Frees a padded amino acid string. + */ +void block_free_padded_aa(struct PaddedBytes *padded); + +/** + * Create a new zero initialized positional score bias vector. + */ +struct PosBias *block_new_pos_bias(uintptr_t len, uintptr_t max_size); + +/** + * Write to the positional score bias vector. + */ +void block_set_pos_bias(struct PosBias *bias, const int16_t *b, uintptr_t len); + +/** + * Frees the positional score bias vector. + */ +void block_free_pos_bias(struct PosBias *bias); + +/** + *Create a new block aligner instance for global alignment of amino acid strings (no traceback). + */ +BlockHandle block_new_aa(uintptr_t query_len, uintptr_t reference_len, uintptr_t max_size); + +/** + *Global alignment of two amino acid strings (no traceback). + */ +void block_align_aa(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + const struct AAMatrix *m, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *Global alignment of an amino acid sequence to a profile (no traceback). + */ +void block_align_profile_aa(BlockHandle b, + const struct PaddedBytes *q, + const struct AAProfile *r, + struct SizeRange s, + int32_t x); + +/** + *Global alignment of two amino acid strings with 3di (no traceback). + */ +void block_align_3di_aa(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *q_3di, + const struct PosBias *q_bias, + const struct PaddedBytes *r, + const struct PaddedBytes *r_3di, + const struct PosBias *r_bias, + const struct AAMatrix *m, + const struct AAMatrix *m_3di, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *Retrieves the result of global alignment of two amino acid strings (no traceback). + */ +struct AlignResult block_res_aa(BlockHandle b); + +/** + *Don't use. + */ +void _block_cigar_aa(BlockHandle b, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Don't use. + */ +void _block_cigar_eq_aa(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Frees the block used for global alignment of two amino acid strings (no traceback). + */ +void block_free_aa(BlockHandle b); + +/** + *Create a new block aligner instance for X-drop alignment of amino acid strings (no traceback). + */ +BlockHandle block_new_aa_xdrop(uintptr_t query_len, uintptr_t reference_len, uintptr_t max_size); + +/** + *X-drop alignment of two amino acid strings (no traceback). + */ +void block_align_aa_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + const struct AAMatrix *m, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *X-drop alignment of an amino acid sequence to a profile (no traceback). + */ +void block_align_profile_aa_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct AAProfile *r, + struct SizeRange s, + int32_t x); + +/** + *X-drop alignment of two amino acid strings with 3di (no traceback). + */ +void block_align_3di_aa_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *q_3di, + const struct PosBias *q_bias, + const struct PaddedBytes *r, + const struct PaddedBytes *r_3di, + const struct PosBias *r_bias, + const struct AAMatrix *m, + const struct AAMatrix *m_3di, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *Retrieves the result of X-drop alignment of two amino acid strings (no traceback). + */ +struct AlignResult block_res_aa_xdrop(BlockHandle b); + +/** + *Don't use. + */ +void _block_cigar_aa_xdrop(BlockHandle b, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Don't use. + */ +void _block_cigar_eq_aa_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Frees the block used for X-drop alignment of two amino acid strings (no traceback). + */ +void block_free_aa_xdrop(BlockHandle b); + +/** + *Create a new block aligner instance for global alignment of amino acid strings, with traceback. + */ +BlockHandle block_new_aa_trace(uintptr_t query_len, uintptr_t reference_len, uintptr_t max_size); + +/** + *Global alignment of two amino acid strings, with traceback. + */ +void block_align_aa_trace(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + const struct AAMatrix *m, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *Global alignment of an amino acid sequence to a profile, with traceback. + */ +void block_align_profile_aa_trace(BlockHandle b, + const struct PaddedBytes *q, + const struct AAProfile *r, + struct SizeRange s, + int32_t x); + +/** + *Global alignment of two amino acid strings with 3di, with traceback. + */ +void block_align_3di_aa_trace(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *q_3di, + const struct PosBias *q_bias, + const struct PaddedBytes *r, + const struct PaddedBytes *r_3di, + const struct PosBias *r_bias, + const struct AAMatrix *m, + const struct AAMatrix *m_3di, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *Retrieves the result of global alignment of two amino acid strings, with traceback. + */ +struct AlignResult block_res_aa_trace(BlockHandle b); + +/** + *Retrieves the resulting CIGAR string from global alignment of two amino acid strings, with traceback. + */ +void block_cigar_aa_trace(BlockHandle b, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Retrieves the resulting CIGAR string from global alignment of two amino acid strings, with traceback containing =/X. + */ +void block_cigar_eq_aa_trace(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Frees the block used for global alignment of two amino acid strings, with traceback. + */ +void block_free_aa_trace(BlockHandle b); + +/** + *Create a new block aligner instance for X-drop alignment of amino acid strings, with traceback. + */ +BlockHandle block_new_aa_trace_xdrop(uintptr_t query_len, + uintptr_t reference_len, + uintptr_t max_size); + +/** + *X-drop alignment of two amino acid strings, with traceback. + */ +void block_align_aa_trace_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + const struct AAMatrix *m, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *X-drop alignment of an amino acid sequence to a profile, with traceback. + */ +void block_align_profile_aa_trace_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct AAProfile *r, + struct SizeRange s, + int32_t x); + + +/** + *X-drop alignment of two amino acid strings with posbias with traceback. + */ + void block_align_aa_trace_xdrop_posbias(BlockHandle b, + const struct PaddedBytes *q, + const struct PosBias *q_bias, + const struct PaddedBytes *r, + const struct PosBias *r_bias, + const struct AAMatrix *m, + struct Gaps g, + struct SizeRange s, + int32_t x); + + /** + *X-drop alignment of two amino acid strings with 3di, with traceback. + */ +void block_align_3di_aa_trace_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *q_3di, + const struct PosBias *q_bias, + const struct PaddedBytes *r, + const struct PaddedBytes *r_3di, + const struct PosBias *r_bias, + const struct AAMatrix *m, + const struct AAMatrix *m_3di, + struct Gaps g, + struct SizeRange s, + int32_t x); + +/** + *Retrieves the result of X-drop alignment of two amino acid strings, with traceback. + */ +struct AlignResult block_res_aa_trace_xdrop(BlockHandle b); + +/** + *Retrieves the resulting CIGAR string from X-drop alignment of two amino acid strings, with traceback. + */ +void block_cigar_aa_trace_xdrop(BlockHandle b, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Retrieves the resulting CIGAR string from X-drop alignment of two amino acid strings, with traceback containing =/X. + */ +void block_cigar_eq_aa_trace_xdrop(BlockHandle b, + const struct PaddedBytes *q, + const struct PaddedBytes *r, + uintptr_t query_idx, + uintptr_t reference_idx, + struct Cigar *cigar); + +/** + *Frees the block used for X-drop alignment of two amino acid strings, with traceback. + */ +void block_free_aa_trace_xdrop(BlockHandle b); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* block_aligner_h */ diff --git a/lib/block-aligner/c/cbindgen.toml b/lib/block-aligner/c/cbindgen.toml new file mode 100644 index 000000000..1fa8471be --- /dev/null +++ b/lib/block-aligner/c/cbindgen.toml @@ -0,0 +1,12 @@ +language = "C" +include_guard = "block_aligner_h" +cpp_compat = true +documentation = true +autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" +after_includes = "#define ALIGNED(n) __attribute__ ((aligned(n)))" + +[layout] +aligned_n = "ALIGNED" + +[parse] +expand = ["block_aligner_c"] diff --git a/lib/block-aligner/c/example.c b/lib/block-aligner/c/example.c new file mode 100644 index 000000000..cdd163c79 --- /dev/null +++ b/lib/block-aligner/c/example.c @@ -0,0 +1,125 @@ +#include +#include + +#include "block_aligner.h" + +void example1(void) { + // global seq-seq alignment + const char* a_str = "AAAAAAAA"; + const char* b_str = "AARAAAA"; + size_t a_len = strlen(a_str); + size_t b_len = strlen(b_str); + SizeRange range = {.min = 32, .max = 32}; + Gaps gaps = {.open = -11, .extend = -1}; + + PaddedBytes* a = block_new_padded_aa(a_len, range.max); + PaddedBytes* b = block_new_padded_aa(b_len, range.max); + block_set_bytes_padded_aa(a, (const uint8_t*)a_str, a_len, range.max); + block_set_bytes_padded_aa(b, (const uint8_t*)b_str, b_len, range.max); + + BlockHandle block = block_new_aa(a_len, b_len, range.max); + block_align_aa(block, a, b, &BLOSUM62, gaps, range, 0); + AlignResult res = block_res_aa(block); + + printf("a: %s\nb: %s\nscore: %d\nidx: (%lu, %lu)\n", + a_str, + b_str, + res.score, + res.query_idx, + res.reference_idx); + + block_free_aa(block); + block_free_padded_aa(a); + block_free_padded_aa(b); +} + +void example2(void) { + // global seq-seq alignment with traceback + const char* a_str = "AAAAAAAA"; + const char* b_str = "AARAAAA"; + size_t a_len = strlen(a_str); + size_t b_len = strlen(b_str); + SizeRange range = {.min = 32, .max = 32}; + Gaps gaps = {.open = -11, .extend = -1}; + + PaddedBytes* a = block_new_padded_aa(a_len, range.max); + PaddedBytes* b = block_new_padded_aa(b_len, range.max); + block_set_bytes_padded_aa(a, (const uint8_t*)a_str, a_len, range.max); + block_set_bytes_padded_aa(b, (const uint8_t*)b_str, b_len, range.max); + + BlockHandle block = block_new_aa_trace(a_len, b_len, range.max); + block_align_aa_trace(block, a, b, &BLOSUM62, gaps, range, 0); + AlignResult res = block_res_aa_trace(block); + + printf("a: %s\nb: %s\nscore: %d\nidx: (%lu, %lu)\n", + a_str, + b_str, + res.score, + res.query_idx, + res.reference_idx); + + Cigar* cigar = block_new_cigar(res.query_idx, res.reference_idx); + block_cigar_eq_aa_trace(block, a, b, res.query_idx, res.reference_idx, cigar); + size_t cigar_len = block_len_cigar(cigar); + // Note: 'M' signals either a match or mismatch + char ops_char[] = {' ', 'M', '=', 'X', 'I', 'D'}; + for (int i = 0; i < cigar_len; i++) { + OpLen o = block_get_cigar(cigar, i); + printf("%lu%c", o.len, ops_char[o.op]); + } + printf("\n"); + + block_free_cigar(cigar); + block_free_aa_trace(block); + block_free_padded_aa(a); + block_free_padded_aa(b); +} + +void example3(void) { + // global seq-profile alignment + const char* a_str = "AAAAAAAA"; + size_t a_len = strlen(a_str); + size_t b_len = 7; + SizeRange range = {.min = 32, .max = 32}; + + PaddedBytes* a = block_new_padded_aa(a_len, range.max); + block_set_bytes_padded_aa(a, (const uint8_t*)a_str, a_len, range.max); + + AAProfile* b = block_new_aaprofile(b_len, range.max, -1); + for (int i = 1; i <= b_len; i++) { + for (int c = 'A'; c <= 'Z'; c++) { + if (c == a_str[i - 1]) { + block_set_aaprofile(b, i, c, 1); + } else { + block_set_aaprofile(b, i, c, -1); + } + } + } + + for (int i = 0; i < b_len; i++) { + block_set_gap_open_C_aaprofile(b, i, -10); + block_set_gap_close_C_aaprofile(b, i, 0); + block_set_gap_open_R_aaprofile(b, i, -10); + } + + BlockHandle block = block_new_aa(a_len, b_len, range.max); + block_align_profile_aa(block, a, b, range, 0); + AlignResult res = block_res_aa(block); + + printf("a: %s\nb len: %lu\nscore: %d\nidx: (%lu, %lu)\n", + a_str, + b_len, + res.score, + res.query_idx, + res.reference_idx); + + block_free_aa(block); + block_free_padded_aa(a); + block_free_aaprofile(b); +} + +int main() { + example1(); + example2(); + example3(); +} diff --git a/lib/block-aligner/data/README.md b/lib/block-aligner/data/README.md new file mode 100644 index 000000000..43400d741 --- /dev/null +++ b/lib/block-aligner/data/README.md @@ -0,0 +1,62 @@ +# Data +Many testing and benchmark programs require large files of sequence data +that should be placed in this directory. + +Below are instructions for how to download the necessary data. Make sure +you are in this directory (`cd data`). + +## 25kbp Nanopore data +This data is from the difference recurrence [paper](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2014-8) +by Suzuki and Kasahara. + +1. `curl -OL https://github.com/Daniel-Liu-c0deb0t/diff-bench-paper/releases/download/v1.0/sequences.txt.gz` +2. `gunzip sequences.txt.gz` + +Since these reads are filtered to only have gaps smaller than 20bp, it is not representative of typical reads. Therefore, +this dataset will be rarely used. + +## \<10kbp and \<50kbp Nanopore data +This data is from the BiWFA [repository](https://github.com/smarco/BiWFA-paper/tree/main/evaluation/data) +and reformatted. + +1. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/seq_pairs.10kbps.5000.txt.gz` +2. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/seq_pairs.50kbps.10000.txt.gz` +3. `gunzip seq_pairs.10kbps.5000.txt.gz` +4. `gunzip seq_pairs.50kbps.10000.txt.gz` + +These files contain pairs of reads that are alignable. + +## Illumina and 1kbp Nanopore data +This data is from the Wavefront Aligner [paper](https://academic.oup.com/bioinformatics/article/37/4/456/5904262). + +1. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/real.illumina.b10M.txt.gz` +2. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/real.ont.b10M.txt.gz` +3. `gunzip real.illumina.b10M.txt.gz` +4. `gunzip real.ont.b10M.txt.gz` + +The Illumina, 1kbp Nanopore, and 25kbp Nanopore datasets are just a list of reads, where every two reads +form a pair that is alignable. + +## Uniclust30 data +This data is generated with [mmseqs2](https://github.com/soedinglab/MMseqs2) +and the [Uniclust30](https://uniclust.mmseqs.com/) dataset. +Two datasets with two different coverages percentages are used: `0.8` +(default in `mmseqs2`) and `0.95`. Using a higher coverage helps gather +sequences that are "globally alignable", as `mmseqs2` uses local alignment. +The dataset with the lower coverage percent is expected to be more challenging. + +Scripts for generating the data: [`0.8` coverage](uc30_pairwise_aln.sh) +and [`0.95` coverage](uc30_0.95_pairwise_aln.sh). + +1. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/uc30.tar.gz` +2. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/uc30_0.95.tar.gz` +3. `tar -xvf uc30.tar.gz` +4. `tar -xvf uc30_0.95.tar.gz` + +## SCOP PSSM data +This data is generated with `mmseqs2` and the [SCOPe](https://scop.berkeley.edu/astral/ver=2.01) dataset. +This data is used for aligning sequences to profiles (position-specific scoring matrices) of protein domains. + +1. `mkdir scop && cd scop` +2. `curl -OL https://github.com/Daniel-Liu-c0deb0t/block-aligner/releases/download/v0.0.0/scop.tar.gz` +3. `tar -xvf scop.tar.gz` diff --git a/lib/block-aligner/data/uc30_0.95_pairwise_aln.sh b/lib/block-aligner/data/uc30_0.95_pairwise_aln.sh new file mode 100755 index 000000000..e63c7de6d --- /dev/null +++ b/lib/block-aligner/data/uc30_0.95_pairwise_aln.sh @@ -0,0 +1,9 @@ +mmseqs align seqdb seqdb uc30_clu uc30_aln --comp-bias-corr 0 -a -c 0.95 --threads 128 +mmseqs convertalis seqdb seqdb uc30_aln uc30_aln.m8 --format-output query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,qseq,tseq --threads 16 +awk '$1!=prev && $3 < 0.4 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_30_40.m8 +awk '$1!=prev && $3 > 0.4 && $3 < 0.5 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_40_50.m8 +awk '$1!=prev && $3 > 0.5 && $3 < 0.6 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_50_60.m8 +awk '$1!=prev && $3 > 0.6 && $3 < 0.7 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_60_70.m8 +awk '$1!=prev && $3 > 0.7 && $3 < 0.8 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_70_80.m8 +awk '$1!=prev && $3 > 0.8 && $3 < 0.9 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_80_90.m8 +awk '$1!=prev && $3 > 0.9 && $3 < 1.0 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_0.95_90_100.m8 diff --git a/lib/block-aligner/data/uc30_pairwise_aln.sh b/lib/block-aligner/data/uc30_pairwise_aln.sh new file mode 100755 index 000000000..229594203 --- /dev/null +++ b/lib/block-aligner/data/uc30_pairwise_aln.sh @@ -0,0 +1,9 @@ +mmseqs align seqdb seqdb uc30_clu uc30_aln --comp-bias-corr 0 -a --threads 128 +mmseqs convertalis seqdb seqdb uc30_aln uc30_aln.m8 --format-output query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,qseq,tseq --threads 16 +awk '$1!=prev && $3 < 0.4 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_30_40.m8 +awk '$1!=prev && $3 > 0.4 && $3 < 0.5 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_40_50.m8 +awk '$1!=prev && $3 > 0.5 && $3 < 0.6 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_50_60.m8 +awk '$1!=prev && $3 > 0.6 && $3 < 0.7 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_60_70.m8 +awk '$1!=prev && $3 > 0.7 && $3 < 0.8 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_70_80.m8 +awk '$1!=prev && $3 > 0.8 && $3 < 0.9 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_80_90.m8 +awk '$1!=prev && $3 > 0.9 && $3 < 1.0 {print; prev=$1}' uc30_aln.m8 | shuf | head -n 1000 > uc30_90_100.m8 diff --git a/lib/block-aligner/examples/accuracy.rs b/lib/block-aligner/examples/accuracy.rs new file mode 100644 index 000000000..e9388d675 --- /dev/null +++ b/lib/block-aligner/examples/accuracy.rs @@ -0,0 +1,246 @@ +//use parasailors::{Matrix, *}; + +use bio::alignment::pairwise::*; +use bio::scores::blosum62; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use simulate_seqs::*; + +use std::{env, str, cmp}; + +fn test(iter: usize, len: usize, k: usize, slow: bool, insert_len: Option, nuc: bool, max_size: usize, verbose: bool) -> (usize, f64, i32, i32) { + let mut wrong = 0usize; + let mut wrong_avg = 0f64; + let mut wrong_min = i32::MAX; + let mut wrong_max = i32::MIN; + let mut rng = StdRng::seed_from_u64(1234); + let nw = |a, b| if a == b { 1 } else { -1 }; + + for _i in 0..iter { + let r = rand_str(len, if nuc { &NUC } else { &AMINO_ACIDS }, &mut rng); + let q = match insert_len { + Some(len) => rand_mutate_insert(&r, k, if nuc { &NUC } else { &AMINO_ACIDS }, len, &mut rng), + None => rand_mutate(&r, k, if nuc { &NUC } else { &AMINO_ACIDS }, &mut rng) + }; + + // rust-bio + let bio_score = if nuc { + let mut bio_aligner = Aligner::with_capacity(q.len(), r.len(), -1, -1, &nw); + bio_aligner.global(&q, &r).score + } else { + let mut bio_aligner = Aligner::with_capacity(q.len(), r.len(), -10, -1, &blosum62); + bio_aligner.global(&q, &r).score + }; + + // parasailors + /* + let matrix = Matrix::new(MatrixType::Blosum62); + let profile = Profile::new(&q, &matrix); + let parasail_score = global_alignment_score(&profile, &r, 11, 1); + */ + + // ours + let scan_score = if slow { + slow_align(&q, &r) + } else { + if nuc { + let run_gaps = Gaps { open: -2, extend: -1 }; + let r_padded = PaddedBytes::from_bytes::(&r, 2048); + let q_padded = PaddedBytes::from_bytes::(&q, 2048); + let mut block_aligner = Block::::new(q.len(), r.len(), max_size); + block_aligner.align(&q_padded, &r_padded, &NW1, run_gaps, 32..=max_size, 0); + block_aligner.res().score + } else { + let run_gaps = Gaps { open: -11, extend: -1 }; + let r_padded = PaddedBytes::from_bytes::(&r, 2048); + let q_padded = PaddedBytes::from_bytes::(&q, 2048); + let mut block_aligner = Block::::new(q.len(), r.len(), max_size); + block_aligner.align(&q_padded, &r_padded, &BLOSUM62, run_gaps, 32..=max_size, 0); + block_aligner.res().score + } + }; + + if bio_score != scan_score { + wrong += 1; + let score_diff = bio_score - scan_score; + wrong_avg += (score_diff as f64) / (bio_score as f64); + wrong_min = cmp::min(wrong_min, score_diff); + wrong_max = cmp::max(wrong_max, score_diff); + + if verbose { + println!( + "bio: {}, ours: {}\nq: {}\nr: {}", + bio_score, + scan_score, + str::from_utf8(&q).unwrap(), + str::from_utf8(&r).unwrap() + ); + } + } + } + + (wrong, wrong_avg / (wrong as f64), wrong_min, wrong_max) +} + +fn main() { + let arg1 = env::args().skip(1).next(); + let slow = false; + let nuc = true; + let verbose = arg1.is_some() && arg1.unwrap() == "-v"; + let iters = [100, 100, 10]; + let lens = [100, 1000, 10000]; + let rcp_ks = [10.0, 5.0, 2.0]; + let inserts = [false, true]; + let max_sizes = [32, 2048]; + + let mut total_wrong = 0usize; + let mut total = 0usize; + + println!("\nlen, k, insert, iter, max size, wrong, wrong % error, wrong min, wrong max\n"); + + for (&len, &iter) in lens.iter().zip(&iters) { + for &rcp_k in &rcp_ks { + for &insert in &inserts { + for &max_size in &max_sizes { + let insert_len = if insert { Some(len / 10) } else { None }; + let (wrong, wrong_avg, wrong_min, wrong_max) = test(iter, len, ((len as f64) / rcp_k) as usize, slow, insert_len, nuc, max_size, verbose); + println!( + "\n{}, {}, {}, {}, {}, {}, {}, {}, {}\n", + len, + ((len as f64) / rcp_k) as usize, + insert, + iter, + max_size, + wrong, + wrong_avg, + wrong_min, + wrong_max + ); + total_wrong += wrong; + total += iter; + } + } + } + } + + println!("\n# total: {}, wrong: {}", total, total_wrong); + println!("# Done!"); +} + +// Scalar version of the block aligner algorithm for testing +// purposes. May not exactly match the implementation of the +// vectorized version. +#[allow(non_snake_case)] +fn slow_align(q: &[u8], r: &[u8]) -> i32 { + let mut block_width = 16usize; + let mut block_height = 16usize; + let block_grow = 16usize; + let max_size = 256usize; + let i_step = 4usize; + let j_step = 4usize; + let mut y_drop = 3i32; + let y_drop_grow = 2i32; + + let mut D = vec![i32::MIN; (q.len() + 1 + max_size) * (r.len() + 1 + max_size)]; + let mut R = vec![i32::MIN; (q.len() + 1 + max_size) * (r.len() + 1 + max_size)]; + let mut C = vec![i32::MIN; (q.len() + 1 + max_size) * (r.len() + 1 + max_size)]; + D[0 + 0 * (q.len() + 1 + max_size)] = 0; + let mut i = 0usize; + let mut j = 0usize; + let mut dir = 0; + let mut best_max = 0; + + //println!("start"); + + loop { + let max = match dir { + 0 => { // right + calc_block(q, r, &mut D, &mut R, &mut C, i, j, block_width, block_height, max_size, -11, -1) + }, + _ => { // down + calc_block(q, r, &mut D, &mut R, &mut C, i, j, block_width, block_height, max_size, -11, -1) + } + }; + + if i + block_height > q.len() && j + block_width > r.len() { + break; + } + + let right_max = block_max(&D, q.len() + 1 + max_size, i, j + block_width - 1, 1, block_height); + let down_max = block_max(&D, q.len() + 1 + max_size, i + block_height - 1, j, block_width, 1); + best_max = cmp::max(best_max, max); + + if block_width < max_size && cmp::max(right_max, down_max) < best_max - y_drop { + block_width += block_grow; + block_height += block_grow; + y_drop += y_drop_grow; + //println!("i: {}, j: {}, w: {}", i, j, block_width); + continue; + } + + if j + block_width > r.len() { + i += i_step; + dir = 1; + } else if i + block_height > q.len() { + j += j_step; + dir = 0; + } else { + if down_max > right_max { + i += i_step; + dir = 1; + } else if right_max > down_max { + j += j_step; + dir = 0; + } else { + j += j_step; + dir = 0; + } + } + } + + D[q.len() + r.len() * (q.len() + 1 + max_size)] +} + +#[allow(non_snake_case)] +fn block_max(D: &[i32], col_len: usize, start_i: usize, start_j: usize, block_width: usize, block_height: usize) -> i32 { + let mut max = i32::MIN; + for i in start_i..start_i + block_height { + for j in start_j..start_j + block_width { + max = cmp::max(max, D[i + j * col_len]); + } + } + max +} + +#[allow(non_snake_case)] +fn calc_block(q: &[u8], r: &[u8], D: &mut [i32], R: &mut [i32], C: &mut [i32], start_i: usize, start_j: usize, block_width: usize, block_height: usize, max_size: usize, gap_open: i32, gap_extend: i32) -> i32 { + let idx = |i: usize, j: usize| { i + j * (q.len() + 1 + max_size) }; + let mut max = i32::MIN; + + for i in start_i..start_i + block_height { + for j in start_j..start_j + block_width { + if D[idx(i, j)] != i32::MIN { + continue; + } + + R[idx(i, j)] = if i == 0 { i32::MIN } else { cmp::max( + R[idx(i - 1, j)].saturating_add(gap_extend), + D[idx(i - 1, j)].saturating_add(gap_open) + ) }; + C[idx(i, j)] = if j == 0 { i32::MIN } else { cmp::max( + C[idx(i, j - 1)].saturating_add(gap_extend), + D[idx(i, j - 1)].saturating_add(gap_open) + ) }; + D[idx(i, j)] = cmp::max( + if i == 0 || j == 0 || i > q.len() || j > r.len() { i32::MIN } else { + D[idx(i - 1, j - 1)].saturating_add(blosum62(q[i - 1], r[j - 1])) + }, + cmp::max(R[idx(i, j)], C[idx(i, j)]) + ); + max = cmp::max(max, D[idx(i, j)]); + } + } + + max +} diff --git a/lib/block-aligner/examples/block_img.rs b/lib/block-aligner/examples/block_img.rs new file mode 100644 index 000000000..274fb980c --- /dev/null +++ b/lib/block-aligner/examples/block_img.rs @@ -0,0 +1,87 @@ +#[cfg(not(feature = "simd_avx2"))] +fn main() {} + +#[cfg(feature = "simd_avx2")] +fn main() { + use block_aligner::scan_block::*; + use block_aligner::scores::*; + use block_aligner::cigar::*; + + use image::{Rgb, RgbImage, ColorType}; + use image::codecs::png::{PngEncoder, CompressionType, FilterType}; + use imageproc::drawing::*; + use imageproc::rect::Rect; + + use std::env; + use std::io::BufWriter; + use std::fs::File; + + let args = env::args().skip(1); + + let seqs = [ + // uc30_50_60 + (b"MVQATTWKKAIPGLSDEASSSPASELRAPLGGVRAMTMNELTRYSIKEPPSDELGSQLVNLYLQQLHTRYPFLDPAELWRLQKARTPVAHSESGNLSMTQRYGIFKLYMVFAIGATLLQLTNKSAEVSPERFYMTALQHMAAAKVPRTVQNIEAMTLLVVYHLRSASGLGLWYMIGLAMRTCIDLGLHRKNHERGLAPLVIQMHRRLFWTVYSLEIVIAISLGRPLSISERQIDVELPDTISVASVPCPSSPGETPVQPTSSNDNLQLANLLFQLRSIEARIHHSIYRTDKPLSALLPKLDKIYKQLEVWRLASIESLPPDGHVLDYPLLLYHRAVRMLIQPFMTILPVSDPYYVLCLRAAGSVCQMHKRLHQTIGYGHSFIAVQTIFVAGVTLLYGLWTQTHLVWSVTLADDLRACSLVLFVMSERAPWVRKYRDAFEVLVDAAMEKLRSGESSLAEMVAVAQTQAQAQSQSQGPRVGQFASGDETMRGPNPDTGPGSSSYGNGNGEHGGESGDVWRLVTELADWIDQDQETTPKWMPNFEALQSLS".to_vec(), b"MTSETQNSVSPPLAMPGAVAVNPRKRGRTAYVADDASSIAYTRALEERVAFLENKLAQVPTPEATTTPRETASNYSVPSGRDKNALSDVVAHVSLGNFEAPAYVGPSSGLSLALNLGEMVQATVWNKMLPDIQDGTTGNQANCINPSPRCITVEDLLAHSVKEPPSDEQGSQMLKAYTSQLHSKYPFLEPEELWKLHSERLTLAAKPTQTLTRIERFGIFKLYLVYAMGATLVQLTQRGPVLSPEALYITALQHISAARESRTVQNIEAMTLLVMFHLRSTSSHGLWYMIGLAMRTSIDLGLHRAAHEQNLDGPIVQRRRRLFWSVYSLERTIAVSLGRPLSIADNQIDVELPNTSINESPSASVIVGNDITLALVLFKLRRIESKIHHSVYRTDKTLDSLRPKLDRLHQQLKIWRNSLTDWIPTGHPDLNYALLLYNRALRLLIQPFLPILPATDPFYGLCMRAAGDICQAHKRLHQTLDYGHSFIAVQTVFVAGVTLVYGLWTQGNALWSVAVSNDIRACSLVLFVMSERAPWVRKYRDAFEVLVNAAMEKLQDSEAGLAEMASAQMRAGKAPGAADSRGVQNPDLSGNETTTRPMDSSSNQFLMSEDGGIALGEFEGAWPMVAELANWIDQDTEGGSPVWMPNFELLQSLSGTWNE".to_vec()), + + // uc30_70_80 + (b"MATFVGLSTSAGRDWTKIEKLASSMFCPLKLILMPVLLDYSLGLNDLIELTVHVGDSALLGCVFQITEEKCVTKVDWMFSSGEHAKDDYVLYYYANLSVPVGRFQNRVSLVGDILRNDGSLLLENVEEADQGTYTCEIRLEKESLVFKKAVALHVLPEEPKELTVHVGDSTQLGCVFQSTEEKRMTRVDWTFSSGEHTKEEVVLRYYPKPSVPVGYFQGWGRFQNRVTLVGDTSYNDASILLQGVKESDRGSYTCSIHLGNLTFRKTTVLRVIVKEPQTSVTPLALRPEILGGNQLVIIVGIVCGTILLLPVLILIVKRTHRNKSSALGQNRKKGSIFSGRCRGQMVKRSKAKGWEGASAGSSGGFGANSAWPPPWGRSPWSWVSLSFCCPLPAQPHLPRPGFLQHPIPWRPTLLTHLKLCGQKDGS".to_vec(), b"MFYPPKRILVPVLLSYFLGLNDLIVSSVELTVHVGDSALLGCIFQSTEEKLVTKVDWMFSSGEHFKDDYVLFYYANISVPVGRFQNRVSLVGDILHHDGSLLLQNVEEADQGNYTCEIRFKMESLVFKKAVVLHVLPEEPKELMAHVGDSTQMGCVFHSTEEKHMTRVDWMFSSGEHTKEEIVLRYYPKLKAAMGYPQNWGRFQNRVNLVGDTSHNDGSIVLHRVKESDGGSYTCSIHLGNLTVRKTTVLHVILKEPRTLVTSVTLRPEILGGNQLVIIVGVVCATILLLPVLILIVKRTYGNKSSVTSTTLVKNLENTKKANPEKHIYSSITMQEVTDEGSSGKSEATYMTMHPVWPSLRSAPTSPSDKKSDGGMPRTEQAF".to_vec()) + ]; + + let cell_size = 1; + let bg_color = Rgb([255u8, 255u8, 255u8]); + let fg_colors = [Rgb([50u8, 50u8, 50u8]), Rgb([50u8, 50u8, 50u8]), Rgb([50u8, 50u8, 50u8])]; + let trace_color = Rgb([255u8, 0u8, 0u8]); + + for (i, img_path) in args.enumerate() { + let q = &seqs[i].0; + let r = &seqs[i].1; + + let r_padded = PaddedBytes::from_bytes::(r, 2048); + let q_padded = PaddedBytes::from_bytes::(q, 2048); + let run_gaps = Gaps { open: -11, extend: -1 }; + + let mut block_aligner = Block::::new(q.len(), r.len(), 256); + block_aligner.align(&q_padded, &r_padded, &BLOSUM62, run_gaps, 32..=256, 0); + let blocks = block_aligner.trace().blocks(); + let mut cigar = Cigar::new(q.len(), r.len()); + block_aligner.trace().cigar(q.len(), r.len(), &mut cigar); + + let img_width = ((r.len() + 1) * cell_size) as u32; + let img_height = ((q.len() + 1) * cell_size) as u32; + let fg_color = fg_colors[i]; + let mut img = RgbImage::new(img_width, img_height); + + println!("path: {}, img size: {} x {}", img_path, img_width, img_height); + + draw_filled_rect_mut(&mut img, Rect::at(0, 0).of_size(img_width, img_height), bg_color); + + for block in &blocks { + if block.width == 0 || block.height == 0 { continue; } + let x = (block.col * cell_size) as i32; + let y = (block.row * cell_size) as i32; + let width = (block.width * cell_size) as u32; + let height = (block.height * cell_size) as u32; + + draw_filled_rect_mut(&mut img, Rect::at(x, y).of_size(width, height), fg_color); + draw_hollow_rect_mut(&mut img, Rect::at(x, y).of_size(width, height), bg_color); + } + + let mut x = cell_size / 2; + let mut y = cell_size / 2; + let vec = cigar.to_vec(); + + for op_len in &vec { + let (next_x, next_y) = match op_len.op { + Operation::M => (x + op_len.len * cell_size, y + op_len.len * cell_size), + Operation::I => (x, y + op_len.len * cell_size), + _ => (x + op_len.len * cell_size, y) + }; + draw_line_segment_mut(&mut img, (x as f32, y as f32), (next_x as f32, next_y as f32), trace_color); + x = next_x; + y = next_y; + } + + let writer = BufWriter::new(File::create(img_path).unwrap()); + let encoder = PngEncoder::new_with_quality(writer, CompressionType::Best, FilterType::Sub); + encoder.encode(img.as_raw(), img.width(), img.height(), ColorType::Rgb8).unwrap(); + } +} diff --git a/lib/block-aligner/examples/compare.rs b/lib/block-aligner/examples/compare.rs new file mode 100644 index 000000000..7443d7b01 --- /dev/null +++ b/lib/block-aligner/examples/compare.rs @@ -0,0 +1,251 @@ +use block_aligner::scan_block::*; +use block_aligner::scores::*; + +use std::{env, cmp}; +use std::fs::File; +use std::io::{BufRead, BufReader}; + +fn test(file_name: &str, max_size: usize, x_drop: i32) -> (usize, usize, f64, usize, f64) { + let reader = BufReader::new(File::open(file_name).unwrap()); + let mut count = 0; + let mut other_better = 0; + let mut other_better_avg = 0f64; + let mut us_better = 0; + let mut us_better_avg = 0f64; + //let mut slow_better = 0; + //let mut slow_equal = 0; + + for line in reader.lines() { + let line = line.unwrap(); + let mut row = line.split_ascii_whitespace().take(5); + let q = row.next().unwrap().to_ascii_uppercase(); + let r = row.next().unwrap().to_ascii_uppercase(); + let other_score = row.next().unwrap().parse::().unwrap(); + let _other_i = row.next().unwrap().parse::().unwrap(); + let _other_j = row.next().unwrap().parse::().unwrap(); + + //let x_drop = 100; + //let x_drop = 50; + //let matrix = NucMatrix::new_simple(2, -3); + let matrix = NucMatrix::new_simple(1, -1); + let r_padded = PaddedBytes::from_bytes::(r.as_bytes(), 2048); + let q_padded = PaddedBytes::from_bytes::(q.as_bytes(), 2048); + //let run_gaps = Gaps { open: -5, extend: -1 }; + let run_gaps = Gaps { open: -2, extend: -1 }; + + // ours + let mut block_aligner = Block::::new(q.len(), r.len(), max_size); + block_aligner.align(&q_padded, &r_padded, &matrix, run_gaps, 32..=max_size, x_drop); + let scan_res = block_aligner.res(); + let scan_score = scan_res.score; + + if scan_score > other_score { + us_better += 1; + us_better_avg += ((scan_score - other_score) as f64) / (scan_score as f64); + } + + if scan_score < other_score { + other_better += 1; + other_better_avg += ((other_score - scan_score) as f64) / (other_score as f64); + + /*let slow_score = slow_align(q.as_bytes(), r.as_bytes(), x_drop); + if slow_score > other_score { + slow_better += 1; + } + if slow_score == other_score { + slow_equal += 1; + } + println!("ours: {}, other: {}, slow: {}", scan_score, other_score, slow_score);*/ + } + + count += 1; + } + //println!("slow better: {}, slow equal: {}", slow_better, slow_equal); + + (count, other_better, other_better_avg / (other_better as f64), us_better, us_better_avg / (us_better as f64)) +} + +fn main() { + let mut args = env::args().skip(1); + let other_file = args.next().expect("Pass in the path to a tab-separated file to compare to!"); + let x_drop = args.next().expect("Pass in an X-drop threshold!").parse::().unwrap(); + let max_sizes = [32, 64]; + + println!("max size, total, other better, other % better, us better, us % better"); + + for &max_size in &max_sizes { + let (count, other_better, other_better_avg, us_better, us_better_avg) = test(&other_file, max_size, x_drop); + + println!( + "\n{}, {}, {}, {}, {}, {}", + max_size, + count, + other_better, + other_better_avg, + us_better, + us_better_avg + ); + } + + println!("# Done!"); +} + +// Scalar version of the block aligner algorithm for testing +// purposes. May not exactly match the implementation of the +// vectorized version. +// +// Also possible to simulate diagonal adaptive banding methods. +#[allow(dead_code)] +#[allow(non_snake_case)] +fn slow_align(q: &[u8], r: &[u8], x_drop: i32) -> i32 { + let block_size = 32usize; + let step = 8usize; + //let step = 1usize; + + let mut D = vec![i32::MIN; (q.len() + 1 + block_size) * (r.len() + 1 + block_size)]; + let mut R = vec![i32::MIN; (q.len() + 1 + block_size) * (r.len() + 1 + block_size)]; + let mut C = vec![i32::MIN; (q.len() + 1 + block_size) * (r.len() + 1 + block_size)]; + D[0 + 0 * (q.len() + 1 + block_size)] = 0; + //let max = calc_block(q, r, &mut D, &mut R, &mut C, 0, 0, block_size, block_size, block_size, -2, -1); + let mut i = 0usize; + let mut j = 0usize; + let mut dir = 0; + //let mut best_max = max; + let mut best_max = 0; + + loop { + let max = match dir { + 0 => { // right + calc_block(q, r, &mut D, &mut R, &mut C, i, j, block_size, block_size, block_size, -2, -1) + //calc_diag(q, r, &mut D, &mut R, &mut C, i, j, block_size, -2, -1) + }, + _ => { // down + calc_block(q, r, &mut D, &mut R, &mut C, i, j, block_size, block_size, block_size, -2, -1) + //calc_diag(q, r, &mut D, &mut R, &mut C, i, j, block_size, -2, -1) + } + }; + + //let max = block_max(&D, q.len() + 1 + block_size, i + block_size / 2 - 1, j + block_size / 2, 1, 1); + let right_max = block_sum(&D, q.len() + 1 + block_size, i, j + block_size - 1, 1, step); + let down_max = block_sum(&D, q.len() + 1 + block_size, i + block_size - 1, j, step, 1); + best_max = cmp::max(best_max, max); + + if max < best_max - x_drop { + return best_max; + } + + if i + block_size > q.len() && j + block_size > r.len() { + return best_max; + } + + if j + block_size > r.len() { + i += step; + dir = 1; + continue; + } + if i + block_size > q.len() { + j += step; + dir = 0; + continue; + } + if down_max > right_max { + i += step; + dir = 1; + } else { + j += step; + dir = 0; + } + } +} + +#[allow(dead_code)] +#[allow(non_snake_case)] +fn block_max(D: &[i32], col_len: usize, start_i: usize, start_j: usize, block_width: usize, block_height: usize) -> i32 { + let mut max = i32::MIN; + for i in start_i..start_i + block_height { + for j in start_j..start_j + block_width { + max = cmp::max(max, D[i + j * col_len]); + } + } + max +} + +#[allow(dead_code)] +#[allow(non_snake_case)] +fn block_sum(D: &[i32], col_len: usize, start_i: usize, start_j: usize, block_width: usize, block_height: usize) -> i32 { + let mut sum = 0; + for i in start_i..start_i + block_height { + for j in start_j..start_j + block_width { + sum += D[i + j * col_len]; + } + } + sum +} + +#[allow(dead_code)] +#[allow(non_snake_case)] +fn calc_diag(q: &[u8], r: &[u8], D: &mut [i32], R: &mut [i32], C: &mut [i32], start_i: usize, start_j: usize, block_size: usize, gap_open: i32, gap_extend: i32) -> i32 { + let idx = |i: usize, j: usize| { i + j * (q.len() + 1 + block_size) }; + let mut max = i32::MIN; + + for off in 0..block_size { + let i = start_i + block_size - 1 - off; + let j = start_j + off; + + if D[idx(i, j)] != i32::MIN { + max = cmp::max(max, D[idx(i, j)]); + continue; + } + + R[idx(i, j)] = if i == 0 { i32::MIN } else { cmp::max( + R[idx(i - 1, j)].saturating_add(gap_extend), + D[idx(i - 1, j)].saturating_add(gap_open) + ) }; + C[idx(i, j)] = if j == 0 { i32::MIN } else { cmp::max( + C[idx(i, j - 1)].saturating_add(gap_extend), + D[idx(i, j - 1)].saturating_add(gap_open) + ) }; + D[idx(i, j)] = cmp::max( + if i == 0 || j == 0 || i > q.len() || j > r.len() { i32::MIN } else { + D[idx(i - 1, j - 1)].saturating_add(if q[i - 1] == r[j - 1] { 1 } else { -1 }) + }, + cmp::max(R[idx(i, j)], C[idx(i, j)]) + ); + max = cmp::max(max, D[idx(i, j)]); + } + + max +} + +#[allow(dead_code)] +#[allow(non_snake_case)] +fn calc_block(q: &[u8], r: &[u8], D: &mut [i32], R: &mut [i32], C: &mut [i32], start_i: usize, start_j: usize, block_width: usize, block_height: usize, block_size: usize, gap_open: i32, gap_extend: i32) -> i32 { + let idx = |i: usize, j: usize| { i + j * (q.len() + 1 + block_size) }; + let mut max = i32::MIN; + + for i in start_i..start_i + block_height { + for j in start_j..start_j + block_width { + if D[idx(i, j)] != i32::MIN { + continue; + } + + R[idx(i, j)] = if i == 0 { i32::MIN } else { cmp::max( + R[idx(i - 1, j)].saturating_add(gap_extend), + D[idx(i - 1, j)].saturating_add(gap_open) + ) }; + C[idx(i, j)] = if j == 0 { i32::MIN } else { cmp::max( + C[idx(i, j - 1)].saturating_add(gap_extend), + D[idx(i, j - 1)].saturating_add(gap_open) + ) }; + D[idx(i, j)] = cmp::max( + if i == 0 || j == 0 || i > q.len() || j > r.len() { i32::MIN } else { + D[idx(i - 1, j - 1)].saturating_add(if q[i - 1] == r[j - 1] { 1 } else { -1 }) + }, + cmp::max(R[idx(i, j)], C[idx(i, j)]) + ); + max = cmp::max(max, D[idx(i, j)]); + } + } + + max +} diff --git a/lib/block-aligner/examples/debug.rs b/lib/block-aligner/examples/debug.rs new file mode 100644 index 000000000..c5e0b2437 --- /dev/null +++ b/lib/block-aligner/examples/debug.rs @@ -0,0 +1,46 @@ +use bio::alignment::pairwise::*; +use bio::scores::blosum62; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; + +use std::{env, str}; + +fn main() { + let mut args = env::args().skip(1); + let mut q = args.next().unwrap(); + q.make_ascii_uppercase(); + let q = q.as_bytes().to_owned(); + let mut r = args.next().unwrap(); + r.make_ascii_uppercase(); + let r = r.as_bytes().to_owned(); + let r_padded = PaddedBytes::from_bytes::(&r, 2048); + let q_padded = PaddedBytes::from_bytes::(&q, 2048); + let run_gaps = Gaps { open: -11, extend: -1 }; + + let mut bio_aligner = Aligner::with_capacity(q.len(), r.len(), -10, -1, &blosum62); + let bio_alignment = bio_aligner.global(&q, &r); + let bio_score = bio_alignment.score; + + let mut block_aligner = Block::::new(q.len(), r.len(), 256); + block_aligner.align(&q_padded, &r_padded, &BLOSUM62, run_gaps, 32..=256, 0); + let scan_score = block_aligner.res().score; + let mut scan_cigar = Cigar::new(q.len(), r.len()); + block_aligner.trace().cigar(q.len(), r.len(), &mut scan_cigar); + let (a, b) = scan_cigar.format(&q, &r); + + println!( + "bio: {}\nours: {}\nq (len = {}): {}\nr (len = {}): {}\nour trace: {}\nour pretty:\n{}\n{}\nbio pretty:\n{}", + bio_score, + scan_score, + q.len(), + str::from_utf8(&q).unwrap(), + r.len(), + str::from_utf8(&r).unwrap(), + scan_cigar, + a, + b, + bio_alignment.pretty(&q, &r, 1_000_000_000) + ); +} diff --git a/lib/block-aligner/examples/nanopore_accuracy.rs b/lib/block-aligner/examples/nanopore_accuracy.rs new file mode 100644 index 000000000..a21250dc9 --- /dev/null +++ b/lib/block-aligner/examples/nanopore_accuracy.rs @@ -0,0 +1,150 @@ +#[cfg(not(feature = "simd_avx2"))] +fn main() {} + +#[cfg(feature = "simd_avx2")] +fn test(file_name: &str, max_size: usize, name: &str, verbose: bool, writer: &mut impl std::io::Write) -> (usize, usize, usize, f64, usize, f64) { + use parasailors::{Matrix, *}; + + use rust_wfa2::aligner::*; + + use bio::alignment::distance::simd::levenshtein; + + use block_aligner::percent_len; + use block_aligner::scan_block::*; + use block_aligner::scores::*; + + use std::fs::File; + use std::io::{BufRead, BufReader}; + + let mut wrong = 0usize; + let mut min_size_wrong = 0usize; + let mut wfa_wrong = 0usize; + let mut wrong_avg = 0f64; + let mut count = 0usize; + let mut seq_id_avg = 0f64; + let reader = BufReader::new(File::open(file_name).unwrap()); + let all_lines = reader.lines().collect::>(); + + for lines in all_lines.chunks(2) { + let r = lines[0].as_ref().unwrap().to_ascii_uppercase(); + let q = lines[1].as_ref().unwrap().to_ascii_uppercase(); + + let correct_score; + + if r.len().max(q.len()) < 15000 { + // parasail + let matrix = Matrix::create("ACGNT", 2, -4); + let profile = parasailors::Profile::new(q.as_bytes(), &matrix); + let parasail_score = global_alignment_score(&profile, r.as_bytes(), 6, 2); + correct_score = parasail_score; + } else { + // parasail is not accurate enough, so use block aligner with large fixed block size + let len = 8192; + let r_padded = PaddedBytes::from_bytes::(r.as_bytes(), len); + let q_padded = PaddedBytes::from_bytes::(q.as_bytes(), len); + let run_gaps = Gaps { open: -6, extend: -2 }; + let matrix = NucMatrix::new_simple(2, -4); + let mut block_aligner = Block::::new(q.len(), r.len(), len); + block_aligner.align(&q_padded, &r_padded, &matrix, run_gaps, len..=len, 0); + let scan_score = block_aligner.res().score; + correct_score = scan_score; + } + + let r_padded = PaddedBytes::from_bytes::(r.as_bytes(), max_size); + let q_padded = PaddedBytes::from_bytes::(q.as_bytes(), max_size); + let run_gaps = Gaps { open: -6, extend: -2 }; + let matrix = NucMatrix::new_simple(2, -4); + + // ours + let mut block_aligner = Block::::new(q.len(), r.len(), max_size); + let max_len = q.len().max(r.len()); + block_aligner.align(&q_padded, &r_padded, &matrix, run_gaps, percent_len(max_len, 0.01)..=percent_len(max_len, 0.1), 0); + let scan_score = block_aligner.res().score; + + write!( + writer, + "{}, {}, {}, {}, {}\n", + name, + q.len(), + r.len(), + scan_score, + correct_score + ).unwrap(); + + if correct_score != scan_score { + wrong += 1; + wrong_avg += ((correct_score - scan_score) as f64) / (correct_score as f64); + + if verbose { + let edit_dist = levenshtein(q.as_bytes(), r.as_bytes()); + println!( + "parasail: {}, ours: {}, edit dist: {}\nq (len = {}): {}\nr (len = {}): {}", + correct_score, + scan_score, + edit_dist, + q.len(), + q, + r.len(), + r + ); + } + } + + block_aligner.align(&q_padded, &r_padded, &matrix, run_gaps, percent_len(max_len, 0.01)..=percent_len(max_len, 0.01), 0); + let min_size_score = block_aligner.res().score; + if min_size_score != correct_score { + min_size_wrong += 1; + } + + let wfa_adaptive_score = { + let mut wfa = WFAlignerGapAffine::new(4, 4, 2, AlignmentScope::Score, MemoryModel::MemoryHigh); + wfa.set_heuristic(Heuristic::WFadaptive(10, 50, 1)); + wfa.align_end_to_end(q.as_bytes(), r.as_bytes()); + wfa.score() + }; + let wfa_score = { + let mut wfa = WFAlignerGapAffine::new(4, 4, 2, AlignmentScope::Alignment, MemoryModel::MemoryHigh); + wfa.set_heuristic(Heuristic::None); + wfa.align_end_to_end(q.as_bytes(), r.as_bytes()); + let cigar = wfa.cigar(); + let matches = cigar.bytes().filter(|&c| c == b'M').count(); + let seq_id = (matches as f64) / (cigar.len() as f64); + seq_id_avg += seq_id; + wfa.score() + }; + if wfa_adaptive_score != wfa_score { + wfa_wrong += 1; + } + + count += 1; + } + + (wrong, min_size_wrong, wfa_wrong, wrong_avg / (wrong as f64), count, seq_id_avg / (count as f64)) +} + +#[cfg(feature = "simd_avx2")] +fn main() { + use std::env; + use std::fs::File; + use std::io::{Write, BufWriter}; + + let arg1 = env::args().skip(1).next(); + let verbose = arg1.is_some() && arg1.unwrap() == "-v"; + let paths = ["data/real.illumina.b10M.txt", "data/real.ont.b10M.txt", "data/seq_pairs.10kbps.5000.txt", "data/seq_pairs.50kbps.10000.txt"]; + let names = ["illumina", "nanopore 1kbp", "nanopore <10kbp", "nanopore <50kbp"]; + let max_size = [32, 128, 1024, 8192]; + + let out_file_name = "data/nanopore_accuracy.csv"; + let mut writer = BufWriter::new(File::create(out_file_name).unwrap()); + write!(writer, "dataset, query len, reference len, pred score, true score\n").unwrap(); + + println!("\ndataset, total, wrong, wrong % error, min size wrong, wfa wrong"); + + for ((path, name), &max_size) in paths.iter().zip(&names).zip(&max_size) { + let (wrong, min_size_wrong, wfa_wrong, wrong_avg, count, seq_id_avg) = test(path, max_size, name, verbose, &mut writer); + println!("\n{}, {}, {}, {}, {}, {}", name, count, wrong, wrong_avg, min_size_wrong, wfa_wrong); + println!("# {} seq id avg: {}", name, seq_id_avg); + } + + println!("# Done!"); +} diff --git a/lib/block-aligner/examples/nanopore_bench.rs b/lib/block-aligner/examples/nanopore_bench.rs new file mode 100644 index 000000000..b320d938e --- /dev/null +++ b/lib/block-aligner/examples/nanopore_bench.rs @@ -0,0 +1,166 @@ +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use parasailors::{Matrix, *}; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; + +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::time::{Instant, Duration}; +use std::hint::black_box; +use std::iter; + +use simulate_seqs::*; + +static FILE_NAME: &str = "data/sequences.txt"; +const ITER: usize = 10000; +const LEN: usize = 10000; +const K: usize = 1000; + +fn get_data(file_name: Option<&str>) -> Vec<(Vec, Vec)> { + let mut rng = StdRng::seed_from_u64(1234); + + if let Some(file_name) = file_name { + let mut res = vec![]; + + let reader = BufReader::new(File::open(file_name).unwrap()); + let all_lines = reader.lines().collect::>(); + + for lines in all_lines.chunks(2) { + let r = lines[0].as_ref().unwrap().to_ascii_uppercase(); + let q = lines[1].as_ref().unwrap().to_ascii_uppercase(); + let mut r = r.as_bytes().to_owned(); + let mut q = q.as_bytes().to_owned(); + let extend_r = rand_str(100, &NUC, &mut rng); + let extend_q = rand_str(100, &NUC, &mut rng); + r.extend_from_slice(&extend_r); + q.extend_from_slice(&extend_q); + res.push((q, r)); + } + + res + } else { + let mut r = rand_str(LEN, &NUC, &mut rng); + let mut q = rand_mutate(&r, K, &NUC, &mut rng); + let extend_r = rand_str(500, &NUC, &mut rng); + let extend_q = rand_str(500, &NUC, &mut rng); + r.extend_from_slice(&extend_r); + q.extend_from_slice(&extend_q); + black_box(iter::repeat_with(|| (q.clone(), r.clone())).take(ITER).collect()) + } +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +#[allow(dead_code)] +fn bench_parasailors_nuc_core(file: bool, _trace: bool, _max_size: usize) -> (i32, Duration) { + let file_data = get_data(if file { Some(&FILE_NAME) } else { None }); + let matrix = Matrix::new(MatrixType::IdentityWithPenalty); + let data = file_data + .iter() + .map(|(q, r)| (parasailors::Profile::new(q, &matrix), r.to_owned())) + .collect::)>>(); + + let start = Instant::now(); + let mut temp = 0i32; + for (p, r) in &data { + temp = temp.wrapping_add(global_alignment_score(p, r, 2, 1)); + } + (temp, start.elapsed()) +} + +fn bench_scan_nuc_core(_file: bool, trace: bool, max_size: usize) -> (i32, Duration) { + let file_data = get_data(None); + let x_drop = 100; + let matrix = NucMatrix::new_simple(2, -3); + let data = file_data + .iter() + .map(|(q, r)| (PaddedBytes::from_bytes::(q, 2048), PaddedBytes::from_bytes::(r, 2048))) + .collect::>(); + let bench_gaps = Gaps { open: -5, extend: -1 }; + + let start = Instant::now(); + let mut temp = 0i32; + for (q, r) in &data { + if trace { + let mut a = Block::::new(q.len(), r.len(), max_size); + a.align(&q, &r, &matrix, bench_gaps, 32..=max_size, x_drop); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } else { + let mut a = Block::::new(q.len(), r.len(), max_size); + a.align(&q, &r, &matrix, bench_gaps, 32..=max_size, x_drop); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } + } + (temp, start.elapsed()) +} + +fn bench_scan_nuc_file(_file: bool, trace: bool, max_size: usize) -> (i32, Duration) { + let file_data = get_data(Some(&FILE_NAME)); + let x_drop = 50; + let data = file_data + .iter() + .map(|(q, r)| (PaddedBytes::from_bytes::(q, 2048), PaddedBytes::from_bytes::(r, 2048))) + .collect::>(); + let bench_gaps = Gaps { open: -2, extend: -1 }; + + let start = Instant::now(); + let mut temp = 0i32; + for (q, r) in &data { + if trace { + let mut a = Block::::new(q.len(), r.len(), max_size); + a.align(&q, &r, &NW1, bench_gaps, 32..=max_size, x_drop); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } else { + let mut a = Block::::new(q.len(), r.len(), max_size); + a.align(&q, &r, &NW1, bench_gaps, 32..=max_size, x_drop); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } + } + (temp, start.elapsed()) +} + +fn time(f: fn(bool, bool, usize) -> (i32, Duration), file: bool, trace: bool, max_size: usize) -> Duration { + let (temp, duration) = f(file, trace, max_size); + black_box(temp); + duration +} + +fn main() { + for _i in 0..3 { + let _d = time(bench_scan_nuc_file, true, false, 32); + } + + println!("# time (s)"); + println!("algorithm, dataset, time"); + + let d = time(bench_scan_nuc_file, true, false, 32); + let nanopore_time = d.as_secs_f64(); + println!("ours (no trace 32-32), nanopore 25kbp, {}", nanopore_time); + let d = time(bench_scan_nuc_core, false, false, 32); + let random_time = d.as_secs_f64(); + println!("ours (no trace 32-32), random, {}", random_time); + + let d = time(bench_scan_nuc_file, true, true, 32); + let nanopore_time = d.as_secs_f64(); + println!("ours (trace 32-32), nanopore 25kbp, {}", nanopore_time); + let d = time(bench_scan_nuc_core, false, true, 32); + let random_time = d.as_secs_f64(); + println!("ours (trace 32-32), random, {}", random_time); + + let d = time(bench_scan_nuc_file, true, true, 64); + let nanopore_time = d.as_secs_f64(); + println!("ours (trace 32-64), nanopore 25kbp, {}", nanopore_time); + let d = time(bench_scan_nuc_core, false, true, 64); + let random_time = d.as_secs_f64(); + println!("ours (trace 32-64), random, {}", random_time); + + /*#[cfg(not(target_arch = "wasm32"))] + { + let d = time(bench_parasailors_nuc_core, true); + let nanopore_time = d.as_secs_f64(); + println!("parasail, nanopore 25kbp, {}", nanopore_time); + let d = time(bench_parasailors_nuc_core, false); + let random_time = d.as_secs_f64(); + println!("parasail, random, {}", random_time); + }*/ +} diff --git a/lib/block-aligner/examples/nanopore_bench_global.rs b/lib/block-aligner/examples/nanopore_bench_global.rs new file mode 100644 index 000000000..26fc477b2 --- /dev/null +++ b/lib/block-aligner/examples/nanopore_bench_global.rs @@ -0,0 +1,217 @@ +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use parasailors::{Matrix, *}; + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use rust_wfa2::aligner::*; + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use edlib_rs::edlibrs::*; + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use ksw2_sys::*; + +use block_aligner::percent_len; +use block_aligner::scan_block::*; +use block_aligner::scores::*; + +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::time::Instant; +use std::hint::black_box; + +fn get_data(file_name: &str) -> Vec<(Vec, Vec)> { + let mut res = vec![]; + + let reader = BufReader::new(File::open(file_name).unwrap()); + let all_lines = reader.lines().collect::>(); + + for lines in all_lines.chunks(2) { + let r = lines[0].as_ref().unwrap().to_ascii_uppercase(); + let q = lines[1].as_ref().unwrap().to_ascii_uppercase(); + let r = r.as_bytes().to_owned(); + let q = q.as_bytes().to_owned(); + res.push((q, r)); + } + + res +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +fn bench_parasailors(file: &str) -> f64 { + let file_data = get_data(file); + let matrix = Matrix::create("ACGNT", 2, -4); + let data = file_data + .iter() + .map(|(q, r)| (parasailors::Profile::new(q, &matrix), r.to_owned())) + .collect::)>>(); + + let mut total_time = 0f64; + let mut temp = 0i32; + for (p, r) in &data { + let start = Instant::now(); + let res = global_alignment_score(p, r, 6, 2); + total_time += start.elapsed().as_secs_f64(); + temp = temp.wrapping_add(res); + } + black_box(temp); + total_time +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +fn bench_wfa2(file: &str, use_heuristic: bool) -> f64 { + let data = get_data(file); + + let mut total_time = 0f64; + let mut temp = 0i32; + for (q, r) in &data { + let mut wfa = WFAlignerGapAffine::new(4, 4, 2, AlignmentScope::Score, MemoryModel::MemoryHigh); + if use_heuristic { + wfa.set_heuristic(Heuristic::WFadaptive(10, 50, 1)); + } else { + wfa.set_heuristic(Heuristic::None); + } + let start = Instant::now(); + wfa.align_end_to_end(&q, &r); + total_time += start.elapsed().as_secs_f64(); + temp = temp.wrapping_add(wfa.score()); + } + black_box(temp); + total_time +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +fn bench_edlib(file: &str) -> f64 { + let data = get_data(file); + + let mut total_time = 0f64; + let mut temp = 0i32; + for (q, r) in &data { + let start = Instant::now(); + let res = edlibAlignRs(&q, &r, &EdlibAlignConfigRs::default()); + total_time += start.elapsed().as_secs_f64(); + temp = temp.wrapping_add(res.editDistance); + } + black_box(temp); + total_time +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +fn bench_ksw2(file: &str, band_width_percent: f32) -> f64 { + let lut = { + let mut l = [0u8; 128]; + l[b'A' as usize] = 0; + l[b'C' as usize] = 1; + l[b'G' as usize] = 2; + l[b'T' as usize] = 3; + l[b'N' as usize] = 4; + l + }; + let file_data = get_data(file); + let data = file_data + .iter() + .map(|(q, r)| (q.iter().map(|&c| lut[c as usize]).collect(), r.iter().map(|&c| lut[c as usize]).collect())) + .collect::, Vec)>>(); + let matrix = { + let mut m = [0i8; 5 * 5]; + m[0] = 2; + m[1] = -4; + m + }; + let mut res: ksw_extz_t = unsafe { std::mem::zeroed() }; + + let mut total_time = 0f64; + let mut temp = 0i32; + for (q, r) in &data { + let band_width = percent_len(q.len().max(r.len()), band_width_percent) as i32; + let start = Instant::now(); + unsafe { + ksw_extz2_sse(std::ptr::null_mut(), q.len() as i32, q.as_ptr(), r.len() as i32, r.as_ptr(), 5, matrix.as_ptr(), 4, 2, band_width, -1, 0, 1, &mut res); + } + total_time += start.elapsed().as_secs_f64(); + temp = temp.wrapping_add(res.score); + } + black_box(temp); + total_time +} + +fn bench_ours(file: &str, trace: bool, max_size: usize, block_grow: bool) -> f64 { + let file_data = get_data(file); + let data = file_data + .iter() + .map(|(q, r)| (PaddedBytes::from_bytes::(q, max_size), PaddedBytes::from_bytes::(r, max_size))) + .collect::>(); + let bench_gaps = Gaps { open: -6, extend: -2 }; + let matrix = NucMatrix::new_simple(2, -4); + + let mut total_time = 0f64; + let mut temp = 0i32; + for (q, r) in &data { + let max_len = q.len().max(r.len()); + let max_percent = if block_grow { 0.1 } else { 0.01 }; + + if trace { + let mut a = Block::::new(q.len(), r.len(), max_size); + let start = Instant::now(); + a.align(&q, &r, &matrix, bench_gaps, percent_len(max_len, 0.01)..=percent_len(max_len, max_percent), 0); + total_time += start.elapsed().as_secs_f64(); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } else { + let mut a = Block::::new(q.len(), r.len(), max_size); + let start = Instant::now(); + a.align(&q, &r, &matrix, bench_gaps, percent_len(max_len, 0.01)..=percent_len(max_len, max_percent), 0); + total_time += start.elapsed().as_secs_f64(); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } + } + black_box(temp); + total_time +} + +fn main() { + let files = ["data/real.illumina.b10M.txt", "data/real.ont.b10M.txt", "data/seq_pairs.10kbps.5000.txt", "data/seq_pairs.50kbps.10000.txt"]; + let names = ["illumina", "nanopore 1kbp", "nanopore <10kbp", "nanopore <50kbp"]; + let max_sizes = [[32, 32], [32, 128], [128, 1024], [512, 8192]]; + let band_widths = [0.01, 0.1]; + let run_parasail_arr = [true, true, true, false]; + + println!("# time (s)"); + println!("dataset, algorithm, time"); + + for (((file, name), max_size), &run_parasail) in files.iter().zip(&names).zip(&max_sizes).zip(&run_parasail_arr) { + for (&s, &g) in max_size.iter().zip(&[false, true]) { + let t = bench_ours(file, false, s, g); + println!("{}, ours ({}), {}", name, if g { "1%-10%" } else { "1%-1%" }, t); + } + + #[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] + { + let t = bench_edlib(file); + println!("{}, edlib, {}", name, t); + } + + #[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] + { + for &b in &band_widths { + let t = bench_ksw2(file, b); + println!("{}, ksw_extz2_sse ({}%), {}", name, (b * 100.0).round() as usize, t); + } + } + + #[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] + { + let t = bench_wfa2(file, false); + println!("{}, wfa2, {}", name, t); + + let t = bench_wfa2(file, true); + println!("{}, wfa2 adaptive, {}", name, t); + } + + if run_parasail { + #[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] + { + let t = bench_parasailors(file); + println!("{}, parasail, {}", name, t); + } + } + } +} diff --git a/lib/block-aligner/examples/profile.rs b/lib/block-aligner/examples/profile.rs new file mode 100644 index 000000000..c53c8df68 --- /dev/null +++ b/lib/block-aligner/examples/profile.rs @@ -0,0 +1,24 @@ +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use simulate_seqs::*; + +use std::hint::black_box; + +fn run(len: usize, k: usize) { + let mut rng = StdRng::seed_from_u64(1234); + let r = rand_str(len, &AMINO_ACIDS, &mut rng); + let q = rand_mutate(&r, k, &AMINO_ACIDS, &mut rng); + let r = PaddedBytes::from_bytes::(&r, 2048); + let q = PaddedBytes::from_bytes::(&q, 2048); + let run_gaps = Gaps { open: -11, extend: -1 }; + let mut a = Block::::new(q.len(), r.len(), 32); + + for _i in 0..10000 { + a.align(&q, &r, &BLOSUM62, run_gaps, 32..=32, 1000); + black_box(a.res()); + } +} + +fn main() { + run(10000, 1000); +} diff --git a/lib/block-aligner/examples/pssm_accuracy.rs b/lib/block-aligner/examples/pssm_accuracy.rs new file mode 100644 index 000000000..2871c8221 --- /dev/null +++ b/lib/block-aligner/examples/pssm_accuracy.rs @@ -0,0 +1,104 @@ +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; + +use std::fs::File; +use std::io::{BufRead, Write, BufReader, BufWriter}; +use std::usize; + +fn run_ours(r: &AAProfile, q: &PaddedBytes, min_size: &[usize], max_size: &[usize]) -> Vec { + let mut scores = Vec::new(); + + for i in 0..min_size.len() { + let mut block_aligner = Block::::new(q.len(), r.len(), max_size[i]); + block_aligner.align_profile(q, r, min_size[i]..=max_size[i], 0); + let score = block_aligner.res().score; + let mut cigar = Cigar::new(q.len(), r.len()); + block_aligner.trace().cigar(q.len(), r.len(), &mut cigar); + //println!("{} {}", score, cigar); + scores.push(score); + } + + scores +} + +static MAP: [u8; 20] = *b"ACDEFGHIKLMNPQRSTVWY"; + +fn test(file_name: &str, out_file_name: &str, min_size: &[usize], max_size: &[usize], padding: usize, gap_open: i8, gap_extend: i8) { + let mut reader = BufReader::new(File::open(file_name).unwrap()); + let mut writer = BufWriter::new(File::create(out_file_name).unwrap()); + let mut seq_string = String::new(); + let mut pssm_string = String::new(); + let mut matches = vec![0usize; min_size.len()]; + + println!("size, correct"); + write!(writer, "size, seq len, profile len, pred score, true score\n").unwrap(); + + loop { + seq_string.clear(); + let len = reader.read_line(&mut seq_string).unwrap(); + if len == 0 { + break; + } + let seq = seq_string.trim_end(); + pssm_string.clear(); + reader.read_line(&mut pssm_string).unwrap(); + let pssm = pssm_string.trim_end(); + let len = pssm.len() - 1; + let mut r = AAProfile::new(len, padding, gap_extend); + let q = PaddedBytes::from_str::(&seq[1..], padding); + + for i in 0..len + 1 { + pssm_string.clear(); + reader.read_line(&mut pssm_string).unwrap(); + let pssm = pssm_string.trim_end(); + if i == 0 { + continue; + } + + for (j, s) in pssm.split_whitespace().skip(2).enumerate() { + let c = MAP[j]; + let s = s.parse::().unwrap(); + r.set(i, c, s); + } + + r.set_gap_open_C(i, gap_open); + r.set_gap_close_C(i, 0); + r.set_gap_open_R(i, gap_open); + } + + let scores = run_ours(&r, &q, min_size, max_size); + for i in 0..matches.len() { + write!( + writer, + "{}-{}, {}, {}, {}, {}\n", + min_size[i], + max_size[i], + q.len(), + r.len(), + scores[i], + scores[scores.len() - 1] + ).unwrap(); + matches[i] += if scores[i] == scores[scores.len() - 1] { 1 } else { 0 }; + } + } + + for (i, m) in matches.iter().enumerate() { + println!("{}-{}, {}", min_size[i], max_size[i], m); + } + + println!("# compared to {}-{}", min_size[min_size.len() - 1], max_size[max_size.len() - 1]); +} + +fn main() { + let file_name = "data/scop/pairs.pssm"; + let out_file_name = "data/pssm_accuracy.csv"; + let min_sizes = [32, 32, 32, 128, 2048]; + let max_sizes = [32, 64, 128, 128, 2048]; + let gap_open = -10; + let gap_extend = -1; + + test(file_name, out_file_name, &min_sizes, &max_sizes, 2048, gap_open, gap_extend); + + println!("# Done!"); +} diff --git a/lib/block-aligner/examples/pssm_bench.rs b/lib/block-aligner/examples/pssm_bench.rs new file mode 100644 index 000000000..28fdae9c9 --- /dev/null +++ b/lib/block-aligner/examples/pssm_bench.rs @@ -0,0 +1,118 @@ +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use parasailors; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; + +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::usize; +use std::time::{Instant, Duration}; +use std::hint::black_box; + +fn bench_ours(pairs: &[(AAProfile, PaddedBytes)], min_size: usize, max_size: usize) -> Duration { + let start = Instant::now(); + + for (r, q) in pairs { + let mut block_aligner = Block::::new(q.len(), r.len(), max_size); + block_aligner.align_profile(q, r, min_size..=max_size, 0); + let mut cigar = Cigar::new(q.len(), r.len()); + block_aligner.trace().cigar(q.len(), r.len(), &mut cigar); + black_box(block_aligner.res()); + black_box(cigar); + } + + start.elapsed() +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +fn bench_parasail(pairs: &[(Vec, Vec)], gap_open: i8, gap_extend: i8) -> Duration { + let matrix = parasailors::Matrix::new(parasailors::MatrixType::Blosum62); + + let start = Instant::now(); + + for (r, q) in pairs { + let p = parasailors::Profile::new(&q, &matrix); + black_box(parasailors::global_alignment_score(&p, r, -(gap_open + gap_extend) as i32, -gap_extend as i32)); + } + + start.elapsed() +} + +static MAP: [u8; 20] = *b"ACDEFGHIKLMNPQRSTVWY"; + +fn get_pairs(file_name: &str, padding: usize, gap_open: i8, gap_extend: i8) -> (Vec<(AAProfile, PaddedBytes)>, Vec<(Vec, Vec)>) { + let mut reader = BufReader::new(File::open(file_name).unwrap()); + let mut seq_string = String::new(); + let mut pssm_string = String::new(); + let mut pssm_pairs = Vec::new(); + let mut cns_pairs = Vec::new(); + + loop { + seq_string.clear(); + let len = reader.read_line(&mut seq_string).unwrap(); + if len == 0 { + break; + } + let seq = seq_string.trim_end(); + pssm_string.clear(); + reader.read_line(&mut pssm_string).unwrap(); + let pssm = pssm_string.trim_end(); + let len = pssm.len() - 1; + let cns = pssm[1..].as_bytes().to_owned(); + let mut r = AAProfile::new(len, padding, gap_extend); + let q = PaddedBytes::from_str::(&seq[1..], padding); + + for i in 0..len + 1 { + pssm_string.clear(); + reader.read_line(&mut pssm_string).unwrap(); + let pssm = pssm_string.trim_end(); + if i == 0 { + continue; + } + + for (j, s) in pssm.split_whitespace().skip(2).enumerate() { + let c = MAP[j]; + let s = s.parse::().unwrap(); + r.set(i, c, s); + } + + r.set_gap_open_C(i, gap_open); + r.set_gap_close_C(i, 0); + r.set_gap_open_R(i, gap_open); + } + + pssm_pairs.push((r, q)); + cns_pairs.push((cns, seq[1..].as_bytes().to_owned())); + } + + (pssm_pairs, cns_pairs) +} + +fn main() { + let file_name = "data/scop/pairs.pssm"; + let min_sizes = [32, 32, 32, 128]; + let max_sizes = [32, 64, 128, 128]; + let gap_open = -10; + let gap_extend = -1; + + let (pssm_pairs, cns_pairs) = get_pairs(file_name, 2048, gap_open, gap_extend); + + println!("size, time"); + + bench_ours(&pssm_pairs, min_sizes[0], max_sizes[0]); + + for (&min_size, &max_size) in min_sizes.iter().zip(&max_sizes) { + let duration = bench_ours(&pssm_pairs, min_size, max_size); + println!("{}-{}, {}", min_size, max_size, duration.as_secs_f64()); + } + + #[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] + { + let duration = bench_parasail(&cns_pairs, gap_open, gap_extend); + println!("parasail, {}", duration.as_secs_f64()); + } + + println!("# Done!"); +} diff --git a/lib/block-aligner/examples/uc_accuracy.rs b/lib/block-aligner/examples/uc_accuracy.rs new file mode 100644 index 000000000..e2c4950db --- /dev/null +++ b/lib/block-aligner/examples/uc_accuracy.rs @@ -0,0 +1,211 @@ +use bio::alignment::pairwise::*; +use bio::alignment::{Alignment, AlignmentOperation}; +use bio::scores::blosum62; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; + +use std::{env, cmp}; +use std::fs::File; +use std::io::{BufRead, Write, BufReader, BufWriter}; +use std::usize; + +fn test(file_name: &str, min_size: usize, max_size: usize, string: &str, verbose: bool, wrong: &mut [usize], wrong_avg: &mut [f64], count: &mut [usize], writer: &mut impl Write) -> (f64, usize, usize, f64) { + let reader = BufReader::new(File::open(file_name).unwrap()); + let mut length_sum = 0f64; + let mut length_min = usize::MAX; + let mut length_max = usize::MIN; + let mut dp_fraction = 0f64; + + for line in reader.lines() { + let line = line.unwrap(); + let mut last_two = line.split_ascii_whitespace().rev().take(2); + let r = last_two.next().unwrap().to_ascii_uppercase(); + let q = last_two.next().unwrap().to_ascii_uppercase(); + + // rust-bio + let mut bio_aligner = Aligner::with_capacity(q.len(), r.len(), -10, -1, &blosum62); + let bio_alignment = bio_aligner.global(q.as_bytes(), r.as_bytes()); + let bio_score = bio_alignment.score; + let seq_identity = seq_id(&bio_alignment); + let id_idx = cmp::min((seq_identity * 10.0) as usize, 9); + let indels = indels(&bio_alignment, cmp::max(q.len(), r.len())); + + let r_padded = PaddedBytes::from_bytes::(r.as_bytes(), 2048); + let q_padded = PaddedBytes::from_bytes::(q.as_bytes(), 2048); + let run_gaps = Gaps { open: -11, extend: -1 }; + + // ours + let mut block_aligner = Block::::new(q.len(), r.len(), max_size); + block_aligner.align(&q_padded, &r_padded, &BLOSUM62, run_gaps, min_size..=max_size, 0); + let scan_res = block_aligner.res(); + let scan_score = scan_res.score; + + write!( + writer, + "{}, {}-{}, {}, {}, {}, {}, {}\n", + string, + min_size, + max_size, + q.len(), + r.len(), + seq_identity, + scan_score, + bio_score + ).unwrap(); + + if bio_score != scan_score { + wrong[id_idx] += 1; + wrong_avg[id_idx] += ((bio_score - scan_score) as f64) / (bio_score as f64); + + if verbose { + let mut cigar = Cigar::new(scan_res.query_idx, scan_res.reference_idx); + block_aligner.trace().cigar(scan_res.query_idx, scan_res.reference_idx, &mut cigar); + let (a_pretty, b_pretty) = cigar.format(q.as_bytes(), r.as_bytes()); + println!( + "seq id: {}, max indel len: {}, bio: {}, ours: {}\nq (len = {}): {}\nr (len = {}): {}\nbio pretty:\n{}\nours pretty:\n{}\n{}", + seq_identity, + indels, + bio_score, + scan_score, + q.len(), + q, + r.len(), + r, + bio_alignment.pretty(q.as_bytes(), r.as_bytes(), 1_000_000_000), + a_pretty, + b_pretty + ); + } + } + + count[id_idx] += 1; + length_sum += (q.len() + r.len()) as f64; + length_min = cmp::min(length_min, cmp::min(q.len(), r.len())); + length_max = cmp::max(length_max, cmp::max(q.len(), r.len())); + + let computed = block_aligner.trace().blocks().iter().map(|b| (b.width as f64) * (b.height as f64)).sum::(); + dp_fraction += computed / (((q.len() + 1) as f64) * ((r.len() + 1) as f64)); + } + + (length_sum, length_min, length_max, dp_fraction) +} + +fn indels(a: &Alignment, len: usize) -> f64 { + let mut indels = 0; + + for &op in &a.operations { + if op == AlignmentOperation::Ins + || op == AlignmentOperation::Del { + indels += 1; + } + } + (indels as f64) / (len as f64) +} + +// BLAST sequence identity +fn seq_id(a: &Alignment) -> f64 { + let mut matches = 0; + + for &op in &a.operations { + if op == AlignmentOperation::Match { + matches += 1; + } + } + + (matches as f64) / (a.operations.len() as f64) +} + +fn main() { + let arg1 = env::args().skip(1).next(); + let verbose = arg1.is_some() && arg1.unwrap() == "-v"; + let file_names_arr = [ + /*[ + "data/merged_clu_aln_30_40.m8", + "data/merged_clu_aln_40_50.m8", + "data/merged_clu_aln_50_60.m8", + "data/merged_clu_aln_60_70.m8", + "data/merged_clu_aln_70_80.m8", + "data/merged_clu_aln_80_90.m8", + "data/merged_clu_aln_90_100.m8" + ],*/ + [ + "data/uc30_0.95_30_40.m8", + "data/uc30_0.95_40_50.m8", + "data/uc30_0.95_50_60.m8", + "data/uc30_0.95_60_70.m8", + "data/uc30_0.95_70_80.m8", + "data/uc30_0.95_80_90.m8", + "data/uc30_0.95_90_100.m8" + ], + [ + "data/uc30_30_40.m8", + "data/uc30_40_50.m8", + "data/uc30_50_60.m8", + "data/uc30_60_70.m8", + "data/uc30_70_80.m8", + "data/uc30_80_90.m8", + "data/uc30_90_100.m8" + ] + ]; + let strings = [/*"merged_clu_aln", */"uc30_0.95", "uc30"]; + let min_sizes = [32, 32, 256]; + let max_sizes = [32, 256, 256]; + + let out_file_name = "data/uc_accuracy.csv"; + let mut writer = BufWriter::new(File::create(out_file_name).unwrap()); + write!(writer, "dataset, size, query len, reference len, seq id, pred score, true score\n").unwrap(); + + println!("# seq identity is lower bound (inclusive)"); + println!("dataset, size, seq identity, count, wrong, wrong % error"); + + for (file_names, string) in file_names_arr.iter().zip(&strings) { + for (&min_size, &max_size) in min_sizes.iter().zip(&max_sizes) { + let mut wrong = [0usize; 10]; + let mut wrong_avg = [0f64; 10]; + let mut count = [0usize; 10]; + let mut length_avg = 0f64; + let mut length_min = usize::MAX; + let mut length_max = usize::MIN; + let mut dp_fraction = 0f64; + + for file_name in file_names { + let (len_sum, len_min, len_max, dp_fract) = test(file_name, min_size, max_size, string, verbose, &mut wrong, &mut wrong_avg, &mut count, &mut writer); + length_avg += len_sum; + length_min = cmp::min(length_min, len_min); + length_max = cmp::max(length_max, len_max); + dp_fraction += dp_fract; + } + + length_avg /= (count.iter().sum::() * 2) as f64; + dp_fraction /= count.iter().sum::() as f64; + + for i in 0..10 { + println!( + "{}, {}-{}, {}, {}, {}, {}", + string, + min_size, + max_size, + (i as f64) / 10.0, + count[i], + wrong[i], + (wrong_avg[i] as f64) / (wrong[i] as f64) + ); + } + + println!( + "\n# total: {}, wrong: {}, wrong % error: {}, length avg: {}, length min: {}, length max: {}, dp fraction: {}\n", + count.iter().sum::(), + wrong.iter().sum::(), + wrong_avg.iter().sum::() / (wrong.iter().sum::() as f64), + length_avg, + length_min, + length_max, + dp_fraction + ); + } + } + + println!("# Done!"); +} diff --git a/lib/block-aligner/examples/uc_bench.rs b/lib/block-aligner/examples/uc_bench.rs new file mode 100644 index 000000000..e3fa0730f --- /dev/null +++ b/lib/block-aligner/examples/uc_bench.rs @@ -0,0 +1,161 @@ +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +use parasailors::{Matrix, *}; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; + +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::time::{Instant, Duration}; +use std::hint::black_box; + +static FILE_NAMES: [[&str; 7]; 2] = [ + [ + "data/uc30_30_40.m8", + "data/uc30_40_50.m8", + "data/uc30_50_60.m8", + "data/uc30_60_70.m8", + "data/uc30_70_80.m8", + "data/uc30_80_90.m8", + "data/uc30_90_100.m8" + ], + /*[ + "data/merged_clu_aln_30_40.m8", + "data/merged_clu_aln_40_50.m8", + "data/merged_clu_aln_50_60.m8", + "data/merged_clu_aln_60_70.m8", + "data/merged_clu_aln_70_80.m8", + "data/merged_clu_aln_80_90.m8", + "data/merged_clu_aln_90_100.m8" + ],*/ + [ + "data/uc30_0.95_30_40.m8", + "data/uc30_0.95_40_50.m8", + "data/uc30_0.95_50_60.m8", + "data/uc30_0.95_60_70.m8", + "data/uc30_0.95_70_80.m8", + "data/uc30_0.95_80_90.m8", + "data/uc30_0.95_90_100.m8" + ] +]; + +fn get_data(file_names: &[&str]) -> Vec<(Vec, Vec)> { + let mut res = vec![]; + + for file_name in file_names { + let reader = BufReader::new(File::open(file_name).unwrap()); + + for line in reader.lines() { + let line = line.unwrap(); + let mut last_two = line.split_ascii_whitespace().rev().take(2); + let r = last_two.next().unwrap().to_ascii_uppercase(); + let q = last_two.next().unwrap().to_ascii_uppercase(); + + res.push((q.as_bytes().to_owned(), r.as_bytes().to_owned())); + } + } + + res +} + +#[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] +fn bench_parasailors_aa_core(idx: usize, _trace: bool, _min_size: usize, _max_size: usize) -> (i32, Duration) { + let file_data = get_data(&FILE_NAMES[idx]); + let matrix = Matrix::new(MatrixType::Blosum62); + let data = file_data + .iter() + .map(|(q, r)| (parasailors::Profile::new(q, &matrix), r.to_owned())) + .collect::)>>(); + + let start = Instant::now(); + let mut temp = 0i32; + for (p, r) in &data { + temp = temp.wrapping_add(global_alignment_score(p, r, 11, 1)); + } + (temp, start.elapsed()) +} + +fn bench_scan_aa_core(idx: usize, trace: bool, min_size: usize, max_size: usize) -> (i32, Duration) { + let file_data = get_data(&FILE_NAMES[idx]); + let data = file_data + .iter() + .map(|(q, r)| (PaddedBytes::from_bytes::(q, 2048), PaddedBytes::from_bytes::(r, 2048))) + .collect::>(); + let bench_gaps = Gaps { open: -11, extend: -1 }; + + let start = Instant::now(); + let mut temp = 0i32; + for (q, r) in &data { + if trace { + let mut a = Block::::new(q.len(), r.len(), max_size); + a.align(&q, &r, &BLOSUM62, bench_gaps, min_size..=max_size, 0); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + let mut cigar = Cigar::new(q.len(), r.len()); + a.trace().cigar(q.len(), r.len(), &mut cigar); + temp = temp.wrapping_add(cigar.len() as i32); + } else { + let mut a = Block::::new(q.len(), r.len(), max_size); + a.align(&q, &r, &BLOSUM62, bench_gaps, min_size..=max_size, 0); + temp = temp.wrapping_add(a.res().score); // prevent optimizations + } + } + (temp, start.elapsed()) +} + +fn time(f: fn(usize, bool, usize, usize) -> (i32, Duration), idx: usize, trace: bool, min_size: usize, max_size: usize) -> Duration { + let (temp, duration) = f(idx, trace, min_size, max_size); + black_box(temp); + duration +} + +fn main() { + for _i in 0..2 { + let _d = time(bench_scan_aa_core, 1, false, 32, 32); + } + + println!("# time (s)"); + println!("algorithm, dataset, size, time"); + + let d = time(bench_scan_aa_core, 0, false, 32, 32); + let uc30_time = d.as_secs_f64(); + println!("ours (no trace), uc30, 32-32, {}", uc30_time); + let d = time(bench_scan_aa_core, 1, false, 32, 32); + let uc30_95_time = d.as_secs_f64(); + println!("ours (no trace), uc30 0.95, 32-32, {}", uc30_95_time); + + let d = time(bench_scan_aa_core, 0, false, 32, 256); + let uc30_time = d.as_secs_f64(); + println!("ours (no trace), uc30, 32-256, {}", uc30_time); + /*let d = time(bench_scan_aa_core, 1); + println!("scan merged time (s): {}", d.as_secs_f64());*/ + let d = time(bench_scan_aa_core, 1, false, 32, 256); + let uc30_95_time = d.as_secs_f64(); + println!("ours (no trace), uc30 0.95, 32-256, {}", uc30_95_time); + + let d = time(bench_scan_aa_core, 0, false, 256, 256); + let uc30_time = d.as_secs_f64(); + println!("ours (no trace), uc30, 256-256, {}", uc30_time); + let d = time(bench_scan_aa_core, 1, false, 256, 256); + let uc30_95_time = d.as_secs_f64(); + println!("ours (no trace), uc30 0.95, 256-256, {}", uc30_95_time); + + let d = time(bench_scan_aa_core, 0, true, 32, 256); + let uc30_time = d.as_secs_f64(); + println!("ours (trace), uc30, 32-256, {}", uc30_time); + let d = time(bench_scan_aa_core, 1, true, 32, 256); + let uc30_95_time = d.as_secs_f64(); + println!("ours (trace), uc30 0.95, 32-256, {}", uc30_95_time); + + #[cfg(not(any(feature = "simd_wasm", feature = "simd_neon", feature = "no_simd")))] + { + let d = time(bench_parasailors_aa_core, 0, false, 0, 0); + let uc30_time = d.as_secs_f64(); + println!("parasail, uc30, full, {}", uc30_time); + /*let d = time(bench_parasailors_aa_core, 1); + println!("parasail merged time (s): {}", d.as_secs_f64());*/ + let d = time(bench_parasailors_aa_core, 1, false, 0, 0); + let uc30_95_time = d.as_secs_f64(); + println!("parasail, uc30 0.95, full, {}", uc30_95_time); + } +} diff --git a/lib/block-aligner/examples/verify_trace.rs b/lib/block-aligner/examples/verify_trace.rs new file mode 100644 index 000000000..85d8fe3e9 --- /dev/null +++ b/lib/block-aligner/examples/verify_trace.rs @@ -0,0 +1,98 @@ +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use block_aligner::cigar::*; +use simulate_seqs::*; + +use std::str; + +fn consistent(i: usize, j: usize, cigar: &Cigar) -> bool { + let mut curr_i = 0; + let mut curr_j = 0; + + for i in 0..cigar.len() { + let op_len = cigar.get(i); + match op_len.op { + Operation::M => { + curr_i += op_len.len; + curr_j += op_len.len; + }, + Operation::I => { + curr_i += op_len.len; + }, + _ => { + curr_j += op_len.len; + } + } + } + + curr_i == i && curr_j == j +} + +fn test(iter: usize, len: usize, k: usize, insert_len: Option) -> usize { + let mut wrong = 0usize; + let mut rng = StdRng::seed_from_u64(1234); + + for _i in 0..iter { + let r = rand_str(len, &AMINO_ACIDS, &mut rng); + let q = match insert_len { + Some(len) => rand_mutate_insert(&r, k, &AMINO_ACIDS, len, &mut rng), + None => rand_mutate(&r, k, &AMINO_ACIDS, &mut rng) + }; + + let r_padded = PaddedBytes::from_bytes::(&r, 2048); + let q_padded = PaddedBytes::from_bytes::(&q, 2048); + let run_gaps = Gaps { open: -11, extend: -1 }; + + let mut block_aligner = Block::::new(q.len(), r.len(), 2048); + block_aligner.align(&q_padded, &r_padded, &BLOSUM62, run_gaps, 32..=2048, 0); + let scan_score = block_aligner.res().score; + let mut scan_cigar = Cigar::new(q.len(), r.len()); + block_aligner.trace().cigar(q.len(), r.len(), &mut scan_cigar); + + if !consistent(q.len(), r.len(), &scan_cigar) { + wrong += 1; + + println!( + "score: {}\nq: {}\nr: {}\ncigar: {}", + scan_score, + str::from_utf8(&q).unwrap(), + str::from_utf8(&r).unwrap(), + scan_cigar + ); + } + } + + wrong +} + +fn main() { + let iters = [100, 100, 100]; + let lens = [10, 100, 1000]; + let rcp_ks = [10.0, 5.0, 2.0]; + let inserts = [false, true]; + + let mut total_wrong = 0usize; + let mut total = 0usize; + + for (&len, &iter) in lens.iter().zip(&iters) { + for &rcp_k in &rcp_ks { + for &insert in &inserts { + let insert_len = if insert { Some(len / 10) } else { None }; + let wrong = test(iter, len, ((len as f64) / rcp_k) as usize, insert_len); + println!( + "\nlen: {}, k: {}, insert: {}, iter: {}, wrong: {}\n", + len, + ((len as f64) / rcp_k) as usize, + insert, + iter, + wrong + ); + total_wrong += wrong; + total += iter; + } + } + } + + println!("\ntotal: {}, wrong: {}", total, total_wrong); + println!("Done!"); +} diff --git a/lib/block-aligner/examples/x_drop_accuracy.rs b/lib/block-aligner/examples/x_drop_accuracy.rs new file mode 100644 index 000000000..e53f4d8a1 --- /dev/null +++ b/lib/block-aligner/examples/x_drop_accuracy.rs @@ -0,0 +1,160 @@ +use bio::scores::blosum62; + +use block_aligner::scan_block::*; +use block_aligner::scores::*; +use simulate_seqs::*; + +use std::{env, str, cmp}; + +fn test(iter: usize, len: usize, k: usize, verbose: bool) -> (usize, f64, i32, i32, usize) { + let mut wrong = 0usize; + let mut wrong_avg = 0f64; + let mut wrong_min = i32::MAX; + let mut wrong_max = i32::MIN; + let mut diff_idx = 0usize; + let mut rng = StdRng::seed_from_u64(1234); + + for _i in 0..iter { + let mut r = rand_str(len, &AMINO_ACIDS, &mut rng); + let q = rand_mutate_suffix(&mut r, k, &AMINO_ACIDS, 500, &mut rng); + + let r_padded = PaddedBytes::from_bytes::(&r, 2048); + let q_padded = PaddedBytes::from_bytes::(&q, 2048); + let run_gaps = Gaps { open: -11, extend: -1 }; + + let slow_res = slow_align(&q, &r, 50); + + let mut block_aligner = Block::::new(q.len(), r.len(), 64); + block_aligner.align(&q_padded, &r_padded, &BLOSUM62, run_gaps, 32..=64, 50); + let scan_res = block_aligner.res(); + + if slow_res.0 != scan_res.score { + wrong += 1; + let score_diff = slow_res.0 - scan_res.score; + wrong_avg += (score_diff as f64) / (slow_res.0 as f64); + wrong_min = cmp::min(wrong_min, score_diff); + wrong_max = cmp::max(wrong_max, score_diff); + + if verbose { + println!( + "slow: (score: {}, i: {}, j: {}),\nours: (score: {}, i: {}, j: {})\nq: {}\nr: {}", + slow_res.0, + slow_res.1, + slow_res.2, + scan_res.score, + scan_res.query_idx, + scan_res.reference_idx, + str::from_utf8(&q).unwrap(), + str::from_utf8(&r).unwrap() + ); + } + } + + if slow_res.1 != scan_res.query_idx || slow_res.2 != scan_res.reference_idx { + diff_idx += 1; + + if verbose { + println!( + "slow: (i: {}, j: {}),\nours: (i: {}, j: {})\nq: {}\nr: {}", + slow_res.1, + slow_res.2, + scan_res.query_idx, + scan_res.reference_idx, + str::from_utf8(&q).unwrap(), + str::from_utf8(&r).unwrap() + ); + } + } + } + + (wrong, wrong_avg / (wrong as f64), wrong_min, wrong_max, diff_idx) +} + +fn main() { + let arg1 = env::args().skip(1).next(); + let verbose = arg1.is_some() && arg1.unwrap() == "-v"; + let iters = [100, 100, 100]; + let lens = [10, 100, 1000]; + let rcp_ks = [10.0, 5.0, 2.0]; + + let mut total_wrong = 0usize; + let mut total = 0usize; + let mut total_diff_idx = 0usize; + + for (&len, &iter) in lens.iter().zip(&iters) { + for &rcp_k in &rcp_ks { + let (wrong, wrong_avg, wrong_min, wrong_max, diff_idx) = test(iter, len, ((len as f64) / rcp_k) as usize, verbose); + println!( + "\nlen: {}, k: {}, iter: {}, wrong: {}, wrong % error: {}, wrong min: {}, wrong max: {}, diff idx: {}\n", + len, + ((len as f64) / rcp_k) as usize, + iter, + wrong, + wrong_avg, + wrong_min, + wrong_max, + diff_idx + ); + total_wrong += wrong; + total += iter; + total_diff_idx += diff_idx; + } + } + + println!("\ntotal: {}, wrong: {}, diff idx: {}", total, total_wrong, total_diff_idx); + println!("Done!"); +} + +#[allow(non_snake_case)] +fn slow_align(q: &[u8], r: &[u8], x_drop: i32) -> (i32, usize, usize) { + let gap_open = -11; + let gap_extend = -1; + let idx = |i: usize, j: usize| { i + j * (q.len() + 1) }; + + let mut D = vec![i32::MIN; (q.len() + 1) * (r.len() + 1)]; + let mut R = vec![i32::MIN; (q.len() + 1) * (r.len() + 1)]; + let mut C = vec![i32::MIN; (q.len() + 1) * (r.len() + 1)]; + D[idx(0, 0)] = 0; + + let mut best_max = i32::MIN; + let mut best_i = 0; + let mut best_j = 0; + + for i in 0..=q.len() { + let mut max = i32::MIN; + let mut max_j = 0; + for j in 0..=r.len() { + if D[idx(i, j)] != i32::MIN { + continue; + } + R[idx(i, j)] = if i == 0 { i32::MIN } else { cmp::max( + R[idx(i - 1, j)].saturating_add(gap_extend), + D[idx(i - 1, j)].saturating_add(gap_open) + ) }; + C[idx(i, j)] = if j == 0 { i32::MIN } else { cmp::max( + C[idx(i, j - 1)].saturating_add(gap_extend), + D[idx(i, j - 1)].saturating_add(gap_open) + ) }; + D[idx(i, j)] = cmp::max( + if i == 0 || j == 0 || i > q.len() || j > r.len() { i32::MIN } else { + D[idx(i - 1, j - 1)].saturating_add(blosum62(q[i - 1], r[j - 1])) + }, + cmp::max(R[idx(i, j)], C[idx(i, j)]) + ); + if D[idx(i, j)] > max { + max = D[idx(i, j)]; + max_j = j; + } + } + if max > best_max { + best_max = max; + best_i = i; + best_j = max_j; + } + if max < best_max - x_drop { + break; + } + } + + (best_max, best_i, best_j) +} diff --git a/lib/block-aligner/ideas.md b/lib/block-aligner/ideas.md new file mode 100644 index 000000000..fe25ac5aa --- /dev/null +++ b/lib/block-aligner/ideas.md @@ -0,0 +1,37 @@ +# Old ideas and history +Originally, this project started with trying to improve Daily's prefix scan algorithm. +The goal was to make it (static) banded and also use a difference encoding like in the +Suzuki-Kasahara algorithm. Additionally, it would be nice to support protein alignment (in addition to +DNA alignment) but with very narrow lanes, for maximum SIMD parallelism. + +This plan did not really work out, so I looked into pivoting to adaptive banding methods. +Adaptive banding allows a very small band to be used, compared to the traditional static +banding approach. Part of this was to stay competitive with the recent Wavefront Alignment +algorithm in terms of speed, and also improve on it since it could not handle complex amino +acid scoring schemes that was necessary in protein alignment. + +However, with vertical or horizontal bands, I quickly realized that it was too hard to +identify the direction to shift. I also thought about using L-shaped areas and other shapes +to tile the DP matrix, but eventually I settled on square blocks due to their flexibility. + +WASM SIMD support was especially interesting since it is cross-platform and runs in the +browser. Who knows? Maybe people will start developing more bioinformatics tools that +run in the browser! + +Compared to other algorithms, block aligner is very optimistic. Try small blocks before larger blocks! + +## Some failed ideas +1. What if we took Daily's prefix scan idea and made it faster and made it banded using +ring buffers and had tons of 32-bit offsets for intervals of the band to prevent overflow? +(This actually works, but it is soooooo complex.) +2. What if we took that banded idea (a single thin vertical band) and made it adaptive? +3. What if we placed blocks like Minecraft, where there is no overlap between blocks? +4. What if we compared the rightmost column and bottommost row in each block to decide +which direction to shift? (Surprisingly, using the first couple of values in each column +or row is better than using the whole column/row. Also, comparing the sum of scores worked +better than comparing the max. Update: revisiting this, it seems like max actually works best!) +5. Use a branch-predictor-like scheme to predict which direction to shift as a tie-breaker +when shifting right or down seem equally good. +6. ... + +Some old code is located in the `src/old` directory. diff --git a/lib/block-aligner/matrices/BLOSUM45 b/lib/block-aligner/matrices/BLOSUM45 new file mode 100644 index 000000000..974dc5838 --- /dev/null +++ b/lib/block-aligner/matrices/BLOSUM45 @@ -0,0 +1,28 @@ +[5,-1,-1,-2,-1,-2,0,-2,-1,-128,-1,-1,-1,-1,-128,-1,-1,-2,1,0,-128,0,-2,0,-2,-1,-128,-128,-128,-128,-128,-128, +-1,4,-2,5,1,-3,-1,0,-3,-128,0,-3,-2,4,-128,-2,0,-1,0,0,-128,-3,-4,-1,-2,2,-128,-128,-128,-128,-128,-128, +-1,-2,12,-3,-3,-2,-3,-3,-3,-128,-3,-2,-2,-2,-128,-4,-3,-3,-1,-1,-128,-1,-5,-2,-3,-3,-128,-128,-128,-128,-128,-128, +-2,5,-3,7,2,-4,-1,0,-4,-128,0,-3,-3,2,-128,-1,0,-1,0,-1,-128,-3,-4,-1,-2,1,-128,-128,-128,-128,-128,-128, +-1,1,-3,2,6,-3,-2,0,-3,-128,1,-2,-2,0,-128,0,2,0,0,-1,-128,-3,-3,-1,-2,4,-128,-128,-128,-128,-128,-128, +-2,-3,-2,-4,-3,8,-3,-2,0,-128,-3,1,0,-2,-128,-3,-4,-2,-2,-1,-128,0,1,-1,3,-3,-128,-128,-128,-128,-128,-128, +0,-1,-3,-1,-2,-3,7,-2,-4,-128,-2,-3,-2,0,-128,-2,-2,-2,0,-2,-128,-3,-2,-1,-3,-2,-128,-128,-128,-128,-128,-128, +-2,0,-3,0,0,-2,-2,10,-3,-128,-1,-2,0,1,-128,-2,1,0,-1,-2,-128,-3,-3,-1,2,0,-128,-128,-128,-128,-128,-128, +-1,-3,-3,-4,-3,0,-4,-3,5,-128,-3,2,2,-2,-128,-2,-2,-3,-2,-1,-128,3,-2,-1,0,-3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,0,-3,0,1,-3,-2,-1,-3,-128,5,-3,-1,0,-128,-1,1,3,-1,-1,-128,-2,-2,-1,-1,1,-128,-128,-128,-128,-128,-128, +-1,-3,-2,-3,-2,1,-3,-2,2,-128,-3,5,2,-3,-128,-3,-2,-2,-3,-1,-128,1,-2,-1,0,-2,-128,-128,-128,-128,-128,-128, +-1,-2,-2,-3,-2,0,-2,0,2,-128,-1,2,6,-2,-128,-2,0,-1,-2,-1,-128,1,-2,-1,0,-1,-128,-128,-128,-128,-128,-128, +-1,4,-2,2,0,-2,0,1,-2,-128,0,-3,-2,6,-128,-2,0,0,1,0,-128,-3,-4,-1,-2,0,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-2,-4,-1,0,-3,-2,-2,-2,-128,-1,-3,-2,-2,-128,9,-1,-2,-1,-1,-128,-3,-3,-1,-3,-1,-128,-128,-128,-128,-128,-128, +-1,0,-3,0,2,-4,-2,1,-2,-128,1,-2,0,0,-128,-1,6,1,0,-1,-128,-3,-2,-1,-1,4,-128,-128,-128,-128,-128,-128, +-2,-1,-3,-1,0,-2,-2,0,-3,-128,3,-2,-1,0,-128,-2,1,7,-1,-1,-128,-2,-2,-1,-1,0,-128,-128,-128,-128,-128,-128, +1,0,-1,0,0,-2,0,-1,-2,-128,-1,-3,-2,1,-128,-1,0,-1,4,2,-128,-1,-4,0,-2,0,-128,-128,-128,-128,-128,-128, +0,0,-1,-1,-1,-1,-2,-2,-1,-128,-1,-1,-1,0,-128,-1,-1,-1,2,5,-128,0,-3,0,-1,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-3,-1,-3,-3,0,-3,-3,3,-128,-2,1,1,-3,-128,-3,-3,-2,-1,0,-128,5,-3,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-2,-4,-5,-4,-3,1,-2,-3,-2,-128,-2,-2,-2,-4,-128,-3,-2,-2,-4,-3,-128,-3,15,-2,3,-2,-128,-128,-128,-128,-128,-128, +0,-1,-2,-1,-1,-1,-1,-1,-1,-128,-1,-1,-1,-1,-128,-1,-1,-1,0,0,-128,-1,-2,-1,-1,-1,-128,-128,-128,-128,-128,-128, +-2,-2,-3,-2,-2,3,-3,2,0,-128,-1,0,0,-2,-128,-3,-1,-1,-2,-1,-128,-1,3,-1,8,-2,-128,-128,-128,-128,-128,-128, +-1,2,-3,1,4,-3,-2,0,-3,-128,1,-2,-1,0,-128,-1,4,0,0,-1,-128,-3,-2,-1,-2,4,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/BLOSUM50 b/lib/block-aligner/matrices/BLOSUM50 new file mode 100644 index 000000000..33a9e243a --- /dev/null +++ b/lib/block-aligner/matrices/BLOSUM50 @@ -0,0 +1,28 @@ +[5,-2,-1,-2,-1,-3,0,-2,-1,-128,-1,-2,-1,-1,-128,-1,-1,-2,1,0,-128,0,-3,-1,-2,-1,-128,-128,-128,-128,-128,-128, +-2,5,-3,5,1,-4,-1,0,-4,-128,0,-4,-3,4,-128,-2,0,-1,0,0,-128,-4,-5,-1,-3,2,-128,-128,-128,-128,-128,-128, +-1,-3,13,-4,-3,-2,-3,-3,-2,-128,-3,-2,-2,-2,-128,-4,-3,-4,-1,-1,-128,-1,-5,-2,-3,-3,-128,-128,-128,-128,-128,-128, +-2,5,-4,8,2,-5,-1,-1,-4,-128,-1,-4,-4,2,-128,-1,0,-2,0,-1,-128,-4,-5,-1,-3,1,-128,-128,-128,-128,-128,-128, +-1,1,-3,2,6,-3,-3,0,-4,-128,1,-3,-2,0,-128,-1,2,0,-1,-1,-128,-3,-3,-1,-2,5,-128,-128,-128,-128,-128,-128, +-3,-4,-2,-5,-3,8,-4,-1,0,-128,-4,1,0,-4,-128,-4,-4,-3,-3,-2,-128,-1,1,-2,4,-4,-128,-128,-128,-128,-128,-128, +0,-1,-3,-1,-3,-4,8,-2,-4,-128,-2,-4,-3,0,-128,-2,-2,-3,0,-2,-128,-4,-3,-2,-3,-2,-128,-128,-128,-128,-128,-128, +-2,0,-3,-1,0,-1,-2,10,-4,-128,0,-3,-1,1,-128,-2,1,0,-1,-2,-128,-4,-3,-1,2,0,-128,-128,-128,-128,-128,-128, +-1,-4,-2,-4,-4,0,-4,-4,5,-128,-3,2,2,-3,-128,-3,-3,-4,-3,-1,-128,4,-3,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,0,-3,-1,1,-4,-2,0,-3,-128,6,-3,-2,0,-128,-1,2,3,0,-1,-128,-3,-3,-1,-2,1,-128,-128,-128,-128,-128,-128, +-2,-4,-2,-4,-3,1,-4,-3,2,-128,-3,5,3,-4,-128,-4,-2,-3,-3,-1,-128,1,-2,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-1,-3,-2,-4,-2,0,-3,-1,2,-128,-2,3,7,-2,-128,-3,0,-2,-2,-1,-128,1,-1,-1,0,-1,-128,-128,-128,-128,-128,-128, +-1,4,-2,2,0,-4,0,1,-3,-128,0,-4,-2,7,-128,-2,0,-1,1,0,-128,-3,-4,-1,-2,0,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-2,-4,-1,-1,-4,-2,-2,-3,-128,-1,-4,-3,-2,-128,10,-1,-3,-1,-1,-128,-3,-4,-2,-3,-1,-128,-128,-128,-128,-128,-128, +-1,0,-3,0,2,-4,-2,1,-3,-128,2,-2,0,0,-128,-1,7,1,0,-1,-128,-3,-1,-1,-1,4,-128,-128,-128,-128,-128,-128, +-2,-1,-4,-2,0,-3,-3,0,-4,-128,3,-3,-2,-1,-128,-3,1,7,-1,-1,-128,-3,-3,-1,-1,0,-128,-128,-128,-128,-128,-128, +1,0,-1,0,-1,-3,0,-1,-3,-128,0,-3,-2,1,-128,-1,0,-1,5,2,-128,-2,-4,-1,-2,0,-128,-128,-128,-128,-128,-128, +0,0,-1,-1,-1,-2,-2,-2,-1,-128,-1,-1,-1,0,-128,-1,-1,-1,2,5,-128,0,-3,0,-2,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-4,-1,-4,-3,-1,-4,-4,4,-128,-3,1,1,-3,-128,-3,-3,-3,-2,0,-128,5,-3,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-3,-5,-5,-5,-3,1,-3,-3,-3,-128,-3,-2,-1,-4,-128,-4,-1,-3,-4,-3,-128,-3,15,-3,2,-2,-128,-128,-128,-128,-128,-128, +-1,-1,-2,-1,-1,-2,-2,-1,-1,-128,-1,-1,-1,-1,-128,-2,-1,-1,-1,0,-128,-1,-3,-1,-1,-1,-128,-128,-128,-128,-128,-128, +-2,-3,-3,-3,-2,4,-3,2,-1,-128,-2,-1,0,-2,-128,-3,-1,-1,-2,-2,-128,-1,2,-1,8,-2,-128,-128,-128,-128,-128,-128, +-1,2,-3,1,5,-4,-2,0,-3,-128,1,-3,-1,0,-128,-1,4,0,0,-1,-128,-3,-2,-1,-2,5,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/BLOSUM62 b/lib/block-aligner/matrices/BLOSUM62 new file mode 100644 index 000000000..e5a2b3fc6 --- /dev/null +++ b/lib/block-aligner/matrices/BLOSUM62 @@ -0,0 +1,28 @@ +[4,-2,0,-2,-1,-2,0,-2,-1,-128,-1,-1,-1,-2,-128,-1,-1,-1,1,0,-128,0,-3,0,-2,-1,-128,-128,-128,-128,-128,-128, +-2,4,-3,4,1,-3,-1,0,-3,-128,0,-4,-3,3,-128,-2,0,-1,0,-1,-128,-3,-4,-1,-3,1,-128,-128,-128,-128,-128,-128, +0,-3,9,-3,-4,-2,-3,-3,-1,-128,-3,-1,-1,-3,-128,-3,-3,-3,-1,-1,-128,-1,-2,-2,-2,-3,-128,-128,-128,-128,-128,-128, +-2,4,-3,6,2,-3,-1,-1,-3,-128,-1,-4,-3,1,-128,-1,0,-2,0,-1,-128,-3,-4,-1,-3,1,-128,-128,-128,-128,-128,-128, +-1,1,-4,2,5,-3,-2,0,-3,-128,1,-3,-2,0,-128,-1,2,0,0,-1,-128,-2,-3,-1,-2,4,-128,-128,-128,-128,-128,-128, +-2,-3,-2,-3,-3,6,-3,-1,0,-128,-3,0,0,-3,-128,-4,-3,-3,-2,-2,-128,-1,1,-1,3,-3,-128,-128,-128,-128,-128,-128, +0,-1,-3,-1,-2,-3,6,-2,-4,-128,-2,-4,-3,0,-128,-2,-2,-2,0,-2,-128,-3,-2,-1,-3,-2,-128,-128,-128,-128,-128,-128, +-2,0,-3,-1,0,-1,-2,8,-3,-128,-1,-3,-2,1,-128,-2,0,0,-1,-2,-128,-3,-2,-1,2,0,-128,-128,-128,-128,-128,-128, +-1,-3,-1,-3,-3,0,-4,-3,4,-128,-3,2,1,-3,-128,-3,-3,-3,-2,-1,-128,3,-3,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,0,-3,-1,1,-3,-2,-1,-3,-128,5,-2,-1,0,-128,-1,1,2,0,-1,-128,-2,-3,-1,-2,1,-128,-128,-128,-128,-128,-128, +-1,-4,-1,-4,-3,0,-4,-3,2,-128,-2,4,2,-3,-128,-3,-2,-2,-2,-1,-128,1,-2,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-1,-3,-1,-3,-2,0,-3,-2,1,-128,-1,2,5,-2,-128,-2,0,-1,-1,-1,-128,1,-1,-1,-1,-1,-128,-128,-128,-128,-128,-128, +-2,3,-3,1,0,-3,0,1,-3,-128,0,-3,-2,6,-128,-2,0,0,1,0,-128,-3,-4,-1,-2,0,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-2,-3,-1,-1,-4,-2,-2,-3,-128,-1,-3,-2,-2,-128,7,-1,-2,-1,-1,-128,-2,-4,-2,-3,-1,-128,-128,-128,-128,-128,-128, +-1,0,-3,0,2,-3,-2,0,-3,-128,1,-2,0,0,-128,-1,5,1,0,-1,-128,-2,-2,-1,-1,3,-128,-128,-128,-128,-128,-128, +-1,-1,-3,-2,0,-3,-2,0,-3,-128,2,-2,-1,0,-128,-2,1,5,-1,-1,-128,-3,-3,-1,-2,0,-128,-128,-128,-128,-128,-128, +1,0,-1,0,0,-2,0,-1,-2,-128,0,-2,-1,1,-128,-1,0,-1,4,1,-128,-2,-3,0,-2,0,-128,-128,-128,-128,-128,-128, +0,-1,-1,-1,-1,-2,-2,-2,-1,-128,-1,-1,-1,0,-128,-1,-1,-1,1,5,-128,0,-2,0,-2,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-3,-1,-3,-2,-1,-3,-3,3,-128,-2,1,1,-3,-128,-2,-2,-3,-2,0,-128,4,-3,-1,-1,-2,-128,-128,-128,-128,-128,-128, +-3,-4,-2,-4,-3,1,-2,-2,-3,-128,-3,-2,-1,-4,-128,-4,-2,-3,-3,-2,-128,-3,11,-2,2,-3,-128,-128,-128,-128,-128,-128, +0,-1,-2,-1,-1,-1,-1,-1,-1,-128,-1,-1,-1,-1,-128,-2,-1,-1,0,0,-128,-1,-2,-1,-1,-1,-128,-128,-128,-128,-128,-128, +-2,-3,-2,-3,-2,3,-3,2,-1,-128,-2,-1,-1,-2,-128,-3,-1,-2,-2,-2,-128,-1,2,-1,7,-2,-128,-128,-128,-128,-128,-128, +-1,1,-3,1,4,-3,-2,0,-3,-128,1,-3,-1,0,-128,-1,3,0,0,-1,-128,-2,-3,-1,-2,4,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/BLOSUM80 b/lib/block-aligner/matrices/BLOSUM80 new file mode 100644 index 000000000..11a15562e --- /dev/null +++ b/lib/block-aligner/matrices/BLOSUM80 @@ -0,0 +1,28 @@ +[7,-3,-1,-3,-2,-4,0,-3,-3,-128,-1,-3,-2,-3,-128,-1,-2,-3,2,0,-128,-1,-5,-1,-4,-2,-128,-128,-128,-128,-128,-128, +-3,6,-6,6,1,-6,-2,-1,-6,-128,-1,-7,-5,5,-128,-4,-1,-2,0,-1,-128,-6,-8,-3,-5,0,-128,-128,-128,-128,-128,-128, +-1,-6,13,-7,-7,-4,-6,-7,-2,-128,-6,-3,-3,-5,-128,-6,-5,-6,-2,-2,-128,-2,-5,-4,-5,-7,-128,-128,-128,-128,-128,-128, +-3,6,-7,10,2,-6,-3,-2,-7,-128,-2,-7,-6,2,-128,-3,-1,-3,-1,-2,-128,-6,-8,-3,-6,1,-128,-128,-128,-128,-128,-128, +-2,1,-7,2,8,-6,-4,0,-6,-128,1,-6,-4,-1,-128,-2,3,-1,-1,-2,-128,-4,-6,-2,-5,6,-128,-128,-128,-128,-128,-128, +-4,-6,-4,-6,-6,10,-6,-2,-1,-128,-5,0,0,-6,-128,-6,-5,-5,-4,-4,-128,-2,0,-3,4,-6,-128,-128,-128,-128,-128,-128, +0,-2,-6,-3,-4,-6,9,-4,-7,-128,-3,-7,-5,-1,-128,-5,-4,-4,-1,-3,-128,-6,-6,-3,-6,-4,-128,-128,-128,-128,-128,-128, +-3,-1,-7,-2,0,-2,-4,12,-6,-128,-1,-5,-4,1,-128,-4,1,0,-2,-3,-128,-5,-4,-2,3,0,-128,-128,-128,-128,-128,-128, +-3,-6,-2,-7,-6,-1,-7,-6,7,-128,-5,2,2,-6,-128,-5,-5,-5,-4,-2,-128,4,-5,-2,-3,-6,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-1,-6,-2,1,-5,-3,-1,-5,-128,8,-4,-3,0,-128,-2,2,3,-1,-1,-128,-4,-6,-2,-4,1,-128,-128,-128,-128,-128,-128, +-3,-7,-3,-7,-6,0,-7,-5,2,-128,-4,6,3,-6,-128,-5,-4,-4,-4,-3,-128,1,-4,-2,-2,-5,-128,-128,-128,-128,-128,-128, +-2,-5,-3,-6,-4,0,-5,-4,2,-128,-3,3,9,-4,-128,-4,-1,-3,-3,-1,-128,1,-3,-2,-3,-3,-128,-128,-128,-128,-128,-128, +-3,5,-5,2,-1,-6,-1,1,-6,-128,0,-6,-4,9,-128,-4,0,-1,1,0,-128,-5,-7,-2,-4,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-4,-6,-3,-2,-6,-5,-4,-5,-128,-2,-5,-4,-4,-128,12,-3,-3,-2,-3,-128,-4,-7,-3,-6,-2,-128,-128,-128,-128,-128,-128, +-2,-1,-5,-1,3,-5,-4,1,-5,-128,2,-4,-1,0,-128,-3,9,1,-1,-1,-128,-4,-4,-2,-3,5,-128,-128,-128,-128,-128,-128, +-3,-2,-6,-3,-1,-5,-4,0,-5,-128,3,-4,-3,-1,-128,-3,1,9,-2,-2,-128,-4,-5,-2,-4,0,-128,-128,-128,-128,-128,-128, +2,0,-2,-1,-1,-4,-1,-2,-4,-128,-1,-4,-3,1,-128,-2,-1,-2,7,2,-128,-3,-6,-1,-3,-1,-128,-128,-128,-128,-128,-128, +0,-1,-2,-2,-2,-4,-3,-3,-2,-128,-1,-3,-1,0,-128,-3,-1,-2,2,8,-128,0,-5,-1,-3,-2,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-6,-2,-6,-4,-2,-6,-5,4,-128,-4,1,1,-5,-128,-4,-4,-4,-3,0,-128,7,-5,-2,-3,-4,-128,-128,-128,-128,-128,-128, +-5,-8,-5,-8,-6,0,-6,-4,-5,-128,-6,-4,-3,-7,-128,-7,-4,-5,-6,-5,-128,-5,16,-5,3,-5,-128,-128,-128,-128,-128,-128, +-1,-3,-4,-3,-2,-3,-3,-2,-2,-128,-2,-2,-2,-2,-128,-3,-2,-2,-1,-1,-128,-2,-5,-2,-3,-1,-128,-128,-128,-128,-128,-128, +-4,-5,-5,-6,-5,4,-6,3,-3,-128,-4,-2,-3,-4,-128,-6,-3,-4,-3,-3,-128,-3,3,-3,11,-4,-128,-128,-128,-128,-128,-128, +-2,0,-7,1,6,-6,-4,0,-6,-128,1,-5,-3,-1,-128,-2,5,0,-1,-2,-128,-4,-5,-1,-4,6,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/BLOSUM90 b/lib/block-aligner/matrices/BLOSUM90 new file mode 100644 index 000000000..4862d1c82 --- /dev/null +++ b/lib/block-aligner/matrices/BLOSUM90 @@ -0,0 +1,28 @@ +[5,-2,-1,-3,-1,-3,0,-2,-2,-128,-1,-2,-2,-2,-128,-1,-1,-2,1,0,-128,-1,-4,-1,-3,-1,-128,-128,-128,-128,-128,-128, +-2,4,-4,4,0,-4,-2,-1,-5,-128,-1,-5,-4,4,-128,-3,-1,-2,0,-1,-128,-4,-6,-2,-4,0,-128,-128,-128,-128,-128,-128, +-1,-4,9,-5,-6,-3,-4,-5,-2,-128,-4,-2,-2,-4,-128,-4,-4,-5,-2,-2,-128,-2,-4,-3,-4,-5,-128,-128,-128,-128,-128,-128, +-3,4,-5,7,1,-5,-2,-2,-5,-128,-1,-5,-4,1,-128,-3,-1,-3,-1,-2,-128,-5,-6,-2,-4,0,-128,-128,-128,-128,-128,-128, +-1,0,-6,1,6,-5,-3,-1,-4,-128,0,-4,-3,-1,-128,-2,2,-1,-1,-1,-128,-3,-5,-2,-4,4,-128,-128,-128,-128,-128,-128, +-3,-4,-3,-5,-5,7,-5,-2,-1,-128,-4,0,-1,-4,-128,-4,-4,-4,-3,-3,-128,-2,0,-2,3,-4,-128,-128,-128,-128,-128,-128, +0,-2,-4,-2,-3,-5,6,-3,-5,-128,-2,-5,-4,-1,-128,-3,-3,-3,-1,-3,-128,-5,-4,-2,-5,-3,-128,-128,-128,-128,-128,-128, +-2,-1,-5,-2,-1,-2,-3,8,-4,-128,-1,-4,-3,0,-128,-3,1,0,-2,-2,-128,-4,-3,-2,1,0,-128,-128,-128,-128,-128,-128, +-2,-5,-2,-5,-4,-1,-5,-4,5,-128,-4,1,1,-4,-128,-4,-4,-4,-3,-1,-128,3,-4,-2,-2,-4,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-1,-4,-1,0,-4,-2,-1,-4,-128,6,-3,-2,0,-128,-2,1,2,-1,-1,-128,-3,-5,-1,-3,1,-128,-128,-128,-128,-128,-128, +-2,-5,-2,-5,-4,0,-5,-4,1,-128,-3,5,2,-4,-128,-4,-3,-3,-3,-2,-128,0,-3,-2,-2,-4,-128,-128,-128,-128,-128,-128, +-2,-4,-2,-4,-3,-1,-4,-3,1,-128,-2,2,7,-3,-128,-3,0,-2,-2,-1,-128,0,-2,-1,-2,-2,-128,-128,-128,-128,-128,-128, +-2,4,-4,1,-1,-4,-1,0,-4,-128,0,-4,-3,7,-128,-3,0,-1,0,0,-128,-4,-5,-2,-3,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-3,-4,-3,-2,-4,-3,-3,-4,-128,-2,-4,-3,-3,-128,8,-2,-3,-2,-2,-128,-3,-5,-2,-4,-2,-128,-128,-128,-128,-128,-128, +-1,-1,-4,-1,2,-4,-3,1,-4,-128,1,-3,0,0,-128,-2,7,1,-1,-1,-128,-3,-3,-1,-3,4,-128,-128,-128,-128,-128,-128, +-2,-2,-5,-3,-1,-4,-3,0,-4,-128,2,-3,-2,-1,-128,-3,1,6,-1,-2,-128,-3,-4,-2,-3,0,-128,-128,-128,-128,-128,-128, +1,0,-2,-1,-1,-3,-1,-2,-3,-128,-1,-3,-2,0,-128,-2,-1,-1,5,1,-128,-2,-4,-1,-3,-1,-128,-128,-128,-128,-128,-128, +0,-1,-2,-2,-1,-3,-3,-2,-1,-128,-1,-2,-1,0,-128,-2,-1,-2,1,6,-128,-1,-4,-1,-2,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,-4,-2,-5,-3,-2,-5,-4,3,-128,-3,0,0,-4,-128,-3,-3,-3,-2,-1,-128,5,-3,-2,-3,-3,-128,-128,-128,-128,-128,-128, +-4,-6,-4,-6,-5,0,-4,-3,-4,-128,-5,-3,-2,-5,-128,-5,-3,-4,-4,-4,-128,-3,11,-3,2,-4,-128,-128,-128,-128,-128,-128, +-1,-2,-3,-2,-2,-2,-2,-2,-2,-128,-1,-2,-1,-2,-128,-2,-1,-2,-1,-1,-128,-2,-3,-2,-2,-1,-128,-128,-128,-128,-128,-128, +-3,-4,-4,-4,-4,3,-5,1,-2,-128,-3,-2,-2,-3,-128,-4,-3,-3,-3,-2,-128,-3,2,-2,8,-3,-128,-128,-128,-128,-128,-128, +-1,0,-5,0,4,-4,-3,0,-4,-128,1,-4,-2,-1,-128,-2,4,0,-1,-1,-128,-3,-4,-1,-3,4,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/NW1 b/lib/block-aligner/matrices/NW1 new file mode 100644 index 000000000..69033319a --- /dev/null +++ b/lib/block-aligner/matrices/NW1 @@ -0,0 +1 @@ +[-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,1,-128,-1,-1,-128,-128,-1,-128,-128,-128,-128,-128,-128,-1,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-1,-128,1,-1,-128,-128,-1,-128,-128,-128,-128,-128,-128,-1,-128,-128,-1,-128,-1,1,-128,-128,-1,-128,-128,-128,-128,-128,-128,-1,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-1,-128,-1,-1,-128,-128,-1,-128,-128,-128,-128,-128,-128,1,-128,-128,-1,-128,-1,-1,-128,-128,1,-128,-128,-128,-128,-128,-128,-1,-128] \ No newline at end of file diff --git a/lib/block-aligner/matrices/PAM100 b/lib/block-aligner/matrices/PAM100 new file mode 100644 index 000000000..cde0b1641 --- /dev/null +++ b/lib/block-aligner/matrices/PAM100 @@ -0,0 +1,28 @@ +[4,-1,-3,-1,0,-5,1,-3,-2,-128,-3,-3,-2,-1,-128,1,-2,-3,1,1,-128,0,-7,-1,-4,-1,-128,-128,-128,-128,-128,-128, +-1,4,-6,4,3,-6,-1,1,-3,-128,0,-5,-4,4,-128,-3,0,-3,0,-1,-128,-4,-6,-2,-4,2,-128,-128,-128,-128,-128,-128, +-3,-6,9,-7,-8,-7,-5,-4,-3,-128,-8,-8,-7,-5,-128,-4,-8,-5,-1,-4,-128,-3,-9,-5,-1,-8,-128,-128,-128,-128,-128,-128, +-1,4,-7,5,4,-8,-1,-1,-4,-128,-1,-6,-5,3,-128,-3,0,-4,-1,-2,-128,-4,-9,-2,-6,3,-128,-128,-128,-128,-128,-128, +0,3,-8,4,5,-8,-1,-1,-3,-128,-1,-5,-4,1,-128,-2,2,-3,-1,-2,-128,-3,-9,-2,-5,4,-128,-128,-128,-128,-128,-128, +-5,-6,-7,-8,-8,8,-6,-3,0,-128,-7,0,-1,-5,-128,-6,-7,-6,-4,-5,-128,-3,-1,-4,4,-7,-128,-128,-128,-128,-128,-128, +1,-1,-5,-1,-1,-6,5,-4,-5,-128,-3,-6,-4,-1,-128,-2,-3,-5,0,-2,-128,-3,-9,-2,-7,-2,-128,-128,-128,-128,-128,-128, +-3,1,-4,-1,-1,-3,-4,7,-4,-128,-2,-3,-4,2,-128,-1,3,1,-2,-3,-128,-3,-4,-2,-1,1,-128,-128,-128,-128,-128,-128, +-2,-3,-3,-4,-3,0,-5,-4,6,-128,-3,1,1,-3,-128,-4,-4,-3,-3,0,-128,3,-7,-2,-3,-3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-3,0,-8,-1,-1,-7,-3,-2,-3,-128,5,-4,0,1,-128,-3,0,2,-1,-1,-128,-4,-6,-2,-6,-1,-128,-128,-128,-128,-128,-128, +-3,-5,-8,-6,-5,0,-6,-3,1,-128,-4,6,3,-4,-128,-4,-2,-5,-4,-3,-128,0,-3,-3,-3,-4,-128,-128,-128,-128,-128,-128, +-2,-4,-7,-5,-4,-1,-4,-4,1,-128,0,3,9,-4,-128,-4,-2,-1,-3,-1,-128,1,-6,-2,-5,-2,-128,-128,-128,-128,-128,-128, +-1,4,-5,3,1,-5,-1,2,-3,-128,1,-4,-4,5,-128,-2,-1,-2,1,0,-128,-3,-5,-1,-2,0,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +1,-3,-4,-3,-2,-6,-2,-1,-4,-128,-3,-4,-4,-2,-128,7,-1,-1,0,-1,-128,-3,-7,-2,-7,-1,-128,-128,-128,-128,-128,-128, +-2,0,-8,0,2,-7,-3,3,-4,-128,0,-2,-2,-1,-128,-1,6,1,-2,-2,-128,-3,-7,-2,-6,5,-128,-128,-128,-128,-128,-128, +-3,-3,-5,-4,-3,-6,-5,1,-3,-128,2,-5,-1,-2,-128,-1,1,7,-1,-3,-128,-4,1,-2,-6,-1,-128,-128,-128,-128,-128,-128, +1,0,-1,-1,-1,-4,0,-2,-3,-128,-1,-4,-3,1,-128,0,-2,-1,4,2,-128,-2,-3,-1,-4,-2,-128,-128,-128,-128,-128,-128, +1,-1,-4,-2,-2,-5,-2,-3,0,-128,-1,-3,-1,0,-128,-1,-2,-3,2,5,-128,0,-7,-1,-4,-2,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-4,-3,-4,-3,-3,-3,-3,3,-128,-4,0,1,-3,-128,-3,-3,-4,-2,0,-128,5,-9,-2,-4,-3,-128,-128,-128,-128,-128,-128, +-7,-6,-9,-9,-9,-1,-9,-4,-7,-128,-6,-3,-6,-5,-128,-7,-7,1,-3,-7,-128,-9,12,-6,-2,-8,-128,-128,-128,-128,-128,-128, +-1,-2,-5,-2,-2,-4,-2,-2,-2,-128,-2,-3,-2,-1,-128,-2,-2,-2,-1,-1,-128,-2,-6,-2,-4,-2,-128,-128,-128,-128,-128,-128, +-4,-4,-1,-6,-5,4,-7,-1,-3,-128,-6,-3,-5,-2,-128,-7,-6,-6,-4,-4,-128,-4,-2,-4,9,-6,-128,-128,-128,-128,-128,-128, +-1,2,-8,3,4,-7,-2,1,-3,-128,-1,-4,-2,0,-128,-1,5,-1,-2,-2,-128,-3,-8,-2,-6,5,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/PAM120 b/lib/block-aligner/matrices/PAM120 new file mode 100644 index 000000000..a15951e73 --- /dev/null +++ b/lib/block-aligner/matrices/PAM120 @@ -0,0 +1,28 @@ +[3,0,-3,0,0,-4,1,-3,-1,-128,-2,-3,-2,-1,-128,1,-1,-3,1,1,-128,0,-7,-1,-4,-1,-128,-128,-128,-128,-128,-128, +0,4,-6,4,3,-5,0,1,-3,-128,0,-4,-4,3,-128,-2,0,-2,0,0,-128,-3,-6,-1,-3,2,-128,-128,-128,-128,-128,-128, +-3,-6,9,-7,-7,-6,-4,-4,-3,-128,-7,-7,-6,-5,-128,-4,-7,-4,0,-3,-128,-3,-8,-4,-1,-7,-128,-128,-128,-128,-128,-128, +0,4,-7,5,3,-7,0,0,-3,-128,-1,-5,-4,2,-128,-3,1,-3,0,-1,-128,-3,-8,-2,-5,3,-128,-128,-128,-128,-128,-128, +0,3,-7,3,5,-7,-1,-1,-3,-128,-1,-4,-3,1,-128,-2,2,-3,-1,-2,-128,-3,-8,-1,-5,4,-128,-128,-128,-128,-128,-128, +-4,-5,-6,-7,-7,8,-5,-3,0,-128,-7,0,-1,-4,-128,-5,-6,-5,-3,-4,-128,-3,-1,-3,4,-6,-128,-128,-128,-128,-128,-128, +1,0,-4,0,-1,-5,5,-4,-4,-128,-3,-5,-4,0,-128,-2,-3,-4,1,-1,-128,-2,-8,-2,-6,-2,-128,-128,-128,-128,-128,-128, +-3,1,-4,0,-1,-3,-4,7,-4,-128,-2,-3,-4,2,-128,-1,3,1,-2,-3,-128,-3,-3,-2,-1,1,-128,-128,-128,-128,-128,-128, +-1,-3,-3,-3,-3,0,-4,-4,6,-128,-3,1,1,-2,-128,-3,-3,-2,-2,0,-128,3,-6,-1,-2,-3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-2,0,-7,-1,-1,-7,-3,-2,-3,-128,5,-4,0,1,-128,-2,0,2,-1,-1,-128,-4,-5,-2,-5,-1,-128,-128,-128,-128,-128,-128, +-3,-4,-7,-5,-4,0,-5,-3,1,-128,-4,5,3,-4,-128,-3,-2,-4,-4,-3,-128,1,-3,-2,-2,-3,-128,-128,-128,-128,-128,-128, +-2,-4,-6,-4,-3,-1,-4,-4,1,-128,0,3,8,-3,-128,-3,-1,-1,-2,-1,-128,1,-6,-2,-4,-2,-128,-128,-128,-128,-128,-128, +-1,3,-5,2,1,-4,0,2,-2,-128,1,-4,-3,4,-128,-2,0,-1,1,0,-128,-3,-4,-1,-2,0,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +1,-2,-4,-3,-2,-5,-2,-1,-3,-128,-2,-3,-3,-2,-128,6,0,-1,1,-1,-128,-2,-7,-2,-6,-1,-128,-128,-128,-128,-128,-128, +-1,0,-7,1,2,-6,-3,3,-3,-128,0,-2,-1,0,-128,0,6,1,-2,-2,-128,-3,-6,-1,-5,4,-128,-128,-128,-128,-128,-128, +-3,-2,-4,-3,-3,-5,-4,1,-2,-128,2,-4,-1,-1,-128,-1,1,6,-1,-2,-128,-3,1,-2,-5,-1,-128,-128,-128,-128,-128,-128, +1,0,0,0,-1,-3,1,-2,-2,-128,-1,-4,-2,1,-128,1,-2,-1,3,2,-128,-2,-2,-1,-3,-1,-128,-128,-128,-128,-128,-128, +1,0,-3,-1,-2,-4,-1,-3,0,-128,-1,-3,-1,0,-128,-1,-2,-2,2,4,-128,0,-6,-1,-3,-2,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-3,-3,-3,-3,-3,-2,-3,3,-128,-4,1,1,-3,-128,-2,-3,-3,-2,0,-128,5,-8,-1,-3,-3,-128,-128,-128,-128,-128,-128, +-7,-6,-8,-8,-8,-1,-8,-3,-6,-128,-5,-3,-6,-4,-128,-7,-6,1,-2,-6,-128,-8,12,-5,-2,-7,-128,-128,-128,-128,-128,-128, +-1,-1,-4,-2,-1,-3,-2,-2,-1,-128,-2,-2,-2,-1,-128,-2,-1,-2,-1,-1,-128,-1,-5,-2,-3,-1,-128,-128,-128,-128,-128,-128, +-4,-3,-1,-5,-5,4,-6,-1,-2,-128,-5,-2,-4,-2,-128,-6,-5,-5,-3,-3,-128,-3,-2,-3,8,-5,-128,-128,-128,-128,-128,-128, +-1,2,-7,3,4,-6,-2,1,-3,-128,-1,-3,-2,0,-128,-1,4,-1,-1,-2,-128,-3,-7,-1,-5,4,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/PAM160 b/lib/block-aligner/matrices/PAM160 new file mode 100644 index 000000000..df7d97682 --- /dev/null +++ b/lib/block-aligner/matrices/PAM160 @@ -0,0 +1,28 @@ +[2,0,-2,0,0,-3,1,-2,-1,-128,-2,-2,-1,0,-128,1,-1,-2,1,1,-128,0,-5,0,-3,0,-128,-128,-128,-128,-128,-128, +0,3,-4,3,2,-4,0,1,-2,-128,0,-4,-3,2,-128,-1,1,-1,0,0,-128,-2,-5,-1,-3,2,-128,-128,-128,-128,-128,-128, +-2,-4,9,-5,-5,-5,-3,-3,-2,-128,-5,-6,-5,-4,-128,-3,-5,-3,0,-2,-128,-2,-7,-3,0,-5,-128,-128,-128,-128,-128,-128, +0,3,-5,4,3,-6,0,0,-3,-128,0,-4,-3,2,-128,-2,1,-2,0,-1,-128,-3,-6,-1,-4,2,-128,-128,-128,-128,-128,-128, +0,2,-5,3,4,-5,0,0,-2,-128,-1,-3,-2,1,-128,-1,2,-2,0,-1,-128,-2,-7,-1,-4,3,-128,-128,-128,-128,-128,-128, +-3,-4,-5,-6,-5,7,-4,-2,0,-128,-5,1,0,-3,-128,-4,-5,-4,-3,-3,-128,-2,-1,-3,5,-5,-128,-128,-128,-128,-128,-128, +1,0,-3,0,0,-4,4,-3,-3,-128,-2,-4,-3,0,-128,-1,-2,-3,1,-1,-128,-2,-7,-1,-5,-1,-128,-128,-128,-128,-128,-128, +-2,1,-3,0,0,-2,-3,6,-3,-128,-1,-2,-3,2,-128,-1,2,1,-1,-2,-128,-2,-3,-1,0,1,-128,-128,-128,-128,-128,-128, +-1,-2,-2,-3,-2,0,-3,-3,5,-128,-2,2,2,-2,-128,-2,-2,-2,-2,0,-128,3,-5,-1,-2,-2,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-2,0,-5,0,-1,-5,-2,-1,-2,-128,4,-3,0,1,-128,-2,0,3,-1,0,-128,-3,-4,-1,-4,0,-128,-128,-128,-128,-128,-128, +-2,-4,-6,-4,-3,1,-4,-2,2,-128,-3,5,3,-3,-128,-3,-2,-3,-3,-2,-128,1,-2,-2,-2,-3,-128,-128,-128,-128,-128,-128, +-1,-3,-5,-3,-2,0,-3,-3,2,-128,0,3,7,-2,-128,-2,-1,-1,-2,-1,-128,1,-4,-1,-3,-2,-128,-128,-128,-128,-128,-128, +0,2,-4,2,1,-3,0,2,-2,-128,1,-3,-2,3,-128,-1,0,-1,1,0,-128,-2,-4,0,-2,1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +1,-1,-3,-2,-1,-4,-1,-1,-2,-128,-2,-3,-2,-1,-128,5,0,-1,1,0,-128,-2,-5,-1,-5,-1,-128,-128,-128,-128,-128,-128, +-1,1,-5,1,2,-5,-2,2,-2,-128,0,-2,-1,0,-128,0,5,1,-1,-1,-128,-2,-5,-1,-4,3,-128,-128,-128,-128,-128,-128, +-2,-1,-3,-2,-2,-4,-3,1,-2,-128,3,-3,-1,-1,-128,-1,1,6,-1,-1,-128,-3,1,-1,-4,0,-128,-128,-128,-128,-128,-128, +1,0,0,0,0,-3,1,-1,-2,-128,-1,-3,-2,1,-128,1,-1,-1,2,1,-128,-1,-2,0,-3,-1,-128,-128,-128,-128,-128,-128, +1,0,-2,-1,-1,-3,-1,-2,0,-128,0,-2,-1,0,-128,0,-1,-1,1,3,-128,0,-5,0,-3,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-2,-2,-3,-2,-2,-2,-2,3,-128,-3,1,1,-2,-128,-2,-2,-3,-1,0,-128,4,-6,-1,-3,-2,-128,-128,-128,-128,-128,-128, +-5,-5,-7,-6,-7,-1,-7,-3,-5,-128,-4,-2,-4,-4,-128,-5,-5,1,-2,-5,-128,-6,12,-4,-1,-6,-128,-128,-128,-128,-128,-128, +0,-1,-3,-1,-1,-3,-1,-1,-1,-128,-1,-2,-1,0,-128,-1,-1,-1,0,0,-128,-1,-4,-1,-3,-1,-128,-128,-128,-128,-128,-128, +-3,-3,0,-4,-4,5,-5,0,-2,-128,-4,-2,-3,-2,-128,-5,-4,-4,-3,-3,-128,-3,-1,-3,8,-4,-128,-128,-128,-128,-128,-128, +0,2,-5,2,3,-5,-1,1,-2,-128,0,-3,-2,1,-128,-1,3,0,-1,-1,-128,-2,-6,-1,-4,3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/PAM200 b/lib/block-aligner/matrices/PAM200 new file mode 100644 index 000000000..b1ae07a1d --- /dev/null +++ b/lib/block-aligner/matrices/PAM200 @@ -0,0 +1,28 @@ +[3,0,-3,0,0,-4,1,-2,-1,-128,-2,-2,-2,0,-128,1,-1,-2,1,1,-128,0,-7,0,-4,0,-128,-128,-128,-128,-128,-128, +0,3,-5,4,3,-6,0,1,-3,-128,0,-4,-3,3,-128,-1,1,-1,1,0,-128,-3,-6,-1,-4,2,-128,-128,-128,-128,-128,-128, +-3,-5,12,-6,-7,-6,-4,-4,-3,-128,-7,-7,-6,-5,-128,-4,-7,-4,0,-3,-128,-2,-9,-4,0,-7,-128,-128,-128,-128,-128,-128, +0,4,-6,5,4,-7,0,0,-3,-128,0,-5,-4,3,-128,-2,2,-2,0,0,-128,-3,-8,-1,-5,3,-128,-128,-128,-128,-128,-128, +0,3,-7,4,5,-7,0,0,-3,-128,0,-4,-3,2,-128,-1,3,-2,0,-1,-128,-2,-9,-1,-5,4,-128,-128,-128,-128,-128,-128, +-4,-6,-6,-7,-7,10,-6,-2,1,-128,-7,2,0,-4,-128,-6,-6,-5,-4,-4,-128,-2,0,-3,7,-6,-128,-128,-128,-128,-128,-128, +1,0,-4,0,0,-6,6,-3,-3,-128,-2,-5,-4,0,-128,-1,-2,-4,1,0,-128,-2,-8,-1,-6,-1,-128,-128,-128,-128,-128,-128, +-2,1,-4,0,0,-2,-3,8,-3,-128,-1,-3,-3,2,-128,-1,3,2,-1,-2,-128,-3,-3,-1,0,2,-128,-128,-128,-128,-128,-128, +-1,-3,-3,-3,-3,1,-3,-3,6,-128,-2,2,2,-2,-128,-3,-3,-2,-2,0,-128,4,-6,-1,-2,-3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-2,0,-7,0,0,-7,-2,-1,-2,-128,6,-4,1,1,-128,-2,1,4,0,0,-128,-3,-4,-1,-5,0,-128,-128,-128,-128,-128,-128, +-2,-4,-7,-5,-4,2,-5,-3,2,-128,-4,7,4,-4,-128,-3,-2,-4,-4,-2,-128,2,-2,-2,-2,-3,-128,-128,-128,-128,-128,-128, +-2,-3,-6,-4,-3,0,-4,-3,2,-128,1,4,8,-2,-128,-3,-1,-1,-2,-1,-128,2,-5,-1,-3,-2,-128,-128,-128,-128,-128,-128, +0,3,-5,3,2,-4,0,2,-2,-128,1,-4,-2,3,-128,-1,1,0,1,0,-128,-2,-5,0,-2,1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +1,-1,-4,-2,-1,-6,-1,-1,-3,-128,-2,-3,-3,-1,-128,7,0,0,1,0,-128,-2,-7,-1,-6,-1,-128,-128,-128,-128,-128,-128, +-1,1,-7,2,3,-6,-2,3,-3,-128,1,-2,-1,1,-128,0,5,1,-1,-1,-128,-3,-6,-1,-5,4,-128,-128,-128,-128,-128,-128, +-2,-1,-4,-2,-2,-5,-4,2,-2,-128,4,-4,-1,0,-128,0,1,7,-1,-1,-128,-3,2,-1,-5,0,-128,-128,-128,-128,-128,-128, +1,1,0,0,0,-4,1,-1,-2,-128,0,-4,-2,1,-128,1,-1,-1,2,2,-128,-1,-3,0,-3,-1,-128,-128,-128,-128,-128,-128, +1,0,-3,0,-1,-4,0,-2,0,-128,0,-2,-1,0,-128,0,-1,-1,2,4,-128,0,-6,0,-3,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-3,-2,-3,-2,-2,-2,-3,4,-128,-3,2,2,-2,-128,-2,-3,-3,-1,0,-128,5,-8,-1,-3,-2,-128,-128,-128,-128,-128,-128, +-7,-6,-9,-8,-9,0,-8,-3,-6,-128,-4,-2,-5,-5,-128,-7,-6,2,-3,-6,-128,-8,18,-5,-1,-7,-128,-128,-128,-128,-128,-128, +0,-1,-4,-1,-1,-3,-1,-1,-1,-128,-1,-2,-1,0,-128,-1,-1,-1,0,0,-128,-1,-5,-1,-3,-1,-128,-128,-128,-128,-128,-128, +-4,-4,0,-5,-5,7,-6,0,-2,-128,-5,-2,-3,-2,-128,-6,-5,-5,-3,-3,-128,-3,-1,-3,11,-5,-128,-128,-128,-128,-128,-128, +0,2,-7,3,4,-6,-1,2,-3,-128,0,-3,-2,1,-128,-1,4,0,-1,-1,-128,-2,-7,-1,-5,4,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/matrices/PAM250 b/lib/block-aligner/matrices/PAM250 new file mode 100644 index 000000000..961b5d13c --- /dev/null +++ b/lib/block-aligner/matrices/PAM250 @@ -0,0 +1,28 @@ +[2,0,-2,0,0,-3,1,-1,-1,-128,-1,-2,-1,0,-128,1,0,-2,1,1,-128,0,-6,0,-3,0,-128,-128,-128,-128,-128,-128, +0,3,-4,3,3,-4,0,1,-2,-128,1,-3,-2,2,-128,-1,1,-1,0,0,-128,-2,-5,-1,-3,2,-128,-128,-128,-128,-128,-128, +-2,-4,12,-5,-5,-4,-3,-3,-2,-128,-5,-6,-5,-4,-128,-3,-5,-4,0,-2,-128,-2,-8,-3,0,-5,-128,-128,-128,-128,-128,-128, +0,3,-5,4,3,-6,1,1,-2,-128,0,-4,-3,2,-128,-1,2,-1,0,0,-128,-2,-7,-1,-4,3,-128,-128,-128,-128,-128,-128, +0,3,-5,3,4,-5,0,1,-2,-128,0,-3,-2,1,-128,-1,2,-1,0,0,-128,-2,-7,-1,-4,3,-128,-128,-128,-128,-128,-128, +-3,-4,-4,-6,-5,9,-5,-2,1,-128,-5,2,0,-3,-128,-5,-5,-4,-3,-3,-128,-1,0,-2,7,-5,-128,-128,-128,-128,-128,-128, +1,0,-3,1,0,-5,5,-2,-3,-128,-2,-4,-3,0,-128,0,-1,-3,1,0,-128,-1,-7,-1,-5,0,-128,-128,-128,-128,-128,-128, +-1,1,-3,1,1,-2,-2,6,-2,-128,0,-2,-2,2,-128,0,3,2,-1,-1,-128,-2,-3,-1,0,2,-128,-128,-128,-128,-128,-128, +-1,-2,-2,-2,-2,1,-3,-2,5,-128,-2,2,2,-2,-128,-2,-2,-2,-1,0,-128,4,-5,-1,-1,-2,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +-1,1,-5,0,0,-5,-2,0,-2,-128,5,-3,0,1,-128,-1,1,3,0,0,-128,-2,-3,-1,-4,0,-128,-128,-128,-128,-128,-128, +-2,-3,-6,-4,-3,2,-4,-2,2,-128,-3,6,4,-3,-128,-3,-2,-3,-3,-2,-128,2,-2,-1,-1,-3,-128,-128,-128,-128,-128,-128, +-1,-2,-5,-3,-2,0,-3,-2,2,-128,0,4,6,-2,-128,-2,-1,0,-2,-1,-128,2,-4,-1,-2,-2,-128,-128,-128,-128,-128,-128, +0,2,-4,2,1,-3,0,2,-2,-128,1,-3,-2,2,-128,0,1,0,1,0,-128,-2,-4,0,-2,1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +1,-1,-3,-1,-1,-5,0,0,-2,-128,-1,-3,-2,0,-128,6,0,0,1,0,-128,-1,-6,-1,-5,0,-128,-128,-128,-128,-128,-128, +0,1,-5,2,2,-5,-1,3,-2,-128,1,-2,-1,1,-128,0,4,1,-1,-1,-128,-2,-5,-1,-4,3,-128,-128,-128,-128,-128,-128, +-2,-1,-4,-1,-1,-4,-3,2,-2,-128,3,-3,0,0,-128,0,1,6,0,-1,-128,-2,2,-1,-4,0,-128,-128,-128,-128,-128,-128, +1,0,0,0,0,-3,1,-1,-1,-128,0,-3,-2,1,-128,1,-1,0,2,1,-128,-1,-2,0,-3,0,-128,-128,-128,-128,-128,-128, +1,0,-2,0,0,-3,0,-1,0,-128,0,-2,-1,0,-128,0,-1,-1,1,3,-128,0,-5,0,-3,-1,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +0,-2,-2,-2,-2,-1,-1,-2,4,-128,-2,2,2,-2,-128,-1,-2,-2,-1,0,-128,4,-6,-1,-2,-2,-128,-128,-128,-128,-128,-128, +-6,-5,-8,-7,-7,0,-7,-3,-5,-128,-3,-2,-4,-4,-128,-6,-5,2,-2,-5,-128,-6,17,-4,0,-6,-128,-128,-128,-128,-128,-128, +0,-1,-3,-1,-1,-2,-1,-1,-1,-128,-1,-1,-1,0,-128,-1,-1,-1,0,0,-128,-1,-4,-1,-2,-1,-128,-128,-128,-128,-128,-128, +-3,-3,0,-4,-4,7,-5,0,-1,-128,-4,-1,-2,-2,-128,-5,-4,-4,-3,-3,-128,-2,0,-2,10,-4,-128,-128,-128,-128,-128,-128, +0,2,-5,3,3,-5,0,2,-2,-128,0,-3,-2,1,-128,0,3,0,0,-1,-128,-2,-6,-1,-4,3,-128,-128,-128,-128,-128,-128, +-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128,-128, +] \ No newline at end of file diff --git a/lib/block-aligner/rust-toolchain b/lib/block-aligner/rust-toolchain new file mode 100644 index 000000000..bf867e0ae --- /dev/null +++ b/lib/block-aligner/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/lib/block-aligner/scripts/accuracy_avx2.sh b/lib/block-aligner/scripts/accuracy_avx2.sh new file mode 100755 index 000000000..2b89a1b3b --- /dev/null +++ b/lib/block-aligner/scripts/accuracy_avx2.sh @@ -0,0 +1 @@ +cargo run --example accuracy --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/accuracy_wasm.sh b/lib/block-aligner/scripts/accuracy_wasm.sh new file mode 100755 index 000000000..d176e4bac --- /dev/null +++ b/lib/block-aligner/scripts/accuracy_wasm.sh @@ -0,0 +1 @@ +CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime --wasm-features simd --" cargo run --target=wasm32-wasi --example accuracy --release --features simd_wasm -- "$@" diff --git a/lib/block-aligner/scripts/bench_avx2.sh b/lib/block-aligner/scripts/bench_avx2.sh new file mode 100755 index 000000000..1abd5a2b2 --- /dev/null +++ b/lib/block-aligner/scripts/bench_avx2.sh @@ -0,0 +1 @@ +cargo bench --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/bench_wasm.sh b/lib/block-aligner/scripts/bench_wasm.sh new file mode 100755 index 000000000..28bdb9371 --- /dev/null +++ b/lib/block-aligner/scripts/bench_wasm.sh @@ -0,0 +1,19 @@ +#set -e + +#cargo clean + +# CARGO_TARGET_WASM32_WASI_RUNNER="wasmer run --native --llvm --enable-simd --" +# CARGO_TARGET_WASM32_WASI_RUNNER="wavm run --enable simd" + +CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime --wasm-features simd --" cargo bench --target=wasm32-wasi --features simd_wasm -- --nocapture "$@" +#RUSTFLAGS="-C target-feature=+simd128" cargo build --release --benches --target=wasm32-wasi + +# binaryen wasm-opt pass +#for f in target/wasm32-wasi/*/deps/*.wasm; do +# wasm-opt --enable-simd --enable-sign-ext -O4 --inlining-optimizing -ifwl -ocimfs 300 -fimfs 300 -aimfs 20 -o $f.opt $f +# echo $f.opt +#done + +#for f in target/wasm32-wasi/*/deps/*.wasm.opt; do +# $CARGO_TARGET_WASM32_WASI_RUNNER $f --bench "$@" +#done diff --git a/lib/block-aligner/scripts/build_ir_asm.sh b/lib/block-aligner/scripts/build_ir_asm.sh new file mode 100755 index 000000000..ae0bc28cc --- /dev/null +++ b/lib/block-aligner/scripts/build_ir_asm.sh @@ -0,0 +1,29 @@ +set -e + +LLVM_MCA=/usr/local/opt/llvm/bin/llvm-mca + +#RUSTFLAGS="-g -Z asm-comments --emit llvm-ir,asm -C llvm-args=-x86-asm-syntax=intel -C target-cpu=native" cargo build --release --example profile --features mca +RUSTFLAGS="-Z asm-comments --emit llvm-ir,asm -C llvm-args=-x86-asm-syntax=intel" cargo build --release --example profile --features mca,simd_avx2 + +# demangle symbols +#for f in target/release/examples/*.{s,ll}; do +# rustfilt -i $f > $f.filt +# echo "$f.filt" +#done + +for f in target/release/examples/*.{s,ll}; do + echo "$f" +done + +# run llvm-mca +for f in target/release/examples/*.s; do + $LLVM_MCA -output-asm-variant=1 -all-views $f > $f.mca + echo "$f.mca" +done + +# also create source/asm interleaved version with objdump +#shopt -s extglob +#for f in target/*/deps/!(*.*); do +# objdump -drwSl -x86-asm-syntax=intel $f | rustfilt -o $f.objdump +# echo "$f.objdump" +#done diff --git a/lib/block-aligner/scripts/build_wat.sh b/lib/block-aligner/scripts/build_wat.sh new file mode 100755 index 000000000..84efd164a --- /dev/null +++ b/lib/block-aligner/scripts/build_wat.sh @@ -0,0 +1,25 @@ +set -e + +cargo clean +cargo build --release --benches --target wasm32-wasi --features simd_wasm + +# binaryen wasm-opt pass +for f in target/wasm32-wasi/*/deps/*.wasm; do + # extreme inlining + wasm-opt --enable-simd --enable-sign-ext -O4 --inlining-optimizing -ifwl -ocimfs 300 -fimfs 300 -aimfs 20 -o $f.opt $f + echo $f.opt +done + +# demangle symbols +cargo install rustfilt +for f in target/wasm32-wasi/*/deps/*.wasm.opt; do + wasm2wat --enable-simd $f | rustfilt -o $f.wat + echo "$f.wat" +done + +# disassemble wasmtime generated object files with objdump +for f in target/wasm32-wasi/*/deps/*.wasm.opt; do + wasmtime wasm2obj --wasm-features simd $f $f.o + objdump -drwSl -x86-asm-syntax=intel $f.o | rustfilt -o $f.objdump + echo "$f.objdump" +done diff --git a/lib/block-aligner/scripts/compare_avx2.sh b/lib/block-aligner/scripts/compare_avx2.sh new file mode 100755 index 000000000..a2ccd0448 --- /dev/null +++ b/lib/block-aligner/scripts/compare_avx2.sh @@ -0,0 +1 @@ +cargo run --example compare --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/create_matrix_aa.py b/lib/block-aligner/scripts/create_matrix_aa.py new file mode 100644 index 000000000..5345f034f --- /dev/null +++ b/lib/block-aligner/scripts/create_matrix_aa.py @@ -0,0 +1,57 @@ +import os + +import pandas as pd + +base_remote_path = "ftp://ftp.ncbi.nlm.nih.gov/blast/matrices/" +base_local_path = "../matrices/" +names = [ + "BLOSUM45", + "BLOSUM50", + "BLOSUM62", + "BLOSUM80", + "BLOSUM90", + "PAM100", + "PAM120", + "PAM160", + "PAM200", + "PAM250" +] + +os.makedirs(base_local_path, exist_ok = True) + +for name in names: + path = base_remote_path + name + df = pd.read_csv(path, delim_whitespace = True, comment = "#") + df = df.drop(index = "*") + df = df.drop(columns = "*") + + min_val = -128 + + # note: '[' = 'A' + 26 + for i in range(27): + c = chr(ord("A") + i) + if not c in df.index: + df.loc[c, :] = min_val + + for i in range(32): + c = chr(ord("A") + i) + if not c in df.columns: + df.loc[:, c] = min_val + + # alphabetically sort the amino acids + df = df.sort_index(axis = 0) + df = df.sort_index(axis = 1) + + for col in df.columns: + df[col] = df[col].astype(int) + + res = "[" + df.to_csv(index = False, header = False) + "]" + res = res.replace("\n", ",\n") + res_path = base_local_path + name + + print(name) + print(df) + print() + + with open(res_path, "w") as f: + f.write(res) diff --git a/lib/block-aligner/scripts/create_matrix_nuc.py b/lib/block-aligner/scripts/create_matrix_nuc.py new file mode 100644 index 000000000..fc42d3dbc --- /dev/null +++ b/lib/block-aligner/scripts/create_matrix_nuc.py @@ -0,0 +1,51 @@ +import os +import sys + +import numpy as np + +matrices = [ + { + "name": "NW1", + "scores": { + ("A", "A"): 1, + ("A", "C"): -1, + ("A", "G"): -1, + ("A", "N"): -1, + ("A", "T"): -1, + ("C", "C"): 1, + ("C", "G"): -1, + ("C", "N"): -1, + ("C", "T"): -1, + ("G", "G"): 1, + ("G", "N"): -1, + ("G", "T"): -1, + ("N", "N"): 1, + ("N", "T"): -1, + ("T", "T"): 1 + } + }, +] + +base_local_path = "../matrices/" + +os.makedirs(base_local_path, exist_ok = True) + +for matrix in matrices: + res = np.full((8, 16), -128) + + for key, val in matrix["scores"].items(): + res[ord(key[0]) & 0b111][ord(key[1]) & 0b1111] = val + + if key[0] != key[1]: + res[ord(key[1]) & 0b111][ord(key[0]) & 0b1111] = val + + res = str(res.flatten().tolist()).replace(" ", "") + + res_path = base_local_path + matrix["name"] + + print(matrix["name"]) + print(res) + print() + + with open(res_path, "w") as f: + f.write(res) diff --git a/lib/block-aligner/scripts/debug_avx2.sh b/lib/block-aligner/scripts/debug_avx2.sh new file mode 100755 index 000000000..6266db399 --- /dev/null +++ b/lib/block-aligner/scripts/debug_avx2.sh @@ -0,0 +1 @@ +cargo run --example debug --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/doc_avx2.sh b/lib/block-aligner/scripts/doc_avx2.sh new file mode 100755 index 000000000..40a8cb891 --- /dev/null +++ b/lib/block-aligner/scripts/doc_avx2.sh @@ -0,0 +1 @@ +cargo doc --features simd_avx2 --no-deps --open diff --git a/lib/block-aligner/scripts/doc_wasm.sh b/lib/block-aligner/scripts/doc_wasm.sh new file mode 100755 index 000000000..535b02a4b --- /dev/null +++ b/lib/block-aligner/scripts/doc_wasm.sh @@ -0,0 +1 @@ +cargo doc --target=wasm32-wasi --features simd_wasm --no-deps --open diff --git a/lib/block-aligner/scripts/nanopore_accuracy_avx2.sh b/lib/block-aligner/scripts/nanopore_accuracy_avx2.sh new file mode 100755 index 000000000..bb3b6554b --- /dev/null +++ b/lib/block-aligner/scripts/nanopore_accuracy_avx2.sh @@ -0,0 +1 @@ +cargo run --example nanopore_accuracy --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/nanopore_bench_avx2.sh b/lib/block-aligner/scripts/nanopore_bench_avx2.sh new file mode 100755 index 000000000..9050f4ac0 --- /dev/null +++ b/lib/block-aligner/scripts/nanopore_bench_avx2.sh @@ -0,0 +1 @@ +cargo run --example nanopore_bench --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/nanopore_bench_global_avx2.sh b/lib/block-aligner/scripts/nanopore_bench_global_avx2.sh new file mode 100755 index 000000000..8806dd031 --- /dev/null +++ b/lib/block-aligner/scripts/nanopore_bench_global_avx2.sh @@ -0,0 +1 @@ +CC=/usr/local/opt/llvm/bin/clang cargo run --example nanopore_bench_global --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/pssm_accuracy_avx2.sh b/lib/block-aligner/scripts/pssm_accuracy_avx2.sh new file mode 100755 index 000000000..0e3a92318 --- /dev/null +++ b/lib/block-aligner/scripts/pssm_accuracy_avx2.sh @@ -0,0 +1 @@ +cargo run --example pssm_accuracy --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/pssm_bench_avx2.sh b/lib/block-aligner/scripts/pssm_bench_avx2.sh new file mode 100755 index 000000000..00c9be5fc --- /dev/null +++ b/lib/block-aligner/scripts/pssm_bench_avx2.sh @@ -0,0 +1 @@ +cargo run --example pssm_bench --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/scop_seq_profile_pairs.py b/lib/block-aligner/scripts/scop_seq_profile_pairs.py new file mode 100644 index 000000000..0cf3a82bd --- /dev/null +++ b/lib/block-aligner/scripts/scop_seq_profile_pairs.py @@ -0,0 +1,72 @@ +import random + +lookup_path = "../data/scop/scop_lookup.fix.tsv" +pssm_path = "../data/scop/scop_mmseqs_pssm.pssm" +seq_path = "../data/scop/scop.fasta" +res_path = "../data/scop/pairs.pssm" + +seq_to_scop = {} +families = {} + +def process_scop_id(scop_id): + #return scop_id[:scop_id.rindex(".")] + return scop_id + +with open(lookup_path) as f: + for line in f: + seq_id, scop_id = line.strip().split() + scop_id = process_scop_id(scop_id) + seq_to_scop[seq_id] = scop_id + families[scop_id] = ([], []) + +seq_lines = {} +seq_seqs = {} + +with open(pssm_path) as f: + curr_seq = None + for line in f: + if line.startswith("#"): + curr_seq = line[1:].strip() + seq_lines[curr_seq] = [] + else: + seq_lines[curr_seq].append(line.strip()) + +with open(seq_path) as f: + curr_seq = None + for line in f: + if line.startswith(">"): + curr_seq = line[1:].strip() + seq_seqs[curr_seq] = "" + else: + seq_seqs[curr_seq] += line.strip() + +for seq_id, pssm in seq_lines.items(): + scop_id = seq_to_scop[seq_id] + families[scop_id][0].append(pssm) + +for seq_id, seq in seq_seqs.items(): + if not seq_id in seq_to_scop: + continue + scop_id = seq_to_scop[seq_id] + families[scop_id][1].append(seq) + +seq_pssm_pairs = [] + +def consensus_seq(lines): + return "".join([s.split()[1] for s in lines[1:]]) + +for _, (pssm_family, seq_family) in families.items(): + random.shuffle(pssm_family) + random.shuffle(seq_family) + for i in range(min(len(pssm_family), len(seq_family))): + pssm = pssm_family[i] + seq = seq_family[i] + seq_pssm_pairs.append((seq, consensus_seq(pssm), pssm)) + +print("Number of seq-pssm pairs:", len(seq_pssm_pairs)) + +with open(res_path, "w") as f: + for seq, cns, pssm in seq_pssm_pairs: + f.write("#" + seq + "\n") + f.write("#" + cns + "\n") + f.write("\n".join(pssm) + "\n") diff --git a/lib/block-aligner/scripts/simulate_k.py b/lib/block-aligner/scripts/simulate_k.py new file mode 100644 index 000000000..749b15d56 --- /dev/null +++ b/lib/block-aligner/scripts/simulate_k.py @@ -0,0 +1,47 @@ +import numpy as np +import matplotlib.pyplot as plt + +#I_min = 512 + 256 +I_min = 512 +I_max = 1024 +#I_max = 1024 + 256 +L = 16 +K_min = I_max +K_max = 1 << 20 + +overshoots = [] +I_list = [] + +for K in range(K_min, K_max + 1, L): + min_overshoot = 10 * I_max # large number + best_I = -1 + + for I in range(I_min, I_max + 1, L): + ceil_K = ((K + I - 1) // I) * I + overshoot = ceil_K - K + + if overshoot <= min_overshoot: + min_overshoot = overshoot + best_I = I + + overshoots.append(min_overshoot) + I_list.append(best_I) + + if K % (K_max // 8) == 0: + print("K: %d" % K); + +overshoots = np.array(overshoots) +I_list = np.array(I_list) +print("Average overshoot: %f" % np.mean(overshoots)) +print("Max overshoot: %d" % np.max(overshoots)) +print("Average I: %f" % np.mean(I_list)) + +plt.subplot(121) +plt.hist(overshoots, bins = "auto") +plt.title("Overshoots") + +plt.subplot(122) +plt.hist(I_list, bins = "auto") +plt.title("I") + +plt.show() diff --git a/lib/block-aligner/scripts/test_avx2.sh b/lib/block-aligner/scripts/test_avx2.sh new file mode 100755 index 000000000..e75bb2f57 --- /dev/null +++ b/lib/block-aligner/scripts/test_avx2.sh @@ -0,0 +1,2 @@ +cargo test --all-targets --features simd_avx2 -- "$@" +cargo test --doc --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/test_wasm.sh b/lib/block-aligner/scripts/test_wasm.sh new file mode 100755 index 000000000..854609dee --- /dev/null +++ b/lib/block-aligner/scripts/test_wasm.sh @@ -0,0 +1 @@ +CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime --wasm-features simd --" cargo test --target=wasm32-wasi --all-targets --features simd_wasm -- --nocapture "$@" diff --git a/lib/block-aligner/scripts/uc_accuracy_avx2.sh b/lib/block-aligner/scripts/uc_accuracy_avx2.sh new file mode 100755 index 000000000..483996d36 --- /dev/null +++ b/lib/block-aligner/scripts/uc_accuracy_avx2.sh @@ -0,0 +1 @@ +cargo run --example uc_accuracy --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/uc_bench_avx2.sh b/lib/block-aligner/scripts/uc_bench_avx2.sh new file mode 100755 index 000000000..fa05e3443 --- /dev/null +++ b/lib/block-aligner/scripts/uc_bench_avx2.sh @@ -0,0 +1 @@ +cargo run --example uc_bench --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/scripts/x_drop_accuracy_avx2.sh b/lib/block-aligner/scripts/x_drop_accuracy_avx2.sh new file mode 100755 index 000000000..06954b26b --- /dev/null +++ b/lib/block-aligner/scripts/x_drop_accuracy_avx2.sh @@ -0,0 +1 @@ +cargo run --example x_drop_accuracy --release --features simd_avx2 -- "$@" diff --git a/lib/block-aligner/src/avx2.rs b/lib/block-aligner/src/avx2.rs new file mode 100644 index 000000000..10e9d5288 --- /dev/null +++ b/lib/block-aligner/src/avx2.rs @@ -0,0 +1,490 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +pub type Simd = __m256i; // use for storing DP scores +pub type HalfSimd = __m128i; // used for storing bytes (sequence or scoring matrix) +pub type LutSimd = __m128i; // used for storing a row in a scoring matrix (always 128 bits) +pub type TraceType = i32; +/// Number of 16-bit lanes in a SIMD vector. +pub const L: usize = 16; +pub const L_BYTES: usize = L * 2; +pub const HALFSIMD_MUL: usize = 1; +// using min = 0 is faster, but restricts range of scores (and restricts the max block size) +pub const ZERO: i16 = 1 << 14; +pub const MIN: i16 = 0; + +// Non-temporal store to avoid cluttering cache with traces +// Actually, non-temporal stores are slower in benchmarks! +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn store_trace(ptr: *mut TraceType, trace: TraceType) { *ptr = trace; } // _mm_stream_si32(ptr, trace); + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_adds_i16(a: Simd, b: Simd) -> Simd { _mm256_adds_epi16(a, b) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_subs_i16(a: Simd, b: Simd) -> Simd { _mm256_subs_epi16(a, b) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_max_i16(a: Simd, b: Simd) -> Simd { _mm256_max_epi16(a, b) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_cmpeq_i16(a: Simd, b: Simd) -> Simd { _mm256_cmpeq_epi16(a, b) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_cmpgt_i16(a: Simd, b: Simd) -> Simd { _mm256_cmpgt_epi16(a, b) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_blend_i8(a: Simd, b: Simd, mask: Simd) -> Simd { _mm256_blendv_epi8(a, b, mask) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_load(ptr: *const Simd) -> Simd { _mm256_load_si256(ptr) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_loadu(ptr: *const Simd) -> Simd { _mm256_loadu_si256(ptr) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_store(ptr: *mut Simd, a: Simd) { _mm256_store_si256(ptr, a) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_set1_i16(v: i16) -> Simd { _mm256_set1_epi16(v) } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_extract_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm256_extract_epi16($a, $num as i32) as i16 + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_insert_i16 { + ($a:expr, $v:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm256_insert_epi16($a, $v, $num as i32) + } + }; +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_movemask_i8(a: Simd) -> u32 { _mm256_movemask_epi8(a) as u32 } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sl_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!(2 * $num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + if $num == L / 2 { + _mm256_permute2x128_si256($a, $b, 0x03) + } else { + _mm256_alignr_epi8($a, _mm256_permute2x128_si256($a, $b, 0x03), (L - (2 * $num)) as i32) + } + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sr_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!(2 * $num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + if $num == L / 2 { + _mm256_permute2x128_si256($a, $b, 0x03) + } else { + _mm256_alignr_epi8(_mm256_permute2x128_si256($a, $b, 0x03), $b, (2 * $num) as i32) + } + } + }; +} + +// hardcoded to STEP = 8 +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_step(a: Simd, b: Simd) -> Simd { + _mm256_permute2x128_si256(a, b, 0x03) +} + +#[target_feature(enable = "avx2")] +#[inline] +unsafe fn simd_sl_i128(a: Simd, b: Simd) -> Simd { + _mm256_permute2x128_si256(a, b, 0x03) +} + +// shift in zeros +macro_rules! simd_sllz_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!(2 * $num < L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm256_slli_si256($a, ($num * 2) as i32) + } + }; +} + +// broadcast last 16-bit element to the whole vector +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_broadcasthi_i16(v: Simd) -> Simd { + let v = _mm256_shufflehi_epi16(v, 0b11111111); + _mm256_permute4x64_epi64(v, 0b11111111) +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_slow_extract_i16(v: Simd, i: usize) -> i16 { + debug_assert!(i < L); + + #[repr(align(32))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + *a.0.as_ptr().add(i) +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_hmax_i16(v: Simd) -> i16 { + let mut v2 = _mm256_max_epi16(v, _mm256_srli_si256(v, 2)); + v2 = _mm256_max_epi16(v2, _mm256_srli_si256(v2, 4)); + v2 = _mm256_max_epi16(v2, _mm256_srli_si256(v2, 8)); + v2 = _mm256_max_epi16(v2, simd_sl_i128(v2, v2)); + simd_extract_i16!(v2, 0) +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hadd_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!(2 * $num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mut v = _mm256_subs_epi16($a, _mm256_set1_epi16(ZERO)); + if $num > 4 { + v = _mm256_adds_epi16(v, _mm256_srli_si256(v, 8)); + } + if $num > 2 { + v = _mm256_adds_epi16(v, _mm256_srli_si256(v, 4)); + } + if $num > 1 { + v = _mm256_adds_epi16(v, _mm256_srli_si256(v, 2)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!(2 * $num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mut v = $a; + if $num > 4 { + v = _mm256_max_epi16(v, _mm256_srli_si256(v, 8)); + } + if $num > 2 { + v = _mm256_max_epi16(v, _mm256_srli_si256(v, 4)); + } + if $num > 1 { + v = _mm256_max_epi16(v, _mm256_srli_si256(v, 2)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_suffix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!(2 * $num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mut v = $a; + if $num > 4 { + v = _mm256_max_epi16(v, _mm256_slli_si256(v, 8)); + } + if $num > 2 { + v = _mm256_max_epi16(v, _mm256_slli_si256(v, 4)); + } + if $num > 1 { + v = _mm256_max_epi16(v, _mm256_slli_si256(v, 2)); + } + simd_extract_i16!(v, 15) + } + }; +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn simd_hargmax_i16(v: Simd, max: i16) -> usize { + let v2 = _mm256_cmpeq_epi16(v, _mm256_set1_epi16(max)); + (simd_movemask_i8(v2).trailing_zeros() as usize) / 2 +} + +#[target_feature(enable = "avx2")] +#[inline] +#[allow(non_snake_case)] +#[allow(dead_code)] +pub unsafe fn simd_naive_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut curr = R_max; + + for _i in 0..(L - 1) { + let prev = curr; + curr = simd_sl_i16!(curr, _mm256_setzero_si256(), 1); + curr = _mm256_adds_epi16(curr, gap_cost); + curr = _mm256_max_epi16(curr, prev); + } + + curr +} + +pub type PrefixScanConsts = Simd; + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn get_prefix_scan_consts(gap: Simd) -> (Simd, PrefixScanConsts) { + let mut shift1 = simd_sllz_i16!(gap, 1); + shift1 = _mm256_adds_epi16(shift1, gap); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = _mm256_adds_epi16(shift2, shift1); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = _mm256_adds_epi16(shift4, shift2); + + let mut correct1 = _mm256_srli_si256(_mm256_shufflehi_epi16(shift4, 0b11111111), 8); + correct1 = _mm256_permute4x64_epi64(correct1, 0b00000101); + correct1 = _mm256_adds_epi16(correct1, shift4); + + (correct1, shift4) +} + +#[target_feature(enable = "avx2")] +#[inline] +#[allow(non_snake_case)] +pub unsafe fn simd_prefix_scan_i16(R_max: Simd, gap_cost: Simd, gap_cost_lane: PrefixScanConsts) -> Simd { + // Optimized prefix add and max for every eight elements + // Note: be very careful to avoid lane-crossing which has a large penalty. + // Also, make sure to use as little registers as possible to avoid + // memory loads (latencies really matter since this is critical path). + // Keep the CPU busy with instructions! + // Note: relies on min score = 0 for speed! + let mut shift1 = simd_sllz_i16!(R_max, 1); + shift1 = _mm256_adds_epi16(shift1, gap_cost); + shift1 = _mm256_max_epi16(R_max, shift1); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = _mm256_adds_epi16(shift2, _mm256_slli_epi16(gap_cost, 1)); + shift2 = _mm256_max_epi16(shift1, shift2); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = _mm256_adds_epi16(shift4, _mm256_slli_epi16(gap_cost, 2)); + shift4 = _mm256_max_epi16(shift2, shift4); + + // Correct the upper lane using the last element of the lower lane + // Make sure that the operation on the bottom lane is essentially nop + let mut correct1 = _mm256_shufflehi_epi16(shift4, 0b11111111); + correct1 = _mm256_permute4x64_epi64(correct1, 0b01010000); + correct1 = _mm256_adds_epi16(correct1, gap_cost_lane); + _mm256_max_epi16(shift4, correct1) +} + +// lookup two 128-bit tables +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_lookup2_i16(lut1: LutSimd, lut2: LutSimd, v: HalfSimd) -> Simd { + let a = _mm_shuffle_epi8(lut1, v); + let b = _mm_shuffle_epi8(lut2, v); + // only the most significant bit of each byte matters for blendv + let mask = _mm_slli_epi16(v, 3); + let c = _mm_blendv_epi8(a, b, mask); + _mm256_cvtepi8_epi16(c) +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_lookup1_i16(lut: LutSimd, v: HalfSimd) -> Simd { + _mm256_cvtepi8_epi16(_mm_shuffle_epi8(lut, v)) +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_lookup_bytes_i16(match_scores: HalfSimd, mismatch_scores: HalfSimd, a: HalfSimd, b: HalfSimd) -> Simd { + let mask = _mm_cmpeq_epi8(a, b); + let c = _mm_blendv_epi8(mismatch_scores, match_scores, mask); + _mm256_cvtepi8_epi16(c) +} + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_load(ptr: *const HalfSimd) -> HalfSimd { _mm_load_si128(ptr) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_loadu(ptr: *const HalfSimd) -> HalfSimd { _mm_loadu_si128(ptr) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn lutsimd_load(ptr: *const LutSimd) -> LutSimd { _mm_load_si128(ptr) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn lutsimd_loadu(ptr: *const LutSimd) -> LutSimd { _mm_loadu_si128(ptr) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_store(ptr: *mut HalfSimd, a: HalfSimd) { _mm_store_si128(ptr, a) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_sub_i8(a: HalfSimd, b: HalfSimd) -> HalfSimd { _mm_sub_epi8(a, b) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_set1_i8(v: i8) -> HalfSimd { _mm_set1_epi8(v) } + +#[target_feature(enable = "avx2")] +#[inline] +pub unsafe fn halfsimd_get_idx(i: usize) -> usize { i } + +#[macro_export] +#[doc(hidden)] +macro_rules! halfsimd_sr_i8 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm_alignr_epi8($a, $b, $num as i32) + } + }; +} + +#[target_feature(enable = "avx2")] +#[allow(dead_code)] +pub unsafe fn simd_dbg_i16(v: Simd) { + #[repr(align(32))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + + for i in (0..a.0.len()).rev() { + print!("{:6} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "avx2")] +#[allow(dead_code)] +pub unsafe fn halfsimd_dbg_i8(v: HalfSimd) { + #[repr(align(16))] + struct A([i8; L]); + + let mut a = A([0i8; L]); + halfsimd_store(a.0.as_mut_ptr() as *mut HalfSimd, v); + + for i in (0..a.0.len()).rev() { + print!("{:3} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "avx2")] +#[allow(dead_code)] +pub unsafe fn simd_assert_vec_eq(a: Simd, b: [i16; L]) { + #[repr(align(32))] + struct A([i16; L]); + + let mut arr = A([0i16; L]); + simd_store(arr.0.as_mut_ptr() as *mut Simd, a); + assert_eq!(arr.0, b); +} + +#[target_feature(enable = "avx2")] +#[allow(dead_code)] +pub unsafe fn halfsimd_assert_vec_eq(a: HalfSimd, b: [i8; L]) { + #[repr(align(32))] + struct A([i8; L]); + + let mut arr = A([0i8; L]); + halfsimd_store(arr.0.as_mut_ptr() as *mut HalfSimd, a); + assert_eq!(arr.0, b); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_prefix_scan() { + #[target_feature(enable = "avx2")] + unsafe fn inner() { + #[repr(align(32))] + struct A([i16; L]); + + let vec = A([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(0); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 15, 15, 15, 15]); + + let vec = A([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(-1); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 14, 13, 14, 13]); + } + unsafe { inner(); } + } +} diff --git a/lib/block-aligner/src/cigar.rs b/lib/block-aligner/src/cigar.rs new file mode 100644 index 000000000..1396f0bbd --- /dev/null +++ b/lib/block-aligner/src/cigar.rs @@ -0,0 +1,158 @@ +//! Data structures and functions for working with CIGAR strings. + +use std::fmt; + +/// A match/mismatch, insertion, or deletion operation. +/// +/// When aligning `q` against `r`, this represents the edit operations to get from `r` to `q`. +#[derive(Debug, PartialEq, Copy, Clone)] +#[repr(u8)] +pub enum Operation { + /// Placeholder variant. + Sentinel = 0u8, + /// Match or mismatch. + /// + /// This is a diagonal transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + M = 1u8, + /// Match. + Eq = 2u8, + /// Mismatch. + X = 3u8, + /// Insertion. + /// + /// When aligning sequences `q` against `r`, this is a gap in `r`. + /// This is a row transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + I = 4u8, + /// Deletion. + /// + /// When aligning sequences `q` against `r`, this is a gap in `q`. + /// This is a column transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + D = 5u8 +} + +/// An operation and how many times that operation is repeated. +#[derive(Debug, Copy, Clone)] +#[repr(C)] +pub struct OpLen { + pub op: Operation, + pub len: usize +} + +/// A CIGAR string that holds a list of operations. +pub struct Cigar { + s: Vec, + idx: usize +} + +impl Cigar { + /// Create a new CIGAR string with certain maximum lengths for the aligned sequences. + pub fn new(query_len: usize, reference_len: usize) -> Self { + let s = vec![OpLen { op: Operation::Sentinel, len: 0 }; query_len + reference_len + 5]; + // first element should always be a sentinel + let idx = 1; + Cigar { s, idx } + } + + /// Clear this CIGAR string. + #[allow(dead_code)] + pub(crate) fn clear(&mut self, query_len: usize, reference_len: usize) { + self.s[..query_len + reference_len + 5].fill(OpLen { op: Operation::Sentinel, len: 0 }); + self.idx = 1; + } + + /// Branchlessly add a new operation (in reverse order). + /// + /// Other methods should allow the CIGAR string to be viewed + /// in the correct (not reversed) order. + /// + /// The total number of added operations must not exceed the + /// maximum length the CIGAR string was created with. + #[allow(dead_code)] + pub(crate) unsafe fn add(&mut self, op: Operation) { + debug_assert!(self.idx < self.s.len()); + // branchlessly append one operation + // make sure that contiguous operations are run-length encoded + let add = (op != (*self.s.as_ptr().add(self.idx - 1)).op) as usize; + self.idx += add; + (*self.s.as_mut_ptr().add(self.idx - 1)).op = op; + (*self.s.as_mut_ptr().add(self.idx - 1)).len += 1; + } + + /// Length of the CIGAR string, not including the first sentinel. + pub fn len(&self) -> usize { + self.idx - 1 + } + + /// Get a certain operation in the CIGAR string. + pub fn get(&self, i: usize) -> OpLen { + self.s[self.idx - 1 - i] + } + + /// Generate two strings to visualize the edit operations. + pub fn format(&self, q: &[u8], r: &[u8]) -> (String, String) { + let mut a = String::with_capacity(self.idx); + let mut b = String::with_capacity(self.idx); + let mut i = 0; + let mut j = 0; + + for &op_len in self.s[1..self.idx].iter().rev() { + match op_len.op { + Operation::M | Operation::Eq | Operation::X => { + for _k in 0..op_len.len { + a.push(q[i] as char); + b.push(r[j] as char); + i += 1; + j += 1; + } + }, + Operation::I => { + for _k in 0..op_len.len { + a.push(q[i] as char); + b.push('-'); + i += 1; + } + }, + Operation::D => { + for _k in 0..op_len.len { + a.push('-'); + b.push(r[j] as char); + j += 1; + } + }, + _ => continue + } + } + + (a, b) + } + + /// Create a copy of the operations in the CIGAR string and + /// ensure that the vector is provided in the correct order. + /// + /// Sentinels are removed. + pub fn to_vec(&self) -> Vec { + self.s[1..self.idx] + .iter() + .rev() + .map(|&op_len| op_len) + .collect::>() + } +} + +impl fmt::Display for Cigar { + /// Print a CIGAR string in standard CIGAR format. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for &op_len in self.s[1..self.idx].iter().rev() { + let c = match op_len.op { + Operation::M => 'M', + Operation::Eq => '=', + Operation::X => 'X', + Operation::I => 'I', + Operation::D => 'D', + _ => continue + }; + write!(f, "{}{}", op_len.len, c)?; + } + Ok(()) + } +} diff --git a/lib/block-aligner/src/fallback.rs b/lib/block-aligner/src/fallback.rs new file mode 100644 index 000000000..f5aee04f7 --- /dev/null +++ b/lib/block-aligner/src/fallback.rs @@ -0,0 +1,519 @@ +#![allow(dead_code)] + +use core::{cmp, ptr}; + +pub type Simd = u128; // 8 × i16 packed little-endian +pub type HalfSimd = u128; // 16 × i8 packed little-endian +pub type LutSimd = u128; // 16 × i8 packed little-endian +pub type TraceType = i16; + +/// Number of 16-bit lanes in a SIMD vector. +pub const L: usize = 8; +pub const L_BYTES: usize = L * 2; +pub const HALFSIMD_MUL: usize = 2; +// using min = 0 is faster, but restricts range of scores (and restricts the max block size) +pub const ZERO: i16 = 1 << 14; +pub const MIN: i16 = 0; + +#[inline] +pub fn to_i16x8(x: Simd) -> [i16; L] { + let b = x.to_le_bytes(); + let mut a = [0i16; L]; + let mut i = 0; + while i < L { + a[i] = i16::from_le_bytes([b[2 * i], b[2 * i + 1]]); + i += 1; + } + a +} + +#[inline] +pub fn from_i16x8(a: [i16; L]) -> Simd { + let mut b = [0u8; 16]; + let mut i = 0; + while i < L { + let bytes = a[i].to_le_bytes(); + b[2 * i] = bytes[0]; + b[2 * i + 1] = bytes[1]; + i += 1; + } + Simd::from_le_bytes(b) +} + +#[inline] +pub fn to_u8x16(x: u128) -> [u8; 16] { + x.to_le_bytes() +} + +#[inline] +pub fn from_u8x16(a: [u8; 16]) -> u128 { + u128::from_le_bytes(a) +} + +#[inline] +pub fn bitselect(a: u128, b: u128, mask: u128) -> u128 { + (a & !mask) | (b & mask) +} + +#[inline] +pub fn i16_shl_wrap(v: i16, n: u32) -> i16 { + (((v as u16) << n) as u16) as i16 +} + +#[inline] +pub unsafe fn store_trace(ptr_: *mut TraceType, trace: TraceType) { *ptr_ = trace; } + +#[inline] +pub unsafe fn simd_load(ptr_: *const Simd) -> Simd { ptr::read(ptr_) } + +#[inline] +pub unsafe fn simd_loadu(ptr_: *const Simd) -> Simd { ptr::read_unaligned(ptr_) } + +#[inline] +pub unsafe fn simd_store(ptr_: *mut Simd, a: Simd) { ptr::write(ptr_, a) } + +#[inline] +pub unsafe fn halfsimd_load(ptr_: *const HalfSimd) -> HalfSimd { ptr::read(ptr_) } + +#[inline] +pub unsafe fn halfsimd_loadu(ptr_: *const HalfSimd) -> HalfSimd { ptr::read_unaligned(ptr_) } + +#[inline] +pub unsafe fn lutsimd_load(ptr_: *const LutSimd) -> LutSimd { ptr::read(ptr_) } + +#[inline] +pub unsafe fn lutsimd_loadu(ptr_: *const LutSimd) -> LutSimd { ptr::read_unaligned(ptr_) } + +#[inline] +pub unsafe fn halfsimd_store(ptr_: *mut HalfSimd, a: HalfSimd) { ptr::write(ptr_, a) } + +#[inline] +pub unsafe fn simd_set1_i16(v: i16) -> Simd { + from_i16x8([v; L]) +} + +#[inline] +pub unsafe fn simd_adds_i16(a: Simd, b: Simd) -> Simd { + let aa = to_i16x8(a); + let bb = to_i16x8(b); + let mut r = [0i16; L]; + let mut i = 0; + while i < L { + r[i] = aa[i].saturating_add(bb[i]); + i += 1; + } + from_i16x8(r) +} + +#[inline] +pub unsafe fn simd_subs_i16(a: Simd, b: Simd) -> Simd { + let aa = to_i16x8(a); + let bb = to_i16x8(b); + let mut r = [0i16; L]; + let mut i = 0; + while i < L { + r[i] = aa[i].saturating_sub(bb[i]); + i += 1; + } + from_i16x8(r) +} + +#[inline] +pub unsafe fn simd_max_i16(a: Simd, b: Simd) -> Simd { + let aa = to_i16x8(a); + let bb = to_i16x8(b); + let mut r = [0i16; L]; + let mut i = 0; + while i < L { + r[i] = cmp::max(aa[i], bb[i]); + i += 1; + } + from_i16x8(r) +} + +#[inline] +pub unsafe fn simd_cmpeq_i16(a: Simd, b: Simd) -> Simd { + let aa = to_i16x8(a); + let bb = to_i16x8(b); + let mut r = [0i16; L]; + let mut i = 0; + while i < L { + r[i] = if aa[i] == bb[i] { -1 } else { 0 }; + i += 1; + } + from_i16x8(r) +} + +#[inline] +pub unsafe fn simd_cmpgt_i16(a: Simd, b: Simd) -> Simd { + let aa = to_i16x8(a); + let bb = to_i16x8(b); + let mut r = [0i16; L]; + let mut i = 0; + while i < L { + r[i] = if aa[i] > bb[i] { -1 } else { 0 }; + i += 1; + } + from_i16x8(r) +} + +// Per-byte blend: (~mask & a) | (mask & b) +#[inline] +pub unsafe fn simd_blend_i8(a: Simd, b: Simd, mask: Simd) -> Simd { + bitselect(a, b, mask) +} + +#[inline] +pub unsafe fn simd_movemask_i8(a: Simd) -> u16 { + let bytes = to_u8x16(a); + let mut m = 0u16; + let mut i = 0; + while i < 16 { + // Take the sign bit of each byte (like SSE2) + let bit = ((bytes[i] as i8) < 0) as u16; + m |= bit << i; + i += 1; + } + m +} + +// hardcoded to STEP = 8 +#[inline] +pub unsafe fn simd_step(a: Simd, _b: Simd) -> Simd { a } + +// Shift left by lanes; bring in from b's high end +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sl_i16 { + ($a:expr, $b:expr, $num:expr) => {{ + debug_assert!($num <= L); + let aa = crate::fallback::to_i16x8($a); + let bb = crate::fallback::to_i16x8($b); + let mut r = [0i16; L]; + let mut i = 0usize; + while i < L { + r[i] = if i < $num { bb[L - $num + i] } else { aa[i - $num] }; + i += 1; + } + crate::fallback::from_i16x8(r) + }}; +} + +// Shift right by lanes; bring in from b's low end +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sr_i16 { + ($a:expr, $b:expr, $num:expr) => {{ + debug_assert!($num <= L); + let aa = crate::fallback::to_i16x8($a); + let bb = crate::fallback::to_i16x8($b); + let mut r = [0i16; L]; + let mut i = 0usize; + while i < L { + r[i] = if i + $num < L { aa[i + $num] } else { bb[i + $num - L] }; + i += 1; + } + crate::fallback::from_i16x8(r) + }}; +} + +// shift in zeros (by lanes) +macro_rules! simd_sllz_i16 { + ($a:expr, $num:expr) => {{ + debug_assert!($num < L); + simd_sl_i16!($a, unsafe { simd_set1_i16(0) }, $num) + }}; +} + +// broadcast highest 16-bit element to the whole vector +#[inline] +pub unsafe fn simd_broadcasthi_i16(v: Simd) -> Simd { + let a = to_i16x8(v); + from_i16x8([a[7]; L]) +} + +#[inline] +pub unsafe fn simd_slow_extract_i16(v: Simd, i: usize) -> i16 { + debug_assert!(i < L); + to_i16x8(v)[i] +} + +#[inline] +pub unsafe fn simd_hmax_i16(v: Simd) -> i16 { + let a = to_i16x8(v); + let mut m = a[0]; + let mut i = 1; + while i < L { + if a[i] > m { m = a[i]; } + i += 1; + } + m +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_extract_i16 { + ($a:expr, $num:expr) => {{ + debug_assert!($num < L); + $crate::fallback::to_i16x8($a)[$num] + }}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_insert_i16 { + ($a:expr, $v:expr, $num:expr) => {{ + debug_assert!($num < L); + let mut tmp = $crate::fallback::to_i16x8($a); + tmp[$num] = $v as i16; + $crate::fallback::from_i16x8(tmp) + }}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hadd_i16 { + ($a:expr, $num:expr) => {{ + debug_assert!($num <= L); + let mut s: i32 = 0; + let aa = $crate::fallback::to_i16x8($a); + let mut i = 0usize; + while i < $num { + s += (aa[i] as i32) - (ZERO as i32); + i += 1; + } + s.clamp(i16::MIN as i32, i16::MAX as i32) as i16 + }}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hmax_i16 { + ($a:expr, $num:expr) => {{ + debug_assert!($num <= L); + let aa = $crate::fallback::to_i16x8($a); + let mut m = aa[0]; + let mut i = 1usize; + while i < $num { + if aa[i] > m { m = aa[i]; } + i += 1; + } + m + }}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_suffix_hmax_i16 { + ($a:expr, $num:expr) => {{ + debug_assert!($num <= L); + let aa = $crate::fallback::to_i16x8($a); + let start = L - $num; + let mut m = aa[start]; + let mut i = start + 1; + while i < L { + if aa[i] > m { m = aa[i]; } + i += 1; + } + m + }}; +} + +#[inline] +pub unsafe fn simd_hargmax_i16(v: Simd, max: i16) -> usize { + let a = to_i16x8(v); + let mut i = 0usize; + while i < L { + if a[i] == max { return i; } + i += 1; + } + L +} + +pub type PrefixScanConsts = (); + +#[inline] +pub unsafe fn get_prefix_scan_consts(gap: Simd) -> (Simd, PrefixScanConsts) { + let mut shift1 = simd_sllz_i16!(gap, 1); + shift1 = simd_adds_i16(shift1, gap); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = simd_adds_i16(shift2, shift1); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = simd_adds_i16(shift4, shift2); + (shift4, ()) +} + +#[inline] +pub unsafe fn simd_naive_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut curr = R_max; + let mut i = 0usize; + while i < (L - 1) { + let prev = curr; + curr = simd_sllz_i16!(curr, 1); + curr = simd_adds_i16(curr, gap_cost); + curr = simd_max_i16(curr, prev); + i += 1; + } + curr +} + +#[inline] +pub unsafe fn simd_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _c: PrefixScanConsts) -> Simd { + let mut shift1 = simd_sllz_i16!(R_max, 1); + shift1 = simd_adds_i16(shift1, gap_cost); + shift1 = simd_max_i16(shift1, R_max); + + let gc_arr = to_i16x8(gap_cost); + let mut gc_shl1 = [0i16; L]; + let mut gc_shl2 = [0i16; L]; + let mut i = 0usize; + while i < L { + gc_shl1[i] = i16_shl_wrap(gc_arr[i], 1); + gc_shl2[i] = i16_shl_wrap(gc_arr[i], 2); + i += 1; + } + let gc_shl1 = from_i16x8(gc_shl1); + let gc_shl2 = from_i16x8(gc_shl2); + + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = simd_adds_i16(shift2, gc_shl1); + shift2 = simd_max_i16(shift2, shift1); + + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = simd_adds_i16(shift4, gc_shl2); + shift4 = simd_max_i16(shift4, shift2); + + shift4 +} + +#[inline] +pub unsafe fn halfsimd_sub_i8(a: HalfSimd, b: HalfSimd) -> HalfSimd { + let aa = to_u8x16(a); + let bb = to_u8x16(b); + let mut r = [0u8; 16]; + let mut i = 0usize; + while i < 16 { + r[i] = aa[i].wrapping_sub(bb[i]); + i += 1; + } + from_u8x16(r) +} + +#[inline] +pub unsafe fn halfsimd_set1_i8(v: i8) -> HalfSimd { + let b = v as u8; + from_u8x16([b; 16]) +} + +#[inline] +pub unsafe fn halfsimd_get_idx(i: usize) -> usize { i + i / L * L } + +#[macro_export] +#[doc(hidden)] +macro_rules! halfsimd_sr_i8 { + ($a:expr, $b:expr, $num:expr) => {{ + debug_assert!($num <= L); + let aa = $crate::to_u8x16($a); + let bb = $crate::to_u8x16($b); + let mut r = [0u8; 16]; + let mut i = 0usize; + while i < L { + r[i] = if i + $num < L { aa[i + $num] } else { bb[i + $num - L] }; + i += 1; + } + $crate::from_u8x16(r) + }}; +} + +#[inline] +pub unsafe fn halfsimd_lookup2_i16(lut1: LutSimd, lut2: LutSimd, v: HalfSimd) -> Simd { + let idx = to_u8x16(v); + let t1 = to_u8x16(lut1); + let t2 = to_u8x16(lut2); + let mut table = [0u8; 32]; + table[..16].copy_from_slice(&t1); + table[16..].copy_from_slice(&t2); + + let mut out = [0i16; L]; + let mut i = 0usize; + while i < L { + let j = idx[i] as usize; + out[i] = table[j] as i8 as i16; + i += 1; + } + from_i16x8(out) +} + +#[inline] +pub unsafe fn halfsimd_lookup1_i16(lut: LutSimd, v: HalfSimd) -> Simd { + let idx = to_u8x16(v); + let t = to_u8x16(lut); + let mut out = [0i16; L]; + let mut i = 0usize; + while i < L { + out[i] = t[(idx[i] & 0x0F) as usize] as i8 as i16; + i += 1; + } + from_i16x8(out) +} + +#[inline] +pub unsafe fn halfsimd_lookup_bytes_i16( + match_scores: HalfSimd, + mismatch_scores: HalfSimd, + a: HalfSimd, + b: HalfSimd +) -> Simd { + let aa = to_u8x16(a); + let bb = to_u8x16(b); + let ms = to_u8x16(match_scores); + let mms = to_u8x16(mismatch_scores); + let mut c = [0u8; 16]; + let mut i = 0usize; + while i < 16 { + c[i] = if aa[i] == bb[i] { ms[i] } else { mms[i] }; + i += 1; + } + let mut out = [0i16; L]; + i = 0; + while i < L { + out[i] = c[i] as i8 as i16; + i += 1; + } + from_i16x8(out) +} + +#[inline] +pub unsafe fn simd_dbg_i16(v: Simd) { + let a = to_i16x8(v); + for i in (0..a.len()).rev() { + print!("{:6} ", a[i]); + } + println!(); +} + +#[inline] +pub unsafe fn halfsimd_dbg_i8(v: HalfSimd) { + let a = to_u8x16(v); + for i in (0..a.len()).rev() { + print!("{:3} ", a[i] as i8); + } + println!(); +} + +#[inline] +pub unsafe fn simd_assert_vec_eq(a: Simd, b: [i16; L]) { + assert_eq!(to_i16x8(a), b); +} + +#[inline] +pub unsafe fn halfsimd_assert_vec_eq(a: HalfSimd, b: [i8; L]) { + let arr = to_u8x16(a); + let mut low = [0i8; L]; + let mut i = 0usize; + while i < L { + low[i] = arr[i] as i8; + i += 1; + } + assert_eq!(low, b); +} diff --git a/lib/block-aligner/src/ffi.rs b/lib/block-aligner/src/ffi.rs new file mode 100644 index 000000000..0676c8e89 --- /dev/null +++ b/lib/block-aligner/src/ffi.rs @@ -0,0 +1,502 @@ +//! C bindings for block aligner. +//! +//! Generics are monomorphised manually. +//! +//! Nucleotide and arbitrary byte alignment do not have bindings yet. + +use std::ffi::c_void; + +use crate::scan_block::*; +use crate::scores::*; +use crate::cigar::*; + +// avoid generics by using void pointer and monomorphism +/// A handle for a block in block aligner. +pub type BlockHandle = *mut c_void; + +/// Represents a range that has inclusive lower and upper bounds. +#[derive(Copy, Clone, PartialEq)] +#[repr(C)] +pub struct SizeRange { + pub min: usize, + pub max: usize +} + + +// AAMatrix + +/// Create a new simple AAMatrix with custom match and mismatch scores. +/// +/// Note that the match score must be positive and the mismatch score must be negative. +#[no_mangle] +pub unsafe extern fn block_new_simple_aamatrix(match_score: i8, mismatch_score: i8) -> *mut AAMatrix { + let matrix = Box::new(AAMatrix::new_simple(match_score, mismatch_score)); + Box::into_raw(matrix) +} + +/// Set an entry in the AAMatrix. +#[no_mangle] +pub unsafe extern fn block_set_aamatrix(matrix: *mut AAMatrix, a: u8, b: u8, score: i8) { + let matrix = &mut *matrix; + matrix.set(a, b, score); +} + +/// Set an entry in the AAMatrix. +#[no_mangle] +pub unsafe extern fn block_set_aamatrix_num(matrix: *mut AAMatrix, a: u8, b: u8, score: i8) { + let matrix = &mut *matrix; + matrix.set_num(a, b, score); +} + +/// Frees an AAMatrix. +#[no_mangle] +pub unsafe extern fn block_free_aamatrix(matrix: *mut AAMatrix) { + drop(Box::from_raw(matrix)); +} + + +// AAProfile + +/// Create a new profile of a specific length, with default (large negative) values. +/// +/// Note that internally, the created profile is longer than a conventional position-specific scoring +/// matrix (and `str_len`) by 1, so the profile will have the same length as the number of +/// columns in the DP matrix. +/// The first column of scores in the profile should be large negative values (padding). +/// This allows gap open costs to be specified for the first column of the DP matrix. +#[no_mangle] +pub unsafe extern fn block_new_aaprofile(str_len: usize, block_size: usize, gap_extend: i8) -> *mut AAProfile { + let profile = Box::new(AAProfile::new(str_len, block_size, gap_extend)); + Box::into_raw(profile) +} + +/// Get the length of the profile. +#[no_mangle] +pub unsafe extern fn block_len_aaprofile(profile: *const AAProfile) -> usize { + let profile = &*profile; + profile.len() +} + +/// Clear the profile so it can be used for profile lengths less than or equal +/// to the length this struct was created with. +#[no_mangle] +pub unsafe extern fn block_clear_aaprofile(profile: *mut AAProfile, str_len: usize, block_size: usize) { + let profile = &mut *profile; + profile.clear(str_len, block_size); +} + +/// Set the score for a position and byte. +/// +/// The first column (`i = 0`) should be padded with large negative values. +/// Therefore, set values starting from `i = 1`. +#[no_mangle] +pub unsafe extern fn block_set_aaprofile(profile: *mut AAProfile, i: usize, b: u8, score: i8) { + let profile = &mut *profile; + profile.set(i, b, score); +} + + +/// Set the scores for all positions in the position specific scoring matrix. +/// +/// The profile should be first `clear`ed before it is reused with different lengths. +/// +/// Use `order` to specify the order of bytes that is used in the `scores` matrix. +/// Scores (in `scores`) should be stored in row-major order, where each row is a different position +/// and each column is a different byte. +#[no_mangle] +pub unsafe extern fn block_set_all_aaprofile(profile: *mut AAProfile, order: *const u8, order_len: usize, scores: *const i8, scores_len: usize, left_shift: usize, right_shift: usize) { + let profile = &mut *profile; + let order = std::slice::from_raw_parts(order, order_len); + let scores = std::slice::from_raw_parts(scores, scores_len); + profile.set_all(order, scores, left_shift, right_shift); +} + +/// Set the scores for all positions in reverse in the position specific scoring matrix. +/// +/// The profile should be first `clear`ed before it is reused with different lengths. +/// +/// Use `order` to specify the order of bytes that is used in the `scores` matrix. +/// Scores (in `scores`) should be stored in row-major order, where each row is a different position +/// and each column is a different byte. +#[no_mangle] +pub unsafe extern fn block_set_all_rev_aaprofile(profile: *mut AAProfile, order: *const u8, order_len: usize, scores: *const i8, scores_len: usize, left_shift: usize, right_shift: usize) { + let profile = &mut *profile; + let order = std::slice::from_raw_parts(order, order_len); + let scores = std::slice::from_raw_parts(scores, scores_len); + profile.set_all_rev(order, scores, left_shift, right_shift); +} + +#[no_mangle] +pub unsafe fn aaprofile_pos_aa(profile: *mut AAProfile) -> *mut i8 { + let profile = &mut *profile; + profile.pos_aa_mut_ptr() +} + +#[no_mangle] +pub unsafe fn aaprofile_aa_pos(profile: &mut AAProfile) -> *mut i16 { + let profile = &mut *profile; + profile.aa_pos_mut_ptr() +} + +/// Set the gap open cost for a column. +/// +/// When aligning a sequence `q` to a profile `r`, this is the gap open cost at column `i` for a +/// column transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. +/// This represents starting a gap in `q`. +#[no_mangle] +pub unsafe extern fn block_set_gap_open_C_aaprofile(profile: *mut AAProfile, i: usize, gap: i8) { + let profile = &mut *profile; + profile.set_gap_open_C(i, gap); +} + +/// Set the gap close cost for a column. +/// +/// When aligning a sequence `q` to a profile `r`, this is the gap close cost at column `i` for +/// ending column transitions in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. +/// This represents ending a gap in `q`. +#[no_mangle] +pub unsafe extern fn block_set_gap_close_C_aaprofile(profile: *mut AAProfile, i: usize, gap: i8) { + let profile = &mut *profile; + profile.set_gap_close_C(i, gap); +} + +/// Set the gap open cost for a row. +/// +/// When aligning a sequence `q` to a profile `r`, this is the gap open cost at column `i` for +/// a row transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. +/// This represents starting a gap in `r`. +#[no_mangle] +pub unsafe extern fn block_set_gap_open_R_aaprofile(profile: *mut AAProfile, i: usize, gap: i8) { + let profile = &mut *profile; + profile.set_gap_open_R(i, gap); +} + +/// Set the gap open cost for all column transitions. +#[no_mangle] +pub unsafe extern fn block_set_all_gap_open_C_aaprofile(profile: *mut AAProfile, gap: i8) { + let profile = &mut *profile; + profile.set_all_gap_open_C(gap); +} + +/// Set the gap close cost for all column transitions. +#[no_mangle] +pub unsafe extern fn block_set_all_gap_close_C_aaprofile(profile: *mut AAProfile, gap: i8) { + let profile = &mut *profile; + profile.set_all_gap_close_C(gap); +} + +/// Set the gap open cost for all row transitions. +#[no_mangle] +pub unsafe extern fn block_set_all_gap_open_R_aaprofile(profile: *mut AAProfile, gap: i8) { + let profile = &mut *profile; + profile.set_all_gap_open_R(gap); +} + +/// Get the score for a position and byte. +#[no_mangle] +pub unsafe extern fn block_get_aaprofile(profile: *const AAProfile, i: usize, b: u8) -> i8 { + let profile = &*profile; + profile.get(i, b) +} + +/// Get the gap extend cost. +#[no_mangle] +pub unsafe extern fn block_get_gap_extend_aaprofile(profile: *const AAProfile) -> i8 { + let profile = &*profile; + profile.get_gap_extend() +} + +#[no_mangle] +pub unsafe extern fn block_get_curr_len_aaprofile(profile: *const AAProfile) -> usize { + let profile = &*profile; + profile.get_curr_len() +} +/// Frees an AAProfile. +#[no_mangle] +pub unsafe extern fn block_free_aaprofile(profile: *mut AAProfile) { + drop(Box::from_raw(profile)); +} + + +// CIGAR + +/// Create a new empty CIGAR string. +#[no_mangle] +pub unsafe extern fn block_new_cigar(query_len: usize, reference_len: usize) -> *mut Cigar { + let cigar = Box::new(Cigar::new(query_len, reference_len)); + Box::into_raw(cigar) +} + +/// Get the operation at a certain index in a CIGAR string. +#[no_mangle] +pub unsafe extern fn block_get_cigar(cigar: *const Cigar, i: usize) -> OpLen { + let cigar_str = &*cigar; + cigar_str.get(i) +} + +/// Get the length of a CIGAR string. +#[no_mangle] +pub unsafe extern fn block_len_cigar(cigar: *const Cigar) -> usize { + let cigar_str = &*cigar; + cigar_str.len() +} + +/// Frees a CIGAR string. +#[no_mangle] +pub unsafe extern fn block_free_cigar(cigar: *mut Cigar) { + drop(Box::from_raw(cigar)); +} + + +// PaddedBytes + +/// Create a new empty padded amino acid string. +#[no_mangle] +pub unsafe extern fn block_new_padded_aa(len: usize, max_size: usize) -> *mut PaddedBytes { + let padded_bytes = Box::new(PaddedBytes::new::(len, max_size)); + Box::into_raw(padded_bytes) +} + +/// Write to a padded amino acid string. +#[no_mangle] +pub unsafe extern fn block_set_bytes_padded_aa(padded: *mut PaddedBytes, s: *const u8, len: usize, max_size: usize) { + let bytes = std::slice::from_raw_parts(s, len); + let padded_bytes = &mut *padded; + padded_bytes.set_bytes::(bytes, max_size); +} + +/// Write to a padded amino acid string. +#[no_mangle] +pub unsafe extern fn block_set_bytes_padded_aa_numsequence(padded: *mut PaddedBytes, s: *const u8, len: usize, max_size: usize) { + let bytes = std::slice::from_raw_parts(s, len); + let padded_bytes = &mut *padded; + padded_bytes.set_bytes_num::(bytes, max_size); +} + +/// Frees a padded amino acid string. +#[no_mangle] +pub unsafe extern fn block_free_padded_aa(padded: *mut PaddedBytes) { + drop(Box::from_raw(padded)); +} + + +// PosBias + +/// Create a new zero initialized positional score bias vector. +#[no_mangle] +pub unsafe extern fn block_new_pos_bias(len: usize, max_size: usize) -> *mut PosBias { + let pos_bias = Box::new(PosBias::new(len, max_size)); + Box::into_raw(pos_bias) +} + +/// Write to the positional score bias vector. +#[no_mangle] +pub unsafe extern fn block_set_pos_bias(bias: *mut PosBias, b: *const i16, len: usize) { + let biases = std::slice::from_raw_parts(b, len); + let pos_bias = &mut *bias; + pos_bias.set_biases(biases); +} + +/// Frees the positional score bias vector. +#[no_mangle] +pub unsafe extern fn block_free_pos_bias(bias: *mut PosBias) { + drop(Box::from_raw(bias)); +} + + +// Block + +macro_rules! gen_functions { + ($new_name:ident, $new_doc:expr, + $align_name:ident, $align_doc:expr, + $align_profile_name:ident, $align_profile_doc:expr, + $align_aa_name:ident, $align_aa_doc:expr, + $align_3di_name:ident, $align_3di_doc:expr, + $res_name:ident, $res_doc:expr, + $trace_name:ident, $trace_doc:expr, + $trace_eq_name:ident, $trace_eq_doc:expr, + $free_name:ident, $free_doc:expr, + $matrix:ty, $profile:ty, $trace:literal, $x_drop:literal) => { + #[doc = $new_doc] + #[no_mangle] + pub unsafe extern fn $new_name(query_len: usize, + reference_len: usize, + max_size: usize) -> BlockHandle { + let aligner = Box::new(Block::<$trace, $x_drop>::new(query_len, reference_len, max_size)); + Box::into_raw(aligner) as BlockHandle + } + + #[doc = $align_doc] + #[no_mangle] + pub unsafe extern fn $align_name(b: BlockHandle, + q: *const PaddedBytes, + r: *const PaddedBytes, + m: *const $matrix, + g: Gaps, + s: SizeRange, + x: i32) { + let aligner = &mut *(b as *mut Block<$trace, $x_drop>); + aligner.align(&*q, &*r, &*m, g, s.min..=s.max, x); + } + + #[doc = $align_profile_doc] + #[no_mangle] + pub unsafe extern fn $align_profile_name(b: BlockHandle, + q: *const PaddedBytes, + r: *const $profile, + s: SizeRange, + x: i32) { + let aligner = &mut *(b as *mut Block<$trace, $x_drop>); + aligner.align_profile(&*q, &*r, s.min..=s.max, x); + } + + #[doc = $align_aa_doc] + #[no_mangle] + pub unsafe extern fn $align_aa_name(b: BlockHandle, + q: *const PaddedBytes, + q_bias: *const PosBias, + r: *const PaddedBytes, + r_bias: *const PosBias, + m: *const $matrix, + g: Gaps, + s: SizeRange, + x: i32) { + let aligner = &mut *(b as *mut Block<$trace, $x_drop>); + aligner.align_aa(&*q, &*q_bias, &*r, &*r_bias, &*m, g, s.min..=s.max, x); + } + + + #[doc = $align_3di_doc] + #[no_mangle] + pub unsafe extern fn $align_3di_name(b: BlockHandle, + q: *const PaddedBytes, + q_3di: *const PaddedBytes, + q_bias: *const PosBias, + r: *const PaddedBytes, + r_3di: *const PaddedBytes, + r_bias: *const PosBias, + m: *const $matrix, + m_3di: *const $matrix, + g: Gaps, + s: SizeRange, + x: i32) { + let aligner = &mut *(b as *mut Block<$trace, $x_drop>); + aligner.align_3di(&*q, &*q_3di, &*q_bias, &*r, &*r_3di, &*r_bias, &*m, &*m_3di, g, s.min..=s.max, x); + } + + #[doc = $res_doc] + #[no_mangle] + pub unsafe extern fn $res_name(b: BlockHandle) -> AlignResult { + let aligner = &*(b as *const Block<$trace, $x_drop>); + aligner.res() + } + + #[doc = $trace_doc] + #[no_mangle] + pub unsafe extern fn $trace_name(b: BlockHandle, query_idx: usize, reference_idx: usize, cigar: *mut Cigar) { + let aligner = &*(b as *const Block<$trace, $x_drop>); + aligner.trace().cigar(query_idx, reference_idx, &mut *cigar); + } + + #[doc = $trace_eq_doc] + #[no_mangle] + pub unsafe extern fn $trace_eq_name(b: BlockHandle, q: *const PaddedBytes, r: *const PaddedBytes, query_idx: usize, reference_idx: usize, cigar: *mut Cigar) { + let aligner = &*(b as *const Block<$trace, $x_drop>); + aligner.trace().cigar_eq(&*q, &*r, query_idx, reference_idx, &mut *cigar); + } + + #[doc = $free_doc] + #[no_mangle] + pub unsafe extern fn $free_name(b: BlockHandle) { + drop(Box::from_raw(b as *mut Block<$trace, $x_drop>)); + } + }; +} + +gen_functions!( + block_new_aa, + "Create a new block aligner instance for global alignment of amino acid strings (no traceback).", + block_align_aa, + "Global alignment of two amino acid strings (no traceback).", + block_align_profile_aa, + "Global alignment of an amino acid sequence to a profile (no traceback).", + block_align_aa_posbias, + "Global alignment of two amino acid strings with posbias (no traceback).", + block_align_3di_aa, + "Global alignment of two amino acid strings with 3di (no traceback).", + block_res_aa, + "Retrieves the result of global alignment of two amino acid strings (no traceback).", + _block_cigar_aa, + "Don't use.", + _block_cigar_eq_aa, + "Don't use.", + block_free_aa, + "Frees the block used for global alignment of two amino acid strings (no traceback).", + AAMatrix, AAProfile, false, false +); + +gen_functions!( + block_new_aa_xdrop, + "Create a new block aligner instance for X-drop alignment of amino acid strings (no traceback).", + block_align_aa_xdrop, + "X-drop alignment of two amino acid strings (no traceback).", + block_align_profile_aa_xdrop, + "X-drop alignment of an amino acid sequence to a profile (no traceback).", + block_align_aa_xdrop_posbias, + "X-drop alignment of two amino acid strings with posbias (no traceback).", + block_align_3di_aa_xdrop, + "X-drop alignment of two amino acid strings with 3di (no traceback).", + block_res_aa_xdrop, + "Retrieves the result of X-drop alignment of two amino acid strings (no traceback).", + _block_cigar_aa_xdrop, + "Don't use.", + _block_cigar_eq_aa_xdrop, + "Don't use.", + block_free_aa_xdrop, + "Frees the block used for X-drop alignment of two amino acid strings (no traceback).", + AAMatrix, AAProfile, false, true +); + +gen_functions!( + block_new_aa_trace, + "Create a new block aligner instance for global alignment of amino acid strings, with traceback.", + block_align_aa_trace, + "Global alignment of two amino acid strings, with traceback.", + block_align_profile_aa_trace, + "Global alignment of an amino acid sequence to a profile, with traceback.", + block_align_aa_trace_posbias, + "Global alignment of two amino acid strings, with traceback.", + block_align_3di_aa_trace, + "Global alignment of two amino acid strings with 3di, with traceback.", + block_res_aa_trace, + "Retrieves the result of global alignment of two amino acid strings, with traceback.", + block_cigar_aa_trace, + "Retrieves the resulting CIGAR string from global alignment of two amino acid strings, with traceback.", + block_cigar_eq_aa_trace, + "Retrieves the resulting CIGAR string from global alignment of two amino acid strings, with traceback containing =/X.", + block_free_aa_trace, + "Frees the block used for global alignment of two amino acid strings, with traceback.", + AAMatrix, AAProfile, true, false +); + +gen_functions!( + block_new_aa_trace_xdrop, + "Create a new block aligner instance for X-drop alignment of amino acid strings, with traceback.", + block_align_aa_trace_xdrop, + "X-drop alignment of two amino acid strings, with traceback.", + block_align_profile_aa_trace_xdrop, + "X-drop alignment of an amino acid sequence to a profile, with traceback.", + block_align_aa_trace_xdrop_posbias, + "X-drop alignment of two amino acid strings with traceback with posbias.", + block_align_3di_aa_trace_xdrop, + "X-drop alignment of two amino acid strings with 3di, with traceback.", + block_res_aa_trace_xdrop, + "Retrieves the result of X-drop alignment of two amino acid strings, with traceback.", + block_cigar_aa_trace_xdrop, + "Retrieves the resulting CIGAR string from X-drop alignment of two amino acid strings, with traceback.", + block_cigar_eq_aa_trace_xdrop, + "Retrieves the resulting CIGAR string from X-drop alignment of two amino acid strings, with traceback containing =/X.", + block_free_aa_trace_xdrop, + "Frees the block used for X-drop alignment of two amino acid strings, with traceback.", + AAMatrix, AAProfile, true, true +); diff --git a/lib/block-aligner/src/lib.rs b/lib/block-aligner/src/lib.rs new file mode 100644 index 000000000..ccf24c770 --- /dev/null +++ b/lib/block-aligner/src/lib.rs @@ -0,0 +1,117 @@ +//! SIMD-accelerated library for computing global and X-drop affine +//! gap penalty sequence-to-sequence or sequence-to-profile alignments +//! using an adaptive block-based algorithm. +//! +//! Currently, SSE2, AVX2, Neon, and WASM SIMD are supported. +//! +//! ## Example +//! ``` +//! use block_aligner::{cigar::*, scan_block::*, scores::*}; +//! +//! let min_block_size = 32; +//! let max_block_size = 256; +//! +//! // A gap of length n will cost: open + extend * (n - 1) +//! let gaps = Gaps { open: -2, extend: -1 }; +//! +//! // Note that PaddedBytes, Block, and Cigar can be initialized with sequence length +//! // and block size upper bounds and be reused later for shorter sequences, to avoid +//! // repeated allocations. +//! let r = PaddedBytes::from_bytes::(b"TTAAAAAAATTTTTTTTTTTT", max_block_size); +//! let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", max_block_size); +//! +//! // Align with traceback, but no X-drop threshold (global alignment). +//! let mut a = Block::::new(q.len(), r.len(), max_block_size); +//! a.align(&q, &r, &NW1, gaps, min_block_size..=max_block_size, 0); +//! let res = a.res(); +//! +//! assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 }); +//! +//! let mut cigar = Cigar::new(res.query_idx, res.reference_idx); +//! // Compute traceback and resolve =/X (matches/mismatches). +//! a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar); +//! +//! assert_eq!(cigar.to_string(), "2=6I16=3D"); +//! ``` +//! +//! ## Tuning block sizes +//! +//! For long, noisy Nanopore reads, a min block size of ~1% sequence length and a max block size +//! of ~10% sequence length performs well (tested with reads up to ~50kbps). +//! For proteins, a min block size of 32 and a max block size of 256 performs well. +//! Using a minimum block size that is at least 32 is recommended for most applications. +//! Using a maximum block size greater than `2^14 = 16384` is not recommended. +//! If the alignment scores are saturating (score too large), then use a smaller block size. +//! Let me know how block aligner performs on your data! +//! +//! When building your code that uses this library, it is important to specify the +//! correct feature flags: `simd_sse2`, `simd_avx2`, `simd_neon`, or `simd_wasm`. +//! More information on specifying different features for different platforms +//! with the same dependency [here](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#platform-specific-dependencies). + +// special SIMD instruction set modules adapted for this library +// their types and lengths are abstracted out + +#[cfg(feature = "simd_sse2")] +#[macro_use] +#[doc(hidden)] +/// cbindgen:ignore +pub mod sse2; + +#[cfg(feature = "simd_sse2")] +pub use sse2::L; + +#[cfg(feature = "simd_avx2")] +#[macro_use] +#[doc(hidden)] +/// cbindgen:ignore +pub mod avx2; + +#[cfg(feature = "simd_avx2")] +pub use avx2::L; + +#[cfg(feature = "simd_wasm")] +#[macro_use] +#[doc(hidden)] +/// cbindgen:ignore +pub mod simd128; + +#[cfg(feature = "simd_wasm")] +pub use simd128::L; + +#[cfg(feature = "simd_neon")] +#[macro_use] +#[doc(hidden)] +/// cbindgen:ignore +pub mod neon; + +#[cfg(feature = "simd_neon")] +pub use neon::L; + +#[cfg(feature = "no_simd")] +#[macro_use] +#[doc(hidden)] +/// cbindgen:ignore +pub mod fallback; + +#[cfg(feature = "no_simd")] +pub use fallback::L; + +#[cfg(any(feature = "no_simd", feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))] +pub mod scan_block; +#[cfg(any(feature = "no_simd", feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))] +pub mod scores; +#[cfg(any(feature = "no_simd", feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))] +pub mod cigar; + +#[cfg(any(feature = "no_simd", feature = "simd_sse2", feature = "simd_avx2", feature = "simd_wasm", feature = "simd_neon"))] +#[doc(hidden)] +pub mod ffi; + +/// Calculate the percentage of a length, rounded to the next power of two. +/// +/// This is useful for computing the min and max block sizes for sequences of a certain +/// length by using percentages. The returned value is at least 32. +pub fn percent_len(len: usize, p: f32) -> usize { + ((p * (len as f32)).round() as usize).max(32).next_power_of_two() +} diff --git a/lib/block-aligner/src/neon.rs b/lib/block-aligner/src/neon.rs new file mode 100644 index 000000000..c604c3f79 --- /dev/null +++ b/lib/block-aligner/src/neon.rs @@ -0,0 +1,515 @@ +use std::arch::aarch64::*; + +pub type Simd = int16x8_t; +pub type HalfSimd = int8x8_t; +pub type LutSimd = int8x16_t; +pub type TraceType = i16; +/// Number of 16-bit lanes in a SIMD vector. +pub const L: usize = 8; +pub const L_BYTES: usize = L * 2; +pub const HALFSIMD_MUL: usize = 1; +// using min = 0 is faster, but restricts range of scores (and restricts the max block size) +pub const ZERO: i16 = 1 << 14; +pub const MIN: i16 = 0; + +// No non-temporal store in Neon +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn store_trace(ptr: *mut TraceType, trace: TraceType) { *ptr = trace; } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_adds_i16(a: Simd, b: Simd) -> Simd { vqaddq_s16(a, b) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_subs_i16(a: Simd, b: Simd) -> Simd { vqsubq_s16(a, b) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_max_i16(a: Simd, b: Simd) -> Simd { vmaxq_s16(a, b) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_cmpeq_i16(a: Simd, b: Simd) -> Simd { vreinterpretq_s16_u16(vceqq_s16(a, b)) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_cmpgt_i16(a: Simd, b: Simd) -> Simd { vreinterpretq_s16_u16(vcgtq_s16(a, b)) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_blend_i8(a: Simd, b: Simd, mask: Simd) -> Simd { + // assume that each element in mask is either 0 or -1 + vbslq_s16(vreinterpretq_u16_s16(mask), b, a) +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_load(ptr: *const Simd) -> Simd { vld1q_s16(ptr as *const i16) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_loadu(ptr: *const Simd) -> Simd { vld1q_s16(ptr as *const i16) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_store(ptr: *mut Simd, a: Simd) { vst1q_s16(ptr as *mut i16, a) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_set1_i16(v: i16) -> Simd { vdupq_n_s16(v) } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_extract_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "aarch64")] + use std::arch::aarch64::*; + vgetq_lane_s16($a, $num as i32) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_insert_i16 { + ($a:expr, $v:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "aarch64")] + use std::arch::aarch64::*; + vsetq_lane_s16($v, $a, $num as i32) + } + }; +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_movemask_i8(a: Simd) -> u16 { + // assume that each byte is either 0 or -1 + static POW2: [u8; 16] = [ + 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 + ]; + let mask = vld1q_u8(POW2.as_ptr()); + let masked = vandq_u8(vreinterpretq_u8_s16(a), mask); + let lo = vaddv_u8(vget_low_u8(masked)) as u16; + let hi = vaddv_u8(vget_high_u8(masked)) as u16; + (hi << 8) | lo +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sl_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "aarch64")] + use std::arch::aarch64::*; + vextq_s16($b, $a, (L - $num) as i32) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sr_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "aarch64")] + use std::arch::aarch64::*; + if $num == L { + $a + } else { + vextq_s16($b, $a, $num as i32) + } + } + }; +} + +// hardcoded to STEP = 8 +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_step(a: Simd, b: Simd) -> Simd { + a +} + +macro_rules! simd_sllz_i16 { + ($a:expr, $num:expr) => { + { + simd_sl_i16!($a, simd_set1_i16(0), $num) + } + }; +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_broadcasthi_i16(v: Simd) -> Simd { + vdupq_laneq_s16(v, 7) +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_slow_extract_i16(v: Simd, i: usize) -> i16 { + debug_assert!(i < L); + + #[repr(align(16))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + *a.0.as_ptr().add(i) +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_hmax_i16(v: Simd) -> i16 { vmaxvq_s16(v) } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hadd_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + let mut v = simd_subs_i16($a, simd_set1_i16(ZERO)); + if $num > 4 { + v = simd_adds_i16(v, simd_sr_i16!(v, v, 4)); + } + if $num > 2 { + v = simd_adds_i16(v, simd_sr_i16!(v, v, 2)); + } + if $num > 1 { + v = simd_adds_i16(v, simd_sr_i16!(v, v, 1)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + let mut v = $a; + if $num > 4 { + v = simd_max_i16(v, simd_sr_i16!(v, v, 4)); + } + if $num > 2 { + v = simd_max_i16(v, simd_sr_i16!(v, v, 2)); + } + if $num > 1 { + v = simd_max_i16(v, simd_sr_i16!(v, v, 1)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_suffix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + let mut v = $a; + if $num > 4 { + v = simd_max_i16(v, simd_sl_i16!(v, v, 4)); + } + if $num > 2 { + v = simd_max_i16(v, simd_sl_i16!(v, v, 2)); + } + if $num > 1 { + v = simd_max_i16(v, simd_sl_i16!(v, v, 1)); + } + simd_extract_i16!(v, 7) + } + }; +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn simd_hargmax_i16(v: Simd, max: i16) -> usize { + let v2 = simd_cmpeq_i16(v, simd_set1_i16(max)); + (simd_movemask_i8(v2).trailing_zeros() as usize) / 2 +} + +#[target_feature(enable = "neon")] +#[inline] +#[allow(non_snake_case)] +#[allow(dead_code)] +pub unsafe fn simd_naive_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut curr = R_max; + + for _i in 0..(L - 1) { + let prev = curr; + curr = simd_sllz_i16!(curr, 1); + curr = simd_adds_i16(curr, gap_cost); + curr = simd_max_i16(curr, prev); + } + + curr +} + +pub type PrefixScanConsts = (); + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn get_prefix_scan_consts(gap: Simd) -> (Simd, PrefixScanConsts) { + let mut shift1 = simd_sllz_i16!(gap, 1); + shift1 = simd_adds_i16(shift1, gap); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = simd_adds_i16(shift2, shift1); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = simd_adds_i16(shift4, shift2); + + (shift4, ()) +} + +#[target_feature(enable = "neon")] +#[inline] +#[allow(non_snake_case)] +pub unsafe fn simd_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut shift1 = simd_sllz_i16!(R_max, 1); + shift1 = simd_adds_i16(shift1, gap_cost); + shift1 = simd_max_i16(shift1, R_max); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = simd_adds_i16(shift2, vshlq_n_s16(gap_cost, 1)); + shift2 = simd_max_i16(shift1, shift2); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = simd_adds_i16(shift4, vshlq_n_s16(gap_cost, 2)); + shift4 = simd_max_i16(shift2, shift4); + + shift4 +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_lookup2_i16(lut1: LutSimd, lut2: LutSimd, v: HalfSimd) -> Simd { + let v2 = vcombine_u8(vreinterpret_u8_s8(v), vdup_n_u8(0)); + let c = vget_low_s8(vqtbl2q_s8(int8x16x2_t(lut1, lut2), v2)); + vmovl_s8(c) +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_lookup1_i16(lut: LutSimd, v: HalfSimd) -> Simd { + let v2 = vcombine_u8(vand_u8(vreinterpret_u8_s8(v), vdup_n_u8(0b1111)), vdup_n_u8(0)); + let c = vget_low_s8(vqtbl1q_s8(lut, v2)); + vmovl_s8(c) +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_lookup_bytes_i16(match_scores: HalfSimd, mismatch_scores: HalfSimd, a: HalfSimd, b: HalfSimd) -> Simd { + let mask = vceq_s8(a, b); + let c = vbsl_s8(mask, match_scores, mismatch_scores); + vmovl_s8(c) +} + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_load(ptr: *const HalfSimd) -> HalfSimd { vld1_s8(ptr as *const i8) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_loadu(ptr: *const HalfSimd) -> HalfSimd { vld1_s8(ptr as *const i8) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn lutsimd_load(ptr: *const LutSimd) -> LutSimd { vld1q_s8(ptr as *const i8) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn lutsimd_loadu(ptr: *const LutSimd) -> LutSimd { vld1q_s8(ptr as *const i8) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_store(ptr: *mut HalfSimd, a: HalfSimd) { vst1_s8(ptr as *mut i8, a) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_sub_i8(a: HalfSimd, b: HalfSimd) -> HalfSimd { vsub_s8(a, b) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_set1_i8(v: i8) -> HalfSimd { vdup_n_s8(v) } + +#[target_feature(enable = "neon")] +#[inline] +pub unsafe fn halfsimd_get_idx(i: usize) -> usize { i } + +#[macro_export] +#[doc(hidden)] +macro_rules! halfsimd_sr_i8 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "aarch64")] + use std::arch::aarch64::*; + vext_s8($b, $a, $num as i32) + } + }; +} + +#[target_feature(enable = "neon")] +#[allow(dead_code)] +pub unsafe fn simd_dbg_i16(v: Simd) { + #[repr(align(16))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + + for i in (0..a.0.len()).rev() { + print!("{:6} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "neon")] +#[allow(dead_code)] +pub unsafe fn halfsimd_dbg_i8(v: HalfSimd) { + #[repr(align(8))] + struct A([i8; L]); + + let mut a = A([0i8; L]); + halfsimd_store(a.0.as_mut_ptr() as *mut HalfSimd, v); + + for i in (0..a.0.len()).rev() { + print!("{:3} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "neon")] +#[allow(dead_code)] +pub unsafe fn simd_assert_vec_eq(a: Simd, b: [i16; L]) { + #[repr(align(16))] + struct A([i16; L]); + + let mut arr = A([0i16; L]); + simd_store(arr.0.as_mut_ptr() as *mut Simd, a); + assert_eq!(arr.0, b); +} + +#[target_feature(enable = "neon")] +#[allow(dead_code)] +pub unsafe fn halfsimd_assert_vec_eq(a: HalfSimd, b: [i8; L]) { + #[repr(align(8))] + struct A([i8; L]); + + let mut arr = A([0i8; L]); + halfsimd_store(arr.0.as_mut_ptr() as *mut HalfSimd, a); + assert_eq!(arr.0, b); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_smoke() { + #[target_feature(enable = "neon")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let test = A([1, 2, 3, 4, 5, 6, 7, 8]); + let test_rev = A([8, 7, 6, 5, 4, 3, 2, 1]); + let test_mask = A([0, -1, 0, -1, 0, -1, 0, -1]); + let vec0 = simd_load(test.0.as_ptr() as *const Simd); + let vec0_rev = simd_load(test_rev.0.as_ptr() as *const Simd); + let vec0_mask = simd_load(test_mask.0.as_ptr() as *const Simd); + + let mut vec1 = simd_sl_i16!(vec0, vec0, 1); + simd_assert_vec_eq(vec1, [8, 1, 2, 3, 4, 5, 6, 7]); + + vec1 = simd_sr_i16!(vec0, vec0, 1); + simd_assert_vec_eq(vec1, [2, 3, 4, 5, 6, 7, 8, 1]); + + vec1 = simd_adds_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [2, 4, 6, 8, 10, 12, 14, 16]); + + vec1 = simd_subs_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, 0, 0, 0, 0]); + + vec1 = simd_max_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [8, 7, 6, 5, 5, 6, 7, 8]); + + vec1 = simd_cmpeq_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, 0, 0, 0, 0]); + + vec1 = simd_cmpeq_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [-1, -1, -1, -1, -1, -1, -1, -1]); + + vec1 = simd_cmpgt_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, -1, -1, -1, -1]); + + vec1 = simd_blend_i8(vec0, vec0_rev, vec0_mask); + simd_assert_vec_eq(vec1, [1, 7, 3, 5, 5, 3, 7, 1]); + + let mut val = simd_extract_i16!(vec0, 0); + assert_eq!(val, 1); + + val = simd_slow_extract_i16(vec0, 0); + assert_eq!(val, 1); + + vec1 = simd_insert_i16!(vec0, 0, 2); + simd_assert_vec_eq(vec1, [1, 2, 0, 4, 5, 6, 7, 8]); + + let val1 = simd_movemask_i8(vec0_mask); + assert_eq!(val1, 0b1100110011001100); + + vec1 = simd_sllz_i16!(vec0, 1); + simd_assert_vec_eq(vec1, [0, 1, 2, 3, 4, 5, 6, 7]); + + vec1 = simd_broadcasthi_i16(vec0); + simd_assert_vec_eq(vec1, [8, 8, 8, 8, 8, 8, 8, 8]); + + val = simd_hmax_i16(vec0); + assert_eq!(val, 8); + + let zeros = simd_set1_i16(ZERO); + val = simd_prefix_hadd_i16!(simd_adds_i16(vec0, zeros), 4); + assert_eq!(val, 10); + + val = simd_prefix_hmax_i16!(vec0, 4); + assert_eq!(val, 4); + + val = simd_suffix_hmax_i16!(vec0, 4); + assert_eq!(val, 8); + + let val2 = simd_hargmax_i16(vec0, 4); + assert_eq!(val2, 3); + } + unsafe { inner(); } + } + + #[test] + fn test_prefix_scan() { + #[target_feature(enable = "neon")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let vec = A([8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(0); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [8, 9, 10, 15, 15, 15, 15, 15]); + + let vec = A([8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(-1); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [8, 9, 10, 15, 14, 13, 14, 13]); + } + unsafe { inner(); } + } +} diff --git a/lib/block-aligner/src/old/scan_band.rs b/lib/block-aligner/src/old/scan_band.rs new file mode 100644 index 000000000..b9a03644b --- /dev/null +++ b/lib/block-aligner/src/old/scan_band.rs @@ -0,0 +1,590 @@ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::avx2::*; + +#[cfg(target_arch = "wasm32")] +use crate::simd128::*; + +use crate::scores::*; + +use std::{alloc, cmp, ptr, i16}; +use std::marker::PhantomData; + +const NULL: u8 = b'A' + 26u8; // this null byte value works for both amino acids and nucleotides + +#[inline] +unsafe fn convert_char(c: u8, nuc: bool) -> u8 { + debug_assert!(c >= b'A' && c <= NULL); + if nuc { c } else { c - b'A' } +} + +#[inline] +unsafe fn clamp(x: i32) -> i16 { + cmp::min(cmp::max(x, i16::MIN as i32), i16::MAX as i32) as i16 +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct EndIndex { + pub query_idx: usize, + pub ref_idx: usize +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum Direction { + Right, + Down +} + +// BLOSUM62 matrix max = 11, min = -4; gap open = -11 (includes extension), gap extend = -1 +// +// R[i][j] = max(R[i - 1][j] + gap_extend, D[i - 1][j] + gap_open) +// C[i][j] = max(C[i][j - 1] + gap_extend, D[i][j - 1] + gap_open) +// D[i][j] = max(D[i - 1][j - 1] + matrix[query[i]][reference[j]], R[i][j], C[i][j]) +// +// indexing (we want to calculate D11): +// x0 x1 +// +-------- +// 0x | 00 01 +// 1x | 10 11 +// +// A band consists of multiple intervals. Each interval is made up of strided vectors. +// +// TODO: update stuff to match later adaptive banding code + +#[allow(non_snake_case)] +pub struct ScanAligner<'a, P: ScoreParams, M: 'a + Matrix, const K_HALF: usize, const TRACE: bool, const X_DROP: bool> { + query_buf_layout: alloc::Layout, + query_buf_ptr: *mut HalfSimd, + delta_Dx0_layout: alloc::Layout, + delta_Dx0_ptr: *mut Simd, + delta_Cx0_layout: alloc::Layout, + delta_Cx0_ptr: *mut Simd, + abs_Ax0_layout: alloc::Layout, + abs_Ax0_ptr: *mut i32, + + trace: Vec, + + query_idx: usize, + shift_idx: isize, + ring_buf_idx: usize, + ref_idx: usize, + + best_max: i32, + best_argmax_i: isize, + best_argmax_j: usize, + + shift_dir: Direction, + + query: &'a [u8], + matrix: &'a M, + + _phantom: PhantomData

+} + +impl<'a, P: ScoreParams, M: 'a + Matrix, const K_HALF: usize, const TRACE: bool, const X_DROP: bool> ScanAligner<'a, P, M, { K_HALF }, { TRACE }, { X_DROP }> { + const K: usize = K_HALF * 2 + 1; + const CEIL_K: usize = ((Self::K + L - 1) / L) * L; // round up to multiple of L + const NUM_INTERVALS: usize = (Self::CEIL_K + P::I - 1) / P::I; + + // Use precomputed strides so compiler can avoid division/modulo instructions + const STRIDE_I: usize = P::I / L; + const STRIDE_LAST: usize = (Self::CEIL_K - ((Self::CEIL_K - 1) / P::I) * P::I) / L; + + const EVEN_BITS: u32 = 0x55555555u32; + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + pub unsafe fn new(query: &'a [u8], matrix: &'a M) -> Self { + assert!(P::GAP_OPEN <= P::GAP_EXTEND); + assert!(P::I % L == 0); + + // These chunks of memory are contiguous ring buffers that represent every interval in the current band + let query_buf_layout = alloc::Layout::from_size_align_unchecked(Self::CEIL_K * HALFSIMD_MUL, L_BYTES); + let query_buf_ptr = alloc::alloc(query_buf_layout) as *mut u8; + + let delta_Dx0_layout = alloc::Layout::from_size_align_unchecked(Self::CEIL_K * 2, L_BYTES); + let delta_Dx0_ptr = alloc::alloc(delta_Dx0_layout) as *mut i16; + + let delta_Cx0_layout = alloc::Layout::from_size_align_unchecked(Self::CEIL_K * 2, L_BYTES); + let delta_Cx0_ptr = alloc::alloc(delta_Cx0_layout) as *mut i16; + + // 32-bit absolute values + let abs_Ax0_layout = alloc::Layout::array::(Self::NUM_INTERVALS).unwrap(); + let abs_Ax0_ptr = alloc::alloc(abs_Ax0_layout) as *mut i32; + + // Initialize DP columns + // Not extremely optimized, since it only runs once + { + let mut abs_prev = 0; + + for idx in 0..Self::CEIL_K { + let i = (idx as isize) - (K_HALF as isize); + let interval_idx = idx / P::I; + let stride = cmp::min(P::I, Self::CEIL_K - interval_idx * P::I) / L; + let buf_idx = interval_idx * P::I + (((idx % P::I) % stride) * L + (idx % P::I) / stride); + debug_assert!(buf_idx < Self::CEIL_K); + + if i >= 0 && i <= query.len() as isize { + ptr::write(query_buf_ptr.add(halfsimd_get_idx(buf_idx)), convert_char(if i > 0 { + *query.get_unchecked(i as usize - 1) } else { NULL }, M::NUC)); + + let val = if i > 0 { + (P::GAP_OPEN as i32) + ((i as i32) - 1) * (P::GAP_EXTEND as i32) + } else { + 0 + }; + + if idx % P::I == 0 { + ptr::write(abs_Ax0_ptr.add(interval_idx), val); + abs_prev = val; + } + + ptr::write(delta_Dx0_ptr.add(buf_idx), (val - abs_prev) as i16); + } else { + if idx % P::I == 0 { + ptr::write(abs_Ax0_ptr.add(interval_idx), 0); + } + + ptr::write(query_buf_ptr.add(halfsimd_get_idx(buf_idx)), convert_char(NULL, M::NUC)); + ptr::write(delta_Dx0_ptr.add(buf_idx), i16::MIN); + } + + ptr::write(delta_Cx0_ptr.add(buf_idx), i16::MIN); + } + } + + Self { + query_buf_layout, + query_buf_ptr: query_buf_ptr as *mut HalfSimd, + delta_Dx0_layout, + delta_Dx0_ptr: delta_Dx0_ptr as *mut Simd, + delta_Cx0_layout, + delta_Cx0_ptr: delta_Cx0_ptr as *mut Simd, + abs_Ax0_layout, + abs_Ax0_ptr, + + trace: vec![], + + query_idx: Self::CEIL_K - K_HALF - 1, + shift_idx: -(K_HALF as isize), + ring_buf_idx: 0, + ref_idx: 0, + + best_max: 0, // max of first column + best_argmax_i: 0, + best_argmax_j: 0, + + shift_dir: Direction::Right, + + query, + matrix, + + _phantom: PhantomData + } + } + + /// Banded alignment. + /// + /// Limitations: + /// 1. Requires x86 AVX2 or WASM SIMD support. + /// 2. The reference and the query can only contain uppercase alphabetical characters. + /// 3. The actual size of the band is K_HALF * 2 + 1 rounded up to the next multiple of the + /// vector length of 16 (for x86 AVX2) or 8 (for WASM SIMD). + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + pub unsafe fn align(&mut self, reference: &[u8], x_drop: i32) { + if X_DROP { + assert!(x_drop >= 0); + } + + // optional 32-bit traceback + // 0b00 = up and left, 0b10 or 0b11 = up, 0b01 = left + if TRACE { + self.trace.resize(self.trace.len() + (reference.len() + 1) * Self::CEIL_K / L, Self::EVEN_BITS << 1); + } + + let gap_open = simd_set1_i16(P::GAP_OPEN as i16); + let gap_extend = simd_set1_i16(P::GAP_EXTEND as i16); + let neg_inf = simd_set1_i16(i16::MIN); + + let stride_gap_I_scalar = (Self::STRIDE_I as i16) * (P::GAP_EXTEND as i16); + let stride_gap_I = simd_set1_i16(stride_gap_I_scalar); + let stride_gap_last_scalar = (Self::STRIDE_LAST as i16) * (P::GAP_EXTEND as i16); + let stride_gap_last = simd_set1_i16(stride_gap_last_scalar); + let stride_gap1234_I = simd_set4_i16(stride_gap_I_scalar * 4, + stride_gap_I_scalar * 3, + stride_gap_I_scalar * 2, + stride_gap_I_scalar * 1); + let stride_gap1234_last = simd_set4_i16(stride_gap_last_scalar * 4, + stride_gap_last_scalar * 3, + stride_gap_last_scalar * 2, + stride_gap_last_scalar * 1); + for j in 0..reference.len() { + // Load scores for the current reference character + let matrix_ptr = self.matrix.as_ptr(convert_char(*reference.get_unchecked(j), M::NUC) as usize); + let scores1 = halfsimd_load(matrix_ptr as *const HalfSimd); + let scores2 = if M::NUC { + halfsimd_set1_i8(0) // unused, should be optimized out + } else { + halfsimd_load((matrix_ptr as *const HalfSimd).add(1)) + }; + + let mut band_idx = 0usize; + let mut abs_R_interval = i16::MIN as i32; + let mut abs_D_interval = i16::MIN as i32; + let mut abs_D_max = i32::MIN; + let mut abs_D_argmax = 0isize; + + while band_idx < Self::CEIL_K { + let last_interval = (band_idx + P::I) >= Self::CEIL_K; + let stride = if last_interval { Self::STRIDE_LAST } else { Self::STRIDE_I }; + let stride_gap = if last_interval { stride_gap_last } else { stride_gap_I }; + let mut delta_D00; + let mut abs_interval = *self.abs_Ax0_ptr.add(band_idx / P::I); + + // Update ring buffers to slide current band down + { + let idx = band_idx / L + if last_interval { + self.ring_buf_idx % Self::STRIDE_LAST + } else { + self.ring_buf_idx % Self::STRIDE_I + }; + let delta_Dx0_idx = self.delta_Dx0_ptr.add(idx); + // Save first vector of the previous interval before it is replaced + delta_D00 = simd_load(delta_Dx0_idx); + + if self.shift_idx + (band_idx as isize) >= 0 { + abs_interval = abs_interval.saturating_add(simd_extract_i16::<0>(delta_D00) as i32); + } + + let query_buf_idx = self.query_buf_ptr.add(idx); + let delta_Cx0_idx = self.delta_Cx0_ptr.add(idx); + + if last_interval { + // This must be the last interval + let c = if self.query_idx < self.query.len() { + *self.query.get_unchecked(self.query_idx) + } else { + NULL + }; + let query_insert = halfsimd_set1_i8(convert_char(c, M::NUC) as i8); + + // Now shift in new values for each interval + halfsimd_store(query_buf_idx, halfsimd_sr_i8!(query_insert, halfsimd_load(query_buf_idx), 1)); + simd_store(delta_Dx0_idx, simd_sr_i16!(neg_inf, delta_D00, 1)); + simd_store(delta_Cx0_idx, simd_sr_i16!(neg_inf, simd_load(delta_Cx0_idx), 1)); + } else { + // Not the last interval; need to shift in a value from the next interval + let next_band_idx = band_idx + P::I; + let next_last_interval = (next_band_idx + P::I) >= Self::CEIL_K; + let next_idx = next_band_idx / L + if next_last_interval { + self.ring_buf_idx % Self::STRIDE_LAST + } else { + self.ring_buf_idx % Self::STRIDE_I + }; + let next_abs_interval = *self.abs_Ax0_ptr.add(next_band_idx / P::I); + let abs_offset = simd_set1_i16(clamp(next_abs_interval - abs_interval)); + debug_assert!(next_idx < Self::CEIL_K / L); + + let query_insert = halfsimd_load(self.query_buf_ptr.add(next_idx)); + let delta_Dx0_insert = simd_adds_i16(simd_load(self.delta_Dx0_ptr.add(next_idx)), abs_offset); + let delta_Cx0_insert = simd_adds_i16(simd_load(self.delta_Cx0_ptr.add(next_idx)), abs_offset); + + // Now shift in new values for each interval + halfsimd_store(query_buf_idx, halfsimd_sr_i8!(query_insert, halfsimd_load(query_buf_idx), 1)); + simd_store(delta_Dx0_idx, simd_sr_i16!(delta_Dx0_insert, delta_D00, 1)); + simd_store(delta_Cx0_idx, simd_sr_i16!(delta_Cx0_insert, simd_load(delta_Cx0_idx), 1)); + } + } + + // Vector for prefix scan calculations + let mut delta_R_max = neg_inf; + let abs_offset = simd_set1_i16(clamp(*self.abs_Ax0_ptr.add(band_idx / P::I) - abs_interval)); + delta_D00 = simd_adds_i16(delta_D00, abs_offset); + + // Begin initial pass + { + let mut extend_to_end = stride_gap; + + for i in 0..stride { + let idx = { + let mut idx = self.ring_buf_idx + 1 + i; + idx = if last_interval { idx % Self::STRIDE_LAST } else { idx % Self::STRIDE_I }; + band_idx / L + idx + }; + debug_assert!(idx < Self::CEIL_K / L); + + let scores = if M::NUC { + halfsimd_lookup1_i16(scores1, halfsimd_load(self.query_buf_ptr.add(idx))) + } else { + halfsimd_lookup2_i16(scores1, scores2, halfsimd_load(self.query_buf_ptr.add(idx))) + }; + + let mut delta_D11 = simd_adds_i16(delta_D00, scores); + + let delta_D10 = simd_adds_i16(simd_load(self.delta_Dx0_ptr.add(idx)), abs_offset); + let delta_C10 = simd_adds_i16(simd_load(self.delta_Cx0_ptr.add(idx)), abs_offset); + let delta_C11 = simd_max_i16( + simd_adds_i16(delta_C10, gap_extend), simd_adds_i16(delta_D10, gap_open)); + + delta_D11 = simd_max_i16(delta_D11, delta_C11); + + if TRACE { + let trace_idx = (Self::CEIL_K / L) * (j + 1) + band_idx / L + i; + debug_assert!(trace_idx < self.trace.len()); + *self.trace.get_unchecked_mut(trace_idx) = + simd_movemask_i8(simd_cmpeq_i16(delta_C11, delta_D11)); + } + + extend_to_end = simd_subs_i16(extend_to_end, gap_extend); + delta_R_max = simd_max_i16(delta_R_max, simd_adds_i16(delta_D11, extend_to_end)); + + // Slide band right by directly overwriting the previous band + simd_store(self.delta_Dx0_ptr.add(idx), delta_D11); + simd_store(self.delta_Cx0_ptr.add(idx), delta_C11); + + delta_D00 = delta_D10; + } + } + // End initial pass + + // Begin prefix scan + { + let prev_delta_R_max_last = simd_extract_i16::<{ L - 1 }>(delta_R_max) as i32; + delta_R_max = simd_sl_i16!(delta_R_max, neg_inf, 1); + delta_R_max = simd_insert_i16::<0>(delta_R_max, clamp(abs_R_interval - abs_interval)); + + let stride_gap1234 = if last_interval { stride_gap1234_last } else { stride_gap1234_I }; + delta_R_max = simd_prefix_scan_i16(delta_R_max, stride_gap, stride_gap1234, neg_inf); + + let curr_delta_R_max_last = simd_extract_i16::<{ L - 1 }>(simd_adds_i16(delta_R_max, stride_gap)) as i32; + abs_R_interval = abs_interval.saturating_add(cmp::max(prev_delta_R_max_last, curr_delta_R_max_last)); + } + // End prefix scan + + let mut delta_D_max = neg_inf; + let mut delta_D_argmax = simd_set1_i16(0); + + // Begin final pass + { + let mut delta_R01 = simd_adds_i16(simd_subs_i16(delta_R_max, gap_extend), gap_open); + let mut delta_D01 = simd_insert_i16::<0>(neg_inf, clamp(abs_D_interval - abs_interval)); + let mut curr_i = simd_set1_i16(0); + + for i in 0..stride { + let idx = { + let mut idx = self.ring_buf_idx + 1 + i; + idx = if last_interval { idx % Self::STRIDE_LAST } else { idx % Self::STRIDE_I }; + band_idx / L + idx + }; + debug_assert!(idx < Self::CEIL_K / L); + + let delta_R11 = simd_max_i16( + simd_adds_i16(delta_R01, gap_extend), simd_adds_i16(delta_D01, gap_open)); + let mut delta_D11 = simd_load(self.delta_Dx0_ptr.add(idx)); + delta_D11 = simd_max_i16(delta_D11, delta_R11); + + if TRACE { + let trace_idx = (Self::CEIL_K / L) * (j + 1) + band_idx / L + i; + debug_assert!(trace_idx < self.trace.len()); + let prev_trace = *self.trace.get_unchecked(trace_idx); + let curr_trace = simd_movemask_i8(simd_cmpeq_i16(delta_R11, delta_D11)); + *self.trace.get_unchecked_mut(trace_idx) = + (prev_trace & Self::EVEN_BITS) | ((curr_trace & Self::EVEN_BITS) << 1); + } + + if X_DROP { + delta_D_max = simd_max_i16(delta_D_max, delta_D11); + let mask = simd_cmpeq_i16(delta_D_max, delta_D11); + delta_D_argmax = simd_blend_i8(delta_D_argmax, curr_i, mask); + curr_i = simd_adds_i16(curr_i, simd_set1_i16(1)); + } + + simd_store(self.delta_Dx0_ptr.add(idx), delta_D11); + + delta_D01 = delta_D11; + delta_R01 = delta_R11; + } + + abs_D_interval = abs_interval.saturating_add(simd_extract_i16::<{ L - 1 }>(delta_D01) as i32); + } + // End final pass + + if X_DROP { + let (max, lane_idx) = simd_hmax_i16(delta_D_max); + let max = (max as i32).saturating_add(abs_interval); + let stride_idx = simd_slow_extract_i16(delta_D_argmax, lane_idx) as u16 as usize; + let max_idx = stride_idx + lane_idx * stride + band_idx; + + if max > abs_D_max { + abs_D_max = max; + abs_D_argmax = max_idx as isize; + } + } + + debug_assert!(band_idx / P::I < Self::NUM_INTERVALS); + *self.abs_Ax0_ptr.add(band_idx / P::I) = abs_interval; + band_idx += P::I; + } + + self.ring_buf_idx += 1; + self.query_idx += 1; + self.shift_idx += 1; + + if X_DROP { + if abs_D_max < self.best_max - x_drop { + break; + } else if abs_D_max > self.best_max { + self.best_max = abs_D_max; + self.best_argmax_i = abs_D_argmax + self.shift_idx; + self.best_argmax_j = j + self.ref_idx + 1; + } + + self.shift_dir = if abs_D_argmax > K_HALF { Direction::Down } else { Direction::Right }; + } + } + + self.ref_idx += reference.len(); + } + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + pub unsafe fn score(&self) -> i32 { + if X_DROP { + self.best_max + } else { + // Extract the score from the last band + assert!((self.query.len() as isize) - self.shift_idx >= 0); + let res_i = ((self.query.len() as isize) - self.shift_idx) as usize; + let band_idx = (res_i / P::I) * P::I; + let stride = cmp::min(P::I, Self::CEIL_K - band_idx) / L; + let idx = band_idx / L + (self.ring_buf_idx + (res_i % P::I)) % stride; + debug_assert!(idx < Self::CEIL_K / L); + + let delta = simd_slow_extract_i16(simd_load(self.delta_Dx0_ptr.add(idx)), (res_i % P::I) / stride) as i32; + let abs = *self.abs_Ax0_ptr.add(res_i / P::I); + + delta + abs + } + } + + pub unsafe fn end_idx(&self) -> EndIndex { + if X_DROP { + assert!(self.best_argmax_i >= 0); + EndIndex { + query_idx: self.best_argmax_i as usize, + ref_idx: self.best_argmax_j + } + } else { + EndIndex { + query_idx: self.query.len(), + ref_idx: self.ref_idx + } + } + } + + pub fn raw_trace(&self) -> &[u32] { + assert!(TRACE); + &self.trace + } +} + +impl<'a, P: ScoreParams, M: 'a + Matrix, const K_HALF: usize, const TRACE: bool, const X_DROP: bool> Drop for ScanAligner<'a, P, M, { K_HALF }, { TRACE }, { X_DROP }> { + fn drop(&mut self) { + unsafe { + alloc::dealloc(self.query_buf_ptr as *mut u8, self.query_buf_layout); + alloc::dealloc(self.delta_Dx0_ptr as *mut u8, self.delta_Dx0_layout); + alloc::dealloc(self.delta_Cx0_ptr as *mut u8, self.delta_Cx0_layout); + alloc::dealloc(self.abs_Ax0_ptr as *mut u8, self.abs_Ax0_layout); + } + } +} + +#[cfg(test)] +mod tests { + use crate::scores::*; + + use super::*; + + #[test] + fn test_scan_align() { + type TestParams = Params<-11, -1, 1024>; + + unsafe { + let r = b"AAAA"; + let q = b"AARA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 11); + + let r = b"AAAA"; + let q = b"AARA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 11); + + let r = b"AAAA"; + let q = b"AAAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 16); + + let r = b"AAAA"; + let q = b"AARA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 11); + + let r = b"AAAA"; + let q = b"RRRR"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), -4); + + let r = b"AAAA"; + let q = b"AAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 1); + + type TestParams2 = Params<-1, -1, 2048>; + + let r = b"AAAN"; + let q = b"ATAA"; + let mut a = ScanAligner::::new(q, &NW1); + a.align(r, 0); + assert_eq!(a.score(), 1); + + let r = b"AAAA"; + let q = b"C"; + let mut a = ScanAligner::::new(q, &NW1); + a.align(r, 0); + assert_eq!(a.score(), -4); + let mut a = ScanAligner::::new(r, &NW1); + a.align(q, 0); + assert_eq!(a.score(), -4); + } + } + + #[test] + fn test_x_drop() { + type TestParams = Params<-11, -1, 1024>; + + unsafe { + let r = b"AAARRA"; + let q = b"AAAAAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 1); + assert_eq!(a.score(), 12); + assert_eq!(a.end_idx(), EndIndex { query_idx: 3, ref_idx: 3 }); + + let r = b"AAARRA"; + let q = b"AAAAAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 1); + assert_eq!(a.score(), 12); + assert_eq!(a.end_idx(), EndIndex { query_idx: 3, ref_idx: 3 }); + } + } +} diff --git a/lib/block-aligner/src/old/scan_block_old.rs b/lib/block-aligner/src/old/scan_block_old.rs new file mode 100644 index 000000000..196dcffff --- /dev/null +++ b/lib/block-aligner/src/old/scan_block_old.rs @@ -0,0 +1,721 @@ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::avx2::*; + +#[cfg(target_arch = "wasm32")] +use crate::simd128::*; + +use crate::scores::*; + +use std::{cmp, ptr, i16, alloc}; +use std::marker::PhantomData; + +const NULL: u8 = b'A' + 26u8; // this null byte value works for both amino acids and nucleotides + +// Notes: +// +// BLOSUM62 matrix max = 11, min = -4; gap open = -11 (includes extension), gap extend = -1 +// +// R[i][j] = max(R[i - 1][j] + gap_extend, D[i - 1][j] + gap_open) +// C[i][j] = max(C[i][j - 1] + gap_extend, D[i][j - 1] + gap_open) +// D[i][j] = max(D[i - 1][j - 1] + matrix[query[i]][reference[j]], R[i][j], C[i][j]) +// +// indexing (we want to calculate D11): +// x0 x1 +// +-------- +// 0x | 00 01 +// 1x | 10 11 +// +// note that 'x' represents any bit +// +// Each block is made up of vertical SIMD vectors of length 8 or 16 16-bit integers. + +// TODO: create matrices with const fn + +pub struct Block<'a, P: ScoreParams, M: 'a + Matrix, const B: usize, const TRACE: bool, const X_DROP: bool> { + res: AlignResult, + trace: Trace, + query: &'a PaddedBytes, + i: usize, + reference: &'a PaddedBytes, + j: usize, + matrix: &'a M, + x_drop: i32, + _phantom: PhantomData

+} + +impl<'a, P: ScoreParams, M: 'a + Matrix, const B: usize, const TRACE: bool, const X_DROP: bool> Block<'a, P, M, { B }, { TRACE }, { X_DROP }> { + const EVEN_BITS: u32 = 0x55555555u32; + + /// Adaptive banded alignment. + /// + /// The x drop option indicates whether to terminate the alignment process early when + /// the max score in the current band drops below the max score encountered so far. If + /// x drop is not enabled, then the band will keep shifting until the end of the reference + /// string is reached. + /// + /// Limitations: + /// 1. Requires x86 AVX2 or WASM SIMD support. + /// 2. The reference and the query can only contain uppercase alphabetical characters. + /// 3. The actual size of the band is K + 1 rounded up to the next multiple of the + /// vector length of 16 (for x86 AVX2) or 8 (for WASM SIMD). + pub fn align(query: &'a PaddedBytes, reference: &'a PaddedBytes, matrix: &'a M, x_drop: i32) -> Self { + assert!(P::GAP_OPEN <= P::GAP_EXTEND); + + if X_DROP { + assert!(x_drop >= 0); + } + + let mut a = Self { + res: AlignResult { score: 0, query_idx: 0, reference_idx: 0 }, + trace: if TRACE { Trace::new(query.len(), reference.len()) } else { Trace::new(0, 0) }, + query, + i: 0, + reference, + j: 0, + matrix, + x_drop, + _phantom: PhantomData + }; + + unsafe { a.align_core(); } + a + } + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + unsafe fn align_core(&mut self) { + let neg_inf = simd_set1_i16(i16::MIN); + + let mut best_max = i32::MIN; + let mut best_argmax_i = 0usize; + let mut best_argmax_j = 0usize; + + let mut dir = Direction::Down; + + let mut off = 0i32; + let mut prev_off; + + let mut D10_buf = Aligned::new(B); + let mut C10_buf = Aligned::new(B); + + let mut D_buf = Aligned::new(B); + let mut R_buf = Aligned::new(B); + for j in 0..B * L { + let D_insert = if j == 0 { 0 } else { (P::GAP_OPEN as i16) + ((j - 1) as i16) * (P::GAP_EXTEND as i16) }; + D_buf.set(j, D_insert); + R_buf.set(j, D_insert + (P::GAP_OPEN as i16)); + } + self.i += 1; + + let mut start = 0usize; + + let mut temp_buf1 = Aligned::new(1); + let mut temp_buf2 = Aligned::new(1); + + loop { + prev_off = off; + let mut D_max = neg_inf; + let mut D_argmax = simd_set1_i16(0); + let mut curr_right_max = neg_inf; + let mut curr_down_max = neg_inf; + + #[cfg(feature = "debug")] + { + println!("i: {}", self.i); + println!("j: {}", self.j); + println!("start: {}", start); + println!("{:?}", dir); + println!("off: {}", off); + } + + match dir { + Direction::Right => { + off += D10_buf.get(0) as i32; + let off_add = simd_set1_i16(clamp(prev_off - off)); + // don't care about previous corner cell, even though it might have been computed + // "cutting corners" in the implementation + let mut corner = i16::MIN; + let mut curr_i = simd_set1_i16(0); + + for i in (0..B * L).step_by(L) { + let next_corner = clamp((D10_buf.get(i + L - 1) as i32) + prev_off - off); + + let D10_buf_ptr = D10_buf.as_mut_ptr().add(i); + simd_store(D10_buf_ptr as _, simd_adds_i16(simd_load(D10_buf_ptr as _), off_add)); + let C10_buf_ptr = C10_buf.as_mut_ptr().add(i); + simd_store(C10_buf_ptr as _, simd_adds_i16(simd_load(C10_buf_ptr as _), off_add)); + + let (early_exit, curr_D_max, curr_D_argmax) = self.place_block( + self.i + i, + self.j + (B - 1) * L, + D10_buf_ptr, + C10_buf_ptr, + corner, + temp_buf1.as_mut_ptr(), + temp_buf2.as_mut_ptr() + ); + + corner = next_corner; + curr_right_max = simd_max_i16(curr_right_max, simd_load(D10_buf_ptr as _)); + + if X_DROP { + D_max = simd_max_i16(D_max, curr_D_max); + let mask = simd_cmpeq_i16(D_max, curr_D_max); + D_argmax = simd_blend_i8(D_argmax, simd_adds_i16(curr_D_argmax, curr_i), mask); + curr_i = simd_adds_i16(curr_i, simd_set1_i16(L as i16)); + } + + if early_exit { + break; + } + } + + // shift and offset bottom row + curr_down_max = self.shift_and_offset( + D_buf.as_mut_ptr(), + R_buf.as_mut_ptr(), + temp_buf1.as_mut_ptr(), + temp_buf2.as_mut_ptr(), + off_add + ); + }, + Direction::Down => { + off += D_buf.get(0) as i32; + let off_add = simd_set1_i16(clamp(prev_off - off)); + // don't care about previous corner cell, even though it might have been computed + // "cutting corners" in the implementation + let mut corner = i16::MIN; + let mut curr_j = simd_set1_i16(0); + + for j in (0..B * L).step_by(L) { + let next_corner = clamp((D_buf.get(j + L - 1) as i32) + prev_off - off); + + let D_buf_ptr = D_buf.as_mut_ptr().add(j); + simd_store(D_buf_ptr as _, simd_adds_i16(simd_load(D_buf_ptr as _), off_add)); + let R_buf_ptr = R_buf.as_mut_ptr().add(j); + simd_store(R_buf_ptr as _, simd_adds_i16(simd_load(R_buf_ptr as _), off_add)); + + let (early_exit, curr_D_max, curr_D_argmax) = self.place_block( + self.i + start * L, + self.j + j, + temp_buf1.as_mut_ptr(), + temp_buf2.as_mut_ptr(), + corner, + D_buf_ptr, + R_buf_ptr + ); + + corner = next_corner; + curr_down_max = simd_max_i16(curr_down_max, simd_load(D_buf_ptr as _)); + + if X_DROP { + D_max = simd_max_i16(D_max, curr_D_max); + let mask = simd_cmpeq_i16(D_max, curr_D_max); + D_argmax = simd_blend_i8(D_argmax, simd_adds_i16(curr_D_argmax, curr_j), mask); + curr_j = simd_adds_i16(curr_j, simd_set1_i16(L as i16)); + } + + if early_exit { + break; + } + } + + // shift and offset right column + curr_right_max = self.shift_and_offset( + D10_buf.as_mut_ptr(), + C10_buf.as_mut_ptr(), + temp_buf1.as_mut_ptr(), + temp_buf2.as_mut_ptr(), + off_add + ); + } + } + + let right_max = simd_hmax_i16(curr_right_max); + let down_max = simd_hmax_i16(curr_down_max); + + if X_DROP { + let max = simd_hmax_i16(D_max); + + if off + (max as i32) > best_max { + let lane_idx = (simd_movemask_i8( + simd_cmpeq_i16(D_max, simd_set1_i16(max))).trailing_zeros() / 2) as usize; + let idx = simd_slow_extract_i16(D_argmax, lane_idx) as usize; + match dir { + Direction::Right => { + best_argmax_i = self.i + (idx / L) * L + lane_idx; + best_argmax_j = self.j + (B - 1) * L + idx % L; + }, + Direction::Down => { + best_argmax_i = self.i + start * L + lane_idx; + best_argmax_j = self.j + idx; + } + } + best_max = off + max as i32; + } + + if off + (cmp::max(right_max, down_max) as i32) < best_max - self.x_drop { + // x drop termination + break; + } + } + + // first check if the shift direction is "forced" + if self.i + (start + 1) * L > self.query.len() && self.j + B * L > self.reference.len() { + // reached the end of the strings + break; + } else if start < B - 1 { + start += 1; + } else if self.j + B * L > self.reference.len() { + self.i += L; + dir = Direction::Down; + } else if self.i + B * L > self.query.len() { + self.j += L; + dir = Direction::Right; + } else { + // move according to max + if down_max > right_max { + self.i += L; + dir = Direction::Down; + } else if right_max > down_max { + self.j += L; + dir = Direction::Right; + } else { + // arbitrary + self.j += L; + dir = Direction::Right; + } + } + } + + self.res = if X_DROP { + AlignResult { + score: best_max, + query_idx: best_argmax_i, + reference_idx: best_argmax_j + } + } else { + debug_assert!(self.i <= self.query.len()); + debug_assert!(self.query.len() - self.i < B * L); + AlignResult { + score: off + D10_buf.get(self.query.len() - self.i + (B - 1 - start) * L) as i32, + query_idx: self.query.len(), + reference_idx: self.reference.len() + } + }; + } + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn shift_and_offset(&self, buf1: *mut i16, buf2: *mut i16, temp_buf1: *mut i16, temp_buf2: *mut i16, off_add: Simd) -> Simd { + let neg_inf = simd_set1_i16(i16::MIN); + let mut curr_max = neg_inf; + + for i in (0..(B - 1) * L).step_by(L) { + let next1 = simd_adds_i16(simd_load(buf1.add(i + L) as _), off_add); + let next2 = simd_adds_i16(simd_load(buf2.add(i + L) as _), off_add); + simd_store(buf1.add(i) as _, next1); + simd_store(buf2.add(i) as _, next2); + curr_max = simd_max_i16(curr_max, next1); + } + + let next1 = simd_load(temp_buf1 as _); + let next2 = simd_load(temp_buf2 as _); + simd_store(buf1.add((B - 1) * L) as _, next1); + simd_store(buf2.add((B - 1) * L) as _, next2); + simd_store(temp_buf1 as _, neg_inf); + simd_store(temp_buf2 as _, neg_inf); + simd_max_i16(curr_max, next1) + } + + // Place block right or down. + // + // Assumes all inputs are already relative to the current offset. + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn place_block(&mut self, + start_i: usize, + start_j: usize, + D10_buf: *mut i16, + C10_buf: *mut i16, + corner: i16, + D_buf: *mut i16, + R_buf: *mut i16) -> (bool, Simd, Simd) { + let (neg_inf, gap_open, gap_extend) = self.get_const_simd(); + let query = halfsimd_loadu(self.query.as_ptr(start_i) as _); + let mut D10 = simd_load(D10_buf as _); + let mut C10 = simd_load(C10_buf as _); + let mut D00 = simd_sl_i16!(D10, simd_set1_i16(corner), 1); + let mut D_max = neg_inf; + let mut D_argmax = simd_set1_i16(0); + let mut curr_i = simd_set1_i16(0); + let mut early_exit = false; + + // TODO: trace direction + + // hottest loop in the whole program + for j in 0..L { + // efficiently lookup scores for each query character + let matrix_ptr = self.matrix.as_ptr(self.reference.get(start_j + j) as usize); + let scores1 = halfsimd_load(matrix_ptr as *const HalfSimd); + let scores2 = if M::NUC { + halfsimd_set1_i8(0) // unused, should be optimized out + } else { + halfsimd_load((matrix_ptr as *const HalfSimd).add(1)) + }; + let scores = if M::NUC { + halfsimd_lookup1_i16(scores1, query) + } else { + halfsimd_lookup2_i16(scores1, scores2, query) + }; + + let mut D11 = simd_adds_i16(D00, scores); + let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), simd_adds_i16(D10, gap_open)); + D11 = simd_max_i16(D11, C11); + + let trace_D_C = if TRACE { + simd_movemask_i8(simd_cmpeq_i16(D11, C11)) + } else { + 0 // should be optimized out + }; + + let D11_open = simd_adds_i16(D11, gap_open); + let R_insert = simd_set1_i16(*R_buf.add(j)); + let mut R11 = simd_sl_i16!(D11_open, R_insert, 1); + // avoid doing prefix scan if possible! + if simd_movemask_i8(simd_cmpgt_i16(R11, D11_open)) != 0 { + R11 = simd_prefix_scan_i16(R11, P::GAP_EXTEND as i16); + D11 = simd_max_i16(D11, R11); + } + + if TRACE { + let trace_D_R = simd_movemask_i8(simd_cmpeq_i16(D11, R11)); + self.trace.add(((trace_D_R & Self::EVEN_BITS) << 1) | (trace_D_C & Self::EVEN_BITS)); + } + + if X_DROP { + D_max = simd_max_i16(D_max, D11); + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax = simd_blend_i8(D_argmax, curr_i, mask); + curr_i = simd_adds_i16(curr_i, simd_set1_i16(1)); + } + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("C11: "); + simd_dbg_i16(C11); + print!("R11: "); + simd_dbg_i16(R11); + print!("D11: "); + simd_dbg_i16(D11); + } + + let D_insert = simd_set1_i16(*D_buf.add(j)); + D00 = simd_sl_i16!(D11, D_insert, 1); + + ptr::write(D_buf.add(j), simd_extract_i16::<{ L - 1 }>(D11)); + let R_buf_val = { + let R_last = simd_max_i16(D11_open, simd_adds_i16(R11, gap_extend)); + simd_extract_i16::<{ L - 1 }>(R_last) + }; + ptr::write(R_buf.add(j), R_buf_val); + + D10 = D11; + C10 = C11; + + if !X_DROP && start_i + L > self.query.len() + && start_j + j >= self.reference.len() { + early_exit = true; + break; + } + } + + simd_store(D10_buf as _, D10); + simd_store(C10_buf as _, C10); + (early_exit, D_max, D_argmax) + } + + #[inline(always)] + pub fn res(&self) -> AlignResult { + self.res + } + + #[inline(always)] + pub fn trace(&self) -> &Trace { + assert!(TRACE); + &self.trace + } + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[inline] + unsafe fn get_const_simd(&self) -> (Simd, Simd, Simd) { + // some useful constant simd vectors + let neg_inf = simd_set1_i16(i16::MIN); + let gap_open = simd_set1_i16(P::GAP_OPEN as i16); + let gap_extend = simd_set1_i16(P::GAP_EXTEND as i16); + (neg_inf, gap_open, gap_extend) + } +} + +#[inline(always)] +fn convert_char(c: u8, nuc: bool) -> u8 { + let c = c.to_ascii_uppercase(); + debug_assert!(c >= b'A' && c <= NULL); + if nuc { c } else { c - b'A' } +} + +#[inline(always)] +fn clamp(x: i32) -> i16 { + cmp::min(cmp::max(x, i16::MIN as i32), i16::MAX as i32) as i16 +} + +#[inline(always)] +fn div_ceil(n: usize, d: usize) -> usize { + (n + d - 1) / d +} + +pub struct Aligned { + layout: alloc::Layout, + ptr: *const i16 +} + +impl Aligned { + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[inline] + pub unsafe fn new(blocks: usize) -> Self { + let layout = alloc::Layout::from_size_align_unchecked(blocks * L * 2, L_BYTES); + let ptr = alloc::alloc(layout) as *const i16; + let neg_inf = simd_set1_i16(i16::MIN); + for i in (0..blocks * L).step_by(L) { + simd_store(ptr.add(i) as _, neg_inf); + } + Self { layout, ptr } + } + + #[inline(always)] + pub fn get(&self, i: usize) -> i16 { + unsafe { *self.ptr.add(i) } + } + + #[inline(always)] + pub fn set(&mut self, i: usize, v: i16) { + unsafe { ptr::write(self.ptr.add(i) as _, v); } + } + + #[inline(always)] + pub fn as_mut_ptr(&mut self) -> *mut i16 { + self.ptr as _ + } + + #[inline(always)] + pub fn as_ptr(&self) -> *const i16 { + self.ptr + } +} + +impl Drop for Aligned { + fn drop(&mut self) { + unsafe { alloc::dealloc(self.ptr as _, self.layout); } + } +} + +#[derive(Clone, PartialEq, Debug)] +pub struct PaddedBytes { + s: Vec, + len: usize +} + +impl PaddedBytes { + #[inline(always)] + pub fn from_bytes(b: &[u8], blocks: usize, nuc: bool) -> Self { + let mut v = b.to_owned(); + let len = v.len(); + v.insert(0, NULL); + v.resize(v.len() + blocks * L, NULL); + v.iter_mut().for_each(|c| *c = convert_char(*c, nuc)); + Self { s: v, len } + } + + #[inline(always)] + pub fn from_str(s: &str, blocks: usize, nuc: bool) -> Self { + Self::from_bytes(s.as_bytes(), blocks, nuc) + } + + #[inline(always)] + pub fn from_string(s: String, blocks: usize, nuc: bool) -> Self { + let mut v = s.into_bytes(); + let len = v.len(); + v.insert(0, NULL); + v.resize(v.len() + blocks * L, NULL); + v.iter_mut().for_each(|c| *c = convert_char(*c, nuc)); + Self { s: v, len } + } + + #[inline(always)] + pub fn get(&self, i: usize) -> u8 { + unsafe { *self.s.get_unchecked(i) } + } + + #[inline(always)] + pub fn set(&mut self, i: usize, c: u8) { + unsafe { *self.s.get_unchecked_mut(i) = c; } + } + + #[inline(always)] + pub fn as_ptr(&self, i: usize) -> *const u8 { + unsafe { self.s.as_ptr().add(i) } + } + + #[inline(always)] + pub fn len(&self) -> usize { + self.len + } +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct AlignResult { + pub score: i32, + pub query_idx: usize, + pub reference_idx: usize +} + +#[derive(Copy, Clone, PartialEq, Debug)] +enum Direction { + Right, + Down +} + +#[derive(Clone)] +pub struct Trace { + trace: Vec, + shift_dir: Vec, + idx: usize +} + +impl Trace { + #[inline(always)] + pub fn new(query_len: usize, reference_len: usize) -> Self { + let len = query_len + reference_len; + Self { + trace: vec![0; div_ceil(len, 16)], + shift_dir: vec![0; div_ceil(div_ceil(len, L), 16)], + idx: 0 + } + } + + #[inline(always)] + pub fn add(&mut self, t: u32) { + unsafe { *self.trace.get_unchecked_mut(self.idx) = t; } + self.idx += 1; + } + + #[inline(always)] + pub fn dir(&mut self, d: u32) { + let i = self.idx / L; + unsafe { + *self.shift_dir.get_unchecked_mut(i / 16) |= d << (i % 16); + } + } + + #[inline(always)] + pub fn clear(&mut self) { + self.trace.fill(0); + self.shift_dir.fill(0); + self.idx = 0; + } +} + +#[cfg(test)] +mod tests { + use crate::scores::*; + + use super::*; + + #[test] + fn test_no_x_drop() { + type TestParams = GapParams<-11, -1>; + + let r = PaddedBytes::from_bytes(b"AAAA", 2, false); + let q = PaddedBytes::from_bytes(b"AARA", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 11); + + let r = PaddedBytes::from_bytes(b"AAAA", 2, false); + let q = PaddedBytes::from_bytes(b"AAAA", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 16); + + let r = PaddedBytes::from_bytes(b"AAAA", 2, false); + let q = PaddedBytes::from_bytes(b"AARA", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 11); + + let r = PaddedBytes::from_bytes(b"AAAA", 2, false); + let q = PaddedBytes::from_bytes(b"RRRR", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, -4); + + let r = PaddedBytes::from_bytes(b"AAAA", 2, false); + let q = PaddedBytes::from_bytes(b"AAA", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 1); + + type TestParams2 = GapParams<-1, -1>; + + let r = PaddedBytes::from_bytes(b"AAAN", 2, true); + let q = PaddedBytes::from_bytes(b"ATAA", 2, true); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 1); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 2, true); + let q = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 2, true); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 32); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 2, true); + let q = PaddedBytes::from_bytes(b"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", 2, true); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, -32); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 2, true); + let q = PaddedBytes::from_bytes(b"TATATATATATATATATATATATATATATATA", 2, true); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 0); + + let r = PaddedBytes::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 2, true); + let q = PaddedBytes::from_bytes(b"TTTTTTTTAAAAAAATTTTTTTTT", 2, true); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 9); + + let r = PaddedBytes::from_bytes(b"AAAA", 2, true); + let q = PaddedBytes::from_bytes(b"C", 2, true); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, -4); + let a = Block::::align(&r, &q, &NW1, 0); + assert_eq!(a.res().score, -4); + } + + #[test] + fn test_x_drop() { + type TestParams = GapParams<-11, -1>; + + let r = PaddedBytes::from_bytes(b"AAARRA", 2, false); + let q = PaddedBytes::from_bytes(b"AAAAAA", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 1); + assert_eq!(a.res(), AlignResult { score: 14, query_idx: 6, reference_idx: 6 }); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAARRRRRRRRRRRRRRRRAAAAAAAAAAAAA", 2, false); + let q = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 2, false); + let a = Block::::align(&q, &r, &BLOSUM62, 1); + assert_eq!(a.res(), AlignResult { score: 60, query_idx: 15, reference_idx: 15 }); + } +} diff --git a/lib/block-aligner/src/old/scan_minecraft.rs b/lib/block-aligner/src/old/scan_minecraft.rs new file mode 100644 index 000000000..7de62c890 --- /dev/null +++ b/lib/block-aligner/src/old/scan_minecraft.rs @@ -0,0 +1,690 @@ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::avx2::*; + +#[cfg(target_arch = "wasm32")] +use crate::simd128::*; + +use crate::scores::*; + +use std::{cmp, ptr, i16}; +use std::marker::PhantomData; + +const NULL: u8 = b'A' + 26u8; // this null byte value works for both amino acids and nucleotides + +// Notes: +// +// BLOSUM62 matrix max = 11, min = -4; gap open = -11 (includes extension), gap extend = -1 +// +// R[i][j] = max(R[i - 1][j] + gap_extend, D[i - 1][j] + gap_open) +// C[i][j] = max(C[i][j - 1] + gap_extend, D[i][j - 1] + gap_open) +// D[i][j] = max(D[i - 1][j - 1] + matrix[query[i]][reference[j]], R[i][j], C[i][j]) +// +// indexing (we want to calculate D11): +// x0 x1 +// +-------- +// 0x | 00 01 +// 1x | 10 11 +// +// note that 'x' represents any bit +// +// Each block is made up of vertical SIMD vectors of length 8 or 16 16-bit integers. + +// TODO: create matrices with const fn + +pub struct Block<'a, P: ScoreParams, M: 'a + Matrix, const TRACE: bool, const X_DROP: bool> { + res: AlignResult, + trace: Trace, + query: &'a PaddedBytes, + i: usize, + reference: &'a PaddedBytes, + j: usize, + matrix: &'a M, + x_drop: i32, + _phantom: PhantomData

+} + +impl<'a, P: ScoreParams, M: 'a + Matrix, const TRACE: bool, const X_DROP: bool> Block<'a, P, M, { TRACE }, { X_DROP }> { + const EVEN_BITS: u32 = 0x55555555u32; + + /// Adaptive banded alignment. + /// + /// The x drop option indicates whether to terminate the alignment process early when + /// the max score in the current band drops below the max score encountered so far. If + /// x drop is not enabled, then the band will keep shifting until the end of the reference + /// string is reached. + /// + /// Limitations: + /// 1. Requires x86 AVX2 or WASM SIMD support. + /// 2. The reference and the query can only contain uppercase alphabetical characters. + /// 3. The actual size of the band is K + 1 rounded up to the next multiple of the + /// vector length of 16 (for x86 AVX2) or 8 (for WASM SIMD). + pub fn align(query: &'a PaddedBytes, reference: &'a PaddedBytes, matrix: &'a M, x_drop: i32) -> Self { + assert!(P::GAP_OPEN <= P::GAP_EXTEND); + + if X_DROP { + assert!(x_drop >= 0); + } + + let mut a = Self { + res: AlignResult { score: 0, query_idx: 0, reference_idx: 0 }, + trace: if TRACE { Trace::new(query.len(), reference.len()) } else { Trace::new(0, 0) }, + query, + i: 0, + reference, + j: 0, + matrix, + x_drop, + _phantom: PhantomData + }; + + unsafe { a.align_core(); } + a + } + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + unsafe fn align_core(&mut self) { + let neg_inf = simd_set1_i16(i16::MIN); + + let mut best_max = i32::MIN; + let mut best_argmax_i = 0usize; + let mut best_argmax_j = 0usize; + + let mut dir = Direction::Diagonal; + let mut prev_dir = Direction::Diagonal; + + let mut off = 0i32; + let mut prev_off = 0i32; + + let mut corner1 = i16::MIN as i32; + let mut corner2 = 0i32; + + let mut D = simd_insert_i16::<{ L - 1 }>(neg_inf, 0i16); + let mut C = neg_inf; + + let mut D_buf = Aligned([i16::MIN; L]); + D_buf.0[L - 1] = 0; + let mut R_buf = Aligned([i16::MIN; L]); + + loop { + let off_add = simd_set1_i16(clamp(prev_off - off)); + + #[cfg(feature = "debug")] + { + println!("i: {}", self.i); + println!("j: {}", self.j); + println!("{:?}", dir); + println!("off: {}", off); + } + + let (new_D, new_C, D_max, D_argmax) = match dir { + Direction::Diagonal => { + let off_add = prev_off - off; + + self.place_block_diag( + clamp(corner2 - off), + clamp((D_buf.0[L - 2] as i32) + off_add), + clamp((simd_extract_i16::<{ L - 2 }>(D) as i32) + off_add), + clamp((R_buf.0[L - 1] as i32) + off_add), + clamp((simd_extract_i16::<{ L - 1 }>(C) as i32) + off_add), + D_buf.0.as_mut_ptr(), + R_buf.0.as_mut_ptr() + ) + }, + Direction::Right => { + let corner = if prev_dir == Direction::Down { clamp(corner1 - off) } else { i16::MIN }; + + self.place_block_rd::( + simd_adds_i16(D, off_add), + simd_adds_i16(C, off_add), + corner, + D_buf.0.as_mut_ptr(), + R_buf.0.as_mut_ptr() + ) + }, + Direction::Down => { + let corner = if prev_dir == Direction::Right { clamp(corner1 - off) } else { i16::MIN }; + let D_buf_ptr = D_buf.0.as_mut_ptr(); + let C_buf_ptr = R_buf.0.as_mut_ptr(); + simd_store(D_buf_ptr as _, simd_adds_i16(simd_load(D_buf_ptr as _), off_add)); + simd_store(C_buf_ptr as _, simd_adds_i16(simd_load(C_buf_ptr as _), off_add)); + + self.place_block_rd::( + neg_inf, + neg_inf, + corner, + D_buf.0.as_mut_ptr(), + R_buf.0.as_mut_ptr() + ) + } + }; + D = new_D; + C = new_C; + + let right_max = simd_hmax_i16(D); + let down_max = simd_hmax_i16(simd_load(D_buf.0.as_ptr() as _)); + prev_dir = dir; + + if X_DROP { + let max = simd_hmax_i16(D_max); + + if off + (max as i32) > best_max { + let lane_idx = (simd_movemask_i8( + simd_cmpeq_i16(D_max, simd_set1_i16(max))).trailing_zeros() / 2) as usize; + best_argmax_i = self.i + lane_idx; + best_argmax_j = self.j + simd_slow_extract_i16(D_argmax, lane_idx) as usize; + best_max = off + max as i32; + } + + if off + (cmp::max(right_max, down_max) as i32) < best_max - self.x_drop { + // x drop termination + break; + } + } + + // first check if the shift direction is "forced" + if self.i + L > self.query.len() && self.j + L > self.reference.len() { + // reached the end of the strings + break; + } else if self.j + L > self.reference.len() { + self.i += L; + dir = Direction::Down; + } else if self.i + L > self.query.len() { + self.j += L; + dir = Direction::Right; + } else { + // move according to max + if down_max > right_max { + self.i += L; + dir = Direction::Down; + } else if right_max > down_max { + self.j += L; + dir = Direction::Right; + } else if right_max == down_max && down_max == D_buf.0[L - 1] { + self.i += L - 1; + self.j += L - 1; + dir = Direction::Diagonal; + } else { + // arbitrary + self.j += L; + dir = Direction::Right; + } + } + + corner1 = corner2; + corner2 = off + D_buf.0[L - 1] as i32; + prev_off = off; + off += simd_extract_i16::<0>(D) as i32; + } + + self.res = if X_DROP { + AlignResult { + score: best_max, + query_idx: best_argmax_i, + reference_idx: best_argmax_j + } + } else { + debug_assert!(self.i <= self.query.len()); + AlignResult { + score: off + simd_slow_extract_i16(D, self.query.len() - self.i) as i32, + query_idx: self.query.len(), + reference_idx: self.reference.len() + } + }; + } + + // Place block diagonally, overlapping the previous block's lower right corner element. + // + // Assumes all inputs are already relative to the current offset. + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + #[cold] + unsafe fn place_block_diag(&mut self, + corner11: i16, + corner10: i16, + corner01: i16, + R_corner: i16, + C_corner: i16, + D_buf: *mut i16, + R_buf: *mut i16) -> (Simd, Simd, Simd, Simd) { + let (neg_inf, gap_open, gap_extend) = self.get_const_simd(); + let query = halfsimd_convert_char(halfsimd_loadu(self.query.as_ptr(self.i) as _), M::NUC); + let mut D00 = simd_sl_i16!(neg_inf, simd_set1_i16(corner10), 2); + let mut D10 = neg_inf; + let mut C10 = neg_inf; + let mut R_insert = simd_set1_i16(R_corner); + let mut D_insert = simd_set1_i16(corner01); + let mut D_max = neg_inf; + let mut D_argmax = simd_set1_i16(0); + let mut curr_i = simd_set1_i16(0); + + if TRACE { + self.trace.dir(0b00); + } + + for i in 0..L { + let matrix_ptr = self.matrix.as_ptr(convert_char(self.reference.get(self.j + i), M::NUC) as usize); + let scores1 = halfsimd_load(matrix_ptr as *const HalfSimd); + let scores2 = if M::NUC { + halfsimd_set1_i8(0) // unused, should be optimized out + } else { + halfsimd_load((matrix_ptr as *const HalfSimd).add(1)) + }; + + // efficiently lookup scores for each query character + let scores = if M::NUC { + halfsimd_lookup1_i16(scores1, query) + } else { + halfsimd_lookup2_i16(scores1, scores2, query) + }; + + let mut D11 = simd_adds_i16(D00, scores); + let mut C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), simd_adds_i16(D10, gap_open)); + D11 = simd_max_i16(D11, C11); + + if i == 0 { + D11 = simd_insert_i16::<0>(D11, corner11); + C11 = simd_insert_i16::<0>(C11, C_corner); + } + + let trace_D_C = if TRACE { + simd_movemask_i8(simd_cmpeq_i16(D11, C11)) + } else { + 0 // should be optimized out + }; + + let D11_open = simd_adds_i16(D11, gap_open); + let mut R11 = simd_sl_i16!(D11_open, R_insert, 1); + R_insert = neg_inf; + // avoid doing prefix scan if possible! + if simd_movemask_i8(simd_cmpgt_i16(R11, D11_open)) != 0 { + R11 = simd_prefix_scan_i16(R11, P::GAP_EXTEND as i16); + D11 = simd_max_i16(D11, R11); + } + + if TRACE { + let trace_D_R = simd_movemask_i8(simd_cmpeq_i16(D11, R11)); + self.trace.add(((trace_D_R & Self::EVEN_BITS) << 1) | (trace_D_C & Self::EVEN_BITS)); + } + + if X_DROP { + D_max = simd_max_i16(D_max, D11); + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax = simd_blend_i8(D_argmax, curr_i, mask); + curr_i = simd_adds_i16(curr_i, simd_set1_i16(1)); + } + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("C11: "); + simd_dbg_i16(C11); + print!("R11: "); + simd_dbg_i16(R11); + print!("D11: "); + simd_dbg_i16(D11); + } + + D00 = simd_sl_i16!(D11, D_insert, 1); + D_insert = neg_inf; + + ptr::write(D_buf.add(i), simd_extract_i16::<{ L - 1 }>(D11)); + let R_buf_val = { + let R_last = simd_max_i16(D11_open, simd_adds_i16(R11, gap_extend)); + simd_extract_i16::<{ L - 1 }>(R_last) + }; + ptr::write(R_buf.add(i), R_buf_val); + + D10 = D11; + C10 = C11; + + if !X_DROP && self.i + L > self.query.len() + && self.j + i >= self.reference.len() { + break; + } + } + + (D10, C10, D_max, D_argmax) + } + + // Place block right or down. + // + // Assumes all inputs are already relative to the current offset. + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn place_block_rd(&mut self, + mut D10: Simd, + mut C10: Simd, + corner: i16, + D_buf: *mut i16, + R_buf: *mut i16) -> (Simd, Simd, Simd, Simd) { + let (neg_inf, gap_open, gap_extend) = self.get_const_simd(); + let query = halfsimd_convert_char(halfsimd_loadu(self.query.as_ptr(self.i) as _), M::NUC); + let mut D00 = simd_sl_i16!(D10, simd_set1_i16(corner), 1); + let mut D_max = neg_inf; + let mut D_argmax = simd_set1_i16(0); + let mut curr_i = simd_set1_i16(0); + + if TRACE { + self.trace.dir(if RIGHT { 0b01 } else { 0b10 }); + } + + for i in 0..L { + let matrix_ptr = self.matrix.as_ptr(convert_char(self.reference.get(self.j + i), M::NUC) as usize); + let scores1 = halfsimd_load(matrix_ptr as *const HalfSimd); + let scores2 = if M::NUC { + halfsimd_set1_i8(0) // unused, should be optimized out + } else { + halfsimd_load((matrix_ptr as *const HalfSimd).add(1)) + }; + + // efficiently lookup scores for each query character + let scores = if M::NUC { + halfsimd_lookup1_i16(scores1, query) + } else { + halfsimd_lookup2_i16(scores1, scores2, query) + }; + + let mut D11 = simd_adds_i16(D00, scores); + let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), simd_adds_i16(D10, gap_open)); + D11 = simd_max_i16(D11, C11); + + let trace_D_C = if TRACE { + simd_movemask_i8(simd_cmpeq_i16(D11, C11)) + } else { + 0 // should be optimized out + }; + + let D11_open = simd_adds_i16(D11, gap_open); + let R_insert = if RIGHT { neg_inf } else { simd_set1_i16(*R_buf.add(i)) }; + let mut R11 = simd_sl_i16!(D11_open, R_insert, 1); + // avoid doing prefix scan if possible! + if simd_movemask_i8(simd_cmpgt_i16(R11, D11_open)) != 0 { + R11 = simd_prefix_scan_i16(R11, P::GAP_EXTEND as i16); + D11 = simd_max_i16(D11, R11); + } + + if TRACE { + let trace_D_R = simd_movemask_i8(simd_cmpeq_i16(D11, R11)); + self.trace.add(((trace_D_R & Self::EVEN_BITS) << 1) | (trace_D_C & Self::EVEN_BITS)); + } + + if X_DROP { + D_max = simd_max_i16(D_max, D11); + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax = simd_blend_i8(D_argmax, curr_i, mask); + curr_i = simd_adds_i16(curr_i, simd_set1_i16(1)); + } + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("C11: "); + simd_dbg_i16(C11); + print!("R11: "); + simd_dbg_i16(R11); + print!("D11: "); + simd_dbg_i16(D11); + } + + let D_insert = if RIGHT { neg_inf } else { simd_set1_i16(*D_buf.add(i)) }; + D00 = simd_sl_i16!(D11, D_insert, 1); + + ptr::write(D_buf.add(i), simd_extract_i16::<{ L - 1 }>(D11)); + let R_buf_val = { + let R_last = simd_max_i16(D11_open, simd_adds_i16(R11, gap_extend)); + simd_extract_i16::<{ L - 1 }>(R_last) + }; + ptr::write(R_buf.add(i), R_buf_val); + + D10 = D11; + C10 = C11; + + if !X_DROP && self.i + L > self.query.len() + && self.j + i >= self.reference.len() { + break; + } + } + + (D10, C10, D_max, D_argmax) + } + + #[inline(always)] + pub fn res(&self) -> AlignResult { + self.res + } + + #[inline(always)] + pub fn trace(&self) -> &Trace { + assert!(TRACE); + &self.trace + } + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[inline] + unsafe fn get_const_simd(&self) -> (Simd, Simd, Simd) { + // some useful constant simd vectors + let neg_inf = simd_set1_i16(i16::MIN); + let gap_open = simd_set1_i16(P::GAP_OPEN as i16); + let gap_extend = simd_set1_i16(P::GAP_EXTEND as i16); + (neg_inf, gap_open, gap_extend) + } +} + +#[inline(always)] +fn convert_char(c: u8, nuc: bool) -> u8 { + debug_assert!(c >= b'A' && c <= NULL); + if nuc { c } else { c - b'A' } +} + +#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] +#[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] +#[inline] +unsafe fn halfsimd_convert_char(v: HalfSimd, nuc: bool) -> HalfSimd { + if nuc { v } else { halfsimd_sub_i8(v, halfsimd_set1_i8(b'A' as i8)) } +} + +#[inline(always)] +fn clamp(x: i32) -> i16 { + cmp::min(cmp::max(x, i16::MIN as i32), i16::MAX as i32) as i16 +} + +#[inline(always)] +fn div_ceil(n: usize, d: usize) -> usize { + (n + d - 1) / d +} + +#[derive(Clone, PartialEq, Debug)] +pub struct PaddedBytes { + s: Vec +} + +impl PaddedBytes { + #[inline(always)] + pub fn from_bytes(b: &[u8]) -> Self { + let mut v = b.to_owned(); + v.insert(0, NULL); + v.resize(v.len() + L, NULL); + Self { s: v } + } + + #[inline(always)] + pub fn from_str(s: &str) -> Self { + Self::from_bytes(s.as_bytes()) + } + + #[inline(always)] + pub fn from_string(s: String) -> Self { + let mut v = s.into_bytes(); + v.insert(0, NULL); + v.resize(v.len() + L, NULL); + Self { s: v } + } + + #[inline(always)] + pub fn get(&self, i: usize) -> u8 { + unsafe { *self.s.get_unchecked(i) } + } + + #[inline(always)] + pub fn set(&mut self, i: usize, c: u8) { + unsafe { *self.s.get_unchecked_mut(i) = c; } + } + + #[inline(always)] + pub fn as_ptr(&self, i: usize) -> *const u8 { + unsafe { self.s.as_ptr().add(i) } + } + + #[inline(always)] + pub fn len(&self) -> usize { + self.s.len() - L - 1 + } +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct AlignResult { + pub score: i32, + pub query_idx: usize, + pub reference_idx: usize +} + +#[derive(Copy, Clone, PartialEq, Debug)] +enum Direction { + Right, + Down, + Diagonal +} + +#[derive(Clone)] +pub struct Trace { + trace: Vec, + shift_dir: Vec, + idx: usize +} + +impl Trace { + #[inline(always)] + pub fn new(query_len: usize, reference_len: usize) -> Self { + let len = query_len + reference_len; + Self { + trace: vec![0; div_ceil(len, 16)], + shift_dir: vec![0; div_ceil(div_ceil(len, L), 16)], + idx: 0 + } + } + + #[inline(always)] + pub fn add(&mut self, t: u32) { + unsafe { *self.trace.get_unchecked_mut(self.idx) = t; } + self.idx += 1; + } + + #[inline(always)] + pub fn dir(&mut self, d: u32) { + let i = self.idx / L; + unsafe { + *self.shift_dir.get_unchecked_mut(i / 16) |= d << (i % 16); + } + } + + #[inline(always)] + pub fn clear(&mut self) { + self.trace.fill(0); + self.shift_dir.fill(0); + self.idx = 0; + } +} + +#[cfg(test)] +mod tests { + use crate::scores::*; + + use super::*; + + #[test] + fn test_no_x_drop() { + type TestParams = GapParams<-11, -1>; + + let r = PaddedBytes::from_bytes(b"AAAA"); + let q = PaddedBytes::from_bytes(b"AARA"); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 11); + + let r = PaddedBytes::from_bytes(b"AAAA"); + let q = PaddedBytes::from_bytes(b"AAAA"); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 16); + + let r = PaddedBytes::from_bytes(b"AAAA"); + let q = PaddedBytes::from_bytes(b"AARA"); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 11); + + let r = PaddedBytes::from_bytes(b"AAAA"); + let q = PaddedBytes::from_bytes(b"RRRR"); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, -4); + + let r = PaddedBytes::from_bytes(b"AAAA"); + let q = PaddedBytes::from_bytes(b"AAA"); + let a = Block::::align(&q, &r, &BLOSUM62, 0); + assert_eq!(a.res().score, 1); + + type TestParams2 = GapParams<-1, -1>; + + let r = PaddedBytes::from_bytes(b"AAAN"); + let q = PaddedBytes::from_bytes(b"ATAA"); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 1); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + let q = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 32); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + let q = PaddedBytes::from_bytes(b"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT"); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, -32); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + let q = PaddedBytes::from_bytes(b"TATATATATATATATATATATATATATATATA"); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 0); + + let r = PaddedBytes::from_bytes(b"TTAAAAAAATTTTTTTTTTTT"); + let q = PaddedBytes::from_bytes(b"TTTTTTTTAAAAAAATTTTTTTTT"); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, 9); + + let r = PaddedBytes::from_bytes(b"AAAA"); + let q = PaddedBytes::from_bytes(b"C"); + let a = Block::::align(&q, &r, &NW1, 0); + assert_eq!(a.res().score, -4); + let a = Block::::align(&r, &q, &NW1, 0); + assert_eq!(a.res().score, -4); + } + + #[test] + fn test_x_drop() { + type TestParams = GapParams<-11, -1>; + + let r = PaddedBytes::from_bytes(b"AAARRA"); + let q = PaddedBytes::from_bytes(b"AAAAAA"); + let a = Block::::align(&q, &r, &BLOSUM62, 1); + assert_eq!(a.res(), AlignResult { score: 14, query_idx: 6, reference_idx: 6 }); + + let r = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAARRRRRRRRRRRRRRRRAAAAAAAAAAAAA"); + let q = PaddedBytes::from_bytes(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + let a = Block::::align(&q, &r, &BLOSUM62, 1); + assert_eq!(a.res(), AlignResult { score: 60, query_idx: 15, reference_idx: 15 }); + } +} diff --git a/lib/block-aligner/src/old/scan_thin.rs b/lib/block-aligner/src/old/scan_thin.rs new file mode 100644 index 000000000..321a0f92d --- /dev/null +++ b/lib/block-aligner/src/old/scan_thin.rs @@ -0,0 +1,544 @@ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::avx2::*; + +#[cfg(target_arch = "wasm32")] +use crate::simd128::*; + +use crate::scores::*; + +use std::{alloc, cmp, ptr, i16}; +use std::marker::PhantomData; + +const NULL: u8 = b'A' + 26u8; // this null byte value works for both amino acids and nucleotides + +#[inline(always)] +fn convert_char(c: u8, nuc: bool) -> u8 { + debug_assert!(c >= b'A' && c <= NULL); + if nuc { c } else { c - b'A' } +} + +#[inline(always)] +fn clamp(x: i32) -> i16 { + cmp::min(cmp::max(x, i16::MIN as i32), i16::MAX as i32) as i16 +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct EndIndex { + pub query_idx: usize, + pub ref_idx: usize +} + +#[derive(Copy, Clone, PartialEq, Debug)] +enum Direction { + Right, + Down(usize) +} + +// Notes: +// +// BLOSUM62 matrix max = 11, min = -4; gap open = -11 (includes extension), gap extend = -1 +// +// R[i][j] = max(R[i - 1][j] + gap_extend, D[i - 1][j] + gap_open) +// C[i][j] = max(C[i][j - 1] + gap_extend, D[i][j - 1] + gap_open) +// D[i][j] = max(D[i - 1][j - 1] + matrix[query[i]][reference[j]], R[i][j], C[i][j]) +// +// indexing (we want to calculate D11): +// x0 x1 +// +-------- +// 0x | 00 01 +// 1x | 10 11 +// +// note that 'x' represents any bit +// +// Each band is made up of strided SIMD vectors of length 8 or 16 16-bit integers. + +#[allow(non_snake_case)] +pub struct ScanAligner<'a, P: ScoreParams, M: 'a + Matrix, const K: usize, const TRACE: bool, const X_DROP: bool> { + query_buf_layout: alloc::Layout, + query_buf_ptr: *mut HalfSimd, + delta_Dx0_layout: alloc::Layout, + delta_Dx0_ptr: *mut Simd, + delta_Cx0_layout: alloc::Layout, + delta_Cx0_ptr: *mut Simd, + abs_A00: i32, + + trace: Vec, + + ring_buf_idx: usize, + ref_idx: usize, + + best_max: i32, + best_argmax_i: usize, + best_argmax_j: usize, + + shift_dir: Direction, + + query: &'a [u8], + matrix: &'a M, + + _phantom: PhantomData

+} + +impl<'a, P: ScoreParams, M: 'a + Matrix, const K: usize, const TRACE: bool, const X_DROP: bool> ScanAligner<'a, P, M, { K }, { TRACE }, { X_DROP }> { + // round K up to multiple of L + // add one to K to make shifting down easier + const CEIL_K: usize = ((K + 1 + L - 1) / L) * L; + const STRIDE: usize = Self::CEIL_K / L; + + const EVEN_BITS: u32 = 0x55555555u32; + + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + pub unsafe fn new(query: &'a [u8], matrix: &'a M) -> Self { + assert!(Self::CEIL_K <= P::I); + assert!(P::GAP_OPEN <= P::GAP_EXTEND); + assert!(P::I % L == 0); + + // These chunks of memory are contiguous ring buffers that represent the current band + let query_buf_layout = alloc::Layout::from_size_align_unchecked(Self::CEIL_K * HALFSIMD_MUL, L_BYTES); + let query_buf_ptr = alloc::alloc(query_buf_layout) as *mut u8; + + let delta_Dx0_layout = alloc::Layout::from_size_align_unchecked(Self::CEIL_K * 2, L_BYTES); + let delta_Dx0_ptr = alloc::alloc(delta_Dx0_layout) as *mut i16; + + let delta_Cx0_layout = alloc::Layout::from_size_align_unchecked(Self::CEIL_K * 2, L_BYTES); + let delta_Cx0_ptr = alloc::alloc(delta_Cx0_layout) as *mut i16; + + // Initialize DP columns (the first band) + // Not extremely optimized, since it only runs once + { + for i in 0..Self::CEIL_K { + let buf_idx = (i % Self::STRIDE) * L + i / Self::STRIDE; + debug_assert!(buf_idx < Self::CEIL_K); + + if i <= query.len() { + ptr::write(query_buf_ptr.add(halfsimd_get_idx(buf_idx)), convert_char(if i > 0 { + *query.get_unchecked(i - 1) } else { NULL }, M::NUC)); + + let val = if i > 0 { + (P::GAP_OPEN as i32) + ((i as i32) - 1) * (P::GAP_EXTEND as i32) + } else { + 0 + }; + + ptr::write(delta_Dx0_ptr.add(buf_idx), val as i16); + } else { + ptr::write(query_buf_ptr.add(halfsimd_get_idx(buf_idx)), convert_char(NULL, M::NUC)); + ptr::write(delta_Dx0_ptr.add(buf_idx), i16::MIN); + } + + ptr::write(delta_Cx0_ptr.add(buf_idx), i16::MIN); + } + } + + Self { + query_buf_layout, + query_buf_ptr: query_buf_ptr as *mut HalfSimd, + delta_Dx0_layout, + delta_Dx0_ptr: delta_Dx0_ptr as *mut Simd, + delta_Cx0_layout, + delta_Cx0_ptr: delta_Cx0_ptr as *mut Simd, + abs_A00: 0i32, + + trace: vec![], + + ring_buf_idx: 0, + ref_idx: 0, + + best_max: 0, // max of first column + best_argmax_i: 0, + best_argmax_j: 0, + + shift_dir: Direction::Right, + + query, + matrix, + + _phantom: PhantomData + } + } + + // TODO: deal with trace when shifting down + // TODO: count number of down/right shifts for profiling + + #[inline(always)] + fn shift_idx(&self) -> usize { + self.ring_buf_idx + } + + #[inline(always)] + fn query_idx(&self) -> usize { + self.ring_buf_idx + Self::CEIL_K - 1 + } + + /// Adaptive banded alignment. + /// + /// The x drop option indicates whether to terminate the alignment process early when + /// the max score in the current band drops below the max score encountered so far. If + /// x drop is not enabled, then the band will keep shifting until the end of the reference + /// string is reached. + /// + /// Limitations: + /// 1. Requires x86 AVX2 or WASM SIMD support. + /// 2. The reference and the query can only contain uppercase alphabetical characters. + /// 3. The actual size of the band is K + 1 rounded up to the next multiple of the + /// vector length of 16 (for x86 AVX2) or 8 (for WASM SIMD). + #[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), target_feature(enable = "avx2"))] + #[cfg_attr(target_arch = "wasm32", target_feature(enable = "simd128"))] + #[allow(non_snake_case)] + pub unsafe fn align(&mut self, reference: &[u8], x_drop: i32) { + if X_DROP { + assert!(x_drop >= 0); + } + + // optional 32-bit traceback + // 0b00 = up and left, 0b10 or 0b11 = up, 0b01 = left + if TRACE { + self.trace.resize(self.trace.len() + (reference.len() + 1) * Self::CEIL_K / L, Self::EVEN_BITS << 1); + } + + let gap_open = simd_set1_i16(P::GAP_OPEN as i16); + let gap_extend = simd_set1_i16(P::GAP_EXTEND as i16); + let neg_inf = simd_set1_i16(i16::MIN); + + let stride_gap_scalar = (Self::STRIDE as i16) * (P::GAP_EXTEND as i16); + let stride_gap = simd_set1_i16(stride_gap_scalar); + let stride_gap1234 = simd_set4_i16(stride_gap_scalar * 4, + stride_gap_scalar * 3, + stride_gap_scalar * 2, + stride_gap_scalar * 1); + + // values that are "shared" between the code for shifting down and shifting right + let mut delta_D00 = simd_sl_i16!(simd_load({ + // get last stride vector and shift it + let idx = (self.ring_buf_idx + Self::STRIDE - 1) % Self::STRIDE; + self.delta_Dx0_ptr.add(idx) + }), neg_inf, 1); + let mut abs_R_band = i32::MIN; + let mut abs_D_band = i16::MIN as i32; + let mut j = 0usize; + + 'outer: while j < reference.len() { + match self.shift_dir { + Direction::Down(shift_iter) => { + // fixed number of shift iterations because newly calculated D values are + // decreasing due to gap penalties + for _i in 0..shift_iter { + // Don't go past the end of the query + if self.shift_idx() >= self.query.len() { + self.shift_dir = Direction::Right; + continue 'outer; + } + + let shift_vec_idx = self.ring_buf_idx % Self::STRIDE; + debug_assert!(shift_vec_idx < Self::CEIL_K / L); + + // Update ring buffers to slide current band down + // the benefit of using ring buffers is apparent here: shifting down + // only requires shifting one simd vector and incrementing an index + let shift_D_ptr = self.delta_Dx0_ptr.add(shift_vec_idx); + let shift_query_ptr = self.query_buf_ptr.add(shift_vec_idx); + let shift_C_ptr = self.delta_Cx0_ptr.add(shift_vec_idx); + + let c = if self.query_idx() < self.query.len() { + *self.query.get_unchecked(self.query_idx()) + } else { + NULL + }; + let query_insert = halfsimd_set1_i8(convert_char(c, M::NUC) as i8); + + // abs_R_band is only used for the first iteration + // it already has the gap extend cost included + abs_D_band = cmp::max(abs_D_band + P::GAP_OPEN as i32, abs_R_band); + abs_R_band = i32::MIN; + + let delta_Dx0_insert = simd_set1_i16(clamp(abs_D_band - self.abs_A00)); + + // Now shift in new values for each band + halfsimd_store(shift_query_ptr, halfsimd_sr_i8!(query_insert, halfsimd_load(shift_query_ptr), 1)); + delta_D00 = simd_load(shift_D_ptr); + simd_store(shift_D_ptr, simd_sr_i16!(delta_Dx0_insert, delta_D00, 1)); + simd_store(shift_C_ptr, simd_sr_i16!(neg_inf, simd_load(shift_C_ptr), 1)); + + self.ring_buf_idx += 1; + } + + self.shift_dir = Direction::Right; + }, + Direction::Right => { + // Load scores for the current reference character + let matrix_ptr = self.matrix.as_ptr(convert_char(*reference.get_unchecked(j), M::NUC) as usize); + let scores1 = halfsimd_load(matrix_ptr as *const HalfSimd); + let scores2 = if M::NUC { + halfsimd_set1_i8(0) // unused, should be optimized out + } else { + halfsimd_load((matrix_ptr as *const HalfSimd).add(1)) + }; + + // Vector for prefix scan calculations + let mut delta_R_max = neg_inf; + // add the first D value of the previous band to the absolute A value of the + // previous band to get the absolute A value of the current band + let abs_band = self.abs_A00.saturating_add({ + let ptr = self.delta_Dx0_ptr.add(self.ring_buf_idx % Self::STRIDE); + simd_extract_i16::<0>(simd_load(ptr)) as i32 + }); + // need to offset the values from the previous band + let abs_offset = simd_set1_i16(clamp(self.abs_A00 - abs_band)); + + delta_D00 = simd_adds_i16(delta_D00, abs_offset); + + // Begin initial pass + { + let mut extend_to_end = stride_gap; + + for i in 0..Self::STRIDE { + let idx = (self.ring_buf_idx + i) % Self::STRIDE; + debug_assert!(idx < Self::CEIL_K / L); + + // efficiently lookup scores for each query character + let scores = if M::NUC { + halfsimd_lookup1_i16(scores1, halfsimd_load(self.query_buf_ptr.add(idx))) + } else { + halfsimd_lookup2_i16(scores1, scores2, halfsimd_load(self.query_buf_ptr.add(idx))) + }; + + let mut delta_D11 = simd_adds_i16(delta_D00, scores); + + let delta_D10 = simd_adds_i16(simd_load(self.delta_Dx0_ptr.add(idx)), abs_offset); + let delta_C10 = simd_adds_i16(simd_load(self.delta_Cx0_ptr.add(idx)), abs_offset); + let delta_C11 = simd_max_i16( + simd_adds_i16(delta_C10, gap_extend), simd_adds_i16(delta_D10, gap_open)); + + delta_D11 = simd_max_i16(delta_D11, delta_C11); + + if TRACE { + let trace_idx = (Self::CEIL_K / L) * (j + 1) + i; + debug_assert!(trace_idx < self.trace.len()); + *self.trace.get_unchecked_mut(trace_idx) = + simd_movemask_i8(simd_cmpeq_i16(delta_C11, delta_D11)); + } + + extend_to_end = simd_subs_i16(extend_to_end, gap_extend); + delta_R_max = simd_max_i16(delta_R_max, simd_adds_i16(delta_D11, extend_to_end)); + + // Slide band right by directly overwriting the previous band + simd_store(self.delta_Dx0_ptr.add(idx), delta_D11); + simd_store(self.delta_Cx0_ptr.add(idx), delta_C11); + + delta_D00 = delta_D10; + } + } + // End initial pass + + // Begin prefix scan + { + let prev_delta_R_max_last = simd_extract_i16::<{ L - 1 }>(delta_R_max) as i32; + + delta_R_max = simd_sl_i16!(delta_R_max, neg_inf, 1); + delta_R_max = simd_prefix_scan_i16(delta_R_max, stride_gap, stride_gap1234, neg_inf); + + let curr_delta_R_max_last = simd_extract_i16::<{ L - 1 }>(simd_adds_i16(delta_R_max, stride_gap)) as i32; + // this is the absolute R value for the last cell of the band, plus + // the gap open cost + abs_R_band = abs_band.saturating_add( + cmp::max(prev_delta_R_max_last, curr_delta_R_max_last) + (P::GAP_OPEN as i32)); + } + // End prefix scan + + let mut delta_D_max = neg_inf; + let mut delta_D_argmax = simd_set1_i16(0); + + // Begin final pass + { + let mut delta_R01 = simd_adds_i16(simd_subs_i16(delta_R_max, gap_extend), gap_open); + let mut delta_D01 = neg_inf; + let mut curr_i = simd_set1_i16(0); + + for i in 0..Self::STRIDE { + let idx = (self.ring_buf_idx + i) % Self::STRIDE; + debug_assert!(idx < Self::CEIL_K / L); + + let delta_R11 = simd_max_i16( + simd_adds_i16(delta_R01, gap_extend), simd_adds_i16(delta_D01, gap_open)); + let mut delta_D11 = simd_load(self.delta_Dx0_ptr.add(idx)); + delta_D11 = simd_max_i16(delta_D11, delta_R11); + + if TRACE { + let trace_idx = (Self::CEIL_K / L) * (j + 1) + i; + debug_assert!(trace_idx < self.trace.len()); + let prev_trace = *self.trace.get_unchecked(trace_idx); + let curr_trace = simd_movemask_i8(simd_cmpeq_i16(delta_R11, delta_D11)); + *self.trace.get_unchecked_mut(trace_idx) = + (prev_trace & Self::EVEN_BITS) | ((curr_trace & Self::EVEN_BITS) << 1); + } + + // consistently update the max D value for each stride vector + delta_D_max = simd_max_i16(delta_D_max, delta_D11); + let mask = simd_cmpeq_i16(delta_D_max, delta_D11); + delta_D_argmax = simd_blend_i8(delta_D_argmax, curr_i, mask); + curr_i = simd_adds_i16(curr_i, simd_set1_i16(1)); + + simd_store(self.delta_Dx0_ptr.add(idx), delta_D11); + + delta_D01 = delta_D11; + delta_R01 = delta_R11; + } + + // this is the absolute D value for the last cell of the band + abs_D_band = abs_band.saturating_add(simd_extract_i16::<{ L - 1 }>(delta_D01) as i32); + + // updating delta_D00 is important if the band shifts right + delta_D00 = simd_sl_i16!(delta_D01, neg_inf, 1); + } + // End final pass + + let (max, lane_idx) = simd_hargmax_i16(delta_D_max); + let max = (max as i32).saturating_add(abs_band); + // "slow" because it allows an index only known at run time + let stride_idx = simd_slow_extract_i16(delta_D_argmax, lane_idx) as u16 as usize; + let argmax = stride_idx + lane_idx * Self::STRIDE; + + self.abs_A00 = abs_band; + + if X_DROP && max < self.best_max - x_drop { + break; + } + + // if not x drop, then keep track of values only for the current band + let cond = !X_DROP || max > self.best_max; + self.best_argmax_i = if cond { argmax + self.shift_idx() } else { self.best_argmax_i }; + self.best_argmax_j = if cond { j + self.ref_idx + 1 } else { self.best_argmax_j }; + self.best_max = if cond { max } else { self.best_max }; + + // high threshold for starting to shift down, to prevent switching back and + // forth between down and right all time + self.shift_dir = if argmax > Self::CEIL_K * 5 / 8 { + Direction::Down(argmax - Self::CEIL_K / 2) + } else { + Direction::Right + }; + + j += 1; + } + } + } + + self.ref_idx += reference.len(); + } + + pub fn score(&self) -> i32 { + self.best_max + } + + pub fn end_idx(&self) -> EndIndex { + EndIndex { + query_idx: self.best_argmax_i, + ref_idx: self.best_argmax_j + } + } + + pub fn raw_trace(&self) -> &[u32] { + assert!(TRACE); + &self.trace + } +} + +impl<'a, P: ScoreParams, M: 'a + Matrix, const K: usize, const TRACE: bool, const X_DROP: bool> Drop for ScanAligner<'a, P, M, { K }, { TRACE }, { X_DROP }> { + fn drop(&mut self) { + unsafe { + alloc::dealloc(self.query_buf_ptr as *mut u8, self.query_buf_layout); + alloc::dealloc(self.delta_Dx0_ptr as *mut u8, self.delta_Dx0_layout); + alloc::dealloc(self.delta_Cx0_ptr as *mut u8, self.delta_Cx0_layout); + } + } +} + +#[cfg(test)] +mod tests { + use crate::scores::*; + + use super::*; + + #[test] + fn test_scan_align() { + type TestParams = Params<-11, -1, 1024>; + + unsafe { + let r = b"AAAA"; + let q = b"AARA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 11); + + let r = b"AAAA"; + let q = b"AARA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 11); + + let r = b"AAAA"; + let q = b"AAAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 16); + + let r = b"AAAA"; + let q = b"AARA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 11); + + let r = b"AAAA"; + let q = b"RRRR"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), -4); + + let r = b"AAAA"; + let q = b"AAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 0); + assert_eq!(a.score(), 1); + + type TestParams2 = Params<-1, -1, 2048>; + + let r = b"AAAN"; + let q = b"ATAA"; + let mut a = ScanAligner::::new(q, &NW1); + a.align(r, 0); + assert_eq!(a.score(), 1); + + let r = b"AAAA"; + let q = b"C"; + let mut a = ScanAligner::::new(q, &NW1); + a.align(r, 0); + assert_eq!(a.score(), -4); + let mut a = ScanAligner::::new(r, &NW1); + a.align(q, 0); + assert_eq!(a.score(), -1); + } + } + + #[test] + fn test_x_drop() { + type TestParams = Params<-11, -1, 1024>; + + unsafe { + let r = b"AAARRA"; + let q = b"AAAAAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 1); + assert_eq!(a.score(), 12); + assert_eq!(a.end_idx(), EndIndex { query_idx: 3, ref_idx: 3 }); + + let r = b"AAARRA"; + let q = b"AAAAAA"; + let mut a = ScanAligner::::new(q, &BLOSUM62); + a.align(r, 1); + assert_eq!(a.score(), 12); + assert_eq!(a.end_idx(), EndIndex { query_idx: 3, ref_idx: 3 }); + } + } +} diff --git a/lib/block-aligner/src/scan_block.rs b/lib/block-aligner/src/scan_block.rs new file mode 100644 index 000000000..efb9ec39f --- /dev/null +++ b/lib/block-aligner/src/scan_block.rs @@ -0,0 +1,2478 @@ +//! Main block aligner algorithm and supporting data structures. + +#[cfg(feature = "simd_sse2")] +use crate::sse2::*; + +#[cfg(feature = "simd_avx2")] +use crate::avx2::*; + +#[cfg(feature = "simd_wasm")] +use crate::simd128::*; + +#[cfg(feature = "simd_neon")] +use crate::neon::*; + +#[cfg(feature = "no_simd")] +use crate::fallback::*; + +use crate::scores::*; +use crate::cigar::*; + +use std::{cmp, ptr, i16, alloc}; +use std::ops::RangeInclusive; + +#[cfg(feature = "mca")] +use std::arch::asm; + +// Notes: +// +// BLOSUM62 matrix max = 11, min = -4; gap open = -11 (includes extension), gap extend = -1 +// +// Dynamic programming formula: +// R[i][j] = max(R[i - 1][j] + gap_extend, D[i - 1][j] + gap_open) +// C[i][j] = max(C[i][j - 1] + gap_extend, D[i][j - 1] + gap_open) +// D[i][j] = max(D[i - 1][j - 1] + matrix[query[i]][reference[j]], R[i][j], C[i][j]) +// +// indexing (we want to calculate D11): +// x0 x1 +// +-------- +// 0x | 00 01 +// 1x | 10 11 +// +// note that 'x' represents any bit +// +// The term "block" gets used in two contexts: +// 1. A square region of the DP matrix, which is helpful for conceptually visualizing +// the algorithm. +// 2. A rectangular region representing only cells in the DP matrix that are calculated +// due to shifting or growing. Since the step size is smaller than the block size, the +// square blocks overlap. Only the non-overlapping new cells (a rectangular block) are +// computed in each step. + +/// Keeps track of internal state and some parameters for block aligner. +/// +/// This does not describe the whole state. The allocated scratch spaces +/// and other local variables are also needed. +struct State<'a, M: Matrix> { + query: &'a PaddedBytes, + i: usize, + reference: &'a PaddedBytes, + j: usize, + min_size: usize, + max_size: usize, + matrix: &'a M, + gaps: Gaps, + x_drop: i32 +} + +/// Keeps track of internal state and some parameters for block aligner for +/// sequence to profile alignment. +/// +/// This does not describe the whole state. The allocated scratch spaces +/// and other local variables are also needed. +struct StateProfile<'a, P: Profile> { + query: &'a PaddedBytes, + i: usize, + reference: &'a P, + j: usize, + min_size: usize, + max_size: usize, + x_drop: i32 +} + +/// Keeps track of internal state and some parameters for block aligner for +/// 3di sequence alignment. +/// +/// This does not describe the whole state. The allocated scratch spaces +/// and other local variables are also needed. +struct State3di<'a, M: Matrix> { + query: PaddedBytes3di<'a>, + i: usize, + reference: PaddedBytes3di<'a>, + j: usize, + min_size: usize, + max_size: usize, + matrix: &'a M, + matrix_3di: &'a M, + gaps: Gaps, + x_drop: i32 +} + +struct StateAA<'a, M: Matrix> { + query: PaddedBytesAA<'a>, + i: usize, + reference: PaddedBytesAA<'a>, + j: usize, + min_size: usize, + max_size: usize, + matrix: &'a M, + gaps: Gaps, + x_drop: i32 +} + + +/// Data structure storing the settings for block aligner. +pub struct Block { + res: AlignResult, + allocated: Allocated +} + +macro_rules! align_core_gen { + ($fn_name:ident, $matrix_or_profile:tt, $state:tt, $place_block_right_fn:path, $place_block_down_fn:path) => { + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + unsafe fn $fn_name(&mut self, mut state: $state) { + // store the best alignment ending location for x drop alignment + let mut best_max = 0i32; + let mut best_argmax_i = 0usize; + let mut best_argmax_j = 0usize; + + let mut prev_dir = Direction::Grow; + let mut dir = Direction::Grow; + let mut prev_size = 0; + let mut block_size = state.min_size; + + // 32-bit score offsets + let mut off = 0i32; + let mut prev_off; + let mut off_max = 0i32; + + // how many steps since the latest best score was encountered + let mut y_drop_iter = 0; + + // how many steps where the X-drop threshold is met + let mut x_drop_iter = 0; + + let mut i_ckpt = state.i; + let mut j_ckpt = state.j; + let mut off_ckpt = 0i32; + + // corner value that affects the score when shifting down then right, or right then down + let mut D_corner = simd_set1_i16(MIN); + + loop { + #[cfg(feature = "debug")] + { + println!("i: {}", state.i); + println!("j: {}", state.j); + println!("{:?}", dir); + println!("block size: {}", block_size); + } + + prev_off = off; + + // grow_D_max is an auxiliary value used when growing because it requires two separate + // place_block steps + let mut grow_D_max = simd_set1_i16(MIN); + let mut grow_D_argmax_i = simd_set1_i16(0); + let mut grow_D_argmax_j = simd_set1_i16(0); + let (D_max, D_argmax_i, D_argmax_j, mut right_max, mut down_max) = match dir { + Direction::Right => { + off = off_max; + #[cfg(feature = "debug")] + println!("off: {}", off); + let off_add = simd_set1_i16(clamp(prev_off - off)); + + if TRACE { + self.allocated.trace.add_block(state.i, state.j + block_size - STEP, STEP, block_size, true); + } + + // offset previous columns with newly computed offset + Self::just_offset(block_size, self.allocated.D_col.as_mut_ptr(), self.allocated.C_col.as_mut_ptr(), off_add); + + // compute new elements in the block as a result of shifting by the step size + // this region should be block_size x step + let (D_max, D_argmax_i, D_argmax_j) = $place_block_right_fn( + &state, + state.query, + state.reference, + &mut self.allocated.trace, + state.i, + state.j + block_size - STEP, + STEP, + block_size, + self.allocated.D_col.as_mut_ptr(), + self.allocated.C_col.as_mut_ptr(), + self.allocated.temp_buf1.as_mut_ptr(), + self.allocated.temp_buf2.as_mut_ptr(), + if prev_dir == Direction::Down { simd_adds_i16(D_corner, off_add) } else { simd_set1_i16(MIN) }, + true + ); + + // sum of a couple elements on the right border + let right_max = Self::prefix_max(self.allocated.D_col.as_ptr()); + + // shift and offset bottom row + D_corner = Self::shift_and_offset( + block_size, + self.allocated.D_row.as_mut_ptr(), + self.allocated.R_row.as_mut_ptr(), + self.allocated.temp_buf1.as_mut_ptr(), + self.allocated.temp_buf2.as_mut_ptr(), + off_add + ); + // sum of a couple elements on the bottom border + let down_max = Self::prefix_max(self.allocated.D_row.as_ptr()); + + (D_max, D_argmax_i, D_argmax_j, right_max, down_max) + }, + Direction::Down => { + off = off_max; + #[cfg(feature = "debug")] + println!("off: {}", off); + let off_add = simd_set1_i16(clamp(prev_off - off)); + + if TRACE { + self.allocated.trace.add_block(state.i + block_size - STEP, state.j, block_size, STEP, false); + } + + // offset previous rows with newly computed offset + Self::just_offset(block_size, self.allocated.D_row.as_mut_ptr(), self.allocated.R_row.as_mut_ptr(), off_add); + + // compute new elements in the block as a result of shifting by the step size + // this region should be step x block_size + let (D_max, D_argmax_i, D_argmax_j) = $place_block_down_fn( + &state, + state.reference, + state.query, + &mut self.allocated.trace, + state.j, + state.i + block_size - STEP, + STEP, + block_size, + self.allocated.D_row.as_mut_ptr(), + self.allocated.R_row.as_mut_ptr(), + self.allocated.temp_buf1.as_mut_ptr(), + self.allocated.temp_buf2.as_mut_ptr(), + if prev_dir == Direction::Right { simd_adds_i16(D_corner, off_add) } else { simd_set1_i16(MIN) }, + false + ); + + // sum of a couple elements on the bottom border + let down_max = Self::prefix_max(self.allocated.D_row.as_ptr()); + + // shift and offset last column + D_corner = Self::shift_and_offset( + block_size, + self.allocated.D_col.as_mut_ptr(), + self.allocated.C_col.as_mut_ptr(), + self.allocated.temp_buf1.as_mut_ptr(), + self.allocated.temp_buf2.as_mut_ptr(), + off_add + ); + // sum of a couple elements on the right border + let right_max = Self::prefix_max(self.allocated.D_col.as_ptr()); + + (D_max, D_argmax_i, D_argmax_j, right_max, down_max) + }, + Direction::Grow => { + D_corner = simd_set1_i16(MIN); + let grow_step = block_size - prev_size; + + #[cfg(feature = "debug")] + println!("off: {}", off); + #[cfg(feature = "debug")] + println!("Grow down"); + + if TRACE { + self.allocated.trace.add_block(state.i + prev_size, state.j, prev_size, grow_step, false); + } + + // down + // this region should be prev_size x prev_size + let (D_max1, D_argmax_i1, D_argmax_j1) = $place_block_down_fn( + &state, + state.reference, + state.query, + &mut self.allocated.trace, + state.j, + state.i + prev_size, + grow_step, + prev_size, + self.allocated.D_row.as_mut_ptr(), + self.allocated.R_row.as_mut_ptr(), + self.allocated.D_col.as_mut_ptr().add(prev_size), + self.allocated.C_col.as_mut_ptr().add(prev_size), + simd_set1_i16(MIN), + false + ); + + #[cfg(feature = "debug")] + println!("Grow right"); + + if TRACE { + self.allocated.trace.add_block(state.i, state.j + prev_size, grow_step, block_size, true); + } + + // right + // this region should be block_size x prev_size + let (D_max2, D_argmax_i2, D_argmax_j2) = $place_block_right_fn( + &state, + state.query, + state.reference, + &mut self.allocated.trace, + state.i, + state.j + prev_size, + grow_step, + block_size, + self.allocated.D_col.as_mut_ptr(), + self.allocated.C_col.as_mut_ptr(), + self.allocated.D_row.as_mut_ptr().add(prev_size), + self.allocated.R_row.as_mut_ptr().add(prev_size), + simd_set1_i16(MIN), + true + ); + + let right_max = Self::prefix_max(self.allocated.D_col.as_ptr()); + let down_max = Self::prefix_max(self.allocated.D_row.as_ptr()); + grow_D_max = D_max1; + grow_D_argmax_i = D_argmax_i1; + grow_D_argmax_j = D_argmax_j1; + + // must update the checkpoint saved values just in case + // the block must grow again from this position + let mut i = 0; + while i < block_size { + self.allocated.D_col_ckpt.set_vec(&self.allocated.D_col, i); + self.allocated.C_col_ckpt.set_vec(&self.allocated.C_col, i); + self.allocated.D_row_ckpt.set_vec(&self.allocated.D_row, i); + self.allocated.R_row_ckpt.set_vec(&self.allocated.R_row, i); + i += L; + } + + if TRACE { + self.allocated.trace.save_ckpt(); + } + + (D_max2, D_argmax_i2, D_argmax_j2, right_max, down_max) + } + }; + + prev_dir = dir; + let D_max_max = simd_hmax_i16(D_max); + let grow_max = simd_hmax_i16(grow_D_max); + // max score of the entire block + // note that other than off_max and best_max, the other maxs are relative to the + // offsets off and ZERO + let max = cmp::max(D_max_max, grow_max); + off_max = off + (max as i32) - (ZERO as i32); + #[cfg(feature = "debug")] + println!("down max: {}, right max: {}", down_max, right_max); + + y_drop_iter += 1; + // if block grows but the best score does not improve, then the block must grow again + let mut grow_no_max = dir == Direction::Grow; + + if off_max > best_max { + if X_DROP { + let mut best_i: usize = 0; + let mut best_j: usize = 0; + + fn compare(i: usize, j: usize, min_i: usize, min_j: usize) -> bool { + if j != min_j { + return j > min_j + } + return i > min_i + + } + + let (grow, curr_max, curr_d_max, curr_d_argmax_i, curr_d_argmax_j) = if dir == Direction::Grow && D_max_max < grow_max { + (true, grow_max, grow_D_max, grow_D_argmax_i, grow_D_argmax_j) + } else { + (false, D_max_max, D_max, D_argmax_i, D_argmax_j) + }; + #[cfg(feature = "debug")] + { + print!("D_max: "); + simd_dbg_i16(curr_d_max); + println!("D_max_max: {}", curr_max); + print!("D_argmax_i: "); + simd_dbg_i16(curr_d_argmax_i); + print!("D_argmax_j: "); + simd_dbg_i16(curr_d_argmax_j); + } + + #[repr(align(32))] + struct A([i16; L]); + + let mut curr_d_max_buf = A([0i16; L]); + simd_store(curr_d_max_buf.0.as_mut_ptr() as *mut Simd, curr_d_max); + + let mut curr_d_argmax_i_buf = A([0i16; L]); + simd_store(curr_d_argmax_i_buf.0.as_mut_ptr() as *mut Simd, curr_d_argmax_i); + + let mut curr_d_argmax_j_buf = A([0i16; L]); + simd_store(curr_d_argmax_j_buf.0.as_mut_ptr() as *mut Simd, curr_d_argmax_j); + + + for lane_idx in 0..L { + let val = curr_d_max_buf.0[lane_idx]; + if val != curr_max { + continue; + } + + let idx_i = curr_d_argmax_i_buf.0[lane_idx] as usize; + let idx_j = curr_d_argmax_j_buf.0[lane_idx] as usize; + + let r = idx_i + lane_idx; + let c = (block_size - STEP) + idx_j; + + let (gi, gj) = if grow { + (state.i + prev_size + idx_j, state.j + idx_i + lane_idx) + } else { + match dir { + Direction::Right => { + (state.i + r, state.j + c) + }, + Direction::Down => { + (state.i + c, state.j + r) + }, + Direction::Grow => { + (state.i + idx_i + lane_idx, state.j + prev_size + idx_j) + } + } + }; + + if compare(gi, gj, best_i, best_j) { + best_i = gi; + best_j = gj; + } + } + (best_argmax_i, best_argmax_j) = (best_i, best_j); + #[cfg(feature = "debug")] + println!("best_argmax_i: {}, best_argmax_j: {}, down", best_argmax_i, best_argmax_j); + } + + if block_size < state.max_size { + // if able to grow in the future, then save the current location + // as a checkpoint + i_ckpt = state.i; + j_ckpt = state.j; + off_ckpt = off; + + let mut i = 0; + while i < block_size { + self.allocated.D_col_ckpt.set_vec(&self.allocated.D_col, i); + self.allocated.C_col_ckpt.set_vec(&self.allocated.C_col, i); + self.allocated.D_row_ckpt.set_vec(&self.allocated.D_row, i); + self.allocated.R_row_ckpt.set_vec(&self.allocated.R_row, i); + i += L; + } + + if TRACE { + self.allocated.trace.save_ckpt(); + } + + grow_no_max = false; + } + + best_max = off_max; + + y_drop_iter = 0; + } + + if X_DROP { + if off_max < best_max - state.x_drop { + if x_drop_iter < X_DROP_ITER - 1 { + x_drop_iter += 1; + } else { + // x drop termination + break; + } + } else { + x_drop_iter = 0; + } + } + + if state.i + block_size > state.query.len() && state.j + block_size > state.reference.len() { + // reached the end of the strings + break; + } + + // first check if the shift direction is "forced" to avoid going out of bounds + if state.j + block_size > state.reference.len() { + state.i += STEP; + dir = Direction::Down; + continue; + } + if state.i + block_size > state.query.len() { + state.j += STEP; + dir = Direction::Right; + continue; + } + + // check if it is possible to grow + let next_size = block_size * 2; + if next_size <= state.max_size { + // if approximately (block_size / step) iterations has passed since the last best + // max, then it is time to grow + if y_drop_iter > (block_size / STEP) - 1 || grow_no_max { + // y drop grow block + prev_size = block_size; + block_size = next_size; + dir = Direction::Grow; + + // return to checkpoint + state.i = i_ckpt; + state.j = j_ckpt; + off = off_ckpt; + + let mut i = 0; + while i < prev_size { + self.allocated.D_col.set_vec(&self.allocated.D_col_ckpt, i); + self.allocated.C_col.set_vec(&self.allocated.C_col_ckpt, i); + self.allocated.D_row.set_vec(&self.allocated.D_row_ckpt, i); + self.allocated.R_row.set_vec(&self.allocated.R_row_ckpt, i); + i += L; + } + + if TRACE { + self.allocated.trace.restore_ckpt(); + } + + y_drop_iter = 0; + continue; + } + } + + // check if it is possible to shrink + if SHRINK && block_size > state.min_size && y_drop_iter == 0 { + let shrink_max = cmp::max( + Self::suffix_max(self.allocated.D_row.as_ptr(), block_size), + Self::suffix_max(self.allocated.D_col.as_ptr(), block_size) + ); + if shrink_max >= max { + // just to make sure it is not right or down shift so D_corner is not used + prev_dir = Direction::Grow; + + block_size /= 2; + let mut i = 0; + while i < block_size { + self.allocated.D_col.copy_vec(i, i + block_size); + self.allocated.C_col.copy_vec(i, i + block_size); + self.allocated.D_row.copy_vec(i, i + block_size); + self.allocated.R_row.copy_vec(i, i + block_size); + i += L; + } + + state.i += block_size; + state.j += block_size; + + i_ckpt = state.i; + j_ckpt = state.j; + off_ckpt = off; + + let mut i = 0; + while i < block_size { + self.allocated.D_col_ckpt.set_vec(&self.allocated.D_col, i); + self.allocated.C_col_ckpt.set_vec(&self.allocated.C_col, i); + self.allocated.D_row_ckpt.set_vec(&self.allocated.D_row, i); + self.allocated.R_row_ckpt.set_vec(&self.allocated.R_row, i); + i += L; + } + + right_max = Self::prefix_max(self.allocated.D_col.as_ptr()); + down_max = Self::prefix_max(self.allocated.D_row.as_ptr()); + + if TRACE { + self.allocated.trace.save_ckpt(); + } + + y_drop_iter = 0; + } + } + + // move according to where the max is + if down_max > right_max { + state.i += STEP; + dir = Direction::Down; + } else { + state.j += STEP; + dir = Direction::Right; + } + } + + #[cfg(any(feature = "debug", feature = "debug_size"))] + { + println!("query size: {}, reference size: {}", state.query.len(), state.reference.len()); + println!("end block size: {}", block_size); + } + + self.res = if X_DROP { + AlignResult { + score: best_max, + query_idx: best_argmax_i, + reference_idx: best_argmax_j + } + } else { + debug_assert!(state.i <= state.query.len()); + let score = off + match dir { + Direction::Right | Direction::Grow => { + let idx = state.query.len() - state.i; + debug_assert!(idx < block_size); + (self.allocated.D_col.get(idx) as i32) - (ZERO as i32) + }, + Direction::Down => { + let idx = state.reference.len() - state.j; + debug_assert!(idx < block_size); + (self.allocated.D_row.get(idx) as i32) - (ZERO as i32) + } + }; + AlignResult { + score, + query_idx: state.query.len(), + reference_idx: state.reference.len() + } + }; + } + }; +} + +/// Place block right or down for sequence-profile alignment. +/// +/// Although conceptually blocks are squares, this function is actually used to compute any +/// rectangular region. For example, when shifting a block right by some step +/// size, only the rectangular region with width = step size needs to be computed, since +/// the new shifted block will partially overlap with the previous block. +/// +/// Assumes all inputs are already relative to the current offset. +/// +/// Inside this function, everything will be treated as shifting right, +/// conceptually. The same process can be trivially used for shifting +/// down by calling this function with different parameters. +/// +/// Right and down shifts must be handled separately since a sequence +/// is aligned to a profile. +macro_rules! place_block_profile_gen { + ($fn_name:ident, $query: ident, $query_type: ty, $reference: ident, $reference_type: ty, $q: ident, $r: ident, $right: expr) => { + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + unsafe fn $fn_name(_state: &StateProfile

, + $query: $query_type, + $reference: $reference_type, + trace: &mut Trace, + start_i: usize, + start_j: usize, + width: usize, + height: usize, + D_col: *mut i16, + C_col: *mut i16, + D_row: *mut i16, + R_row: *mut i16, + mut D_corner: Simd, + _right: bool) -> (Simd, Simd, Simd) { + let gap_extend = simd_set1_i16($r.get_gap_extend() as i16); + let (gap_extend_all, prefix_scan_consts) = get_prefix_scan_consts(gap_extend); + let mut D_max = simd_set1_i16(MIN); + let mut D_argmax_i = simd_set1_i16(0); + let mut D_argmax_j = simd_set1_i16(0); + + let mut idx = 0; + let mut gap_open_C = simd_set1_i16(MIN); + let mut gap_close_C = simd_set1_i16(MIN); + let mut gap_open_R = simd_set1_i16(MIN); + let mut gap_close_R = simd_set1_i16(MIN); + + if width == 0 || height == 0 { + return (D_max, D_argmax_i, D_argmax_j); + } + + // hottest loop in the whole program + for j in 0..width { + let mut R01 = simd_set1_i16(MIN); + let mut D11 = simd_set1_i16(MIN); + let mut R11 = simd_set1_i16(MIN); + let mut prev_trace_R = simd_set1_i16(0); + + if $right { + idx = start_j + j; + gap_open_C = $r.get_gap_open_right_C(idx); + gap_close_C = $r.get_gap_close_right_C(idx); + gap_open_R = $r.get_gap_open_right_R(idx); + } + + let mut i = 0; + while i < height { + let D10 = simd_load(D_col.add(i) as _); + let C10 = simd_load(C_col.add(i) as _); + let D00 = simd_sl_i16!(D10, D_corner, 1); + D_corner = D10; + + if !$right { + idx = start_i + i; + gap_open_C = $r.get_gap_open_down_R(idx); + gap_open_R = $r.get_gap_open_down_C(idx); + gap_close_R = $r.get_gap_close_down_C(idx); + } + + let scores = if $right { + $r.get_scores_pos(idx, halfsimd_loadu($q.as_ptr(start_i + i) as _), true) + } else { + $r.get_scores_aa(idx, $q.get(start_j + j), false) + }; + D11 = simd_adds_i16(D00, scores); + if start_i + i == 0 && start_j + j == 0 { + D11 = simd_insert_i16!(D11, ZERO, 0); + } + + // let C11_open = simd_adds_i16(D10, simd_adds_i16(gap_open_C, gap_extend)); + let C11_open = simd_adds_i16(D10, gap_open_C); + let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), C11_open); + let C11_end = if $right { simd_adds_i16(C11, gap_close_C) } else { C11 }; + D11 = simd_max_i16(D11, C11_end); + // at this point, C11 is fully calculated and D11 is partially calculated + + // let D11_open = simd_adds_i16(D11, gap_open_R); + let D11_open = simd_adds_i16(D11, simd_subs_i16(gap_open_R, gap_extend)); + R11 = simd_prefix_scan_i16(D11_open, gap_extend, prefix_scan_consts); + // do prefix scan before using R01 to break up dependency chain that depends on + // the last element of R01 from the previous loop iteration + R11 = simd_max_i16(R11, simd_adds_i16(simd_broadcasthi_i16(R01), gap_extend_all)); + // fully calculate D11 using R11 + let R11_end = if $right { R11 } else { simd_adds_i16(R11, gap_close_R) }; + D11 = simd_max_i16(D11, R11_end); + R01 = R11; + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("D00: "); + simd_dbg_i16(simd_subs_i16(D00, simd_set1_i16(ZERO))); + print!("C11: "); + simd_dbg_i16(simd_subs_i16(C11, simd_set1_i16(ZERO))); + print!("R11: "); + simd_dbg_i16(simd_subs_i16(R11, simd_set1_i16(ZERO))); + print!("D11: "); + simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO))); + } + + if TRACE { + let trace_D_C = simd_cmpeq_i16(D11, C11_end); + let trace_D_R = simd_cmpeq_i16(D11, R11_end); + #[cfg(feature = "debug")] + { + print!("D_C: "); + simd_dbg_i16(trace_D_C); + print!("D_R: "); + simd_dbg_i16(trace_D_R); + } + // compress trace with movemask to save space + let mask = simd_set1_i16(0xFF00u16 as i16); + let trace_data = simd_movemask_i8(simd_blend_i8(trace_D_C, trace_D_R, mask)); + let temp_trace_R = simd_cmpeq_i16(R11, D11_open); + let trace_R = simd_sl_i16!(temp_trace_R, prev_trace_R, 1); + let trace_data2 = simd_movemask_i8(simd_blend_i8(simd_cmpeq_i16(C11, C11_open), trace_R, mask)); + prev_trace_R = temp_trace_R; + trace.add_trace(trace_data as TraceType, trace_data2 as TraceType); + } + + D_max = simd_max_i16(D_max, D11); + + if X_DROP { + // keep track of the best score and its location + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax_i = simd_blend_i8(D_argmax_i, simd_set1_i16(i as i16), mask); + D_argmax_j = simd_blend_i8(D_argmax_j, simd_set1_i16(j as i16), mask); + } + + simd_store(D_col.add(i) as _, D11); + simd_store(C_col.add(i) as _, C11); + i += L; + } + + D_corner = simd_set1_i16(MIN); + + ptr::write(D_row.add(j), simd_extract_i16!(D11, L - 1)); + ptr::write(R_row.add(j), simd_extract_i16!(R11, L - 1)); + + if !X_DROP && start_i + height > $query.len() + && start_j + j >= $reference.len() { + if TRACE { + // make sure that the trace index is updated since the rest of the loop + // iterations are skipped + trace.add_trace_idx((width - 1 - j) * (height / L)); + } + break; + } + } + + (D_max, D_argmax_i, D_argmax_j) + } + }; +} + +// increasing step size gives a bit extra speed but results in lower accuracy +// current settings are fast, at the expense of some accuracy, and step size does not grow +const STEP: usize = 8; +const X_DROP_ITER: usize = 2; // make sure that the X-drop iteration is truly met instead of just one "bad" step +const SHRINK: bool = true; // whether to allow the block size to shrink by powers of 2 +const SHRINK_SUFFIX_LEN: usize = STEP / 4; +impl Block<{ TRACE }, { X_DROP }> { + /// Allocate a block aligner instance with an upper bound query length, + /// reference length, and max block size. + /// + /// A block aligner instance can be reused for multiple alignments as long + /// as the aligned sequence lengths and block sizes do not exceed the specified + /// upper bounds. + pub fn new(query_len: usize, reference_len: usize, max_size: usize) -> Self { + assert!(max_size.is_power_of_two(), "Block size must be a power of two!"); + + Self { + res: AlignResult { score: 0, query_idx: 0, reference_idx: 0 }, + allocated: Allocated::new(query_len, reference_len, max_size, TRACE) + } + } + + /// Align two sequences with block aligner. + /// + /// If `TRACE` is true, then information for computing the traceback will be stored. + /// After alignment, the traceback CIGAR string can then be computed. + /// This will slow down alignment and use a lot more memory. + /// + /// If `X_DROP` is true, then the alignment process will be terminated early when + /// the max score in the current block drops by `x_drop` below the max score encountered + /// so far. If `X_DROP` is false, then global alignment is done. + /// + /// Since larger scores are better, gap and mismatches penalties must be negative. + /// + /// The minimum and maximum sizes of the block must be powers of 2 that are greater than the + /// number of 16-bit lanes in a SIMD vector. + /// + /// The block aligner algorithm will dynamically shift a block down or right and grow its size + /// to efficiently calculate the alignment between two strings. + /// This is fast, but it may be slightly less accurate than computing the entire the alignment + /// dynamic programming matrix. Growing the size of the block allows larger gaps and + /// other potentially difficult regions to be handled correctly. + /// The algorithm also allows shrinking the block size for greater efficiency when handling + /// regions in the sequences with no gaps. + /// 16-bit deltas and 32-bit offsets are used to ensure that accurate scores are + /// computed, even when the the strings are long. + /// + /// When aligning sequences `q` against `r`, this algorithm computes cells in the DP matrix + /// with `|q| + 1` rows and `|r| + 1` columns. + /// + /// X-drop alignment with `ByteMatrix` is not supported. + pub fn align(&mut self, query: &PaddedBytes, reference: &PaddedBytes, matrix: &M, gaps: Gaps, size: RangeInclusive, x_drop: i32) { + // check invariants so bad stuff doesn't happen later + assert!(gaps.open < 0 && gaps.extend < 0, "Gap costs must be negative!"); + // there are edge cases with calculating traceback that doesn't work if + // gap open does not cost more than gap extend + assert!(gaps.open < gaps.extend, "Gap open must cost more than gap extend!"); + let min_size = if *size.start() < L { L } else { *size.start() }; + let max_size = if *size.end() < L { L } else { *size.end() }; + assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!"); + assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!"); + if X_DROP { + assert!(x_drop >= 0, "X-drop threshold amount must be nonnegative!"); + } + + unsafe { self.allocated.clear(query.len(), reference.len(), max_size, TRACE); } + + let s = State { + query, + i: 0, + reference, + j: 0, + min_size, + max_size, + matrix, + gaps, + x_drop + }; + unsafe { self.align_core(s); } + } + + /// Align a sequence to a profile with block aligner. + /// + /// If `TRACE` is true, then information for computing the traceback will be stored. + /// After alignment, the traceback CIGAR string can then be computed. + /// This will slow down alignment and use a lot more memory. + /// + /// If `X_DROP` is true, then the alignment process will be terminated early when + /// the max score in the current block drops by `x_drop` below the max score encountered + /// so far. If `X_DROP` is false, then global alignment is done. + /// + /// Since larger scores are better, gap and mismatches penalties must be negative. + /// + /// The minimum and maximum sizes of the block must be powers of 2 that are greater than the + /// number of 16-bit lanes in a SIMD vector. + /// + /// The block aligner algorithm will dynamically shift a block down or right and grow its size + /// to efficiently calculate the alignment between two strings. + /// This is fast, but it may be slightly less accurate than computing the entire the alignment + /// dynamic programming matrix. Growing the size of the block allows larger gaps and + /// other potentially difficult regions to be handled correctly. + /// The algorithm also allows shrinking the block size for greater efficiency when handling + /// regions in the sequences with no gaps. + /// 16-bit deltas and 32-bit offsets are used to ensure that accurate scores are + /// computed, even when the the strings are long. + /// + /// When aligning sequence `q` against profile `p`, this algorithm computes cells in the DP matrix + /// with `|q| + 1` rows and `|p| + 1` columns. + pub fn align_profile(&mut self, query: &PaddedBytes, profile: &P, size: RangeInclusive, x_drop: i32) { + // check invariants so bad stuff doesn't happen later + assert!(profile.get_gap_extend() < 0, "Gap extend cost must be negative!"); + let min_size = if *size.start() < L { L } else { *size.start() }; + let max_size = if *size.end() < L { L } else { *size.end() }; + assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!"); + assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!"); + if X_DROP { + assert!(x_drop >= 0, "X-drop threshold amount must be nonnegative!"); + } + + unsafe { self.allocated.clear(query.len(), profile.len(), max_size, TRACE); } + + let s = StateProfile { + query, + i: 0, + reference: profile, + j: 0, + min_size, + max_size, + x_drop + }; + unsafe { self.align_profile_core(s); } + } + + /// Align two 3di sequences with block aligner. + /// + /// If `TRACE` is true, then information for computing the traceback will be stored. + /// After alignment, the traceback CIGAR string can then be computed. + /// This will slow down alignment and use a lot more memory. + /// + /// If `X_DROP` is true, then the alignment process will be terminated early when + /// the max score in the current block drops by `x_drop` below the max score encountered + /// so far. If `X_DROP` is false, then global alignment is done. + /// + /// Since larger scores are better, gap and mismatches penalties must be negative. + /// + /// The minimum and maximum sizes of the block must be powers of 2 that are greater than the + /// number of 16-bit lanes in a SIMD vector. + /// + /// The block aligner algorithm will dynamically shift a block down or right and grow its size + /// to efficiently calculate the alignment between two strings. + /// This is fast, but it may be slightly less accurate than computing the entire the alignment + /// dynamic programming matrix. Growing the size of the block allows larger gaps and + /// other potentially difficult regions to be handled correctly. + /// The algorithm also allows shrinking the block size for greater efficiency when handling + /// regions in the sequences with no gaps. + /// 16-bit deltas and 32-bit offsets are used to ensure that accurate scores are + /// computed, even when the the strings are long. + /// + /// When aligning sequences `q` against `r`, this algorithm computes cells in the DP matrix + /// with `|q|` rows and `|r|` columns. + /// + /// X-drop alignment with `ByteMatrix` is not supported. + pub fn align_3di(&mut self, query: &PaddedBytes, query_3di: &PaddedBytes, query_bias: &PosBias, reference: &PaddedBytes, reference_3di: &PaddedBytes, reference_bias: &PosBias, matrix: &AAMatrix, matrix_3di: &AAMatrix, gaps: Gaps, size: RangeInclusive, x_drop: i32) { + // check invariants so bad stuff doesn't happen later + assert_eq!(query.len(), query_3di.len()); + assert_eq!(query.len(), query_bias.len()); + assert_eq!(reference.len(), reference_3di.len()); + assert_eq!(reference.len(), reference_bias.len()); + assert!(gaps.open < 0 && gaps.extend < 0, "Gap costs must be negative!"); + // there are edge cases with calculating traceback that doesn't work if + // gap open does not cost more than gap extend + assert!(gaps.open < gaps.extend, "Gap open must cost more than gap extend!"); + let min_size = if *size.start() < L { L } else { *size.start() }; + let max_size = if *size.end() < L { L } else { *size.end() }; + assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!"); + assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!"); + if X_DROP { + assert!(x_drop >= 0, "X-drop threshold amount must be nonnegative!"); + } + + unsafe { self.allocated.clear(query.len(), reference.len(), max_size, TRACE); } + + let s = State3di { + query: PaddedBytes3di { + bytes: query, + bytes_3di: query_3di, + pos_bias: query_bias + }, + i: 0, + reference: PaddedBytes3di { + bytes: reference, + bytes_3di: reference_3di, + pos_bias: reference_bias + }, + j: 0, + min_size, + max_size, + matrix, + matrix_3di, + gaps, + x_drop + }; + unsafe { self.align_3di_core(s); } + } + + pub fn align_aa(&mut self, query: &PaddedBytes, query_bias: &PosBias, reference: &PaddedBytes, reference_bias: &PosBias, matrix: &AAMatrix, gaps: Gaps, size: RangeInclusive, x_drop: i32) { + // check invariants so bad stuff doesn't happen later + assert_eq!(query.len(), query_bias.len()); + assert_eq!(reference.len(), reference_bias.len()); + assert!(gaps.open < 0 && gaps.extend < 0, "Gap costs must be negative!"); + // there are edge cases with calculating traceback that doesn't work if + // gap open does not cost more than gap extend + assert!(gaps.open < gaps.extend, "Gap open must cost more than gap extend!"); + let min_size = if *size.start() < L { L } else { *size.start() }; + let max_size = if *size.end() < L { L } else { *size.end() }; + assert!(min_size < (u16::MAX as usize) && max_size < (u16::MAX as usize), "Block sizes must be smaller than 2^16 - 1!"); + assert!(min_size.is_power_of_two() && max_size.is_power_of_two(), "Block sizes must be powers of two!"); + if X_DROP { + assert!(x_drop >= 0, "X-drop threshold amount must be nonnegative!"); + } + + unsafe { self.allocated.clear(query.len(), reference.len(), max_size, TRACE); } + + let s = StateAA { + query: PaddedBytesAA { + bytes: query, + pos_bias: query_bias + }, + i: 0, + reference: PaddedBytesAA { + bytes: reference, + pos_bias: reference_bias + }, + j: 0, + min_size, + max_size, + matrix, + gaps, + x_drop + }; + unsafe { self.align_aa_core(s); } + } + + align_core_gen!(align_core, Matrix, State, Self::place_block, Self::place_block); + align_core_gen!(align_profile_core, Profile, StateProfile, Self::place_block_profile_right, Self::place_block_profile_down); + align_core_gen!(align_3di_core, Matrix, State3di, Self::place_block_3di, Self::place_block_3di); + align_core_gen!(align_aa_core, Matrix, StateAA, Self::place_block_aa, Self::place_block_aa); + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn just_offset(block_size: usize, buf1: *mut i16, buf2: *mut i16, off_add: Simd) { + let mut i = 0; + while i < block_size { + let a = simd_adds_i16(simd_load(buf1.add(i) as _), off_add); + let b = simd_adds_i16(simd_load(buf2.add(i) as _), off_add); + simd_store(buf1.add(i) as _, a); + simd_store(buf2.add(i) as _, b); + i += L; + } + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn prefix_max(buf: *const i16) -> i16 { + simd_prefix_hmax_i16!(simd_load(buf as _), STEP) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn suffix_max(buf: *const i16, buf_len: usize) -> i16 { + simd_suffix_hmax_i16!(simd_load(buf.add(buf_len - L) as _), SHRINK_SUFFIX_LEN) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + #[inline] + unsafe fn shift_and_offset(block_size: usize, buf1: *mut i16, buf2: *mut i16, temp_buf1: *mut i16, temp_buf2: *mut i16, off_add: Simd) -> Simd { + let mut curr1 = simd_adds_i16(simd_load(buf1 as _), off_add); + let D_corner = simd_set1_i16(simd_extract_i16!(curr1, STEP - 1)); + let mut curr2 = simd_adds_i16(simd_load(buf2 as _), off_add); + + let mut i = 0; + while i < block_size - L { + let next1 = simd_adds_i16(simd_load(buf1.add(i + L) as _), off_add); + let next2 = simd_adds_i16(simd_load(buf2.add(i + L) as _), off_add); + simd_store(buf1.add(i) as _, simd_step(next1, curr1)); + simd_store(buf2.add(i) as _, simd_step(next2, curr2)); + curr1 = next1; + curr2 = next2; + i += L; + } + + let next1 = simd_load(temp_buf1 as _); + let next2 = simd_load(temp_buf2 as _); + simd_store(buf1.add(block_size - L) as _, simd_step(next1, curr1)); + simd_store(buf2.add(block_size - L) as _, simd_step(next2, curr2)); + D_corner + } + + /// Place block right or down for sequence-sequence alignment. + /// + /// Although conceptually blocks are squares, this function is actually used to compute any + /// rectangular region. For example, when shifting a block right by some step + /// size, only the rectangular region with width = step size needs to be computed, since + /// the new shifted block will partially overlap with the previous block. + /// + /// Assumes all inputs are already relative to the current offset. + /// + /// Inside this function, everything will be treated as shifting right, + /// conceptually. The same process can be trivially used for shifting + /// down by calling this function with different parameters. + /// + /// The same function can be reused for right and down shifts because + /// sequence to sequence alignment is symmetric. + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + unsafe fn place_block(state: &State, + query: &PaddedBytes, + reference: &PaddedBytes, + trace: &mut Trace, + start_i: usize, + start_j: usize, + width: usize, + height: usize, + D_col: *mut i16, + C_col: *mut i16, + D_row: *mut i16, + R_row: *mut i16, + mut D_corner: Simd, + right: bool) -> (Simd, Simd, Simd) { + let gap_open = simd_set1_i16(state.gaps.open as i16); + let gap_extend = simd_set1_i16(state.gaps.extend as i16); + let (gap_extend_all, prefix_scan_consts) = get_prefix_scan_consts(gap_extend); + let mut D_max = simd_set1_i16(MIN); + let mut D_argmax_i = simd_set1_i16(0); + let mut D_argmax_j = simd_set1_i16(0); + + if width == 0 || height == 0 { + return (D_max, D_argmax_i, D_argmax_j); + } + + // hottest loop in the whole program + for j in 0..width { + let mut R01 = simd_set1_i16(MIN); + let mut D11 = simd_set1_i16(MIN); + let mut R11 = simd_set1_i16(MIN); + let mut prev_trace_R = simd_set1_i16(0); + + let c = reference.get(start_j + j); + + let mut i = 0; + while i < height { + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))] + asm!("# LLVM-MCA-BEGIN place_block inner loop", options(nomem, nostack, preserves_flags)); + + let D10 = simd_load(D_col.add(i) as _); + let C10 = simd_load(C_col.add(i) as _); + let D00 = simd_sl_i16!(D10, D_corner, 1); + D_corner = D10; + + let scores = state.matrix.get_scores(c, halfsimd_loadu(query.as_ptr(start_i + i) as _), right); + D11 = simd_adds_i16(D00, scores); + if start_i + i == 0 && start_j + j == 0 { + D11 = simd_insert_i16!(D11, ZERO, 0); + } + + let C11_open = simd_adds_i16(D10, gap_open); + let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), C11_open); + D11 = simd_max_i16(D11, C11); + // at this point, C11 is fully calculated and D11 is partially calculated + + let D11_open = simd_adds_i16(D11, simd_subs_i16(gap_open, gap_extend)); + R11 = simd_prefix_scan_i16(D11_open, gap_extend, prefix_scan_consts); + // do prefix scan before using R01 to break up dependency chain that depends on + // the last element of R01 from the previous loop iteration + R11 = simd_max_i16(R11, simd_adds_i16(simd_broadcasthi_i16(R01), gap_extend_all)); + // fully calculate D11 using R11 + D11 = simd_max_i16(D11, R11); + R01 = R11; + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("D00: "); + simd_dbg_i16(simd_subs_i16(D00, simd_set1_i16(ZERO))); + print!("C11: "); + simd_dbg_i16(simd_subs_i16(C11, simd_set1_i16(ZERO))); + print!("R11: "); + simd_dbg_i16(simd_subs_i16(R11, simd_set1_i16(ZERO))); + print!("D11: "); + simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO))); + } + + if TRACE { + let trace_D_C = simd_cmpeq_i16(D11, C11); + let trace_D_R = simd_cmpeq_i16(D11, R11); + #[cfg(feature = "debug")] + { + print!("D_C: "); + simd_dbg_i16(trace_D_C); + print!("D_R: "); + simd_dbg_i16(trace_D_R); + } + // compress trace with movemask to save space + let mask = simd_set1_i16(0xFF00u16 as i16); + let trace_data = simd_movemask_i8(simd_blend_i8(trace_D_C, trace_D_R, mask)); + let temp_trace_R = simd_cmpeq_i16(R11, D11_open); + let trace_R = simd_sl_i16!(temp_trace_R, prev_trace_R, 1); + let trace_data2 = simd_movemask_i8(simd_blend_i8(simd_cmpeq_i16(C11, C11_open), trace_R, mask)); + prev_trace_R = temp_trace_R; + trace.add_trace(trace_data as TraceType, trace_data2 as TraceType); + } + + D_max = simd_max_i16(D_max, D11); + + if X_DROP { + // keep track of the best score and its location + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax_i = simd_blend_i8(D_argmax_i, simd_set1_i16(i as i16), mask); + D_argmax_j = simd_blend_i8(D_argmax_j, simd_set1_i16(j as i16), mask); + } + + simd_store(D_col.add(i) as _, D11); + simd_store(C_col.add(i) as _, C11); + i += L; + + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))] + asm!("# LLVM-MCA-END", options(nomem, nostack, preserves_flags)); + } + + D_corner = simd_set1_i16(MIN); + + ptr::write(D_row.add(j), simd_extract_i16!(D11, L - 1)); + ptr::write(R_row.add(j), simd_extract_i16!(R11, L - 1)); + + if !X_DROP && start_i + height > query.len() + && start_j + j >= reference.len() { + if TRACE { + // make sure that the trace index is updated since the rest of the loop + // iterations are skipped + trace.add_trace_idx((width - 1 - j) * (height / L)); + } + break; + } + } + + (D_max, D_argmax_i, D_argmax_j) + } + + place_block_profile_gen!(place_block_profile_right, query, &PaddedBytes, reference, &P, query, reference, true); + place_block_profile_gen!(place_block_profile_down, reference, &P, query, &PaddedBytes, query, reference, false); + + /// Place block right or down for sequence-sequence 3di alignment. + /// + /// Although conceptually blocks are squares, this function is actually used to compute any + /// rectangular region. For example, when shifting a block right by some step + /// size, only the rectangular region with width = step size needs to be computed, since + /// the new shifted block will partially overlap with the previous block. + /// + /// Assumes all inputs are already relative to the current offset. + /// + /// Inside this function, everything will be treated as shifting right, + /// conceptually. The same process can be trivially used for shifting + /// down by calling this function with different parameters. + /// + /// The same function can be reused for right and down shifts because + /// sequence to sequence alignment is symmetric. + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + unsafe fn place_block_3di(state: &State3di, + query: PaddedBytes3di, + reference: PaddedBytes3di, + trace: &mut Trace, + start_i: usize, + start_j: usize, + width: usize, + height: usize, + D_col: *mut i16, + C_col: *mut i16, + D_row: *mut i16, + R_row: *mut i16, + mut D_corner: Simd, + right: bool) -> (Simd, Simd, Simd) { + let gap_open = simd_set1_i16(state.gaps.open as i16); + let gap_extend = simd_set1_i16(state.gaps.extend as i16); + let (gap_extend_all, prefix_scan_consts) = get_prefix_scan_consts(gap_extend); + let mut D_max = simd_set1_i16(MIN); + let mut D_argmax_i = simd_set1_i16(0); + let mut D_argmax_j = simd_set1_i16(0); + + if width == 0 || height == 0 { + return (D_max, D_argmax_i, D_argmax_j); + } + + // hottest loop in the whole program + for j in 0..width { + let mut R01 = simd_set1_i16(MIN); + let mut D11 = simd_set1_i16(MIN); + let mut R11 = simd_set1_i16(MIN); + let mut prev_trace_R = simd_set1_i16(0); + + let c = reference.bytes.get(start_j + j); + let c_3di = reference.bytes_3di.get(start_j + j); + let reference_bias = simd_set1_i16(reference.pos_bias.get(start_j + j)); + + let mut i = 0; + while i < height { + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))] + asm!("# LLVM-MCA-BEGIN place_block inner loop", options(nomem, nostack, preserves_flags)); + + let D10 = simd_load(D_col.add(i) as _); + let C10 = simd_load(C_col.add(i) as _); + let D00 = simd_sl_i16!(D10, D_corner, 1); + D_corner = D10; + + let scores = state.matrix.get_scores(c, halfsimd_loadu(query.bytes.as_ptr(start_i + i) as _), right); + let scores_3di = state.matrix_3di.get_scores(c_3di, halfsimd_loadu(query.bytes_3di.as_ptr(start_i + i) as _), right); + let query_bias = query.pos_bias.get_biases(start_i + i); + let pos_bias = simd_adds_i16(reference_bias, query_bias); + D11 = simd_adds_i16(D00, simd_adds_i16(simd_adds_i16(scores, scores_3di), pos_bias)); + if start_i + i == 0 && start_j + j == 0 { + D11 = simd_insert_i16!(D11, ZERO, 0); + } + + let C11_open = simd_adds_i16(D10, gap_open); + let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), C11_open); + D11 = simd_max_i16(D11, C11); + // at this point, C11 is fully calculated and D11 is partially calculated + + let D11_open = simd_adds_i16(D11, simd_subs_i16(gap_open, gap_extend)); + R11 = simd_prefix_scan_i16(D11_open, gap_extend, prefix_scan_consts); + // do prefix scan before using R01 to break up dependency chain that depends on + // the last element of R01 from the previous loop iteration + R11 = simd_max_i16(R11, simd_adds_i16(simd_broadcasthi_i16(R01), gap_extend_all)); + // fully calculate D11 using R11 + D11 = simd_max_i16(D11, R11); + R01 = R11; + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("3di: "); + simd_dbg_i16(scores_3di); + print!("D00: "); + simd_dbg_i16(simd_subs_i16(D00, simd_set1_i16(ZERO))); + print!("C11: "); + simd_dbg_i16(simd_subs_i16(C11, simd_set1_i16(ZERO))); + print!("R11: "); + simd_dbg_i16(simd_subs_i16(R11, simd_set1_i16(ZERO))); + print!("D11: "); + simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO))); + } + + if TRACE { + let trace_D_C = simd_cmpeq_i16(D11, C11); + let trace_D_R = simd_cmpeq_i16(D11, R11); + #[cfg(feature = "debug")] + { + print!("D_C: "); + simd_dbg_i16(trace_D_C); + print!("D_R: "); + simd_dbg_i16(trace_D_R); + } + // compress trace with movemask to save space + let mask = simd_set1_i16(0xFF00u16 as i16); + let trace_data = simd_movemask_i8(simd_blend_i8(trace_D_C, trace_D_R, mask)); + let temp_trace_R = simd_cmpeq_i16(R11, D11_open); + let trace_R = simd_sl_i16!(temp_trace_R, prev_trace_R, 1); + let trace_data2 = simd_movemask_i8(simd_blend_i8(simd_cmpeq_i16(C11, C11_open), trace_R, mask)); + prev_trace_R = temp_trace_R; + trace.add_trace(trace_data as TraceType, trace_data2 as TraceType); + } + + D_max = simd_max_i16(D_max, D11); + + if X_DROP { + // keep track of the best score and its location + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax_i = simd_blend_i8(D_argmax_i, simd_set1_i16(i as i16), mask); + D_argmax_j = simd_blend_i8(D_argmax_j, simd_set1_i16(j as i16), mask); + } + + simd_store(D_col.add(i) as _, D11); + simd_store(C_col.add(i) as _, C11); + i += L; + + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))] + asm!("# LLVM-MCA-END", options(nomem, nostack, preserves_flags)); + } + + D_corner = simd_set1_i16(MIN); + + ptr::write(D_row.add(j), simd_extract_i16!(D11, L - 1)); + ptr::write(R_row.add(j), simd_extract_i16!(R11, L - 1)); + + if !X_DROP && start_i + height > query.len() + && start_j + j >= reference.len() { + if TRACE { + // make sure that the trace index is updated since the rest of the loop + // iterations are skipped + trace.add_trace_idx((width - 1 - j) * (height / L)); + } + break; + } + } + + (D_max, D_argmax_i, D_argmax_j) + } + + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[allow(non_snake_case)] + unsafe fn place_block_aa(state: &StateAA, + query: PaddedBytesAA, + reference: PaddedBytesAA, + trace: &mut Trace, + start_i: usize, + start_j: usize, + width: usize, + height: usize, + D_col: *mut i16, + C_col: *mut i16, + D_row: *mut i16, + R_row: *mut i16, + mut D_corner: Simd, + right: bool) -> (Simd, Simd, Simd) { + let gap_open = simd_set1_i16(state.gaps.open as i16); + let gap_extend = simd_set1_i16(state.gaps.extend as i16); + let (gap_extend_all, prefix_scan_consts) = get_prefix_scan_consts(gap_extend); + let mut D_max = simd_set1_i16(MIN); + let mut D_argmax_i = simd_set1_i16(0); + let mut D_argmax_j = simd_set1_i16(0); + + if width == 0 || height == 0 { + return (D_max, D_argmax_i, D_argmax_j); + } + + // hottest loop in the whole program + for j in 0..width { + let mut R01 = simd_set1_i16(MIN); + let mut D11 = simd_set1_i16(MIN); + let mut R11 = simd_set1_i16(MIN); + let mut prev_trace_R = simd_set1_i16(0); + + let c = reference.bytes.get(start_j + j); + let reference_bias = simd_set1_i16(reference.pos_bias.get(start_j + j)); + + let mut i = 0; + while i < height { + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))] + asm!("# LLVM-MCA-BEGIN place_block inner loop", options(nomem, nostack, preserves_flags)); + + let D10 = simd_load(D_col.add(i) as _); + let C10 = simd_load(C_col.add(i) as _); + let D00 = simd_sl_i16!(D10, D_corner, 1); + D_corner = D10; + + let scores = state.matrix.get_scores(c, halfsimd_loadu(query.bytes.as_ptr(start_i + i) as _), right); + let query_bias = query.pos_bias.get_biases(start_i + i); + let pos_bias = simd_adds_i16(reference_bias, query_bias); + D11 = simd_adds_i16(D00, simd_adds_i16(scores, pos_bias)); + if start_i + i == 0 && start_j + j == 0 { + D11 = simd_insert_i16!(D11, ZERO, 0); + } + + let C11_open = simd_adds_i16(D10, gap_open); + let C11 = simd_max_i16(simd_adds_i16(C10, gap_extend), C11_open); + D11 = simd_max_i16(D11, C11); + // at this point, C11 is fully calculated and D11 is partially calculated + + let D11_open = simd_adds_i16(D11, simd_subs_i16(gap_open, gap_extend)); + #[cfg(feature = "debug")] + { + print!(" g-seqD11: "); + // simd_dbg_i16(simd_subs_i16(D11_open, simd_set1_i16(ZERO))); + simd_dbg_i16(D11); + print!(" g-simd_subs_i16(gap_open, gap_extend): "); + simd_dbg_i16(simd_subs_i16(gap_open, gap_extend)); + print!(" g-seqD11_open: "); + // simd_dbg_i16(simd_subs_i16(D11_open, simd_set1_i16(ZERO))); + simd_dbg_i16(D11_open); + print!(" g-gapExtend: "); + // simd_dbg_i16(simd_subs_i16(gap_extend, simd_set1_i16(ZERO))); + simd_dbg_i16(gap_extend); + print!(" g-prefix_scan_consts: "); + // simd_dbg_i16(simd_subs_i16(prefix_scan_consts, simd_set1_i16(ZERO))); + simd_dbg_i16(prefix_scan_consts); + } + R11 = simd_prefix_scan_i16(D11_open, gap_extend, prefix_scan_consts); + // do prefix scan before using R01 to break up dependency chain that depends on + // the last element of R01 from the previous loop iteration + #[cfg(feature = "debug")] + { + print!(" g-R11tmp: "); + simd_dbg_i16(R11); + } + R11 = simd_max_i16(R11, simd_adds_i16(simd_broadcasthi_i16(R01), gap_extend_all)); + // fully calculate D11 using R11 + #[cfg(feature = "debug")] + { + print!(" g-R11tmp2: "); + simd_dbg_i16(R11); + print!("g-2D11: "); + simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO))); + } + D11 = simd_max_i16(D11, R11); + R01 = R11; + + #[cfg(feature = "debug")] + { + print!("s: "); + simd_dbg_i16(scores); + print!("D00: "); + simd_dbg_i16(simd_subs_i16(D00, simd_set1_i16(ZERO))); + print!("C11: "); + simd_dbg_i16(simd_subs_i16(C11, simd_set1_i16(ZERO))); + print!("R11: "); + simd_dbg_i16(simd_subs_i16(R11, simd_set1_i16(ZERO))); + print!("D11: "); + simd_dbg_i16(simd_subs_i16(D11, simd_set1_i16(ZERO))); + } + + if TRACE { + let trace_D_C = simd_cmpeq_i16(D11, C11); + let trace_D_R = simd_cmpeq_i16(D11, R11); + #[cfg(feature = "debug")] + { + print!("D_C: "); + simd_dbg_i16(trace_D_C); + print!("D_R: "); + simd_dbg_i16(trace_D_R); + } + // compress trace with movemask to save space + let mask = simd_set1_i16(0xFF00u16 as i16); + let trace_data = simd_movemask_i8(simd_blend_i8(trace_D_C, trace_D_R, mask)); + let temp_trace_R = simd_cmpeq_i16(R11, D11_open); + let trace_R = simd_sl_i16!(temp_trace_R, prev_trace_R, 1); + let trace_data2 = simd_movemask_i8(simd_blend_i8(simd_cmpeq_i16(C11, C11_open), trace_R, mask)); + prev_trace_R = temp_trace_R; + trace.add_trace(trace_data as TraceType, trace_data2 as TraceType); + } + + D_max = simd_max_i16(D_max, D11); + + if X_DROP { + // keep track of the best score and its location + let mask = simd_cmpeq_i16(D_max, D11); + D_argmax_i = simd_blend_i8(D_argmax_i, simd_set1_i16(i as i16), mask); + D_argmax_j = simd_blend_i8(D_argmax_j, simd_set1_i16(j as i16), mask); + } + + simd_store(D_col.add(i) as _, D11); + simd_store(C_col.add(i) as _, C11); + i += L; + + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "mca"))] + asm!("# LLVM-MCA-END", options(nomem, nostack, preserves_flags)); + } + + D_corner = simd_set1_i16(MIN); + + ptr::write(D_row.add(j), simd_extract_i16!(D11, L - 1)); + ptr::write(R_row.add(j), simd_extract_i16!(R11, L - 1)); + + if !X_DROP && start_i + height > query.len() + && start_j + j >= reference.len() { + if TRACE { + // make sure that the trace index is updated since the rest of the loop + // iterations are skipped + trace.add_trace_idx((width - 1 - j) * (height / L)); + } + break; + } + } + + (D_max, D_argmax_i, D_argmax_j) + } + + /// Get the resulting score and ending location of the alignment. + #[inline] + pub fn res(&self) -> AlignResult { + self.res + } + + /// Get the trace of the alignment, assuming `TRACE` is true. + #[inline] + pub fn trace(&self) -> &Trace { + assert!(TRACE); + &self.allocated.trace + } +} + +/// Allocated scratch spaces for alignment. +/// +/// Scratch spaces can be reused for aligning strings with shorter lengths +/// and smaller block sizes. +#[allow(non_snake_case)] +struct Allocated { + pub trace: Trace, + + // bottom and right borders of the current block + pub D_col: Aligned, + pub C_col: Aligned, + pub D_row: Aligned, + pub R_row: Aligned, + + // the state at the previous checkpoint (where latest best score was encountered) + pub D_col_ckpt: Aligned, + pub C_col_ckpt: Aligned, + pub D_row_ckpt: Aligned, + pub R_row_ckpt: Aligned, + + // reused buffers for storing values that must be shifted + // into the other border when the block moves in one direction + pub temp_buf1: Aligned, + pub temp_buf2: Aligned, + + query_len: usize, + reference_len: usize, + max_size: usize, + trace_flag: bool +} + +impl Allocated { + #[allow(non_snake_case)] + fn new(query_len: usize, reference_len: usize, max_size: usize, trace_flag: bool) -> Self { + unsafe { + let trace = if trace_flag { + Trace::new(query_len, reference_len, max_size) + } else { + Trace::new(0, 0, 0) + }; + let D_col = Aligned::new(max_size); + let C_col = Aligned::new(max_size); + let D_row = Aligned::new(max_size); + let R_row = Aligned::new(max_size); + let D_col_ckpt = Aligned::new(max_size); + let C_col_ckpt = Aligned::new(max_size); + let D_row_ckpt = Aligned::new(max_size); + let R_row_ckpt = Aligned::new(max_size); + let temp_buf1 = Aligned::new(L); + let temp_buf2 = Aligned::new(L); + + Self { + trace, + D_col, + C_col, + D_row, + R_row, + D_col_ckpt, + C_col_ckpt, + D_row_ckpt, + R_row_ckpt, + temp_buf1, + temp_buf2, + query_len, + reference_len, + max_size, + trace_flag + } + } + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + unsafe fn clear(&mut self, query_len: usize, reference_len: usize, max_size: usize, trace_flag: bool) { + // do not overwrite query_len, reference_len, etc. because they are upper bounds + assert!(query_len + reference_len <= self.query_len + self.reference_len); + assert!(max_size <= self.max_size); + assert_eq!(trace_flag, self.trace_flag); + + self.trace.clear(query_len, reference_len); + self.D_col.clear(max_size); + self.C_col.clear(max_size); + self.D_row.clear(max_size); + self.R_row.clear(max_size); + self.D_col_ckpt.clear(max_size); + self.C_col_ckpt.clear(max_size); + self.D_row_ckpt.clear(max_size); + self.R_row_ckpt.clear(max_size); + self.temp_buf1.clear(L); + self.temp_buf2.clear(L); + } +} + +/// Holds the trace generated by block aligner. +#[derive(Clone)] +pub struct Trace { + trace: Vec, + trace2: Vec, + right: Vec, + block_start: Vec, + block_size: Vec, + trace_idx: usize, + block_idx: usize, + ckpt_trace_idx: usize, + ckpt_block_idx: usize, + query_len: usize, + reference_len: usize +} + +impl Trace { + #[inline] + fn new(query_len: usize, reference_len: usize, max_size: usize) -> Self { + let len = query_len + reference_len; + let trace = vec![0 as TraceType; (max_size / L) * (len + max_size * 2)]; + let trace2 = vec![0 as TraceType; (max_size / L) * (len + max_size * 2)]; + let right = vec![0u64; div_ceil(len, 64)]; + let block_start = vec![0u32; len * 2]; + let block_size = vec![0u16; len * 2]; + + Self { + trace, + trace2, + right, + block_start, + block_size, + trace_idx: 0, + block_idx: 0, + ckpt_trace_idx: 0, + ckpt_block_idx: 0, + query_len, + reference_len + } + } + + #[inline] + fn clear(&mut self, query_len: usize, reference_len: usize) { + // no need to clear trace, block_start, and block_size + self.right.fill(0); + self.trace_idx = 0; + self.block_idx = 0; + self.ckpt_trace_idx = 0; + self.ckpt_block_idx = 0; + self.query_len = query_len; + self.reference_len = reference_len; + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn add_trace(&mut self, t: TraceType, t2: TraceType) { + debug_assert!(self.trace_idx < self.trace.len()); + store_trace(self.trace.as_mut_ptr().add(self.trace_idx), t); + store_trace(self.trace2.as_mut_ptr().add(self.trace_idx), t2); + self.trace_idx += 1; + } + + #[inline] + fn add_block(&mut self, i: usize, j: usize, width: usize, height: usize, right: bool) { + debug_assert!(self.block_idx * 2 < self.block_start.len()); + unsafe { + *self.block_start.as_mut_ptr().add(self.block_idx * 2) = i as u32; + *self.block_start.as_mut_ptr().add(self.block_idx * 2 + 1) = j as u32; + *self.block_size.as_mut_ptr().add(self.block_idx * 2) = height as u16; + *self.block_size.as_mut_ptr().add(self.block_idx * 2 + 1) = width as u16; + + let a = self.block_idx / 64; + let b = self.block_idx % 64; + let v = *self.right.as_ptr().add(a) & !(1 << b); // clear bit + *self.right.as_mut_ptr().add(a) = v | ((right as u64) << b); + + self.block_idx += 1; + } + } + + #[inline] + fn add_trace_idx(&mut self, add: usize) { + self.trace_idx += add; + } + + #[inline] + fn save_ckpt(&mut self) { + self.ckpt_trace_idx = self.trace_idx; + self.ckpt_block_idx = self.block_idx; + } + + /// The trace data structure is like a stack, so all trace values and blocks after the + /// checkpoint is essentially popped off the stack. + #[inline] + fn restore_ckpt(&mut self) { + self.trace_idx = self.ckpt_trace_idx; + self.block_idx = self.ckpt_block_idx; + } + + /// Create a CIGAR string that represents a single traceback path ending on the specified + /// location. + /// + /// When aligning `q` against `r`, this represents the edits to go from `r` to `q`. + /// Matches and mismatches are both represented with `M`. + pub fn cigar(&self, i: usize, j: usize, cigar: &mut Cigar) { + self.cigar_core::(i, j, None, None, cigar); + } + + /// Create a CIGAR string that represents a single traceback path ending on the specified + /// location. + /// + /// When aligning `q` against `r`, this represents the edits to go from `r` to `q`. + /// Matches are represented using `=` and mismatches are represented using `X`. + pub fn cigar_eq(&self, query: &PaddedBytes, reference: &PaddedBytes, i: usize, j: usize, cigar: &mut Cigar) { + self.cigar_core::(i, j, Some(query), Some(reference), cigar); + } + + fn cigar_core(&self, mut i: usize, mut j: usize, q: Option<&PaddedBytes>, r: Option<&PaddedBytes>, cigar: &mut Cigar) { + assert!(i <= self.query_len && j <= self.reference_len, "Traceback cigar end position must be in bounds!"); + if EQ { + assert!(q.is_some() && r.is_some()); + assert!(i <= q.unwrap().len() && j <= r.unwrap().len(), "Traceback cigar end position must be in sequence bounds!"); + } + + cigar.clear(i, j); + + unsafe { + let mut block_idx = self.block_idx; + let mut trace_idx = self.trace_idx; + let mut block_i; + let mut block_j; + let mut block_width; + let mut block_height; + let mut right; + + #[derive(Copy, Clone, PartialEq, Debug)] + enum Table { + D = 0b00, + C = 0b01, + R = 0b10 + } + + // use lookup table instead of hard to predict branches + static OP_LUT: [[(Operation, usize, usize, Table); 64]; 2] = { + let mut lut = [[(Operation::D, 0, 1, Table::D); 64]; 2]; + + // table: the current DP table, D, C, or R (tables are standardized to right = true) + // trace: 2 bits, first bit is whether the max equals C table entry, second bit is + // whether the max equals R table entry (vice versa for right = false) + // trace2: 2 bits, first bit is whether the max in the C table is the gap beginning, second + // bit is whether the max in the R table is the gap beginning (vice versa for right = false) + // right: whether the current block contains vectors laid out vertically + + let mut right = 0; + while right < 2 { + let mut trace = 0; + while trace < 4 { + let mut trace2 = 0; + while trace2 < 4 { + let mut table_idx = 0; + while table_idx < 3 { + let table = match table_idx { + 0b00 => Table::D, + 0b01 => Table::C, + _ => Table::R + }; + + let res = if right == 1 { + match (trace, trace2, table) { + (_, 0b00 | 0b10, Table::C) => (Operation::D, 0, 1, Table::C), // C table gap extend + (_, 0b01 | 0b11, Table::C) => (Operation::D, 0, 1, Table::D), // C table gap open + (_, 0b00 | 0b01, Table::R) => (Operation::I, 1, 0, Table::R), // R table gap extend + (_, 0b10 | 0b11, Table::R) => (Operation::I, 1, 0, Table::D), // R table gap open + (0b00, _, Table::D) => (Operation::M, 1, 1, Table::D), // D table match/mismatch + (0b01 | 0b11, 0b00 | 0b10, Table::D) => (Operation::D, 0, 1, Table::C), // D table C gap extend + (0b01 | 0b11, 0b01 | 0b11, Table::D) => (Operation::D, 0, 1, Table::D), // D table C gap open + (0b10, 0b00 | 0b01, Table::D) => (Operation::I, 1, 0, Table::R), // D table R gap extend + (0b10, 0b10 | 0b11, Table::D) => (Operation::I, 1, 0, Table::D), // D table R gap open + _ => (Operation::D, 0, 1, Table::D) + } + } else { + match (trace, trace2, table) { + (_, 0b00 | 0b10, Table::R) => (Operation::I, 1, 0, Table::R), // R table gap extend + (_, 0b01 | 0b11, Table::R) => (Operation::I, 1, 0, Table::D), // R table gap open + (_, 0b00 | 0b01, Table::C) => (Operation::D, 0, 1, Table::C), // C table gap extend + (_, 0b10 | 0b11, Table::C) => (Operation::D, 0, 1, Table::D), // C table gap open + (0b00, _, Table::D) => (Operation::M, 1, 1, Table::D), // D table match/mismatch + (0b01 | 0b11, 0b00 | 0b10, Table::D) => (Operation::I, 1, 0, Table::R), // D table R gap extend + (0b01 | 0b11, 0b01 | 0b11, Table::D) => (Operation::I, 1, 0, Table::D), // D table R gap open + (0b10, 0b00 | 0b01, Table::D) => (Operation::D, 0, 1, Table::C), // D table C gap extend + (0b10, 0b10 | 0b11, Table::D) => (Operation::D, 0, 1, Table::D), // D table C gap open + _ => (Operation::I, 1, 0, Table::D) + } + }; + + lut[right][(trace << 4) | (trace2 << 2) | (table as usize)] = res; + table_idx += 1; + } + trace2 += 1; + } + trace += 1; + } + right += 1; + } + + lut + }; + + let mut table = Table::D; + + while i > 0 || j > 0 { + // find the current block that contains (i, j) + loop { + block_idx -= 1; + block_i = *self.block_start.as_ptr().add(block_idx * 2) as usize; + block_j = *self.block_start.as_ptr().add(block_idx * 2 + 1) as usize; + block_height = *self.block_size.as_ptr().add(block_idx * 2) as usize; + block_width = *self.block_size.as_ptr().add(block_idx * 2 + 1) as usize; + trace_idx -= block_width * block_height / L; + + if i >= block_i && j >= block_j { + right = ((*self.right.as_ptr().add(block_idx / 64) >> (block_idx % 64)) & 0b1) as usize; + break; + } + } + + // compute traceback within the current block + let lut = &*OP_LUT.as_ptr().add(right); + if right > 0 { + while i >= block_i && j >= block_j && (i > 0 || j > 0) { + let curr_i = i - block_i; + let curr_j = j - block_j; + let idx = trace_idx + curr_i / L + curr_j * (block_height / L); + let t = ((*self.trace.as_ptr().add(idx) >> ((curr_i % L) * 2)) & 0b11) as usize; + let t2 = ((*self.trace2.as_ptr().add(idx) >> ((curr_i % L) * 2)) & 0b11) as usize; + let lut_idx = (t << 4) | (t2 << 2) | (table as usize); + let lut_entry = &*lut.as_ptr().add(lut_idx); + + let op = if EQ && lut_entry.0 == Operation::M { + if q.unwrap_unchecked().get(i) == r.unwrap_unchecked().get(j) { + Operation::Eq + } else { + Operation::X + } + } else { + lut_entry.0 + }; + i -= lut_entry.1; + j -= lut_entry.2; + table = lut_entry.3; + cigar.add(op); + } + } else { + while i >= block_i && j >= block_j && (i > 0 || j > 0) { + let curr_i = i - block_i; + let curr_j = j - block_j; + let idx = trace_idx + curr_j / L + curr_i * (block_width / L); + let t = ((*self.trace.as_ptr().add(idx) >> ((curr_j % L) * 2)) & 0b11) as usize; + let t2 = ((*self.trace2.as_ptr().add(idx) >> ((curr_j % L) * 2)) & 0b11) as usize; + let lut_idx = (t << 4) | (t2 << 2) | (table as usize); + let lut_entry = &*lut.as_ptr().add(lut_idx); + + let op = if EQ && lut_entry.0 == Operation::M { + if q.unwrap_unchecked().get(i) == r.unwrap_unchecked().get(j) { + Operation::Eq + } else { + Operation::X + } + } else { + lut_entry.0 + }; + i -= lut_entry.1; + j -= lut_entry.2; + table = lut_entry.3; + cigar.add(op); + } + } + } + } + } + + /// Return all of the rectangular regions that were calculated separately as + /// block aligner shifts and grows. + pub fn blocks(&self) -> Vec { + let mut res = Vec::with_capacity(self.block_idx); + + for i in 0..self.block_idx { + unsafe { + res.push(Rectangle { + row: *self.block_start.as_ptr().add(i * 2) as usize, + col: *self.block_start.as_ptr().add(i * 2 + 1) as usize, + height: *self.block_size.as_ptr().add(i * 2) as usize, + width: *self.block_size.as_ptr().add(i * 2 + 1) as usize + }); + } + } + + res + } +} + +/// A rectangular region. +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct Rectangle { + pub row: usize, + pub col: usize, + pub width: usize, + pub height: usize +} + +#[inline] +fn clamp(x: i32) -> i16 { + cmp::min(cmp::max(x, i16::MIN as i32), i16::MAX as i32) as i16 +} + +#[inline] +fn div_ceil(n: usize, d: usize) -> usize { + (n + d - 1) / d +} + +/// Same alignment as SIMD vectors. +struct Aligned { + layout: alloc::Layout, + ptr: *const i16 +} + +impl Aligned { + pub unsafe fn new(block_size: usize) -> Self { + // custom alignment + let layout = alloc::Layout::from_size_align_unchecked(block_size * 2, L_BYTES); + let ptr = alloc::alloc_zeroed(layout) as *const i16; + Self { layout, ptr } + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + pub unsafe fn clear(&mut self, block_size: usize) { + let mut i = 0; + while i < block_size { + simd_store(self.ptr.add(i) as _, simd_set1_i16(MIN)); + i += L; + } + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + pub unsafe fn set_vec(&mut self, o: &Aligned, idx: usize) { + simd_store(self.ptr.add(idx) as _, simd_load(o.as_ptr().add(idx) as _)); + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + pub unsafe fn copy_vec(&mut self, new_idx: usize, idx: usize) { + simd_store(self.ptr.add(new_idx) as _, simd_load(self.ptr.add(idx) as _)); + } + + #[inline] + pub fn get(&self, i: usize) -> i16 { + unsafe { *self.ptr.add(i) } + } + + #[allow(dead_code)] + #[inline] + pub fn set(&mut self, i: usize, v: i16) { + unsafe { ptr::write(self.ptr.add(i) as _, v); } + } + + #[inline] + pub fn as_mut_ptr(&mut self) -> *mut i16 { + self.ptr as _ + } + + #[inline] + pub fn as_ptr(&self) -> *const i16 { + self.ptr + } +} + +impl Drop for Aligned { + fn drop(&mut self) { + unsafe { alloc::dealloc(self.ptr as _, self.layout); } + } +} + +#[derive(Copy, Clone, Debug)] +struct PaddedBytes3di<'a> { + pub bytes: &'a PaddedBytes, + pub bytes_3di: &'a PaddedBytes, + pub pos_bias: &'a PosBias +} + +impl<'a> PaddedBytes3di<'a> { + pub fn len(&self) -> usize { + self.bytes.len() + } +} + +#[derive(Copy, Clone, Debug)] +struct PaddedBytesAA<'a> { + pub bytes: &'a PaddedBytes, + pub pos_bias: &'a PosBias +} + +impl<'a> PaddedBytesAA<'a> { + pub fn len(&self) -> usize { + self.bytes.len() + } +} + +/// A padded string that helps avoid out of bounds access when using SIMD. +/// +/// A single padding byte in inserted before the start of the string, +/// and `block_size` bytes are inserted after the end of the string. +#[derive(Clone, PartialEq, Debug)] +pub struct PaddedBytes { + s: Vec, + len: usize +} + +impl PaddedBytes { + /// Create an empty `PaddedBytes` instance that can hold byte strings + /// of a specific size. + pub fn new(len: usize, block_size: usize) -> Self { + Self { + s: vec![M::convert_char(M::NULL); 1 + len + block_size], + len + } + } + + /// Modifies the bytes in place, filling in the rest of the memory with padding bytes. + pub fn set_bytes(&mut self, b: &[u8], block_size: usize) { + self.s[0] = M::convert_char(M::NULL); + self.s[1..1 + b.len()].copy_from_slice(b); + self.s[1..1 + b.len()].iter_mut().for_each(|c| *c = M::convert_char(*c)); + self.s[1 + b.len()..1 + b.len() + block_size].fill(M::convert_char(M::NULL)); + self.len = b.len(); + } + + /// Modifies the bytes in place, filling in the rest of the memory with padding bytes. + pub fn set_bytes_num(&mut self, b: &[u8], block_size: usize) { + self.s[0] = M::convert_char(M::NULL); + self.s[1..1 + b.len()].copy_from_slice(b); + // self.s[1..1 + b.len()].iter_mut().for_each(|c| *c = M::convert_char(*c)); + self.s[1 + b.len()..1 + b.len() + block_size].fill(M::convert_char(M::NULL)); + self.len = b.len(); + } + + /// Create from a byte slice. + /// + /// Make sure that `block_size` is greater than or equal to the upper bound + /// block size used in the `Block::align` function. + #[inline] + pub fn from_bytes(b: &[u8], block_size: usize) -> Self { + let mut v = b.to_owned(); + let len = v.len(); + v.insert(0, M::NULL); + v.resize(v.len() + block_size, M::NULL); + v.iter_mut().for_each(|c| *c = M::convert_char(*c)); + Self { s: v, len } + } + + /// Create from the bytes in a string slice. + /// + /// Make sure that `block_size` is greater than or equal to the upper bound + /// block size used in the `Block::align` function. + #[inline] + pub fn from_str(s: &str, block_size: usize) -> Self { + Self::from_bytes::(s.as_bytes(), block_size) + } + + /// Create from the bytes in a string. + /// + /// Make sure that `block_size` is greater than or equal to the upper bound + /// block size used in the `Block::align` function. + #[inline] + pub fn from_string(s: String, block_size: usize) -> Self { + let mut v = s.into_bytes(); + let len = v.len(); + v.insert(0, M::NULL); + v.resize(v.len() + block_size, M::NULL); + v.iter_mut().for_each(|c| *c = M::convert_char(*c)); + Self { s: v, len } + } + + /// Get the byte at a certain index (unchecked). + #[inline] + pub unsafe fn get(&self, i: usize) -> u8 { + *self.s.as_ptr().add(i) + } + + /// Set the byte at a certain index (unchecked). + #[inline] + pub unsafe fn set(&mut self, i: usize, c: u8) { + *self.s.as_mut_ptr().add(i) = c; + } + + /// Create a pointer to a specific index. + #[inline] + pub unsafe fn as_ptr(&self, i: usize) -> *const u8 { + self.s.as_ptr().add(i) + } + + /// Length of the original string (no padding). + #[inline] + pub fn len(&self) -> usize { + self.len + } +} + +/// Resulting score and alignment end position. +#[repr(C)] +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct AlignResult { + pub score: i32, + pub query_idx: usize, + pub reference_idx: usize +} + +#[derive(Copy, Clone, PartialEq, Debug)] +enum Direction { + Right, + Down, + Grow +} + +#[cfg(test)] +mod tests { + use crate::scores::*; + + use super::*; + + #[test] + fn test_no_x_drop() { + let test_gaps = Gaps { open: -11, extend: -1 }; + + let mut a = Block::::new(100, 100, 16); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"AARA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 11); + + let r = PaddedBytes::from_bytes::(b"AAAAAAAA", 16); + let q = PaddedBytes::from_bytes::(b"AARAAAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 12); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"AAAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 16); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"AARA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 11); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"RRRR", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, -4); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"AAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 1); + + let test_gaps2 = Gaps { open: -2, extend: -1 }; + + let r = PaddedBytes::from_bytes::(b"AAAN", 16); + let q = PaddedBytes::from_bytes::(b"ATAA", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, 0); + + let r = PaddedBytes::from_bytes::(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16); + let q = PaddedBytes::from_bytes::(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, 32); + + let r = PaddedBytes::from_bytes::(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16); + let q = PaddedBytes::from_bytes::(b"TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, -32); + + let r = PaddedBytes::from_bytes::(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16); + let q = PaddedBytes::from_bytes::(b"TATATATATATATATATATATATATATATATA", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, 0); + + let r = PaddedBytes::from_bytes::(b"TTAAAAAAATTTTTTTTTTTT", 16); + let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, 7); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"C", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, -5); + a.align(&r, &q, &NW1, test_gaps2, 16..=16, 0); + assert_eq!(a.res().score, -5); + } + + #[test] + fn test_x_drop() { + let test_gaps = Gaps { open: -11, extend: -1 }; + + let mut a = Block::::new(100, 100, 16); + + let r = PaddedBytes::from_bytes::(b"AAARRA", 16); + let q = PaddedBytes::from_bytes::(b"AAAAAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1); + assert_eq!(a.res(), AlignResult { score: 14, query_idx: 6, reference_idx: 6 }); + + let r = PaddedBytes::from_bytes::(b"AAAAAAAAAAAAAAARRRRRRRRRRRRRRRRAAAAAAAAAAAAA", 16); + let q = PaddedBytes::from_bytes::(b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 1); + assert_eq!(a.res(), AlignResult { score: 60, query_idx: 15, reference_idx: 15 }); + + let mut a = Block::::new(2048, 2048, 2048); + let long_str = std::iter::repeat(b'A').take(2048).collect::>(); + let r = PaddedBytes::from_bytes::(&long_str, 2048); + let q = PaddedBytes::from_bytes::(&long_str, 2048); + a.align(&q, &r, &BLOSUM62, test_gaps, 2048..=2048, 100); + assert_eq!(a.res(), AlignResult { score: 8192, query_idx: 2048, reference_idx: 2048 }); + } + + #[test] + fn test_trace() { + let test_gaps = Gaps { open: -11, extend: -1 }; + + let mut cigar = Cigar::new(100, 100); + + let mut a = Block::::new(100, 100, 16); + + let r = PaddedBytes::from_bytes::(b"AAARRA", 16); + let q = PaddedBytes::from_bytes::(b"AAAAAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 14, query_idx: 6, reference_idx: 6 }); + a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "3=2X1="); + + let r = PaddedBytes::from_bytes::(b"AAAA", 16); + let q = PaddedBytes::from_bytes::(b"AAA", 16); + a.align(&q, &r, &BLOSUM62, test_gaps, 16..=16, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 1, query_idx: 3, reference_idx: 4 }); + a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "3M1D"); + + let test_gaps2 = Gaps { open: -2, extend: -1 }; + + let r = PaddedBytes::from_bytes::(b"TTAAAAAAATTTTTTTTTTTT", 16); + let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", 16); + a.align(&q, &r, &NW1, test_gaps2, 16..=16, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 }); + a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "2M6I16M3D"); + + let mut a = Block::::new(100, 100, 32); + let q = PaddedBytes::from_bytes::(b"AAAAAAAAATTGCGCT", 32); + let r = PaddedBytes::from_bytes::(b"AAAAAAAAAGCGC", 32); + + a.align(&q, &r, &NW1, test_gaps2, 32..=32, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 8, query_idx: 16, reference_idx: 13 }); + a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "9=2I4=1I"); + + let matrix = NucMatrix::new_simple(2, -1); + let test_gaps3 = Gaps { open: -5, extend: -2 }; + a.align(&q, &r, &matrix, test_gaps3, 32..=32, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 14, query_idx: 16, reference_idx: 13 }); + a.trace().cigar_eq(&q, &r, res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "9=2I4=1I"); + } + + #[test] + fn test_bytes() { + let test_gaps = Gaps { open: -2, extend: -1 }; + + let mut a = Block::::new(100, 100, 16); + + let r = PaddedBytes::from_bytes::(b"AAAaaA", 16); + let q = PaddedBytes::from_bytes::(b"AAAAAA", 16); + a.align(&q, &r, &BYTES1, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 2); + + let r = PaddedBytes::from_bytes::(b"abcdefg", 16); + let q = PaddedBytes::from_bytes::(b"abdefg", 16); + a.align(&q, &r, &BYTES1, test_gaps, 16..=16, 0); + assert_eq!(a.res().score, 4); + } + + #[test] + fn test_profile() { + let mut a = Block::::new(100, 100, 16); + let r = AAProfile::from_bytes(b"AAAA", 16, 1, -1, -1, 0, -1, -1); + let q = PaddedBytes::from_bytes::(b"AAAA", 16); + a.align_profile(&q, &r, 16..=16, 0); + assert_eq!(a.res().score, 4); + + let r = AAProfile::from_bytes(b"AATTAA", 16, 1, -1, -1, 0, -1, -1); + let q = PaddedBytes::from_bytes::(b"AAAA", 16); + a.align_profile(&q, &r, 16..=16, 0); + assert_eq!(a.res().score, 1); + + let r = AAProfile::from_bytes(b"AATTAA", 16, 1, -1, -1, -1, -1, -1); + let q = PaddedBytes::from_bytes::(b"AAAA", 16); + a.align_profile(&q, &r, 16..=16, 0); + assert_eq!(a.res().score, 0); + + let mut a = Block::::new(100, 100, 16); + let mut cigar = Cigar::new(100, 100); + + let r = AAProfile::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 16, 1, -1, -1, 0, -1, -1); + let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", 16); + a.align_profile(&q, &r, 16..=16, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 7, query_idx: 24, reference_idx: 21 }); + a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "2M6I16M3D"); + + let r = AAProfile::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 16, 1, -1, -1, -1, -1, -1); + let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", 16); + a.align_profile(&q, &r, 16..=16, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 6, query_idx: 24, reference_idx: 21 }); + a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "2M6I16M3D"); + + let mut r = AAProfile::from_bytes(b"TTAAAAAAATTTTTTTTTTTT", 16, 1, -1, -2, -1, -1, -1); + r.set_gap_close_C(17, -1); + r.set_gap_close_C(19, 0); + let q = PaddedBytes::from_bytes::(b"TTTTTTTTAAAAAAATTTTTTTTT", 16); + a.align_profile(&q, &r, 16..=16, 0); + let res = a.res(); + assert_eq!(res, AlignResult { score: 6, query_idx: 24, reference_idx: 21 }); + a.trace().cigar(res.query_idx, res.reference_idx, &mut cigar); + assert_eq!(cigar.to_string(), "2M6I14M3D2M"); + } +} diff --git a/lib/block-aligner/src/scores.rs b/lib/block-aligner/src/scores.rs new file mode 100644 index 000000000..126d8649d --- /dev/null +++ b/lib/block-aligner/src/scores.rs @@ -0,0 +1,789 @@ +//! Structs for representing match/mismatch scoring matrices. + +#[cfg(feature = "simd_sse2")] +use crate::sse2::*; + +#[cfg(feature = "simd_avx2")] +use crate::avx2::*; + +#[cfg(feature = "simd_wasm")] +use crate::simd128::*; + +#[cfg(feature = "simd_neon")] +use crate::neon::*; + +#[cfg(feature = "no_simd")] +use crate::fallback::*; + +use std::i8; + +pub trait Matrix { + /// Byte to use as padding. + const NULL: u8; + /// Create a new matrix with default (usually nonsense) values. + /// + /// Use `new_simple` to create a sensible scoring matrix. + fn new() -> Self; + /// Set the score for a pair of bytes. + fn set(&mut self, a: u8, b: u8, score: i8); + + fn set_num(&mut self, a: u8, b: u8, score: i8); + /// Get the score for a pair of bytes. + fn get(&self, a: u8, b: u8) -> i8; + /// Get the pointer for a specific index. + fn as_ptr(&self, i: usize) -> *const i8; + /// Get the scores for a certain byte and a certain SIMD vector of bytes. + unsafe fn get_scores(&self, c: u8, v: HalfSimd, right: bool) -> Simd; + /// Convert a byte to a better storage format that makes retrieving scores + /// easier. + fn convert_char(c: u8) -> u8; +} + +/// Amino acid scoring matrix. +/// +/// Supports characters `A` to `Z`. Lowercase characters are uppercased. +#[repr(C, align(32))] +#[derive(Clone, PartialEq, Debug)] +pub struct AAMatrix { + scores: [i8; 27 * 32] +} + +impl AAMatrix { + /// Create a simple matrix with a certain match and mismatch score. + pub const fn new_simple(match_score: i8, mismatch_score: i8) -> Self { + let mut scores = [i8::MIN; 27 * 32]; + let mut i = b'A'; + while i <= b'Z' { + let mut j = b'A'; + while j <= b'Z' { + let idx = ((i - b'A') as usize) * 32 + ((j - b'A') as usize); + scores[idx] = if i == j { match_score } else { mismatch_score }; + j += 1; + } + i += 1; + } + Self { scores } + } + + /// Create an AAMatrix from a tab-separated table with no headers. + /// + /// Use `aa_order` to pass in the amino acids in order. + pub fn from_tsv(tsv: &str, aa_order: &str) -> Self { + let tsv = tsv.trim(); + let aa_order = aa_order.split_ascii_whitespace().map(|s| s.as_bytes()[0]).collect::>(); + let mut res = Self::new(); + + for (line, &a) in tsv.split("\n").zip(&aa_order) { + for (score, &b) in line.split_ascii_whitespace().zip(&aa_order) { + let score = score.parse::().unwrap(); + res.set(a, b, score); + } + } + + res + } +} + +impl Matrix for AAMatrix { + const NULL: u8 = b'A' + 26u8; + + fn new() -> Self { + Self { scores: [i8::MIN; 27 * 32] } + } + + fn set(&mut self, a: u8, b: u8, score: i8) { + let a = a.to_ascii_uppercase(); + let b = b.to_ascii_uppercase(); + assert!(b'A' <= a && a <= b'Z' + 1); + assert!(b'A' <= b && b <= b'Z' + 1); + let idx = ((a - b'A') as usize) * 32 + ((b - b'A') as usize); + self.scores[idx] = score; + let idx = ((b - b'A') as usize) * 32 + ((a - b'A') as usize); + self.scores[idx] = score; + } + + fn set_num(&mut self, a: u8, b: u8, score: i8) { + let idx = (a as usize)* 32 + (b as usize); + self.scores[idx] = score; + let idx = (b as usize)* 32 + (a as usize); + self.scores[idx] = score; + } + + fn get(&self, a: u8, b: u8) -> i8 { + let a = a.to_ascii_uppercase(); + let b = b.to_ascii_uppercase(); + assert!(b'A' <= a && a <= b'Z' + 1); + assert!(b'A' <= b && b <= b'Z' + 1); + let idx = ((a - b'A') as usize) * 32 + ((b - b'A') as usize); + self.scores[idx] + } + + #[inline] + fn as_ptr(&self, i: usize) -> *const i8 { + debug_assert!(i < 27); + unsafe { self.scores.as_ptr().add(i * 32) } + } + + // TODO: get rid of lookup for around half of the shifts by constructing position specific scoring matrix? + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_scores(&self, c: u8, v: HalfSimd, _right: bool) -> Simd { + // efficiently lookup scores for each character in v + let matrix_ptr = self.as_ptr(c as usize); + let scores1 = lutsimd_load(matrix_ptr as *const LutSimd); + let scores2 = lutsimd_load((matrix_ptr as *const LutSimd).add(1)); + halfsimd_lookup2_i16(scores1, scores2, v) + } + + #[inline] + fn convert_char(c: u8) -> u8 { + let c = c.to_ascii_uppercase(); + assert!(c >= b'A' && c <= Self::NULL); + c - b'A' + } +} + +/// Nucleotide scoring matrix. +/// +/// Supports characters `A`, `C`, `G`, `N`, and `T`. Lowercase characters are uppercased. +/// +/// If a larger alphabet is needed (for example, with IUPAC characters), use `AAMatrix` instead. +#[repr(C, align(32))] +#[derive(Clone, PartialEq, Debug)] +pub struct NucMatrix { + scores: [i8; 8 * 16] +} + +impl NucMatrix { + /// Create a simple matrix with a certain match and mismatch score. + pub const fn new_simple(match_score: i8, mismatch_score: i8) -> Self { + let mut scores = [i8::MIN; 8 * 16]; + let alpha = [b'A', b'T', b'C', b'G', b'N']; + let mut i = 0; + while i < alpha.len() { + let mut j = 0; + while j < alpha.len() { + let idx = ((alpha[i] & 0b111) as usize) * 16 + ((alpha[j] & 0b1111) as usize); + scores[idx] = if i == j { match_score } else { mismatch_score }; + j += 1; + } + i += 1; + } + Self { scores } + } +} + +impl Matrix for NucMatrix { + const NULL: u8 = b'Z'; + + fn new() -> Self { + Self { scores: [i8::MIN; 8 * 16] } + } + + fn set(&mut self, a: u8, b: u8, score: i8) { + let a = a.to_ascii_uppercase(); + let b = b.to_ascii_uppercase(); + assert!(b'A' <= a && a <= b'Z'); + assert!(b'A' <= b && b <= b'Z'); + let idx = ((a & 0b111) as usize) * 16 + ((b & 0b1111) as usize); + self.scores[idx] = score; + let idx = ((b & 0b111) as usize) * 16 + ((a & 0b1111) as usize); + self.scores[idx] = score; + } + + fn set_num(&mut self, _a: u8, _b: u8, _score: i8) { + unimplemented!(); + } + + fn get(&self, a: u8, b: u8) -> i8 { + let a = a.to_ascii_uppercase(); + let b = b.to_ascii_uppercase(); + assert!(b'A' <= a && a <= b'Z'); + assert!(b'A' <= b && b <= b'Z'); + let idx = ((a & 0b111) as usize) * 16 + ((b & 0b1111) as usize); + self.scores[idx] + } + + #[inline] + fn as_ptr(&self, i: usize) -> *const i8 { + unsafe { self.scores.as_ptr().add((i & 0b111) * 16) } + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_scores(&self, c: u8, v: HalfSimd, _right: bool) -> Simd { + // efficiently lookup scores for each character in v + let matrix_ptr = self.as_ptr(c as usize); + let scores = lutsimd_load(matrix_ptr as *const LutSimd); + halfsimd_lookup1_i16(scores, v) + } + + #[inline] + fn convert_char(c: u8) -> u8 { + let c = c.to_ascii_uppercase(); + assert!(c >= b'A' && c <= Self::NULL); + c + } +} + +/// Arbitrary bytes scoring matrix. +#[repr(C)] +#[derive(Clone, PartialEq, Debug)] +pub struct ByteMatrix { + match_score: i8, + mismatch_score: i8 +} + +impl ByteMatrix { + /// Create a simple matrix with a certain match and mismatch score. + pub const fn new_simple(match_score: i8, mismatch_score: i8) -> Self { + Self { match_score, mismatch_score } + } +} + +impl Matrix for ByteMatrix { + /// May lead to inaccurate results with x drop alignment, + /// if the block reaches the ends of the strings. + /// + /// Avoid using `ByteMatrix` with x drop alignment. + const NULL: u8 = b'\0'; + + fn new() -> Self { + Self { match_score: i8::MIN, mismatch_score: i8::MIN } + } + + fn set(&mut self, _a: u8, _b: u8, _score: i8) { + unimplemented!(); + } + + fn get(&self, a: u8, b: u8) -> i8 { + if a == b { self.match_score } else { self.mismatch_score } + } + + fn set_num(&mut self, _a: u8, _b: u8, _score: i8) { + unimplemented!(); + } + + #[inline] + fn as_ptr(&self, _i: usize) -> *const i8 { + unimplemented!() + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_scores(&self, c: u8, v: HalfSimd, _right: bool) -> Simd { + let match_scores = halfsimd_set1_i8(self.match_score); + let mismatch_scores = halfsimd_set1_i8(self.mismatch_score); + halfsimd_lookup_bytes_i16(match_scores, mismatch_scores, halfsimd_set1_i8(c as i8), v) + } + + #[inline] + fn convert_char(c: u8) -> u8 { + c + } +} + +/// Match = 1, mismatch = -1. +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static NW1: NucMatrix = NucMatrix::new_simple(1, -1); + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static BLOSUM45: AAMatrix = AAMatrix { scores: include!("../matrices/BLOSUM45") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static BLOSUM50: AAMatrix = AAMatrix { scores: include!("../matrices/BLOSUM50") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static BLOSUM62: AAMatrix = AAMatrix { scores: include!("../matrices/BLOSUM62") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static BLOSUM80: AAMatrix = AAMatrix { scores: include!("../matrices/BLOSUM80") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static BLOSUM90: AAMatrix = AAMatrix { scores: include!("../matrices/BLOSUM90") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static PAM100: AAMatrix = AAMatrix { scores: include!("../matrices/PAM100") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static PAM120: AAMatrix = AAMatrix { scores: include!("../matrices/PAM120") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static PAM160: AAMatrix = AAMatrix { scores: include!("../matrices/PAM160") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static PAM200: AAMatrix = AAMatrix { scores: include!("../matrices/PAM200") }; + +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static PAM250: AAMatrix = AAMatrix { scores: include!("../matrices/PAM250") }; + +/// Match = 1, mismatch = -1. +#[cfg_attr(not(target_arch = "wasm32"), no_mangle)] +pub static BYTES1: ByteMatrix = ByteMatrix::new_simple(1, -1); + +/*pub trait ScoreParams { + const GAP_OPEN: i8; + const GAP_EXTEND: i8; + const I: usize; +} + +pub struct Params; + +impl ScoreParams for Params<{ GAP_OPEN }, { GAP_EXTEND }, { I }> { + const GAP_OPEN: i8 = GAP_OPEN; + const GAP_EXTEND: i8 = GAP_EXTEND; + const I: usize = I; +} + +pub type GapParams = Params<{ GAP_OPEN }, { GAP_EXTEND }, 0>;*/ + +/// Open and extend gap costs. +/// +/// Open cost must include the extend cost. For example, with `Gaps { open: -11, extend: -1 }`, +/// a gap of length 1 costs -11, and a gap of length 2 costs -12. +#[derive(Copy, Clone, PartialEq, Debug)] +#[repr(C)] +pub struct Gaps { + pub open: i8, + pub extend: i8 +} + +#[allow(non_snake_case)] +pub trait Profile { + /// Byte to use as padding. + const NULL: u8; + + /// Create a new profile of a specific length, with default (large negative) values. + /// + /// Note that internally, the created profile is longer than a conventional position-specific scoring + /// matrix (and `str_len`) by 1, so the profile will have the same length as the number of + /// columns in the DP matrix. + /// The first column of scores in the profile should be large negative values (padding). + /// This allows gap open costs to be specified for the first column of the DP matrix. + fn new(str_len: usize, block_size: usize, gap_extend: i8) -> Self; + /// Create a new profile from a byte string. + fn from_bytes(b: &[u8], block_size: usize, match_score: i8, mismatch_score: i8, gap_open_C: i8, gap_close_C: i8, gap_open_R: i8, gap_extend: i8) -> Self; + + /// Get the length of the profile. + fn len(&self) -> usize; + /// Clear the profile so it can be reused for profile lengths less than or equal + /// to the length this struct was created with. + fn clear(&mut self, str_len: usize, block_size: usize); + + /// Set the score for a position and byte. + /// + /// The profile should be first `clear`ed before it is reused with different lengths. + /// + /// The first column (`i = 0`) should be padded with large negative values. + /// Therefore, set values starting from `i = 1`. + fn set(&mut self, i: usize, b: u8, score: i8); + /// Set the scores for all positions in the position specific scoring matrix. + /// + /// The profile should be first `clear`ed before it is reused with different lengths. + /// + /// Use `order` to specify the order of bytes that is used in the `scores` matrix. + /// Scores (in `scores`) should be stored in row-major order, where each row is a different position + /// and each column is a different byte. + /// + /// Use `left_shift` and `right_shift` to scale all the scores. + fn set_all(&mut self, order: &[u8], scores: &[i8], left_shift: usize, right_shift: usize); + /// Set the scores for all positions in reverse in the position specific scoring matrix. + /// + /// The profile should be first `clear`ed before it is reused with different lengths. + /// + /// Use `order` to specify the order of bytes that is used in the `scores` matrix. + /// Scores (in `scores`) should be stored in row-major order, where each row is a different position + /// and each column is a different byte. + /// + /// Use `left_shift` and `right_shift` to scale all the scores. + fn set_all_rev(&mut self, order: &[u8], scores: &[i8], left_shift: usize, right_shift: usize); + + /// Set the gap open cost for a column. + /// + /// When aligning a sequence `q` to a profile `r`, this is the gap open cost at column `i` for a + /// column transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + /// This represents starting a gap in `q`. + fn set_gap_open_C(&mut self, i: usize, gap: i8); + /// Set the gap close cost for a column. + /// + /// When aligning a sequence `q` to a profile `r`, this is the gap close cost at column `i` for + /// ending column transitions in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + /// This represents ending a gap in `q`. + fn set_gap_close_C(&mut self, i: usize, gap: i8); + /// Set the gap open cost for a row. + /// + /// When aligning a sequence `q` to a profile `r`, this is the gap open cost at column `i` for + /// a row transition in the DP matrix with `|q| + 1` rows and `|r| + 1` columns. + /// This represents starting a gap in `r`. + fn set_gap_open_R(&mut self, i: usize, gap: i8); + + /// Set the gap open cost for all column transitions. + fn set_all_gap_open_C(&mut self, gap: i8); + /// Set the gap close cost for all column transitions. + fn set_all_gap_close_C(&mut self, gap: i8); + /// Set the gap open cost for all row transitions. + fn set_all_gap_open_R(&mut self, gap: i8); + + /// Get the score for a position and byte. + fn get(&self, i: usize, b: u8) -> i8; + /// Get the gap extend cost. + fn get_gap_extend(&self) -> i8; + fn get_curr_len(&self) -> usize; + /// Get the pointer for a specific index. + fn as_ptr_pos(&self, i: usize) -> *const i8; + /// Get the pointer for a specific amino acid. + fn as_ptr_aa(&self, a: usize) -> *const i16; + + /// Get the scores for a certain SIMD vector of bytes at a specific position in the profile. + unsafe fn get_scores_pos(&self, i: usize, v: HalfSimd, right: bool) -> Simd; + /// Get the scores for a certain byte starting at a specific position in the profile. + unsafe fn get_scores_aa(&self, i: usize, c: u8, right: bool) -> Simd; + + /// Get the gap open cost for a column. + unsafe fn get_gap_open_right_C(&self, i: usize) -> Simd; + /// Get the gap close cost for a column. + unsafe fn get_gap_close_right_C(&self, i: usize) -> Simd; + /// Get the gap open cost for a row. + unsafe fn get_gap_open_right_R(&self, i: usize) -> Simd; + + /// Get the gap open cost for a column. + unsafe fn get_gap_open_down_C(&self, i: usize) -> Simd; + /// Get the gap close cost for a column. + unsafe fn get_gap_close_down_C(&self, i: usize) -> Simd; + /// Get the gap open cost for a row. + unsafe fn get_gap_open_down_R(&self, i: usize) -> Simd; + + /// Convert a byte to a better storage format that makes retrieving scores + /// easier. + fn convert_char(c: u8) -> u8; +} + +/// Amino acid position specific scoring matrix. +/// +/// Supports characters `A` to `Z`. Lowercase characters are uppercased. +#[allow(non_snake_case)] +#[derive(Clone, PartialEq, Debug)] +pub struct AAProfile { + aa_pos: Vec, + pos_aa: Vec, + gap_extend: i8, + pos_gap_open_C: Vec, + pos_gap_close_C: Vec, + pos_gap_open_R: Vec, + // length used for underlying allocated vectors + max_len: usize, + // length used for the current padded profile + curr_len: usize, + // length of the profile without padding (same length as the consensus sequence of the position + // specific scoring matrix) + str_len: usize +} + +impl Profile for AAProfile { + const NULL: u8 = b'A' + 26u8; + + fn new(str_len: usize, block_size: usize, gap_extend: i8) -> Self { + let max_len = str_len + block_size + 1; + Self { + aa_pos: vec![i8::MIN as i16; 32 * max_len], + pos_aa: vec![i8::MIN; max_len * 32], + gap_extend, + pos_gap_open_C: vec![i8::MIN as i16; max_len], + pos_gap_close_C: vec![i8::MIN as i16; max_len], + pos_gap_open_R: vec![i8::MIN as i16; max_len], + max_len, + curr_len: max_len, + str_len + } + } + + #[allow(non_snake_case)] + fn from_bytes(b: &[u8], block_size: usize, match_score: i8, mismatch_score: i8, gap_open_C: i8, gap_close_C: i8, gap_open_R: i8, gap_extend: i8) -> Self { + let mut res = Self::new(b.len(), block_size, gap_extend); + + for i in 0..b.len() { + for c in b'A'..=b'Z' { + res.set(i + 1, c, if c == b[i] { match_score } else { mismatch_score }); + } + } + + for i in 0..b.len() + 1 { + res.set_gap_open_C(i, gap_open_C); + res.set_gap_close_C(i, gap_close_C); + res.set_gap_open_R(i, gap_open_R); + } + + res + } + + fn len(&self) -> usize { + self.str_len + } + + fn clear(&mut self, str_len: usize, block_size: usize) { + let curr_len = str_len + block_size + 1; + assert!(curr_len <= self.max_len); + self.aa_pos[..32 * curr_len].fill(i8::MIN as i16); + self.pos_aa[..curr_len * 32].fill(i8::MIN); + self.pos_gap_open_C[..curr_len].fill(i8::MIN as i16); + self.pos_gap_close_C[..curr_len].fill(i8::MIN as i16); + self.pos_gap_open_R[..curr_len].fill(i8::MIN as i16); + self.str_len = str_len; + self.curr_len = curr_len; + } + + fn set(&mut self, i: usize, b: u8, score: i8) { + let b = b.to_ascii_uppercase(); + assert!(b'A' <= b && b <= b'Z' + 1); + let idx = i * 32 + ((b - b'A') as usize); + self.pos_aa[idx] = score; + let idx = ((b - b'A') as usize) * self.curr_len + i; + self.aa_pos[idx] = score as i16; + } + + fn set_all(&mut self, order: &[u8], scores: &[i8], left_shift: usize, right_shift: usize) { + self.set_all_core::(order, scores, left_shift, right_shift); + } + + fn set_all_rev(&mut self, order: &[u8], scores: &[i8], left_shift: usize, right_shift: usize) { + self.set_all_core::(order, scores, left_shift, right_shift); + } + + fn set_gap_open_C(&mut self, i: usize, gap: i8) { + assert!(gap < 0, "Gap open cost must be negative!"); + self.pos_gap_open_C[i] = gap as i16; + } + + fn set_gap_close_C(&mut self, i: usize, gap: i8) { + self.pos_gap_close_C[i] = gap as i16; + } + + fn set_gap_open_R(&mut self, i: usize, gap: i8) { + assert!(gap < 0, "Gap open cost must be negative!"); + self.pos_gap_open_R[i] = gap as i16; + } + + fn set_all_gap_open_C(&mut self, gap: i8) { + assert!(gap < 0, "Gap open cost must be negative!"); + self.pos_gap_open_C[..self.curr_len].fill(gap as i16); + } + + fn set_all_gap_close_C(&mut self, gap: i8) { + self.pos_gap_close_C[..self.curr_len].fill(gap as i16); + } + + fn set_all_gap_open_R(&mut self, gap: i8) { + assert!(gap < 0, "Gap open cost must be negative!"); + self.pos_gap_open_R[..self.curr_len].fill(gap as i16); + } + + fn get(&self, i: usize, b: u8) -> i8 { + let b = b.to_ascii_uppercase(); + assert!(b'A' <= b && b <= b'Z' + 1); + let idx = i * 32 + ((b - b'A') as usize); + self.pos_aa[idx] + } + + fn get_gap_extend(&self) -> i8 { + self.gap_extend + } + + fn get_curr_len(&self) -> usize { + self.curr_len + } + + #[inline] + fn as_ptr_pos(&self, i: usize) -> *const i8 { + debug_assert!(i < self.curr_len); + unsafe { self.pos_aa.as_ptr().add(i * 32) } + } + + #[inline] + fn as_ptr_aa(&self, a: usize) -> *const i16 { + debug_assert!(a < 27); + unsafe { self.aa_pos.as_ptr().add(a * self.curr_len) } + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_scores_pos(&self, i: usize, v: HalfSimd, _right: bool) -> Simd { + // efficiently lookup scores for each character in v + let matrix_ptr = self.as_ptr_pos(i); + let scores1 = lutsimd_loadu(matrix_ptr as *const LutSimd); + let scores2 = lutsimd_loadu((matrix_ptr as *const LutSimd).add(1)); + halfsimd_lookup2_i16(scores1, scores2, v) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_scores_aa(&self, i: usize, c: u8, _right: bool) -> Simd { + let matrix_ptr = self.as_ptr_aa(c as usize); + simd_loadu(matrix_ptr.add(i) as *const Simd) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_gap_open_right_C(&self, i: usize) -> Simd { + simd_set1_i16(*self.pos_gap_open_C.as_ptr().add(i)) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_gap_close_right_C(&self, i: usize) -> Simd { + simd_set1_i16(*self.pos_gap_close_C.as_ptr().add(i)) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_gap_open_right_R(&self, i: usize) -> Simd { + simd_set1_i16(*self.pos_gap_open_R.as_ptr().add(i)) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_gap_open_down_C(&self, i: usize) -> Simd { + simd_loadu(self.pos_gap_open_C.as_ptr().add(i) as *const Simd) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_gap_close_down_C(&self, i: usize) -> Simd { + simd_loadu(self.pos_gap_close_C.as_ptr().add(i) as *const Simd) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + unsafe fn get_gap_open_down_R(&self, i: usize) -> Simd { + simd_loadu(self.pos_gap_open_R.as_ptr().add(i) as *const Simd) + } + + #[inline] + fn convert_char(c: u8) -> u8 { + let c = c.to_ascii_uppercase(); + assert!(c >= b'A' && c <= Self::NULL); + c - b'A' + } +} + +/// Positional score bias for scores. +#[derive(Clone, PartialEq, Debug)] +pub struct PosBias { + bias: Vec, + len: usize, +} + +impl PosBias { + /// Create a new positional score bias vector of some maximum length and initialized to zeros. + pub fn new(len: usize, block_size: usize) -> Self { + Self { + bias: vec![0i16; len + block_size + 1], + len + } + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn set_biases(&mut self, b: &[i16]) { + self.bias.fill(0i16); + self.bias[1..b.len() + 1].copy_from_slice(b); + self.len = b.len(); + } + + #[inline] + pub unsafe fn get(&self, i: usize) -> i16 { + *self.bias.as_ptr().add(i) + } + + #[cfg_attr(feature = "simd_sse2", target_feature(enable = "sse2"))] + #[cfg_attr(feature = "simd_avx2", target_feature(enable = "avx2"))] + #[cfg_attr(feature = "simd_wasm", target_feature(enable = "simd128"))] + #[cfg_attr(feature = "simd_neon", target_feature(enable = "neon"))] + #[inline] + pub unsafe fn get_biases(&self, i: usize) -> Simd { + simd_loadu(self.bias.as_ptr().add(i) as *const Simd) + } +} + +impl AAProfile { + fn set_all_core(&mut self, order: &[u8], scores: &[i8], left_shift: usize, right_shift: usize) { + #[repr(align(32))] + struct A([u8; 32]); + let mut o = A([Self::NULL - b'A'; 32]); + assert!(order.len() <= 32); + + for (i, &b) in order.iter().enumerate() { + let b = b.to_ascii_uppercase(); + assert!(b'A' <= b && b <= b'Z' + 1); + o.0[i] = b - b'A'; + } + assert_eq!(scores.len() / order.len(), self.str_len); + + let mut i = if REV { self.str_len } else { 1 }; + let mut score_idx = 0; + + while if REV { i >= 1 } else { i <= self.str_len } { + let mut j = 0; + + while j < order.len() { + unsafe { + let score = ((*scores.as_ptr().add(score_idx)) << left_shift) >> right_shift; + let b = *o.0.as_ptr().add(j) as usize; + *self.pos_aa.as_mut_ptr().add(i * 32 + b) = score; + *self.aa_pos.as_mut_ptr().add(b * self.curr_len + i) = score as i16; + } + + score_idx += 1; + j += 1; + } + + if REV { + i -= 1; + } else { + i += 1; + } + } + } + + pub fn pos_aa_mut_ptr(&mut self) -> *mut i8 { + self.pos_aa.as_mut_ptr() + } + + pub fn aa_pos_mut_ptr(&mut self) -> *mut i16 { + self.aa_pos.as_mut_ptr() + } +} diff --git a/lib/block-aligner/src/simd128.rs b/lib/block-aligner/src/simd128.rs new file mode 100644 index 000000000..139e33e99 --- /dev/null +++ b/lib/block-aligner/src/simd128.rs @@ -0,0 +1,564 @@ +use std::arch::wasm32::*; + +pub type Simd = v128; +// no v64 type, so HalfSimd is just v128 with upper half ignored +pub type HalfSimd = v128; +pub type LutSimd = v128; +pub type TraceType = i16; +/// Number of 16-bit lanes in a SIMD vector. +pub const L: usize = 8; +pub const L_BYTES: usize = L * 2; +pub const HALFSIMD_MUL: usize = 2; +// using min = 0 is faster, but restricts range of scores (and restricts the max block size) +pub const ZERO: i16 = 1 << 14; +pub const MIN: i16 = 0; + +// Note: SIMD vectors treated as little-endian + +// No non-temporal store in WASM +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn store_trace(ptr: *mut TraceType, trace: TraceType) { *ptr = trace; } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_adds_i16(a: Simd, b: Simd) -> Simd { i16x8_add_sat(a, b) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_subs_i16(a: Simd, b: Simd) -> Simd { i16x8_sub_sat(a, b) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_max_i16(a: Simd, b: Simd) -> Simd { i16x8_max(a, b) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_cmpeq_i16(a: Simd, b: Simd) -> Simd { i16x8_eq(a, b) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_cmpgt_i16(a: Simd, b: Simd) -> Simd { i16x8_gt(a, b) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_blend_i8(a: Simd, b: Simd, mask: Simd) -> Simd { v128_bitselect(b, a, mask) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_load(ptr: *const Simd) -> Simd { v128_load(ptr) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_loadu(ptr: *const Simd) -> Simd { v128_load(ptr) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_store(ptr: *mut Simd, a: Simd) { v128_store(ptr, a) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_set1_i16(v: i16) -> Simd { i16x8_splat(v) } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_extract_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num < L); + use std::arch::wasm32::*; + i16x8_extract_lane::<{ $num }>($a) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_insert_i16 { + ($a:expr, $v:expr, $num:expr) => { + { + debug_assert!($num < L); + use std::arch::wasm32::*; + i16x8_replace_lane::<{ $num }>($a, $v) + } + }; +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_movemask_i8(a: Simd) -> u16 { + i8x16_bitmask(a) as u16 + /*const MUL: i64 = { + let mut m = 0u64; + m |= 1u64 << (0 - 0); + m |= 1u64 << (8 - 1); + m |= 1u64 << (16 - 2); + m |= 1u64 << (24 - 3); + m |= 1u64 << (32 - 4); + m |= 1u64 << (40 - 5); + m |= 1u64 << (48 - 6); + m |= 1u64 << (56 - 7); + m as i64 + }; + let b = i64x2_mul(v128_and(a, i8x16_splat(0b10000000u8 as i8)), i64x2_splat(MUL)); + let res1 = i8x16_extract_lane::<{ L * 2 - 1 }>(b) as u32; + let res2 = i8x16_extract_lane::<{ L - 1 }>(b) as u32; + (res1 << 8) | res2*/ +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sl_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + use std::arch::wasm32::*; + i16x8_shuffle::<{ 8 - $num }, { 9 - $num }, { 10 - $num }, { 11 - $num }, { 12 - $num }, { 13 - $num }, { 14 - $num }, { 15 - $num }>($b, $a) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sr_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + use std::arch::wasm32::*; + i16x8_shuffle::<{ 0 + $num }, { 1 + $num }, { 2 + $num }, { 3 + $num }, { 4 + $num }, { 5 + $num }, { 6 + $num }, { 7 + $num }>($b, $a) + } + }; +} + +// hardcoded to STEP = 8 +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_step(a: Simd, b: Simd) -> Simd { + a +} + +macro_rules! simd_sllz_i16 { + ($a:expr, $num:expr) => { + { + simd_sl_i16!($a, simd_set1_i16(0), $num) + } + }; +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_broadcasthi_i16(v: Simd) -> Simd { + i16x8_shuffle::<7, 7, 7, 7, 7, 7, 7, 7>(v, v) +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_slow_extract_i16(v: Simd, i: usize) -> i16 { + debug_assert!(i < L); + + #[repr(align(16))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + *a.0.as_ptr().add(i) +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_hmax_i16(v: Simd) -> i16 { + let mut v2 = i16x8_max(v, simd_sr_i16!(v, v, 1)); + v2 = i16x8_max(v2, simd_sr_i16!(v2, v2, 2)); + v2 = i16x8_max(v2, simd_sr_i16!(v2, v2, 4)); + simd_extract_i16!(v2, 0) +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hadd_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + use std::arch::wasm32::*; + let mut v = i16x8_sub_sat($a, i16x8_splat(ZERO)); + if $num > 4 { + v = i16x8_add_sat(v, simd_sr_i16!(v, v, 4)); + } + if $num > 2 { + v = i16x8_add_sat(v, simd_sr_i16!(v, v, 2)); + } + if $num > 1 { + v = i16x8_add_sat(v, simd_sr_i16!(v, v, 1)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + use std::arch::wasm32::*; + let mut v = $a; + if $num > 4 { + v = i16x8_max(v, simd_sr_i16!(v, v, 4)); + } + if $num > 2 { + v = i16x8_max(v, simd_sr_i16!(v, v, 2)); + } + if $num > 1 { + v = i16x8_max(v, simd_sr_i16!(v, v, 1)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_suffix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + use std::arch::wasm32::*; + let mut v = $a; + if $num > 4 { + v = i16x8_max(v, simd_sl_i16!(v, v, 4)); + } + if $num > 2 { + v = i16x8_max(v, simd_sl_i16!(v, v, 2)); + } + if $num > 1 { + v = i16x8_max(v, simd_sl_i16!(v, v, 1)); + } + simd_extract_i16!(v, 7) + } + }; +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn simd_hargmax_i16(v: Simd, max: i16) -> usize { + let v2 = i16x8_eq(v, i16x8_splat(max)); + (simd_movemask_i8(v2).trailing_zeros() as usize) / 2 +} + +#[target_feature(enable = "simd128")] +#[inline] +#[allow(non_snake_case)] +#[allow(dead_code)] +pub unsafe fn simd_naive_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut curr = R_max; + + for _i in 0..(L - 1) { + let prev = curr; + curr = simd_sllz_i16!(curr, 1); + curr = i16x8_add_sat(curr, gap_cost); + curr = i16x8_max(curr, prev); + } + + curr +} + +pub type PrefixScanConsts = (); + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn get_prefix_scan_consts(gap: Simd) -> (Simd, PrefixScanConsts) { + let mut shift1 = simd_sllz_i16!(gap, 1); + shift1 = i16x8_add_sat(shift1, gap); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = i16x8_add_sat(shift2, shift1); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = i16x8_add_sat(shift4, shift2); + + (shift4, ()) +} + +#[target_feature(enable = "simd128")] +#[inline] +#[allow(non_snake_case)] +pub unsafe fn simd_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut shift1 = simd_sllz_i16!(R_max, 1); + shift1 = i16x8_add_sat(shift1, gap_cost); + shift1 = i16x8_max(shift1, R_max); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = i16x8_add_sat(shift2, i16x8_shl(gap_cost, 1)); + shift2 = i16x8_max(shift1, shift2); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = i16x8_add_sat(shift4, i16x8_shl(gap_cost, 2)); + shift4 = i16x8_max(shift2, shift4); + + shift4 +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_lookup2_i16(lut1: LutSimd, lut2: LutSimd, v: HalfSimd) -> Simd { + // must use a mask to avoid zeroing lanes that are too large + let mask = i8x16_splat(0b1111); + let v_mask = v128_and(v, mask); + let a = i8x16_swizzle(lut1, v_mask); + let b = i8x16_swizzle(lut2, v_mask); + let lut_mask = i8x16_gt(v, mask); + let c = v128_bitselect(b, a, lut_mask); + i16x8_extend_low_i8x16(c) +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_lookup1_i16(lut: LutSimd, v: HalfSimd) -> Simd { + i16x8_extend_low_i8x16(i8x16_swizzle(lut, v128_and(v, i8x16_splat(0b1111)))) +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_lookup_bytes_i16(match_scores: HalfSimd, mismatch_scores: HalfSimd, a: HalfSimd, b: HalfSimd) -> Simd { + let mask = i8x16_eq(a, b); + let c = v128_bitselect(match_scores, mismatch_scores, mask); + i16x8_extend_low_i8x16(c) +} + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_load(ptr: *const HalfSimd) -> HalfSimd { v128_load(ptr) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_loadu(ptr: *const HalfSimd) -> HalfSimd { v128_load(ptr) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn lutsimd_load(ptr: *const LutSimd) -> LutSimd { v128_load(ptr) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn lutsimd_loadu(ptr: *const LutSimd) -> LutSimd { v128_load(ptr) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_store(ptr: *mut HalfSimd, a: HalfSimd) { v128_store(ptr, a) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_sub_i8(a: HalfSimd, b: HalfSimd) -> HalfSimd { i8x16_sub(a, b) } + +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_set1_i8(v: i8) -> HalfSimd { i8x16_splat(v) } + +// only the low 8 bytes are out of each v128 for halfsimd +#[target_feature(enable = "simd128")] +#[inline] +pub unsafe fn halfsimd_get_idx(i: usize) -> usize { i + i / L * L } + +#[macro_export] +#[doc(hidden)] +macro_rules! halfsimd_sr_i8 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + use std::arch::wasm32::*; + // special indexing to skip over the high 8 bytes that are unused + const fn get_idx(i: usize) -> usize { if i >= L { i + L } else { i } } + i8x16_shuffle::< + { get_idx(0 + $num) }, { get_idx(1 + $num) }, { get_idx(2 + $num) }, { get_idx(3 + $num) }, + { get_idx(4 + $num) }, { get_idx(5 + $num) }, { get_idx(6 + $num) }, { get_idx(7 + $num) }, + 8, 9, 10, 11, + 12, 13, 14, 15 + >($b, $a) + } + }; +} + +#[target_feature(enable = "simd128")] +#[allow(dead_code)] +pub unsafe fn simd_dbg_i16(v: Simd) { + #[repr(align(16))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + + for i in (0..a.0.len()).rev() { + print!("{:6} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "simd128")] +#[allow(dead_code)] +pub unsafe fn halfsimd_dbg_i8(v: HalfSimd) { + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + + let mut a = A([0i8; L * HALFSIMD_MUL]); + halfsimd_store(a.0.as_mut_ptr() as *mut HalfSimd, v); + + for i in (0..a.0.len()).rev() { + print!("{:3} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "simd128")] +#[allow(dead_code)] +pub unsafe fn simd_assert_vec_eq(a: Simd, b: [i16; L]) { + #[repr(align(16))] + struct A([i16; L]); + + let mut arr = A([0i16; L]); + simd_store(arr.0.as_mut_ptr() as *mut Simd, a); + assert_eq!(arr.0, b); +} + +#[target_feature(enable = "simd128")] +#[allow(dead_code)] +pub unsafe fn halfsimd_assert_vec_eq(a: HalfSimd, b: [i8; L]) { + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + + let mut arr = A([0i8; L * HALFSIMD_MUL]); + halfsimd_store(arr.0.as_mut_ptr() as *mut HalfSimd, a); + assert_eq!(&arr.0[..L], b); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_smoke() { + #[target_feature(enable = "simd128")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let test = A([1, 2, 3, 4, 5, 6, 7, 8]); + let test_rev = A([8, 7, 6, 5, 4, 3, 2, 1]); + let test_mask = A([0, -1, 0, -1, 0, -1, 0, -1]); + let vec0 = simd_load(test.0.as_ptr() as *const Simd); + let vec0_rev = simd_load(test_rev.0.as_ptr() as *const Simd); + let vec0_mask = simd_load(test_mask.0.as_ptr() as *const Simd); + + let mut vec1 = simd_sl_i16!(vec0, vec0, 1); + simd_assert_vec_eq(vec1, [8, 1, 2, 3, 4, 5, 6, 7]); + + vec1 = simd_sr_i16!(vec0, vec0, 1); + simd_assert_vec_eq(vec1, [2, 3, 4, 5, 6, 7, 8, 1]); + + vec1 = simd_adds_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [2, 4, 6, 8, 10, 12, 14, 16]); + + vec1 = simd_subs_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, 0, 0, 0, 0]); + + vec1 = simd_max_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [8, 7, 6, 5, 5, 6, 7, 8]); + + vec1 = simd_cmpeq_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, 0, 0, 0, 0]); + + vec1 = simd_cmpeq_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [-1, -1, -1, -1, -1, -1, -1, -1]); + + vec1 = simd_cmpgt_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, -1, -1, -1, -1]); + + vec1 = simd_blend_i8(vec0, vec0_rev, vec0_mask); + simd_assert_vec_eq(vec1, [1, 7, 3, 5, 5, 3, 7, 1]); + + let mut val = simd_extract_i16!(vec0, 0); + assert_eq!(val, 1); + + val = simd_slow_extract_i16(vec0, 0); + assert_eq!(val, 1); + + vec1 = simd_insert_i16!(vec0, 0, 2); + simd_assert_vec_eq(vec1, [1, 2, 0, 4, 5, 6, 7, 8]); + + let val1 = simd_movemask_i8(vec0_mask); + assert_eq!(val1, 0b1100110011001100); + + vec1 = simd_sllz_i16!(vec0, 1); + simd_assert_vec_eq(vec1, [0, 1, 2, 3, 4, 5, 6, 7]); + + vec1 = simd_broadcasthi_i16(vec0); + simd_assert_vec_eq(vec1, [8, 8, 8, 8, 8, 8, 8, 8]); + + val = simd_hmax_i16(vec0); + assert_eq!(val, 8); + + let zeros = simd_set1_i16(ZERO); + val = simd_prefix_hadd_i16!(simd_adds_i16(vec0, zeros), 4); + assert_eq!(val, 10); + + val = simd_prefix_hmax_i16!(vec0, 4); + assert_eq!(val, 4); + + val = simd_suffix_hmax_i16!(vec0, 4); + assert_eq!(val, 8); + + let val2 = simd_hargmax_i16(vec0, 4); + assert_eq!(val2, 3); + } + unsafe { inner(); } + } + + #[test] + fn test_endianness() { + #[target_feature(enable = "simd128")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let vec = A([1, 2, 3, 4, 5, 6, 7, 8]); + let vec = simd_load(vec.0.as_ptr() as *const Simd); + let res = simd_sl_i16!(vec, vec, 1); + simd_assert_vec_eq(res, [8, 1, 2, 3, 4, 5, 6, 7]); + + let vec = A([1, 2, 3, 4, 5, 6, 7, 8]); + let vec = simd_load(vec.0.as_ptr() as *const Simd); + let res = simd_sr_i16!(vec, vec, 1); + simd_assert_vec_eq(res, [2, 3, 4, 5, 6, 7, 8, 1]); + + #[repr(align(16))] + struct B([i8; L * HALFSIMD_MUL]); + + let vec = B([1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0]); + let vec = halfsimd_load(vec.0.as_ptr() as *const HalfSimd); + let res = halfsimd_sr_i8!(vec, vec, 1); + halfsimd_assert_vec_eq(res, [2, 3, 4, 5, 6, 7, 8, 1]); + + simd_assert_vec_eq(simd_adds_i16(simd_set1_i16(i16::MIN), simd_set1_i16(i16::MIN)), [i16::MIN; 8]); + simd_assert_vec_eq(simd_adds_i16(simd_set1_i16(i16::MAX), simd_set1_i16(i16::MIN)), [-1; 8]); + simd_assert_vec_eq(simd_subs_i16(simd_set1_i16(i16::MAX), simd_set1_i16(i16::MIN)), [i16::MAX; 8]); + } + unsafe { inner(); } + } + + #[test] + fn test_prefix_scan() { + #[target_feature(enable = "simd128")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let vec = A([8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(0); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [8, 9, 10, 15, 15, 15, 15, 15]); + + let vec = A([8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(-1); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [8, 9, 10, 15, 14, 13, 14, 13]); + } + unsafe { inner(); } + } +} diff --git a/lib/block-aligner/src/sse2.rs b/lib/block-aligner/src/sse2.rs new file mode 100644 index 000000000..fda629b81 --- /dev/null +++ b/lib/block-aligner/src/sse2.rs @@ -0,0 +1,601 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +pub type Simd = __m128i; // use for storing DP scores +pub type HalfSimd = __m128i; // used for storing bytes (sequence or scoring matrix) +pub type LutSimd = __m128i; // used for storing a row in a scoring matrix (always 128 bits) +pub type TraceType = i16; +/// Number of 16-bit lanes in a SIMD vector. +pub const L: usize = 8; +pub const L_BYTES: usize = L * 2; +pub const HALFSIMD_MUL: usize = 2; +// using min = 0 is faster, but restricts range of scores (and restricts the max block size) +pub const ZERO: i16 = 1 << 14; +pub const MIN: i16 = 0; + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn store_trace(ptr: *mut TraceType, trace: TraceType) { *ptr = trace; } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_adds_i16(a: Simd, b: Simd) -> Simd { _mm_adds_epi16(a, b) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_subs_i16(a: Simd, b: Simd) -> Simd { _mm_subs_epi16(a, b) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_max_i16(a: Simd, b: Simd) -> Simd { _mm_max_epi16(a, b) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_cmpeq_i16(a: Simd, b: Simd) -> Simd { _mm_cmpeq_epi16(a, b) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_cmpgt_i16(a: Simd, b: Simd) -> Simd { _mm_cmpgt_epi16(a, b) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_blend_i8(a: Simd, b: Simd, mask: Simd) -> Simd { _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b)) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_load(ptr: *const Simd) -> Simd { _mm_load_si128(ptr) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_loadu(ptr: *const Simd) -> Simd { _mm_loadu_si128(ptr) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_store(ptr: *mut Simd, a: Simd) { _mm_store_si128(ptr, a) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_set1_i16(v: i16) -> Simd { _mm_set1_epi16(v) } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_extract_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm_extract_epi16($a, $num as i32) as i16 + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_insert_i16 { + ($a:expr, $v:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm_insert_epi16($a, $v as i32, $num as i32) + } + }; +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_movemask_i8(a: Simd) -> u16 { _mm_movemask_epi8(a) as u16 } + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sl_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm_or_si128(_mm_slli_si128($a, (2 * $num) as i32), _mm_srli_si128($b, ((L - $num) * 2) as i32)) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_sr_i16 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm_or_si128(_mm_slli_si128($a, ((L - $num) * 2) as i32), _mm_srli_si128($b, (2 * $num) as i32)) + } + }; +} + +// hardcoded to STEP = 8 +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_step(a: Simd, b: Simd) -> Simd { + a +} + +// shift in zeros +macro_rules! simd_sllz_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num < L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + _mm_slli_si128($a, ($num * 2) as i32) + } + }; +} + +// broadcast last 16-bit element to the whole vector +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_broadcasthi_i16(v: Simd) -> Simd { + let v = _mm_shufflehi_epi16(v, 0b11111111); + _mm_shuffle_epi32(v, 0b11111111) +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_slow_extract_i16(v: Simd, i: usize) -> i16 { + debug_assert!(i < L); + + #[repr(align(16))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + *a.0.as_ptr().add(i) +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_hmax_i16(v: Simd) -> i16 { + let mut v2 = _mm_max_epi16(v, _mm_srli_si128(v, 2)); + v2 = _mm_max_epi16(v2, _mm_srli_si128(v2, 4)); + v2 = _mm_max_epi16(v2, _mm_srli_si128(v2, 8)); + simd_extract_i16!(v2, 0) +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hadd_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mut v = _mm_subs_epi16($a, _mm_set1_epi16(ZERO)); + if $num > 4 { + v = _mm_adds_epi16(v, _mm_srli_si128(v, 8)); + } + if $num > 2 { + v = _mm_adds_epi16(v, _mm_srli_si128(v, 4)); + } + if $num > 1 { + v = _mm_adds_epi16(v, _mm_srli_si128(v, 2)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_prefix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mut v = $a; + if $num > 4 { + v = _mm_max_epi16(v, _mm_srli_si128(v, 8)); + } + if $num > 2 { + v = _mm_max_epi16(v, _mm_srli_si128(v, 4)); + } + if $num > 1 { + v = _mm_max_epi16(v, _mm_srli_si128(v, 2)); + } + simd_extract_i16!(v, 0) + } + }; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! simd_suffix_hmax_i16 { + ($a:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mut v = $a; + if $num > 4 { + v = _mm_max_epi16(v, _mm_slli_si128(v, 8)); + } + if $num > 2 { + v = _mm_max_epi16(v, _mm_slli_si128(v, 4)); + } + if $num > 1 { + v = _mm_max_epi16(v, _mm_slli_si128(v, 2)); + } + simd_extract_i16!(v, 7) + } + }; +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn simd_hargmax_i16(v: Simd, max: i16) -> usize { + let v2 = _mm_cmpeq_epi16(v, _mm_set1_epi16(max)); + (simd_movemask_i8(v2).trailing_zeros() as usize) / 2 +} + +#[target_feature(enable = "sse2")] +#[inline] +#[allow(non_snake_case)] +#[allow(dead_code)] +pub unsafe fn simd_naive_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + let mut curr = R_max; + + for _i in 0..(L - 1) { + let prev = curr; + curr = simd_sl_i16!(curr, _mm_setzero_si128(), 1); + curr = _mm_adds_epi16(curr, gap_cost); + curr = _mm_max_epi16(curr, prev); + } + + curr +} + +pub type PrefixScanConsts = (); + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn get_prefix_scan_consts(gap: Simd) -> (Simd, PrefixScanConsts) { + let mut shift1 = simd_sllz_i16!(gap, 1); + shift1 = _mm_adds_epi16(shift1, gap); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = _mm_adds_epi16(shift2, shift1); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = _mm_adds_epi16(shift4, shift2); + + (shift4, ()) +} + +#[target_feature(enable = "sse2")] +#[inline] +#[allow(non_snake_case)] +pub unsafe fn simd_prefix_scan_i16(R_max: Simd, gap_cost: Simd, _gap_cost_lane: PrefixScanConsts) -> Simd { + // Optimized prefix add and max for every eight elements + // Note: be very careful to avoid lane-crossing which has a large penalty. + // Also, make sure to use as little registers as possible to avoid + // memory loads (latencies really matter since this is critical path). + // Keep the CPU busy with instructions! + // Note: relies on min score = 0 for speed! + let mut shift1 = simd_sllz_i16!(R_max, 1); + shift1 = _mm_adds_epi16(shift1, gap_cost); + shift1 = _mm_max_epi16(R_max, shift1); + let mut shift2 = simd_sllz_i16!(shift1, 2); + shift2 = _mm_adds_epi16(shift2, _mm_slli_epi16(gap_cost, 1)); + shift2 = _mm_max_epi16(shift1, shift2); + let mut shift4 = simd_sllz_i16!(shift2, 4); + shift4 = _mm_adds_epi16(shift4, _mm_slli_epi16(gap_cost, 2)); + shift4 = _mm_max_epi16(shift2, shift4); + + shift4 +} + +// lookup two 128-bit tables +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_lookup2_i16(lut1: LutSimd, lut2: LutSimd, v: HalfSimd) -> Simd { + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + #[repr(align(16))] + struct T([i8; L * HALFSIMD_MUL * 2]); + #[repr(align(16))] + struct A2([i16; L]); + + let mut idx = A([0i8; L * HALFSIMD_MUL]); + simd_store(idx.0.as_mut_ptr() as *mut HalfSimd, v); + let idx_ptr = idx.0.as_ptr(); + + let mut table = T([0i8; L * HALFSIMD_MUL * 2]); + simd_store(table.0.as_mut_ptr() as *mut LutSimd, lut1); + simd_store(table.0.as_mut_ptr().add(L * HALFSIMD_MUL) as *mut LutSimd, lut2); + let table_ptr = table.0.as_ptr(); + + let mut res = A2([0i16; L]); + + let mut i = 0; + while i < L { + *res.0.as_mut_ptr().add(i) = *table_ptr.add(*idx_ptr.add(i) as usize) as i16; + i += 1; + } + + simd_load(res.0.as_ptr() as *const Simd) +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_lookup1_i16(lut: LutSimd, v: HalfSimd) -> Simd { + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + #[repr(align(16))] + struct A2([i16; L]); + + let mut idx = A([0i8; L * HALFSIMD_MUL]); + simd_store(idx.0.as_mut_ptr() as *mut HalfSimd, v); + let idx_ptr = idx.0.as_ptr(); + + let mut table = A([0i8; L * HALFSIMD_MUL]); + simd_store(table.0.as_mut_ptr() as *mut LutSimd, lut); + let table_ptr = table.0.as_ptr(); + + let mut res = A2([0i16; L]); + + let mut i = 0; + while i < L { + *res.0.as_mut_ptr().add(i) = *table_ptr.add((*idx_ptr.add(i) as usize) & 0b1111) as i16; + i += 1; + } + + simd_load(res.0.as_ptr() as *const Simd) +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_lookup_bytes_i16(match_scores: HalfSimd, mismatch_scores: HalfSimd, a: HalfSimd, b: HalfSimd) -> Simd { + let mask = _mm_cmpeq_epi8(a, b); + let c = simd_blend_i8(mismatch_scores, match_scores, mask); + + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + #[repr(align(16))] + struct A2([i16; L]); + + let mut a = A([0i8; L * HALFSIMD_MUL]); + simd_store(a.0.as_mut_ptr() as *mut HalfSimd, c); + let a_ptr = a.0.as_ptr(); + + let mut res = A2([0i16; L]); + + let mut i = 0; + while i < L { + *res.0.as_mut_ptr().add(i) = *a_ptr.add(i) as i16; + i += 1; + } + + simd_load(res.0.as_ptr() as *const Simd) +} + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_load(ptr: *const HalfSimd) -> HalfSimd { _mm_load_si128(ptr) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_loadu(ptr: *const HalfSimd) -> HalfSimd { _mm_loadu_si128(ptr) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn lutsimd_load(ptr: *const LutSimd) -> LutSimd { _mm_load_si128(ptr) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn lutsimd_loadu(ptr: *const LutSimd) -> LutSimd { _mm_loadu_si128(ptr) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_store(ptr: *mut HalfSimd, a: HalfSimd) { _mm_store_si128(ptr, a) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_sub_i8(a: HalfSimd, b: HalfSimd) -> HalfSimd { _mm_sub_epi8(a, b) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_set1_i8(v: i8) -> HalfSimd { _mm_set1_epi8(v) } + +#[target_feature(enable = "sse2")] +#[inline] +pub unsafe fn halfsimd_get_idx(i: usize) -> usize { i + i / L * L } + +#[macro_export] +#[doc(hidden)] +macro_rules! halfsimd_sr_i8 { + ($a:expr, $b:expr, $num:expr) => { + { + debug_assert!($num <= L); + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + let mask = _mm_srli_si128(_mm_set1_epi32(-1i32), (L + $num) as i32); + _mm_or_si128(_mm_slli_si128($a, (L - $num) as i32), _mm_and_si128(_mm_srli_si128($b, $num as i32), mask)) + } + }; +} + +#[target_feature(enable = "sse2")] +#[allow(dead_code)] +pub unsafe fn simd_dbg_i16(v: Simd) { + #[repr(align(16))] + struct A([i16; L]); + + let mut a = A([0i16; L]); + simd_store(a.0.as_mut_ptr() as *mut Simd, v); + + for i in (0..a.0.len()).rev() { + print!("{:6} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "sse2")] +#[allow(dead_code)] +pub unsafe fn halfsimd_dbg_i8(v: HalfSimd) { + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + + let mut a = A([0i8; L * HALFSIMD_MUL]); + halfsimd_store(a.0.as_mut_ptr() as *mut HalfSimd, v); + + for i in (0..a.0.len()).rev() { + print!("{:3} ", a.0[i]); + } + println!(); +} + +#[target_feature(enable = "sse2")] +#[allow(dead_code)] +pub unsafe fn simd_assert_vec_eq(a: Simd, b: [i16; L]) { + #[repr(align(16))] + struct A([i16; L]); + + let mut arr = A([0i16; L]); + simd_store(arr.0.as_mut_ptr() as *mut Simd, a); + assert_eq!(arr.0, b); +} + +#[target_feature(enable = "sse2")] +#[allow(dead_code)] +pub unsafe fn halfsimd_assert_vec_eq(a: HalfSimd, b: [i8; L]) { + #[repr(align(16))] + struct A([i8; L * HALFSIMD_MUL]); + + let mut arr = A([0i8; L * HALFSIMD_MUL]); + halfsimd_store(arr.0.as_mut_ptr() as *mut HalfSimd, a); + assert_eq!(&arr.0[..L], b); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_smoke() { + #[target_feature(enable = "sse2")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let test = A([1, 2, 3, 4, 5, 6, 7, 8]); + let test_rev = A([8, 7, 6, 5, 4, 3, 2, 1]); + let test_mask = A([0, -1, 0, -1, 0, -1, 0, -1]); + let vec0 = simd_load(test.0.as_ptr() as *const Simd); + let vec0_rev = simd_load(test_rev.0.as_ptr() as *const Simd); + let vec0_mask = simd_load(test_mask.0.as_ptr() as *const Simd); + + let mut vec1 = simd_sl_i16!(vec0, vec0, 1); + simd_assert_vec_eq(vec1, [8, 1, 2, 3, 4, 5, 6, 7]); + + vec1 = simd_sr_i16!(vec0, vec0, 1); + simd_assert_vec_eq(vec1, [2, 3, 4, 5, 6, 7, 8, 1]); + + vec1 = simd_adds_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [2, 4, 6, 8, 10, 12, 14, 16]); + + vec1 = simd_subs_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, 0, 0, 0, 0]); + + vec1 = simd_max_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [8, 7, 6, 5, 5, 6, 7, 8]); + + vec1 = simd_cmpeq_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, 0, 0, 0, 0]); + + vec1 = simd_cmpeq_i16(vec0, vec0); + simd_assert_vec_eq(vec1, [-1, -1, -1, -1, -1, -1, -1, -1]); + + vec1 = simd_cmpgt_i16(vec0, vec0_rev); + simd_assert_vec_eq(vec1, [0, 0, 0, 0, -1, -1, -1, -1]); + + vec1 = simd_blend_i8(vec0, vec0_rev, vec0_mask); + simd_assert_vec_eq(vec1, [1, 7, 3, 5, 5, 3, 7, 1]); + + let mut val = simd_extract_i16!(vec0, 0); + assert_eq!(val, 1); + + val = simd_slow_extract_i16(vec0, 0); + assert_eq!(val, 1); + + vec1 = simd_insert_i16!(vec0, 0, 2); + simd_assert_vec_eq(vec1, [1, 2, 0, 4, 5, 6, 7, 8]); + + let val1 = simd_movemask_i8(vec0_mask); + assert_eq!(val1, 0b1100110011001100); + + vec1 = simd_sllz_i16!(vec0, 1); + simd_assert_vec_eq(vec1, [0, 1, 2, 3, 4, 5, 6, 7]); + + vec1 = simd_broadcasthi_i16(vec0); + simd_assert_vec_eq(vec1, [8, 8, 8, 8, 8, 8, 8, 8]); + + val = simd_hmax_i16(vec0); + assert_eq!(val, 8); + + let zeros = simd_set1_i16(ZERO); + val = simd_prefix_hadd_i16!(simd_adds_i16(vec0, zeros), 4); + assert_eq!(val, 10); + + val = simd_prefix_hmax_i16!(vec0, 4); + assert_eq!(val, 4); + + val = simd_suffix_hmax_i16!(vec0, 4); + assert_eq!(val, 8); + + let val2 = simd_hargmax_i16(vec0, 4); + assert_eq!(val2, 3); + } + unsafe { inner(); } + } + + #[test] + fn test_prefix_scan() { + #[target_feature(enable = "sse2")] + unsafe fn inner() { + #[repr(align(16))] + struct A([i16; L]); + + let vec = A([8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(0); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [8, 9, 10, 15, 15, 15, 15, 15]); + + let vec = A([8, 9, 10, 15, 12, 13, 14, 11]); + let gap = simd_set1_i16(-1); + let (_, consts) = get_prefix_scan_consts(gap); + let res = simd_prefix_scan_i16(simd_load(vec.0.as_ptr() as *const Simd), gap, consts); + simd_assert_vec_eq(res, [8, 9, 10, 15, 14, 13, 14, 13]); + } + unsafe { inner(); } + } +} diff --git a/lib/block-aligner/vis/block_aligner_accuracy_vis.ipynb b/lib/block-aligner/vis/block_aligner_accuracy_vis.ipynb new file mode 100644 index 000000000..359985b6b --- /dev/null +++ b/lib/block-aligner/vis/block_aligner_accuracy_vis.ipynb @@ -0,0 +1,5889 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Block Aligner Accuracy Analysis and Visualizations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook contains code for collecting, cleaning, and analyzing data produced by block aligner's experiments.\n", + "\n", + "To run this, you will need to install all the libraries imported below, along with [altair-saver](https://github.com/altair-viz/altair_saver) and [altair-data-server](https://github.com/altair-viz/altair_data_server), which has some extra dependencies for PDF saving.\n", + "\n", + "Run each cell one by one to reproduce the experiments." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:11.050100Z", + "iopub.status.busy": "2023-02-27T11:47:11.049343Z", + "iopub.status.idle": "2023-02-27T11:47:11.914583Z", + "shell.execute_reply": "2023-02-27T11:47:11.915081Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DataTransformerRegistry.enable('data_server')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import altair as alt\n", + "from altair_saver import save\n", + "from altair import datum\n", + "import pandas as pd\n", + "from io import StringIO\n", + "\n", + "alt.data_transformers.enable(\"data_server\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:11.918767Z", + "iopub.status.busy": "2023-02-27T11:47:11.918278Z", + "iopub.status.idle": "2023-02-27T11:47:11.920120Z", + "shell.execute_reply": "2023-02-27T11:47:11.920612Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "def csv_to_pandas(csv, d = \"\\\\s*,\\\\s*\", t = None):\n", + " s = StringIO(\"\\n\".join(csv))\n", + " data = pd.read_csv(s, sep = d, thousands = t, comment = \"#\", engine = \"python\")\n", + " return data\n", + "\n", + "def file_to_pandas(path):\n", + " return pd.read_csv(path, sep = \"\\\\s*,\\\\s*\", comment = \"#\", engine = \"python\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Block Aligner Image" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:11.923819Z", + "iopub.status.busy": "2023-02-27T11:47:11.923340Z", + "iopub.status.idle": "2023-02-27T11:47:20.960034Z", + "shell.execute_reply": "2023-02-27T11:47:20.960736Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "path: vis/block_img1.png, img size: 660 x 549\r\n", + "path: vis/block_img2.png, img size: 384 x 428\r\n" + ] + } + ], + "source": [ + "!cd .. && cargo run --example block_img --release --features simd_avx2 --quiet -- vis/block_img1.png vis/block_img2.png" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Data Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:20.965547Z", + "iopub.status.busy": "2023-02-27T11:47:20.964993Z", + "iopub.status.idle": "2023-02-27T11:51:13.096340Z", + "shell.execute_reply": "2023-02-27T11:51:13.095918Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['',\n", + " 'len, k, insert, iter, max size, wrong, wrong % error, wrong min, wrong max',\n", + " '',\n", + " '',\n", + " '100, 10, false, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 10, false, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 10, true, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 10, true, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 20, false, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 20, false, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 20, true, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 20, true, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 50, false, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 50, false, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 50, true, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '100, 50, true, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 100, false, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 100, false, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 100, true, 100, 32, 99, 0.6398763123199956, 7, 1001',\n", + " '',\n", + " '',\n", + " '1000, 100, true, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 200, false, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 200, false, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 200, true, 100, 32, 97, 0.7973371403880799, 11, 824',\n", + " '',\n", + " '',\n", + " '1000, 200, true, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 500, false, 100, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 500, false, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '1000, 500, true, 100, 32, 96, 1.6462162143616295, 1, 511',\n", + " '',\n", + " '',\n", + " '1000, 500, true, 100, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 1000, false, 10, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 1000, false, 10, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 1000, true, 10, 32, 10, 0.7274038361407913, 2402, 9940',\n", + " '',\n", + " '',\n", + " '10000, 1000, true, 10, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 2000, false, 10, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 2000, false, 10, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 2000, true, 10, 32, 10, 0.8624608080671005, 2201, 7736',\n", + " '',\n", + " '',\n", + " '10000, 2000, true, 10, 2048, 5, 0.001297233522793133, 1, 14',\n", + " '',\n", + " '',\n", + " '10000, 5000, false, 10, 32, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 5000, false, 10, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '10000, 5000, true, 10, 32, 10, 1.9540110615043058, 365, 4091',\n", + " '',\n", + " '',\n", + " '10000, 5000, true, 10, 2048, 0, NaN, 2147483647, -2147483648',\n", + " '',\n", + " '',\n", + " '# total: 2520, wrong: 327',\n", + " '# Done!']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example accuracy --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:51:13.099412Z", + "iopub.status.busy": "2023-02-27T11:51:13.099020Z", + "iopub.status.idle": "2023-02-27T11:51:13.113831Z", + "shell.execute_reply": "2023-02-27T11:51:13.114111Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lenkinsertitermax sizewrongwrong % errorwrong minwrong max
010010False100320NaN2147483647-2147483648
110010False10020480NaN2147483647-2147483648
210010True100320NaN2147483647-2147483648
310010True10020480NaN2147483647-2147483648
410020False100320NaN2147483647-2147483648
510020False10020480NaN2147483647-2147483648
610020True100320NaN2147483647-2147483648
710020True10020480NaN2147483647-2147483648
810050False100320NaN2147483647-2147483648
910050False10020480NaN2147483647-2147483648
1010050True100320NaN2147483647-2147483648
1110050True10020480NaN2147483647-2147483648
121000100False100320NaN2147483647-2147483648
131000100False10020480NaN2147483647-2147483648
141000100True10032990.63987671001
151000100True10020480NaN2147483647-2147483648
161000200False100320NaN2147483647-2147483648
171000200False10020480NaN2147483647-2147483648
181000200True10032970.79733711824
191000200True10020480NaN2147483647-2147483648
201000500False100320NaN2147483647-2147483648
211000500False10020480NaN2147483647-2147483648
221000500True10032961.6462161511
231000500True10020480NaN2147483647-2147483648
24100001000False10320NaN2147483647-2147483648
25100001000False1020480NaN2147483647-2147483648
26100001000True1032100.72740424029940
27100001000True1020480NaN2147483647-2147483648
28100002000False10320NaN2147483647-2147483648
29100002000False1020480NaN2147483647-2147483648
30100002000True1032100.86246122017736
31100002000True10204850.001297114
32100005000False10320NaN2147483647-2147483648
33100005000False1020480NaN2147483647-2147483648
34100005000True1032101.9540113654091
35100005000True1020480NaN2147483647-2147483648
\n", + "
" + ], + "text/plain": [ + " len k insert iter max size wrong wrong % error wrong min \\\n", + "0 100 10 False 100 32 0 NaN 2147483647 \n", + "1 100 10 False 100 2048 0 NaN 2147483647 \n", + "2 100 10 True 100 32 0 NaN 2147483647 \n", + "3 100 10 True 100 2048 0 NaN 2147483647 \n", + "4 100 20 False 100 32 0 NaN 2147483647 \n", + "5 100 20 False 100 2048 0 NaN 2147483647 \n", + "6 100 20 True 100 32 0 NaN 2147483647 \n", + "7 100 20 True 100 2048 0 NaN 2147483647 \n", + "8 100 50 False 100 32 0 NaN 2147483647 \n", + "9 100 50 False 100 2048 0 NaN 2147483647 \n", + "10 100 50 True 100 32 0 NaN 2147483647 \n", + "11 100 50 True 100 2048 0 NaN 2147483647 \n", + "12 1000 100 False 100 32 0 NaN 2147483647 \n", + "13 1000 100 False 100 2048 0 NaN 2147483647 \n", + "14 1000 100 True 100 32 99 0.639876 7 \n", + "15 1000 100 True 100 2048 0 NaN 2147483647 \n", + "16 1000 200 False 100 32 0 NaN 2147483647 \n", + "17 1000 200 False 100 2048 0 NaN 2147483647 \n", + "18 1000 200 True 100 32 97 0.797337 11 \n", + "19 1000 200 True 100 2048 0 NaN 2147483647 \n", + "20 1000 500 False 100 32 0 NaN 2147483647 \n", + "21 1000 500 False 100 2048 0 NaN 2147483647 \n", + "22 1000 500 True 100 32 96 1.646216 1 \n", + "23 1000 500 True 100 2048 0 NaN 2147483647 \n", + "24 10000 1000 False 10 32 0 NaN 2147483647 \n", + "25 10000 1000 False 10 2048 0 NaN 2147483647 \n", + "26 10000 1000 True 10 32 10 0.727404 2402 \n", + "27 10000 1000 True 10 2048 0 NaN 2147483647 \n", + "28 10000 2000 False 10 32 0 NaN 2147483647 \n", + "29 10000 2000 False 10 2048 0 NaN 2147483647 \n", + "30 10000 2000 True 10 32 10 0.862461 2201 \n", + "31 10000 2000 True 10 2048 5 0.001297 1 \n", + "32 10000 5000 False 10 32 0 NaN 2147483647 \n", + "33 10000 5000 False 10 2048 0 NaN 2147483647 \n", + "34 10000 5000 True 10 32 10 1.954011 365 \n", + "35 10000 5000 True 10 2048 0 NaN 2147483647 \n", + "\n", + " wrong max \n", + "0 -2147483648 \n", + "1 -2147483648 \n", + "2 -2147483648 \n", + "3 -2147483648 \n", + "4 -2147483648 \n", + "5 -2147483648 \n", + "6 -2147483648 \n", + "7 -2147483648 \n", + "8 -2147483648 \n", + "9 -2147483648 \n", + "10 -2147483648 \n", + "11 -2147483648 \n", + "12 -2147483648 \n", + "13 -2147483648 \n", + "14 1001 \n", + "15 -2147483648 \n", + "16 -2147483648 \n", + "17 -2147483648 \n", + "18 824 \n", + "19 -2147483648 \n", + "20 -2147483648 \n", + "21 -2147483648 \n", + "22 511 \n", + "23 -2147483648 \n", + "24 -2147483648 \n", + "25 -2147483648 \n", + "26 9940 \n", + "27 -2147483648 \n", + "28 -2147483648 \n", + "29 -2147483648 \n", + "30 7736 \n", + "31 14 \n", + "32 -2147483648 \n", + "33 -2147483648 \n", + "34 4091 \n", + "35 -2147483648 " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:51:13.116975Z", + "iopub.status.busy": "2023-02-27T11:51:13.116495Z", + "iopub.status.idle": "2023-02-27T11:51:13.125020Z", + "shell.execute_reply": "2023-02-27T11:51:13.124063Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "data[\"% wrong\"] = data[\"wrong\"] / data[\"iter\"]\n", + "data[\"k %\"] = data[\"k\"] / data[\"len\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Error Rate on Random DNA Sequences with 10% Insert" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:51:13.138761Z", + "iopub.status.busy": "2023-02-27T11:51:13.138187Z", + "iopub.status.idle": "2023-02-27T11:51:14.456344Z", + "shell.execute_reply": "2023-02-27T11:51:14.456756Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(\n", + " x = alt.X(\"% wrong\", axis = alt.Axis(format = \"%\")),\n", + " y = alt.Y(\"k %:N\", axis = alt.Axis(format = \"~%\", grid = True)),\n", + " color = \"max size:N\",\n", + " shape = \"max size:N\",\n", + " row = alt.Row(\"len:N\", header = alt.Header(title = \"length\"))\n", + ").transform_filter(\n", + " datum.insert == True\n", + ").properties(\n", + " width = 100,\n", + " height = 50\n", + ")\n", + "save(c, \"random_dna_accuracy.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uniclust 30 Data Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:51:14.460237Z", + "iopub.status.busy": "2023-02-27T11:51:14.459790Z", + "iopub.status.idle": "2023-02-27T11:54:08.954095Z", + "shell.execute_reply": "2023-02-27T11:54:08.954544Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['# seq identity is lower bound (inclusive)',\n", + " 'dataset, size, seq identity, count, wrong, wrong % error',\n", + " 'uc30_0.95, 32-32, 0, 0, 0, NaN',\n", + " 'uc30_0.95, 32-32, 0.1, 0, 0, NaN',\n", + " 'uc30_0.95, 32-32, 0.2, 14, 0, NaN',\n", + " 'uc30_0.95, 32-32, 0.3, 873, 45, 0.24234488792701261',\n", + " 'uc30_0.95, 32-32, 0.4, 1166, 70, 0.23740147684159452',\n", + " 'uc30_0.95, 32-32, 0.5, 951, 39, 0.22458752668094342',\n", + " 'uc30_0.95, 32-32, 0.6, 923, 30, 0.2423639248577639',\n", + " 'uc30_0.95, 32-32, 0.7, 789, 19, 0.23070399690337745',\n", + " 'uc30_0.95, 32-32, 0.8, 747, 9, 0.2314009291547539',\n", + " 'uc30_0.95, 32-32, 0.9, 1537, 18, 0.14197227628986994',\n", + " '',\n", + " '# total: 7000, wrong: 230, wrong % error: 0.22858669521170225, length avg: 329.554, length min: 22, length max: 8881, dp fraction: 0.3389349961353417',\n", + " '',\n", + " 'uc30_0.95, 32-256, 0, 0, 0, NaN',\n", + " 'uc30_0.95, 32-256, 0.1, 0, 0, NaN',\n", + " 'uc30_0.95, 32-256, 0.2, 14, 0, NaN',\n", + " 'uc30_0.95, 32-256, 0.3, 873, 10, 0.022419704677810615',\n", + " 'uc30_0.95, 32-256, 0.4, 1166, 11, 0.0640908027337777',\n", + " 'uc30_0.95, 32-256, 0.5, 951, 5, 0.06678752297229766',\n", + " 'uc30_0.95, 32-256, 0.6, 923, 9, 0.04391515678016153',\n", + " 'uc30_0.95, 32-256, 0.7, 789, 4, 0.023700475250152283',\n", + " 'uc30_0.95, 32-256, 0.8, 747, 1, 0.0006338028169014084',\n", + " 'uc30_0.95, 32-256, 0.9, 1537, 4, 0.05817779039335834',\n", + " '',\n", + " '# total: 7000, wrong: 44, wrong % error: 0.04514810836644425, length avg: 329.554, length min: 22, length max: 8881, dp fraction: 0.3654793322322297',\n", + " '',\n", + " 'uc30_0.95, 256-256, 0, 0, 0, NaN',\n", + " 'uc30_0.95, 256-256, 0.1, 0, 0, NaN',\n", + " 'uc30_0.95, 256-256, 0.2, 14, 0, NaN',\n", + " 'uc30_0.95, 256-256, 0.3, 873, 1, 0.029182542431589884',\n", + " 'uc30_0.95, 256-256, 0.4, 1166, 2, 0.015912013783823824',\n", + " 'uc30_0.95, 256-256, 0.5, 951, 0, NaN',\n", + " 'uc30_0.95, 256-256, 0.6, 923, 1, 0.14847403884264765',\n", + " 'uc30_0.95, 256-256, 0.7, 789, 0, NaN',\n", + " 'uc30_0.95, 256-256, 0.8, 747, 0, NaN',\n", + " 'uc30_0.95, 256-256, 0.9, 1537, 0, NaN',\n", + " '',\n", + " '# total: 7000, wrong: 4, wrong % error: 0.052370152210471296, length avg: 329.554, length min: 22, length max: 8881, dp fraction: 4.221035489298381',\n", + " '',\n", + " 'uc30, 32-32, 0, 0, 0, NaN',\n", + " 'uc30, 32-32, 0.1, 0, 0, NaN',\n", + " 'uc30, 32-32, 0.2, 192, 82, 0.9275327360430471',\n", + " 'uc30, 32-32, 0.3, 1170, 288, 0.5938895288958876',\n", + " 'uc30, 32-32, 0.4, 1182, 272, 0.5563610451866828',\n", + " 'uc30, 32-32, 0.5, 1160, 229, 0.5338615587300842',\n", + " 'uc30, 32-32, 0.6, 1044, 192, 0.5196576365562074',\n", + " 'uc30, 32-32, 0.7, 847, 103, 0.4959494251346888',\n", + " 'uc30, 32-32, 0.8, 694, 86, 0.47644760063092073',\n", + " 'uc30, 32-32, 0.9, 711, 19, 0.25554500500876537',\n", + " '',\n", + " '# total: 7000, wrong: 1271, wrong % error: 0.564413277478051, length avg: 302.51342857142856, length min: 25, length max: 8293, dp fraction: 0.35536869224678297',\n", + " '',\n", + " 'uc30, 32-256, 0, 0, 0, NaN',\n", + " 'uc30, 32-256, 0.1, 0, 0, NaN',\n", + " 'uc30, 32-256, 0.2, 192, 12, 0.14227202149727955',\n", + " 'uc30, 32-256, 0.3, 1170, 49, 0.12495803143160221',\n", + " 'uc30, 32-256, 0.4, 1182, 49, 0.12113074192604727',\n", + " 'uc30, 32-256, 0.5, 1160, 35, 0.15550386498366273',\n", + " 'uc30, 32-256, 0.6, 1044, 32, 0.11973947097066927',\n", + " 'uc30, 32-256, 0.7, 847, 18, 0.06996993682039082',\n", + " 'uc30, 32-256, 0.8, 694, 21, 0.08632626264571577',\n", + " 'uc30, 32-256, 0.9, 711, 8, 0.045885200156420294',\n", + " '',\n", + " '# total: 7000, wrong: 224, wrong % error: 0.11821118070339384, length avg: 302.51342857142856, length min: 25, length max: 8293, dp fraction: 0.44600432314797367',\n", + " '',\n", + " 'uc30, 256-256, 0, 0, 0, NaN',\n", + " 'uc30, 256-256, 0.1, 0, 0, NaN',\n", + " 'uc30, 256-256, 0.2, 192, 0, NaN',\n", + " 'uc30, 256-256, 0.3, 1170, 7, 0.3835458905002951',\n", + " 'uc30, 256-256, 0.4, 1182, 4, 0.7080088654647194',\n", + " 'uc30, 256-256, 0.5, 1160, 7, 0.4602842002549759',\n", + " 'uc30, 256-256, 0.6, 1044, 5, 0.3023075188513019',\n", + " 'uc30, 256-256, 0.7, 847, 2, 0.23043440955768113',\n", + " 'uc30, 256-256, 0.8, 694, 1, 0.0544986745672852',\n", + " 'uc30, 256-256, 0.9, 711, 1, 0.0690854119425548',\n", + " '',\n", + " '# total: 7000, wrong: 27, wrong % error: 0.4012902443343513, length avg: 302.51342857142856, length min: 25, length max: 8293, dp fraction: 4.391380248306236',\n", + " '',\n", + " '# Done!']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example uc_accuracy --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:08.970472Z", + "iopub.status.busy": "2023-02-27T11:54:08.969482Z", + "iopub.status.idle": "2023-02-27T11:54:08.972815Z", + "shell.execute_reply": "2023-02-27T11:54:08.973259Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetsizeseq identitycountwrongwrong % error
0uc30_0.9532-320.000NaN
1uc30_0.9532-320.100NaN
2uc30_0.9532-320.2140NaN
3uc30_0.9532-320.3873450.242345
4uc30_0.9532-320.41166700.237401
5uc30_0.9532-320.5951390.224588
6uc30_0.9532-320.6923300.242364
7uc30_0.9532-320.7789190.230704
8uc30_0.9532-320.874790.231401
9uc30_0.9532-320.91537180.141972
10uc30_0.9532-2560.000NaN
11uc30_0.9532-2560.100NaN
12uc30_0.9532-2560.2140NaN
13uc30_0.9532-2560.3873100.022420
14uc30_0.9532-2560.41166110.064091
15uc30_0.9532-2560.595150.066788
16uc30_0.9532-2560.692390.043915
17uc30_0.9532-2560.778940.023700
18uc30_0.9532-2560.874710.000634
19uc30_0.9532-2560.9153740.058178
20uc30_0.95256-2560.000NaN
21uc30_0.95256-2560.100NaN
22uc30_0.95256-2560.2140NaN
23uc30_0.95256-2560.387310.029183
24uc30_0.95256-2560.4116620.015912
25uc30_0.95256-2560.59510NaN
26uc30_0.95256-2560.692310.148474
27uc30_0.95256-2560.77890NaN
28uc30_0.95256-2560.87470NaN
29uc30_0.95256-2560.915370NaN
30uc3032-320.000NaN
31uc3032-320.100NaN
32uc3032-320.2192820.927533
33uc3032-320.311702880.593890
34uc3032-320.411822720.556361
35uc3032-320.511602290.533862
36uc3032-320.610441920.519658
37uc3032-320.78471030.495949
38uc3032-320.8694860.476448
39uc3032-320.9711190.255545
40uc3032-2560.000NaN
41uc3032-2560.100NaN
42uc3032-2560.2192120.142272
43uc3032-2560.31170490.124958
44uc3032-2560.41182490.121131
45uc3032-2560.51160350.155504
46uc3032-2560.61044320.119739
47uc3032-2560.7847180.069970
48uc3032-2560.8694210.086326
49uc3032-2560.971180.045885
50uc30256-2560.000NaN
51uc30256-2560.100NaN
52uc30256-2560.21920NaN
53uc30256-2560.3117070.383546
54uc30256-2560.4118240.708009
55uc30256-2560.5116070.460284
56uc30256-2560.6104450.302308
57uc30256-2560.784720.230434
58uc30256-2560.869410.054499
59uc30256-2560.971110.069085
\n", + "
" + ], + "text/plain": [ + " dataset size seq identity count wrong wrong % error\n", + "0 uc30_0.95 32-32 0.0 0 0 NaN\n", + "1 uc30_0.95 32-32 0.1 0 0 NaN\n", + "2 uc30_0.95 32-32 0.2 14 0 NaN\n", + "3 uc30_0.95 32-32 0.3 873 45 0.242345\n", + "4 uc30_0.95 32-32 0.4 1166 70 0.237401\n", + "5 uc30_0.95 32-32 0.5 951 39 0.224588\n", + "6 uc30_0.95 32-32 0.6 923 30 0.242364\n", + "7 uc30_0.95 32-32 0.7 789 19 0.230704\n", + "8 uc30_0.95 32-32 0.8 747 9 0.231401\n", + "9 uc30_0.95 32-32 0.9 1537 18 0.141972\n", + "10 uc30_0.95 32-256 0.0 0 0 NaN\n", + "11 uc30_0.95 32-256 0.1 0 0 NaN\n", + "12 uc30_0.95 32-256 0.2 14 0 NaN\n", + "13 uc30_0.95 32-256 0.3 873 10 0.022420\n", + "14 uc30_0.95 32-256 0.4 1166 11 0.064091\n", + "15 uc30_0.95 32-256 0.5 951 5 0.066788\n", + "16 uc30_0.95 32-256 0.6 923 9 0.043915\n", + "17 uc30_0.95 32-256 0.7 789 4 0.023700\n", + "18 uc30_0.95 32-256 0.8 747 1 0.000634\n", + "19 uc30_0.95 32-256 0.9 1537 4 0.058178\n", + "20 uc30_0.95 256-256 0.0 0 0 NaN\n", + "21 uc30_0.95 256-256 0.1 0 0 NaN\n", + "22 uc30_0.95 256-256 0.2 14 0 NaN\n", + "23 uc30_0.95 256-256 0.3 873 1 0.029183\n", + "24 uc30_0.95 256-256 0.4 1166 2 0.015912\n", + "25 uc30_0.95 256-256 0.5 951 0 NaN\n", + "26 uc30_0.95 256-256 0.6 923 1 0.148474\n", + "27 uc30_0.95 256-256 0.7 789 0 NaN\n", + "28 uc30_0.95 256-256 0.8 747 0 NaN\n", + "29 uc30_0.95 256-256 0.9 1537 0 NaN\n", + "30 uc30 32-32 0.0 0 0 NaN\n", + "31 uc30 32-32 0.1 0 0 NaN\n", + "32 uc30 32-32 0.2 192 82 0.927533\n", + "33 uc30 32-32 0.3 1170 288 0.593890\n", + "34 uc30 32-32 0.4 1182 272 0.556361\n", + "35 uc30 32-32 0.5 1160 229 0.533862\n", + "36 uc30 32-32 0.6 1044 192 0.519658\n", + "37 uc30 32-32 0.7 847 103 0.495949\n", + "38 uc30 32-32 0.8 694 86 0.476448\n", + "39 uc30 32-32 0.9 711 19 0.255545\n", + "40 uc30 32-256 0.0 0 0 NaN\n", + "41 uc30 32-256 0.1 0 0 NaN\n", + "42 uc30 32-256 0.2 192 12 0.142272\n", + "43 uc30 32-256 0.3 1170 49 0.124958\n", + "44 uc30 32-256 0.4 1182 49 0.121131\n", + "45 uc30 32-256 0.5 1160 35 0.155504\n", + "46 uc30 32-256 0.6 1044 32 0.119739\n", + "47 uc30 32-256 0.7 847 18 0.069970\n", + "48 uc30 32-256 0.8 694 21 0.086326\n", + "49 uc30 32-256 0.9 711 8 0.045885\n", + "50 uc30 256-256 0.0 0 0 NaN\n", + "51 uc30 256-256 0.1 0 0 NaN\n", + "52 uc30 256-256 0.2 192 0 NaN\n", + "53 uc30 256-256 0.3 1170 7 0.383546\n", + "54 uc30 256-256 0.4 1182 4 0.708009\n", + "55 uc30 256-256 0.5 1160 7 0.460284\n", + "56 uc30 256-256 0.6 1044 5 0.302308\n", + "57 uc30 256-256 0.7 847 2 0.230434\n", + "58 uc30 256-256 0.8 694 1 0.054499\n", + "59 uc30 256-256 0.9 711 1 0.069085" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:08.989655Z", + "iopub.status.busy": "2023-02-27T11:54:08.988673Z", + "iopub.status.idle": "2023-02-27T11:54:08.992042Z", + "shell.execute_reply": "2023-02-27T11:54:08.992446Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetsizeseq identitycountwrongwrong % errorerror rate% error
0uc30_0.9532-320-10%00NaNNaNNaN
1uc30_0.9532-3210-20%00NaNNaNNaN
2uc30_0.9532-3220-30%140NaN0.000000NaN
3uc30_0.9532-3230-40%873450.2423450.0515460.242345
4uc30_0.9532-3240-50%1166700.2374010.0600340.237401
5uc30_0.9532-3250-60%951390.2245880.0410090.224588
6uc30_0.9532-3260-70%923300.2423640.0325030.242364
7uc30_0.9532-3270-80%789190.2307040.0240810.230704
8uc30_0.9532-3280-90%74790.2314010.0120480.231401
9uc30_0.9532-3290-100%1537180.1419720.0117110.141972
10uc30_0.9532-2560-10%00NaNNaNNaN
11uc30_0.9532-25610-20%00NaNNaNNaN
12uc30_0.9532-25620-30%140NaN0.000000NaN
13uc30_0.9532-25630-40%873100.0224200.0114550.022420
14uc30_0.9532-25640-50%1166110.0640910.0094340.064091
15uc30_0.9532-25650-60%95150.0667880.0052580.066788
16uc30_0.9532-25660-70%92390.0439150.0097510.043915
17uc30_0.9532-25670-80%78940.0237000.0050700.023700
18uc30_0.9532-25680-90%74710.0006340.0013390.000634
19uc30_0.9532-25690-100%153740.0581780.0026020.058178
20uc30_0.95256-2560-10%00NaNNaNNaN
21uc30_0.95256-25610-20%00NaNNaNNaN
22uc30_0.95256-25620-30%140NaN0.000000NaN
23uc30_0.95256-25630-40%87310.0291830.0011450.029183
24uc30_0.95256-25640-50%116620.0159120.0017150.015912
25uc30_0.95256-25650-60%9510NaN0.000000NaN
26uc30_0.95256-25660-70%92310.1484740.0010830.148474
27uc30_0.95256-25670-80%7890NaN0.000000NaN
28uc30_0.95256-25680-90%7470NaN0.000000NaN
29uc30_0.95256-25690-100%15370NaN0.000000NaN
30uc3032-320-10%00NaNNaNNaN
31uc3032-3210-20%00NaNNaNNaN
32uc3032-3220-30%192820.9275330.4270830.927533
33uc3032-3230-40%11702880.5938900.2461540.593890
34uc3032-3240-50%11822720.5563610.2301180.556361
35uc3032-3250-60%11602290.5338620.1974140.533862
36uc3032-3260-70%10441920.5196580.1839080.519658
37uc3032-3270-80%8471030.4959490.1216060.495949
38uc3032-3280-90%694860.4764480.1239190.476448
39uc3032-3290-100%711190.2555450.0267230.255545
40uc3032-2560-10%00NaNNaNNaN
41uc3032-25610-20%00NaNNaNNaN
42uc3032-25620-30%192120.1422720.0625000.142272
43uc3032-25630-40%1170490.1249580.0418800.124958
44uc3032-25640-50%1182490.1211310.0414550.121131
45uc3032-25650-60%1160350.1555040.0301720.155504
46uc3032-25660-70%1044320.1197390.0306510.119739
47uc3032-25670-80%847180.0699700.0212510.069970
48uc3032-25680-90%694210.0863260.0302590.086326
49uc3032-25690-100%71180.0458850.0112520.045885
50uc30256-2560-10%00NaNNaNNaN
51uc30256-25610-20%00NaNNaNNaN
52uc30256-25620-30%1920NaN0.000000NaN
53uc30256-25630-40%117070.3835460.0059830.383546
54uc30256-25640-50%118240.7080090.0033840.708009
55uc30256-25650-60%116070.4602840.0060340.460284
56uc30256-25660-70%104450.3023080.0047890.302308
57uc30256-25670-80%84720.2304340.0023610.230434
58uc30256-25680-90%69410.0544990.0014410.054499
59uc30256-25690-100%71110.0690850.0014060.069085
\n", + "
" + ], + "text/plain": [ + " dataset size seq identity count wrong wrong % error error rate \\\n", + "0 uc30_0.95 32-32 0-10% 0 0 NaN NaN \n", + "1 uc30_0.95 32-32 10-20% 0 0 NaN NaN \n", + "2 uc30_0.95 32-32 20-30% 14 0 NaN 0.000000 \n", + "3 uc30_0.95 32-32 30-40% 873 45 0.242345 0.051546 \n", + "4 uc30_0.95 32-32 40-50% 1166 70 0.237401 0.060034 \n", + "5 uc30_0.95 32-32 50-60% 951 39 0.224588 0.041009 \n", + "6 uc30_0.95 32-32 60-70% 923 30 0.242364 0.032503 \n", + "7 uc30_0.95 32-32 70-80% 789 19 0.230704 0.024081 \n", + "8 uc30_0.95 32-32 80-90% 747 9 0.231401 0.012048 \n", + "9 uc30_0.95 32-32 90-100% 1537 18 0.141972 0.011711 \n", + "10 uc30_0.95 32-256 0-10% 0 0 NaN NaN \n", + "11 uc30_0.95 32-256 10-20% 0 0 NaN NaN \n", + "12 uc30_0.95 32-256 20-30% 14 0 NaN 0.000000 \n", + "13 uc30_0.95 32-256 30-40% 873 10 0.022420 0.011455 \n", + "14 uc30_0.95 32-256 40-50% 1166 11 0.064091 0.009434 \n", + "15 uc30_0.95 32-256 50-60% 951 5 0.066788 0.005258 \n", + "16 uc30_0.95 32-256 60-70% 923 9 0.043915 0.009751 \n", + "17 uc30_0.95 32-256 70-80% 789 4 0.023700 0.005070 \n", + "18 uc30_0.95 32-256 80-90% 747 1 0.000634 0.001339 \n", + "19 uc30_0.95 32-256 90-100% 1537 4 0.058178 0.002602 \n", + "20 uc30_0.95 256-256 0-10% 0 0 NaN NaN \n", + "21 uc30_0.95 256-256 10-20% 0 0 NaN NaN \n", + "22 uc30_0.95 256-256 20-30% 14 0 NaN 0.000000 \n", + "23 uc30_0.95 256-256 30-40% 873 1 0.029183 0.001145 \n", + "24 uc30_0.95 256-256 40-50% 1166 2 0.015912 0.001715 \n", + "25 uc30_0.95 256-256 50-60% 951 0 NaN 0.000000 \n", + "26 uc30_0.95 256-256 60-70% 923 1 0.148474 0.001083 \n", + "27 uc30_0.95 256-256 70-80% 789 0 NaN 0.000000 \n", + "28 uc30_0.95 256-256 80-90% 747 0 NaN 0.000000 \n", + "29 uc30_0.95 256-256 90-100% 1537 0 NaN 0.000000 \n", + "30 uc30 32-32 0-10% 0 0 NaN NaN \n", + "31 uc30 32-32 10-20% 0 0 NaN NaN \n", + "32 uc30 32-32 20-30% 192 82 0.927533 0.427083 \n", + "33 uc30 32-32 30-40% 1170 288 0.593890 0.246154 \n", + "34 uc30 32-32 40-50% 1182 272 0.556361 0.230118 \n", + "35 uc30 32-32 50-60% 1160 229 0.533862 0.197414 \n", + "36 uc30 32-32 60-70% 1044 192 0.519658 0.183908 \n", + "37 uc30 32-32 70-80% 847 103 0.495949 0.121606 \n", + "38 uc30 32-32 80-90% 694 86 0.476448 0.123919 \n", + "39 uc30 32-32 90-100% 711 19 0.255545 0.026723 \n", + "40 uc30 32-256 0-10% 0 0 NaN NaN \n", + "41 uc30 32-256 10-20% 0 0 NaN NaN \n", + "42 uc30 32-256 20-30% 192 12 0.142272 0.062500 \n", + "43 uc30 32-256 30-40% 1170 49 0.124958 0.041880 \n", + "44 uc30 32-256 40-50% 1182 49 0.121131 0.041455 \n", + "45 uc30 32-256 50-60% 1160 35 0.155504 0.030172 \n", + "46 uc30 32-256 60-70% 1044 32 0.119739 0.030651 \n", + "47 uc30 32-256 70-80% 847 18 0.069970 0.021251 \n", + "48 uc30 32-256 80-90% 694 21 0.086326 0.030259 \n", + "49 uc30 32-256 90-100% 711 8 0.045885 0.011252 \n", + "50 uc30 256-256 0-10% 0 0 NaN NaN \n", + "51 uc30 256-256 10-20% 0 0 NaN NaN \n", + "52 uc30 256-256 20-30% 192 0 NaN 0.000000 \n", + "53 uc30 256-256 30-40% 1170 7 0.383546 0.005983 \n", + "54 uc30 256-256 40-50% 1182 4 0.708009 0.003384 \n", + "55 uc30 256-256 50-60% 1160 7 0.460284 0.006034 \n", + "56 uc30 256-256 60-70% 1044 5 0.302308 0.004789 \n", + "57 uc30 256-256 70-80% 847 2 0.230434 0.002361 \n", + "58 uc30 256-256 80-90% 694 1 0.054499 0.001441 \n", + "59 uc30 256-256 90-100% 711 1 0.069085 0.001406 \n", + "\n", + " % error \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 0.242345 \n", + "4 0.237401 \n", + "5 0.224588 \n", + "6 0.242364 \n", + "7 0.230704 \n", + "8 0.231401 \n", + "9 0.141972 \n", + "10 NaN \n", + "11 NaN \n", + "12 NaN \n", + "13 0.022420 \n", + "14 0.064091 \n", + "15 0.066788 \n", + "16 0.043915 \n", + "17 0.023700 \n", + "18 0.000634 \n", + "19 0.058178 \n", + "20 NaN \n", + "21 NaN \n", + "22 NaN \n", + "23 0.029183 \n", + "24 0.015912 \n", + "25 NaN \n", + "26 0.148474 \n", + "27 NaN \n", + "28 NaN \n", + "29 NaN \n", + "30 NaN \n", + "31 NaN \n", + "32 0.927533 \n", + "33 0.593890 \n", + "34 0.556361 \n", + "35 0.533862 \n", + "36 0.519658 \n", + "37 0.495949 \n", + "38 0.476448 \n", + "39 0.255545 \n", + "40 NaN \n", + "41 NaN \n", + "42 0.142272 \n", + "43 0.124958 \n", + "44 0.121131 \n", + "45 0.155504 \n", + "46 0.119739 \n", + "47 0.069970 \n", + "48 0.086326 \n", + "49 0.045885 \n", + "50 NaN \n", + "51 NaN \n", + "52 NaN \n", + "53 0.383546 \n", + "54 0.708009 \n", + "55 0.460284 \n", + "56 0.302308 \n", + "57 0.230434 \n", + "58 0.054499 \n", + "59 0.069085 " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"error rate\"] = data[\"wrong\"] / data[\"count\"]\n", + "data[\"% error\"] = data[\"wrong % error\"]\n", + "data[\"seq identity\"] = data[\"seq identity\"].map({\n", + " 0.0: \"0-10%\",\n", + " 0.1: \"10-20%\",\n", + " 0.2: \"20-30%\",\n", + " 0.3: \"30-40%\",\n", + " 0.4: \"40-50%\",\n", + " 0.5: \"50-60%\",\n", + " 0.6: \"60-70%\",\n", + " 0.7: \"70-80%\",\n", + " 0.8: \"80-90%\",\n", + " 0.9: \"90-100%\"\n", + "})\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 Error Rate" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:09.008690Z", + "iopub.status.busy": "2023-02-27T11:54:09.008290Z", + "iopub.status.idle": "2023-02-27T11:54:09.822090Z", + "shell.execute_reply": "2023-02-27T11:54:09.822642Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = \"seq identity\",\n", + " y = alt.Y(\"error rate\", axis = alt.Axis(format = \"%\")),\n", + " column = alt.Column(\"size\", title = \"block size\", sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " row = \"dataset\",\n", + " color = alt.Color(\"size\", legend = None, sort = [\"32-32\", \"32-256\", \"256-256\"])\n", + ").properties(\n", + " width = 100,\n", + " height = 100\n", + ")\n", + "save(c, \"uniclust30_accuracy.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 % Error" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:09.839594Z", + "iopub.status.busy": "2023-02-27T11:54:09.839161Z", + "iopub.status.idle": "2023-02-27T11:54:10.612319Z", + "shell.execute_reply": "2023-02-27T11:54:10.612881Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = \"seq identity\",\n", + " y = alt.Y(\"% error\", axis = alt.Axis(format = \"%\")),\n", + " column = alt.Column(\"size\", title = \"block size\", sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " row = \"dataset\",\n", + " color = alt.Color(\"size\", legend = None, sort = [\"32-32\", \"32-256\", \"256-256\"])\n", + ").properties(\n", + " width = 100,\n", + " height = 100\n", + ")\n", + "save(c, \"uniclust30_percent_error.pdf\")\n", + "c" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:10.620578Z", + "iopub.status.busy": "2023-02-27T11:54:10.618326Z", + "iopub.status.idle": "2023-02-27T11:54:10.628161Z", + "shell.execute_reply": "2023-02-27T11:54:10.628712Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetsizecountwrongerror rate
0uc30256-2567000270.003857
1uc3032-25670002240.032000
2uc3032-32700012710.181571
3uc30_0.95256-256700040.000571
4uc30_0.9532-2567000440.006286
5uc30_0.9532-3270002300.032857
\n", + "
" + ], + "text/plain": [ + " dataset size count wrong error rate\n", + "0 uc30 256-256 7000 27 0.003857\n", + "1 uc30 32-256 7000 224 0.032000\n", + "2 uc30 32-32 7000 1271 0.181571\n", + "3 uc30_0.95 256-256 7000 4 0.000571\n", + "4 uc30_0.95 32-256 7000 44 0.006286\n", + "5 uc30_0.95 32-32 7000 230 0.032857" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agg_data = data.copy()\n", + "agg_data = agg_data.groupby([\"dataset\", \"size\"]).agg({\"count\": \"sum\", \"wrong\": \"sum\"}).reset_index()\n", + "agg_data[\"error rate\"] = agg_data[\"wrong\"] / agg_data[\"count\"]\n", + "agg_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Overall Uniclust30 Error Rate" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:10.645309Z", + "iopub.status.busy": "2023-02-27T11:54:10.644904Z", + "iopub.status.idle": "2023-02-27T11:54:11.427132Z", + "shell.execute_reply": "2023-02-27T11:54:11.427534Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.FacetChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(agg_data).mark_bar().encode(\n", + " x = alt.X(\"size\", axis = None, sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " y = alt.Y(\"error rate\", axis = alt.Axis(format = \"%\")),\n", + " color = alt.Color(\"size\", title = \"block size\", sort = [\"32-32\", \"32-256\", \"256-256\"])\n", + ")\n", + "t = c.mark_text(dy = -4, size = 8).encode(text = alt.Text(\"error rate\", format = \".1%\"), color = alt.value(\"black\"))\n", + "c = (c + t).properties(\n", + " width = 50,\n", + " height = 100\n", + ").facet(\n", + " column = alt.Column(\"dataset\", header = alt.Header(orient = \"bottom\")),\n", + ")\n", + "save(c, \"uniclust30_overall_accuracy.pdf\")\n", + "c" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:11.430676Z", + "iopub.status.busy": "2023-02-27T11:54:11.430292Z", + "iopub.status.idle": "2023-02-27T11:54:11.752562Z", + "shell.execute_reply": "2023-02-27T11:54:11.753061Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetsizequery lenreference lenseq idpred scoretrue score
0uc30_0.9532-32194418710.36209516562909
1uc30_0.9532-328048080.36374511841184
2uc30_0.9532-32424241220.42703267227639
3uc30_0.9532-322302320.400000405405
4uc30_0.9532-322642590.324528390390
........................
41995uc30256-2565425420.99631028622862
41996uc30256-2562773030.76237611031103
41997uc30256-25665650.907692307307
41998uc30256-25645560.732143195195
41999uc30256-2561261390.856115587587
\n", + "

42000 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " dataset size query len reference len seq id pred score \\\n", + "0 uc30_0.95 32-32 1944 1871 0.362095 1656 \n", + "1 uc30_0.95 32-32 804 808 0.363745 1184 \n", + "2 uc30_0.95 32-32 4242 4122 0.427032 6722 \n", + "3 uc30_0.95 32-32 230 232 0.400000 405 \n", + "4 uc30_0.95 32-32 264 259 0.324528 390 \n", + "... ... ... ... ... ... ... \n", + "41995 uc30 256-256 542 542 0.996310 2862 \n", + "41996 uc30 256-256 277 303 0.762376 1103 \n", + "41997 uc30 256-256 65 65 0.907692 307 \n", + "41998 uc30 256-256 45 56 0.732143 195 \n", + "41999 uc30 256-256 126 139 0.856115 587 \n", + "\n", + " true score \n", + "0 2909 \n", + "1 1184 \n", + "2 7639 \n", + "3 405 \n", + "4 390 \n", + "... ... \n", + "41995 2862 \n", + "41996 1103 \n", + "41997 307 \n", + "41998 195 \n", + "41999 587 \n", + "\n", + "[42000 rows x 7 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = file_to_pandas(\"../data/uc_accuracy.csv\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 Our Score vs True Score (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:11.763198Z", + "iopub.status.busy": "2023-02-27T11:54:11.762734Z", + "iopub.status.idle": "2023-02-27T11:54:12.790422Z", + "shell.execute_reply": "2023-02-27T11:54:12.790869Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_circle().encode(\n", + " x = alt.X(\"true score\", bin = alt.Bin(maxbins = 50)),\n", + " y = alt.Y(\"pred score\", bin = alt.Bin(maxbins = 50)),\n", + " row = \"dataset\",\n", + " column = alt.Column(\"size\", title = \"block size\", header = alt.Header(orient = \"bottom\"), sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " color = alt.Color(\"count():Q\", title = \"count\", scale = alt.Scale(type = \"log\", scheme = \"viridis\"))\n", + ").properties(\n", + " width = 200,\n", + " height = 200\n", + ").configure_axis(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_header(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_legend(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ")\n", + "save(c, \"uniclust30_scores.pdf\")\n", + "c" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:12.794839Z", + "iopub.status.busy": "2023-02-27T11:54:12.794418Z", + "iopub.status.idle": "2023-02-27T11:54:12.809235Z", + "shell.execute_reply": "2023-02-27T11:54:12.809640Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetsizequery lenreference lenseq idpred scoretrue scoreseq length% error
0uc30_0.9532-32194418710.3620951656290919440.430732
1uc30_0.9532-328048080.363745118411848080.000000
2uc30_0.9532-32424241220.4270326722763942420.120042
3uc30_0.9532-322302320.4000004054052320.000000
4uc30_0.9532-322642590.3245283903902640.000000
..............................
41995uc30256-2565425420.996310286228625420.000000
41996uc30256-2562773030.762376110311033030.000000
41997uc30256-25665650.907692307307650.000000
41998uc30256-25645560.732143195195560.000000
41999uc30256-2561261390.8561155875871390.000000
\n", + "

42000 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " dataset size query len reference len seq id pred score \\\n", + "0 uc30_0.95 32-32 1944 1871 0.362095 1656 \n", + "1 uc30_0.95 32-32 804 808 0.363745 1184 \n", + "2 uc30_0.95 32-32 4242 4122 0.427032 6722 \n", + "3 uc30_0.95 32-32 230 232 0.400000 405 \n", + "4 uc30_0.95 32-32 264 259 0.324528 390 \n", + "... ... ... ... ... ... ... \n", + "41995 uc30 256-256 542 542 0.996310 2862 \n", + "41996 uc30 256-256 277 303 0.762376 1103 \n", + "41997 uc30 256-256 65 65 0.907692 307 \n", + "41998 uc30 256-256 45 56 0.732143 195 \n", + "41999 uc30 256-256 126 139 0.856115 587 \n", + "\n", + " true score seq length % error \n", + "0 2909 1944 0.430732 \n", + "1 1184 808 0.000000 \n", + "2 7639 4242 0.120042 \n", + "3 405 232 0.000000 \n", + "4 390 264 0.000000 \n", + "... ... ... ... \n", + "41995 2862 542 0.000000 \n", + "41996 1103 303 0.000000 \n", + "41997 307 65 0.000000 \n", + "41998 195 56 0.000000 \n", + "41999 587 139 0.000000 \n", + "\n", + "[42000 rows x 9 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"seq length\"] = data[[\"query len\", \"reference len\"]].max(axis = 1)\n", + "data[\"% error\"] = 1.0 - data[\"pred score\"] / data[\"true score\"]\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 Sequence Length vs % Error (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:12.816939Z", + "iopub.status.busy": "2023-02-27T11:54:12.816504Z", + "iopub.status.idle": "2023-02-27T11:54:13.847531Z", + "shell.execute_reply": "2023-02-27T11:54:13.847993Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_circle().encode(\n", + " x = alt.X(\"seq length\", bin = alt.Bin(maxbins = 50)),\n", + " y = alt.Y(\"% error\", bin = alt.Bin(maxbins = 50), axis = alt.Axis(format = \"%\")),\n", + " column = alt.Column(\"size\", title = \"block size\", header = alt.Header(orient = \"bottom\"), sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " color = alt.Color(\"count():Q\", title = \"count\", scale = alt.Scale(type = \"log\", scheme = \"viridis\"))\n", + ").transform_filter(\n", + " (datum.dataset == \"uc30_0.95\") & (datum.size != \"256-256\")\n", + ").properties(\n", + " width = 200,\n", + " height = 200\n", + ").configure_axis(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_header(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_legend(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ")\n", + "save(c, \"uniclust30_length_accuracy.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 Sequence Identity vs % Error (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:13.856152Z", + "iopub.status.busy": "2023-02-27T11:54:13.855642Z", + "iopub.status.idle": "2023-02-27T11:54:14.884615Z", + "shell.execute_reply": "2023-02-27T11:54:14.885025Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_circle().encode(\n", + " x = alt.X(\"seq id\", bin = alt.Bin(maxbins = 50), axis = alt.Axis(format = \"%\")),\n", + " y = alt.Y(\"% error\", bin = alt.Bin(maxbins = 50), axis = alt.Axis(format = \"%\")),\n", + " column = alt.Column(\"size\", title = \"block size\", header = alt.Header(orient = \"bottom\"), sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " color = alt.Color(\"count():Q\", title = \"count\", scale = alt.Scale(type = \"log\", scheme = \"viridis\"))\n", + ").transform_filter(\n", + " (datum.dataset == \"uc30_0.95\") & (datum.size != \"256-256\")\n", + ").properties(\n", + " width = 200,\n", + " height = 200\n", + ").configure_axis(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_header(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_legend(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ")\n", + "save(c, \"uniclust30_seq_id_accuracy.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DNA Reads Global Alignment" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:54:14.888370Z", + "iopub.status.busy": "2023-02-27T11:54:14.887957Z", + "iopub.status.idle": "2023-02-27T12:19:50.164258Z", + "shell.execute_reply": "2023-02-27T12:19:50.164759Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['',\n", + " 'dataset, total, wrong, wrong % error, min size wrong, wfa wrong',\n", + " '',\n", + " 'illumina, 100000, 0, NaN, 0, 0',\n", + " '# illumina seq id avg: 0.997052517030022',\n", + " '',\n", + " 'nanopore 1kbp, 12477, 21, 0.060149406709727474, 1810, 21',\n", + " '# nanopore 1kbp seq id avg: 0.8926079817605514',\n", + " '',\n", + " 'nanopore <10kbp, 5000, 109, 0.035888133253518265, 1595, 645',\n", + " '# nanopore <10kbp seq id avg: 0.8752513435635519',\n", + " '',\n", + " 'nanopore <50kbp, 10000, 278, 0.020799640807527112, 2599, 2545',\n", + " '# nanopore <50kbp seq id avg: 0.8795798677370729',\n", + " '# Done!']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example nanopore_accuracy --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:19:50.168669Z", + "iopub.status.busy": "2023-02-27T12:19:50.168197Z", + "iopub.status.idle": "2023-02-27T12:19:50.176876Z", + "shell.execute_reply": "2023-02-27T12:19:50.177370Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasettotalwrongwrong % errormin size wrongwfa wrong
0illumina1000000NaN00
1nanopore 1kbp12477210.060149181021
2nanopore <10kbp50001090.0358881595645
3nanopore <50kbp100002780.02080025992545
\n", + "
" + ], + "text/plain": [ + " dataset total wrong wrong % error min size wrong wfa wrong\n", + "0 illumina 100000 0 NaN 0 0\n", + "1 nanopore 1kbp 12477 21 0.060149 1810 21\n", + "2 nanopore <10kbp 5000 109 0.035888 1595 645\n", + "3 nanopore <50kbp 10000 278 0.020800 2599 2545" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:19:50.182739Z", + "iopub.status.busy": "2023-02-27T12:19:50.182241Z", + "iopub.status.idle": "2023-02-27T12:19:50.187837Z", + "shell.execute_reply": "2023-02-27T12:19:50.188846Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " dataset reads errors error rate % error\n", + "0 illumina 100000 0 0.0% 0.0%\n", + "1 nanopore 1kbp 12477 21 0.2% 6.0%\n", + "2 nanopore <10kbp 5000 109 2.2% 3.6%\n", + "3 nanopore <50kbp 10000 278 2.8% 2.1%\n" + ] + } + ], + "source": [ + "data[\"error rate\"] = data[\"wrong\"] / data[\"total\"]\n", + "data = data.rename(columns = {\"total\": \"reads\", \"wrong\": \"errors\", \"wrong % error\": \"% error\"})\n", + "data = data[[\"dataset\", \"reads\", \"errors\", \"error rate\", \"% error\"]]\n", + "data = data.fillna(0)\n", + "data[\"error rate\"] = data[\"error rate\"].map(\"{:.1%}\".format)\n", + "data[\"% error\"] = data[\"% error\"].map(\"{:.1%}\".format)\n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:19:50.191761Z", + "iopub.status.busy": "2023-02-27T12:19:50.191287Z", + "iopub.status.idle": "2023-02-27T12:19:51.058341Z", + "shell.execute_reply": "2023-02-27T12:19:51.058898Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetquery lenreference lenpred scoretrue score
0illumina101101190190
1illumina101101196196
2illumina101101196196
3illumina101101196196
4illumina101101196196
..................
127472nanopore <50kbp1235129119041904
127473nanopore <50kbp15970162442499424994
127474nanopore <50kbp3784384362906290
127475nanopore <50kbp88197514281428
127476nanopore <50kbp3206297834163416
\n", + "

127477 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " dataset query len reference len pred score true score\n", + "0 illumina 101 101 190 190\n", + "1 illumina 101 101 196 196\n", + "2 illumina 101 101 196 196\n", + "3 illumina 101 101 196 196\n", + "4 illumina 101 101 196 196\n", + "... ... ... ... ... ...\n", + "127472 nanopore <50kbp 1235 1291 1904 1904\n", + "127473 nanopore <50kbp 15970 16244 24994 24994\n", + "127474 nanopore <50kbp 3784 3843 6290 6290\n", + "127475 nanopore <50kbp 881 975 1428 1428\n", + "127476 nanopore <50kbp 3206 2978 3416 3416\n", + "\n", + "[127477 rows x 5 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = file_to_pandas(\"../data/nanopore_accuracy.csv\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nanopore <10kbp Global Alignment Our Score vs True Score (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:19:51.067125Z", + "iopub.status.busy": "2023-02-27T12:19:51.066631Z", + "iopub.status.idle": "2023-02-27T12:19:52.302680Z", + "shell.execute_reply": "2023-02-27T12:19:52.303088Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_circle().encode(\n", + " x = alt.X(\"true score\", bin = alt.Bin(maxbins = 50)),\n", + " y = alt.Y(\"pred score\", bin = alt.Bin(maxbins = 50)),\n", + " color = alt.Color(\"count():Q\", title = \"count\", scale = alt.Scale(type = \"log\", scheme = \"viridis\"))\n", + ").transform_filter(\n", + " datum.dataset == \"nanopore <10kbp\"\n", + ").properties(\n", + " width = 200,\n", + " height = 200\n", + ").configure_axis(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_header(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_legend(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ")\n", + "save(c, \"nanopore_10kbp_scores.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Nanopore <50kbp Global Alignment Our Score vs True Score (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:19:52.309926Z", + "iopub.status.busy": "2023-02-27T12:19:52.309515Z", + "iopub.status.idle": "2023-02-27T12:19:53.486732Z", + "shell.execute_reply": "2023-02-27T12:19:53.487176Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_circle().encode(\n", + " x = alt.X(\"true score\", bin = alt.Bin(maxbins = 50)),\n", + " y = alt.Y(\"pred score\", bin = alt.Bin(maxbins = 50)),\n", + " color = alt.Color(\"count():Q\", title = \"count\", scale = alt.Scale(type = \"log\", scheme = \"viridis\"))\n", + ").transform_filter(\n", + " datum.dataset == \"nanopore <50kbp\"\n", + ").properties(\n", + " width = 200,\n", + " height = 200\n", + ").configure_axis(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_header(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_legend(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ")\n", + "save(c, \"nanopore_50kbp_scores.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nanopore Data Compare Setup\n", + "\n", + "To run the comparisons below, you need to clone the following repos, place them in the same directory where this repo (block aligner) is located, and follow their setup instructions:\n", + "* [diff-bench-paper](https://github.com/Daniel-Liu-c0deb0t/diff-bench-paper)\n", + "* [adaptivebandbench](https://github.com/Daniel-Liu-c0deb0t/adaptivebandbench)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nanopore Data Compare" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:19:53.490832Z", + "iopub.status.busy": "2023-02-27T12:19:53.490359Z", + "iopub.status.idle": "2023-02-27T12:20:26.228821Z", + "shell.execute_reply": "2023-02-27T12:20:26.229299Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['scores_l1000.tsv',\n", + " 'scores_l10000.tsv',\n", + " 'scores_l25000.tsv',\n", + " 'scores_default.tsv']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd ../../diff-bench-paper/supplementary_data/benchmark_codes && ./custom_scores.sh 2>&1 | grep '\\.tsv'\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:20:26.233270Z", + "iopub.status.busy": "2023-02-27T12:20:26.232730Z", + "iopub.status.idle": "2023-02-27T12:20:26.235018Z", + "shell.execute_reply": "2023-02-27T12:20:26.235509Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['1000', '10000', '25000', 'default']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lengths = []\n", + "for f in output:\n", + " l = f[len(\"scores_\"):f.index(\".\")]\n", + " lengths.append(l[1:] if l[0] == \"l\" else l)\n", + "lengths" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:20:26.239060Z", + "iopub.status.busy": "2023-02-27T12:20:26.238591Z", + "iopub.status.idle": "2023-02-27T12:21:00.433788Z", + "shell.execute_reply": "2023-02-27T12:21:00.434247Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 21, 0.08564959576734898, 1652, 0.014478023753787836',\n", + " '',\n", + " '64, 1734, 6, 0.014165306396410624, 1669, 0.017846773911076242',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 123, 0.11277759543387586, 1546, 0.0018081439985594904',\n", + " '',\n", + " '64, 1734, 56, 0.01591056402384434, 1633, 0.01032615586212946',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 203, 0.1292233687300956, 892, 0.0016197787311747142',\n", + " '',\n", + " '64, 1734, 75, 0.013415647980847437, 1051, 0.02590382915635615',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 229, 0.1193901258355045, 37, 0.024771049421762822',\n", + " '',\n", + " '64, 1734, 86, 0.013091483866668505, 176, 0.1557904770879331',\n", + " '# Done!']]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path_prefix = \"../diff-bench-paper/\"\n", + "outputs = []\n", + "for f in output:\n", + " o = !cd .. && cargo run --example compare --release --features simd_avx2 --quiet -- {path_prefix + f} 50\n", + " outputs.append(o)\n", + "outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:21:00.438114Z", + "iopub.status.busy": "2023-02-27T12:21:00.437719Z", + "iopub.status.idle": "2023-02-27T12:21:00.454295Z", + "shell.execute_reply": "2023-02-27T12:21:00.454742Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lengthmax sizetotalother betterother % betterus betterus % betterband width
01000321734210.08565016520.01447832
1100064173460.01416516690.01784732
2100003217341230.11277815460.00180832
310000641734560.01591116330.01032632
4250003217342030.1292238920.00162032
525000641734750.01341610510.02590432
6default3217342290.119390370.02477132
7default641734860.0130911760.15579032
\n", + "
" + ], + "text/plain": [ + " length max size total other better other % better us better \\\n", + "0 1000 32 1734 21 0.085650 1652 \n", + "1 1000 64 1734 6 0.014165 1669 \n", + "2 10000 32 1734 123 0.112778 1546 \n", + "3 10000 64 1734 56 0.015911 1633 \n", + "4 25000 32 1734 203 0.129223 892 \n", + "5 25000 64 1734 75 0.013416 1051 \n", + "6 default 32 1734 229 0.119390 37 \n", + "7 default 64 1734 86 0.013091 176 \n", + "\n", + " us % better band width \n", + "0 0.014478 32 \n", + "1 0.017847 32 \n", + "2 0.001808 32 \n", + "3 0.010326 32 \n", + "4 0.001620 32 \n", + "5 0.025904 32 \n", + "6 0.024771 32 \n", + "7 0.155790 32 " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = []\n", + "for o in outputs:\n", + " d = csv_to_pandas(o)\n", + " data.append(d)\n", + "data = pd.concat(data, keys = lengths)\n", + "data = data.reset_index()\n", + "data = data.drop(columns = [\"level_1\"])\n", + "data = data.rename(columns = {\"level_0\": \"length\"})\n", + "data[\"band width\"] = 32\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:21:00.457892Z", + "iopub.status.busy": "2023-02-27T12:21:00.457479Z", + "iopub.status.idle": "2023-02-27T12:23:13.981031Z", + "shell.execute_reply": "2023-02-27T12:23:13.981527Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['scores_l1000_b256.tsv',\n", + " 'scores_l10000_b256.tsv',\n", + " 'scores_l10000_b2048.tsv',\n", + " 'scores_b256.tsv',\n", + " 'scores_b2048.tsv']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd ../../adaptivebandbench && ./custom_scores.sh 2>&1 | grep '\\.tsv'\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:13.985933Z", + "iopub.status.busy": "2023-02-27T12:23:13.985444Z", + "iopub.status.idle": "2023-02-27T12:23:13.987563Z", + "shell.execute_reply": "2023-02-27T12:23:13.988103Z" + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['1000', '10000', '10000', 'default', 'default']\n", + "['256', '256', '2048', '256', '2048']\n" + ] + } + ], + "source": [ + "lengths = []\n", + "band_widths = []\n", + "for f in output:\n", + " l = f[len(\"scores_\"):f.index(\".\")]\n", + " if l[0] == \"l\":\n", + " lengths.append(l[1:l.index(\"_\")])\n", + " l = l[l.index(\"_\") + 1:]\n", + " else:\n", + " lengths.append(\"default\")\n", + " if l[0] == \"b\":\n", + " band_widths.append(l[1:])\n", + "print(lengths)\n", + "print(band_widths)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:13.991600Z", + "iopub.status.busy": "2023-02-27T12:23:13.991122Z", + "iopub.status.idle": "2023-02-27T12:23:56.420915Z", + "shell.execute_reply": "2023-02-27T12:23:56.421382Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 56, 0.13765090849996714, 0, NaN',\n", + " '',\n", + " '64, 1734, 28, 0.024168297720724062, 0, NaN',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 134, 0.13674604294825887, 1589, 0.61734014542284',\n", + " '',\n", + " '64, 1734, 120, 0.04701559488367986, 1606, 0.6220346985685162',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 203, 0.16989582863274405, 0, NaN',\n", + " '',\n", + " '64, 1734, 145, 0.06028818832879965, 0, NaN',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 135, 0.13540622232038024, 1594, 0.8403591347916828',\n", + " '',\n", + " '64, 1734, 120, 0.05296844615029002, 1612, 0.848036363148492',\n", + " '# Done!'],\n", + " ['max size, total, other better, other % better, us better, us % better',\n", + " '',\n", + " '32, 1734, 269, 0.21050607201659027, 379, 0.1479370409142696',\n", + " '',\n", + " '64, 1734, 168, 0.07369966954145103, 419, 0.15517942649488767',\n", + " '# Done!']]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path_prefix = \"../adaptivebandbench/\"\n", + "outputs = []\n", + "for f in output:\n", + " o = !cd .. && cargo run --example compare --release --features simd_avx2 --quiet -- {path_prefix + f} 100000\n", + " outputs.append(o)\n", + "outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:56.425636Z", + "iopub.status.busy": "2023-02-27T12:23:56.425072Z", + "iopub.status.idle": "2023-02-27T12:23:56.448535Z", + "shell.execute_reply": "2023-02-27T12:23:56.448160Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lengthband widthmax sizetotalother betterother % betterus betterus % better
01000256321734560.1376510NaN
11000256641734280.0241680NaN
2100002563217341340.13674615890.617340
3100002566417341200.04701616060.622035
41000020483217342030.1698960NaN
51000020486417341450.0602880NaN
6default2563217341350.13540615940.840359
7default2566417341200.05296816120.848036
8default20483217342690.2105063790.147937
9default20486417341680.0737004190.155179
\n", + "
" + ], + "text/plain": [ + " length band width max size total other better other % better \\\n", + "0 1000 256 32 1734 56 0.137651 \n", + "1 1000 256 64 1734 28 0.024168 \n", + "2 10000 256 32 1734 134 0.136746 \n", + "3 10000 256 64 1734 120 0.047016 \n", + "4 10000 2048 32 1734 203 0.169896 \n", + "5 10000 2048 64 1734 145 0.060288 \n", + "6 default 256 32 1734 135 0.135406 \n", + "7 default 256 64 1734 120 0.052968 \n", + "8 default 2048 32 1734 269 0.210506 \n", + "9 default 2048 64 1734 168 0.073700 \n", + "\n", + " us better us % better \n", + "0 0 NaN \n", + "1 0 NaN \n", + "2 1589 0.617340 \n", + "3 1606 0.622035 \n", + "4 0 NaN \n", + "5 0 NaN \n", + "6 1594 0.840359 \n", + "7 1612 0.848036 \n", + "8 379 0.147937 \n", + "9 419 0.155179 " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data2 = []\n", + "for o in outputs:\n", + " d = csv_to_pandas(o)\n", + " data2.append(d)\n", + "index = list(zip(lengths, band_widths))\n", + "data2 = pd.concat(data2, keys = index)\n", + "data2 = data2.reset_index()\n", + "data2 = data2.drop(columns = [\"level_2\"])\n", + "data2 = data2.rename(columns = {\"level_0\": \"length\", \"level_1\": \"band width\"})\n", + "data2" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:56.453442Z", + "iopub.status.busy": "2023-02-27T12:23:56.453000Z", + "iopub.status.idle": "2023-02-27T12:23:56.454610Z", + "shell.execute_reply": "2023-02-27T12:23:56.455091Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "data[\"other better %\"] = data[\"other better\"] / data[\"total\"]\n", + "data[\"us better %\"] = data[\"us better\"] / data[\"total\"]\n", + "\n", + "data2[\"other better %\"] = data2[\"other better\"] / data2[\"total\"]\n", + "data2[\"us better %\"] = data2[\"us better\"] / data2[\"total\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:56.459061Z", + "iopub.status.busy": "2023-02-27T12:23:56.458593Z", + "iopub.status.idle": "2023-02-27T12:23:56.460325Z", + "shell.execute_reply": "2023-02-27T12:23:56.460816Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "data[\"equal %\"] = 1.0 - data[\"other better %\"] - data[\"us better %\"]\n", + "data2[\"equal %\"] = 1.0 - data2[\"other better %\"] - data2[\"us better %\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:56.472132Z", + "iopub.status.busy": "2023-02-27T12:23:56.471699Z", + "iopub.status.idle": "2023-02-27T12:23:56.473447Z", + "shell.execute_reply": "2023-02-27T12:23:56.473890Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "cleaned = data.copy()\n", + "cleaned = cleaned.melt(id_vars = [\"length\", \"band width\", \"max size\"], value_vars = [\"us better %\", \"other better %\", \"equal %\"])\n", + "cleaned[\"variable\"] = cleaned[\"variable\"].map({\"us better %\": \"ours better %\", \"other better %\": \"adaptive banding better %\", \"equal %\": \"equal %\"})\n", + "\n", + "cleaned2 = data2.copy()\n", + "cleaned2 = cleaned2.melt(id_vars = [\"length\", \"band width\", \"max size\"], value_vars = [\"us better %\", \"other better %\", \"equal %\"])\n", + "cleaned2[\"variable\"] = cleaned2[\"variable\"].map({\"us better %\": \"ours better %\", \"other better %\": \"static banding better %\", \"equal %\": \"equal %\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:56.476845Z", + "iopub.status.busy": "2023-02-27T12:23:56.476407Z", + "iopub.status.idle": "2023-02-27T12:23:56.482844Z", + "shell.execute_reply": "2023-02-27T12:23:56.483334Z" + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "order = {\"ours better %\": 0, \"equal %\": 1, \"adaptive banding better %\": 2}\n", + "cleaned[\"order\"] = cleaned.apply(lambda r: order[r[\"variable\"]], axis = 1)\n", + "\n", + "order = {\"ours better %\": 0, \"equal %\": 1, \"static banding better %\": 2}\n", + "cleaned2[\"order\"] = cleaned2.apply(lambda r: order[r[\"variable\"]], axis = 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comparison with Adaptive Banding" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:56.501924Z", + "iopub.status.busy": "2023-02-27T12:23:56.501532Z", + "iopub.status.idle": "2023-02-27T12:23:57.300380Z", + "shell.execute_reply": "2023-02-27T12:23:57.300890Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(cleaned).mark_bar().encode(\n", + " x = \"length\",\n", + " y = alt.Y(\"sum(value)\", axis = alt.Axis(title = \"\", format = \"%\")),\n", + " color = alt.Color(\"variable\", title = \"\", sort = alt.EncodingSortField(field = \"order\")),\n", + " row = \"max size:N\",\n", + " order = \"order\"\n", + ").properties(\n", + " width = 100,\n", + " height = 100\n", + ")\n", + "save(c, \"compare_adaptive_banding.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Comparison with Static Banding" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:57.318689Z", + "iopub.status.busy": "2023-02-27T12:23:57.315958Z", + "iopub.status.idle": "2023-02-27T12:23:58.098490Z", + "shell.execute_reply": "2023-02-27T12:23:58.098939Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(cleaned2).mark_bar().encode(\n", + " x = \"length\",\n", + " y = alt.Y(\"sum(value)\", axis = alt.Axis(title = \"\", format = \"%\")),\n", + " color = alt.Color(\"variable\", title = \"\", sort = alt.EncodingSortField(field = \"order\")),\n", + " row = alt.Row(\"max size:N\", title = \"max block size\"),\n", + " column = alt.Column(\"band width:N\", title = \"static band width\", sort = [\"256\", \"2048\"]),\n", + " order = \"order\"\n", + ").properties(\n", + " width = 100,\n", + " height = 100\n", + ")\n", + "save(c, \"compare_diagonal.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sequence-to-Profile Alignment Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:23:58.101943Z", + "iopub.status.busy": "2023-02-27T12:23:58.101536Z", + "iopub.status.idle": "2023-02-27T12:24:08.316396Z", + "shell.execute_reply": "2023-02-27T12:24:08.316843Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['size, correct',\n", + " '32-32, 9656',\n", + " '32-64, 10724',\n", + " '32-128, 11083',\n", + " '128-128, 11107',\n", + " '2048-2048, 11160',\n", + " '# compared to 2048-2048',\n", + " '# Done!']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example pssm_accuracy --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:24:08.319661Z", + "iopub.status.busy": "2023-02-27T12:24:08.319241Z", + "iopub.status.idle": "2023-02-27T12:24:08.325836Z", + "shell.execute_reply": "2023-02-27T12:24:08.326268Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sizecorrect
032-329656
132-6410724
232-12811083
3128-12811107
42048-204811160
\n", + "
" + ], + "text/plain": [ + " size correct\n", + "0 32-32 9656\n", + "1 32-64 10724\n", + "2 32-128 11083\n", + "3 128-128 11107\n", + "4 2048-2048 11160" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:24:08.332263Z", + "iopub.status.busy": "2023-02-27T12:24:08.331634Z", + "iopub.status.idle": "2023-02-27T12:24:08.333867Z", + "shell.execute_reply": "2023-02-27T12:24:08.334303Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sizecorrecterror rate
032-3296560.134767
132-64107240.039068
232-128110830.006900
3128-128111070.004749
42048-2048111600.000000
\n", + "
" + ], + "text/plain": [ + " size correct error rate\n", + "0 32-32 9656 0.134767\n", + "1 32-64 10724 0.039068\n", + "2 32-128 11083 0.006900\n", + "3 128-128 11107 0.004749\n", + "4 2048-2048 11160 0.000000" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"error rate\"] = 1.0 - data[\"correct\"] / data.iloc[-1][\"correct\"]\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:24:08.339565Z", + "iopub.status.busy": "2023-02-27T12:24:08.338950Z", + "iopub.status.idle": "2023-02-27T12:24:08.341051Z", + "shell.execute_reply": "2023-02-27T12:24:08.341462Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " block size error rate\n", + "0 32-32 13.5%\n", + "1 32-64 3.9%\n", + "2 32-128 0.7%\n", + "3 128-128 0.5%\n" + ] + } + ], + "source": [ + "table = data[[\"size\", \"error rate\"]]\n", + "table = table.rename(columns = {\"size\": \"block size\"})\n", + "table[\"error rate\"] = table[\"error rate\"].map(\"{:.1%}\".format)\n", + "table = table.iloc[:-1]\n", + "print(table)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SCOP Sequence-to-Profile Alignment Accuracy (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:24:08.358663Z", + "iopub.status.busy": "2023-02-27T12:24:08.358250Z", + "iopub.status.idle": "2023-02-27T12:24:09.111432Z", + "shell.execute_reply": "2023-02-27T12:24:09.111871Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = alt.X(\"size\", title = \"block size\", sort = [\"32-32\", \"32-64\", \"32-128\", \"128-128\"]),\n", + " y = alt.Y(\"error rate\", axis = alt.Axis(format = \"%\")),\n", + " color = alt.Color(\"size\", sort = [\"32-32\", \"32-64\", \"32-128\", \"128-128\"], legend = None)\n", + ").transform_filter(\n", + " datum.size != \"2048-2048\"\n", + ").properties(\n", + " width = 60,\n", + " height = 100\n", + ")\n", + "t = c.mark_text(dy = -4, size = 8).encode(text = alt.Text(\"error rate\", format = \".1%\"), color = alt.value(\"black\"))\n", + "c = c + t\n", + "save(c, \"pssm_accuracy.pdf\")\n", + "c" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:24:09.114859Z", + "iopub.status.busy": "2023-02-27T12:24:09.114460Z", + "iopub.status.idle": "2023-02-27T12:24:09.462121Z", + "shell.execute_reply": "2023-02-27T12:24:09.461747Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sizeseq lenprofile lenpred scoretrue score
032-32116116439439
132-64116116439439
232-128116116439439
3128-128116116439439
42048-2048116116439439
..................
5579532-325656331331
5579632-645656331331
5579732-1285656331331
55798128-1285656331331
557992048-20485656331331
\n", + "

55800 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " size seq len profile len pred score true score\n", + "0 32-32 116 116 439 439\n", + "1 32-64 116 116 439 439\n", + "2 32-128 116 116 439 439\n", + "3 128-128 116 116 439 439\n", + "4 2048-2048 116 116 439 439\n", + "... ... ... ... ... ...\n", + "55795 32-32 56 56 331 331\n", + "55796 32-64 56 56 331 331\n", + "55797 32-128 56 56 331 331\n", + "55798 128-128 56 56 331 331\n", + "55799 2048-2048 56 56 331 331\n", + "\n", + "[55800 rows x 5 columns]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = file_to_pandas(\"../data/pssm_accuracy.csv\")\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SCOP Sequence-to-Profile Alignment Our Score vs True Score (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T12:24:09.470619Z", + "iopub.status.busy": "2023-02-27T12:24:09.470072Z", + "iopub.status.idle": "2023-02-27T12:24:10.444888Z", + "shell.execute_reply": "2023-02-27T12:24:10.445327Z" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_circle().encode(\n", + " x = alt.X(\"true score\", bin = alt.Bin(maxbins = 50)),\n", + " y = alt.Y(\"pred score\", bin = alt.Bin(maxbins = 50)),\n", + " column = alt.Column(\"size\", title = \"block size\", header = alt.Header(orient = \"bottom\"), sort = [\"32-32\", \"32-64\", \"32-128\"]),\n", + " color = alt.Color(\"count():Q\", title = \"count\", scale = alt.Scale(type = \"log\", scheme = \"viridis\"))\n", + ").transform_filter(\n", + " (datum.size != \"2048-2048\") & (datum.size != \"128-128\")\n", + ").properties(\n", + " width = 200,\n", + " height = 200\n", + ").configure_axis(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_header(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ").configure_legend(\n", + " titleFontSize = 12,\n", + " labelFontSize = 12\n", + ")\n", + "save(c, \"pssm_scores.pdf\")\n", + "c" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/lib/block-aligner/vis/block_aligner_bench_vis.ipynb b/lib/block-aligner/vis/block_aligner_bench_vis.ipynb new file mode 100644 index 000000000..f8d93c1b0 --- /dev/null +++ b/lib/block-aligner/vis/block_aligner_bench_vis.ipynb @@ -0,0 +1,3144 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Block Aligner Benchmark Analysis and Visualizations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook contains code for collecting, cleaning, and analyzing data produced by block aligner's experiments.\n", + "\n", + "To run this, you will need to install all the libraries imported below, along with [altair-saver](https://github.com/altair-viz/altair_saver) and [altair-data-server](https://github.com/altair-viz/altair_data_server), which has some extra dependencies for PDF saving.\n", + "\n", + "Run each cell one by one to reproduce the experiments. This may take a while. For accurate benchmarking, it is recommended to run the entire notebook in the command line with `nbconvert`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:14:39.004205Z", + "iopub.status.busy": "2023-02-27T11:14:39.003434Z", + "iopub.status.idle": "2023-02-27T11:14:40.467921Z", + "shell.execute_reply": "2023-02-27T11:14:40.468330Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DataTransformerRegistry.enable('data_server')" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import altair as alt\n", + "from altair_saver import save\n", + "from altair import datum\n", + "import pandas as pd\n", + "from io import StringIO\n", + "\n", + "alt.data_transformers.enable(\"data_server\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:14:40.471391Z", + "iopub.status.busy": "2023-02-27T11:14:40.470954Z", + "iopub.status.idle": "2023-02-27T11:14:40.472356Z", + "shell.execute_reply": "2023-02-27T11:14:40.472852Z" + } + }, + "outputs": [], + "source": [ + "def csv_to_pandas(csv, d = \"\\\\s*,\\\\s*\", t = None):\n", + " s = StringIO(\"\\n\".join(csv))\n", + " data = pd.read_csv(s, sep = d, thousands = t, comment = \"#\", engine = \"python\")\n", + " return data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prefix Scan Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:14:40.475746Z", + "iopub.status.busy": "2023-02-27T11:14:40.475308Z", + "iopub.status.idle": "2023-02-27T11:15:01.464856Z", + "shell.execute_reply": "2023-02-27T11:15:01.465354Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['algorithm\\ttime', 'bench_naive_prefix_scan\\t26', 'bench_opt_prefix_scan\\t18']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo bench --features simd_avx2 --quiet -- prefix_scan | grep 'bench:' | awk '{print $2\"\\t\"$5}'\n", + "output.insert(0, \"algorithm\\ttime\")\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:01.468890Z", + "iopub.status.busy": "2023-02-27T11:15:01.468374Z", + "iopub.status.idle": "2023-02-27T11:15:01.480729Z", + "shell.execute_reply": "2023-02-27T11:15:01.481235Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmtime
0bench_naive_prefix_scan26
1bench_opt_prefix_scan18
\n", + "
" + ], + "text/plain": [ + " algorithm time\n", + "0 bench_naive_prefix_scan 26\n", + "1 bench_opt_prefix_scan 18" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output, d = \"\\t\", t = \",\")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:01.486965Z", + "iopub.status.busy": "2023-02-27T11:15:01.486359Z", + "iopub.status.idle": "2023-02-27T11:15:01.488535Z", + "shell.execute_reply": "2023-02-27T11:15:01.489034Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmtime
0naive26
1ours18
\n", + "
" + ], + "text/plain": [ + " algorithm time\n", + "0 naive 26\n", + "1 ours 18" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"algorithm\"] = data[\"algorithm\"].map({\n", + " \"bench_naive_prefix_scan\": \"naive\",\n", + " \"bench_opt_prefix_scan\": \"ours\"\n", + "})\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prefix Scan Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:01.497427Z", + "iopub.status.busy": "2023-02-27T11:15:01.496945Z", + "iopub.status.idle": "2023-02-27T11:15:03.645465Z", + "shell.execute_reply": "2023-02-27T11:15:03.645874Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = alt.X(\"time\", axis = alt.Axis(title = \"time (ns)\")),\n", + " y = \"algorithm\",\n", + " color = alt.Color(\"algorithm\", legend = None)\n", + ").properties(\n", + " width = 150\n", + ")\n", + "save(c, \"prefix_scan_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Random Data Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:03.649500Z", + "iopub.status.busy": "2023-02-27T11:15:03.649097Z", + "iopub.status.idle": "2023-02-27T11:15:53.919996Z", + "shell.execute_reply": "2023-02-27T11:15:53.920443Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['bench_parasailors_aa_1000_10000\\t52,435,895',\n", + " 'bench_parasailors_aa_100_1000\\t608,499',\n", + " 'bench_parasailors_aa_10_100\\t20,155',\n", + " 'bench_rustbio_aa_100_1000\\t15,363,409',\n", + " 'bench_rustbio_aa_10_100\\t170,271',\n", + " 'bench_scan_aa_1000_10000\\t231,190',\n", + " 'bench_scan_aa_1000_10000_insert\\t4,913,574',\n", + " 'bench_scan_aa_1000_10000_small\\t220,464',\n", + " 'bench_scan_aa_1000_10000_trace\\t1,861,712',\n", + " 'bench_scan_aa_100_1000\\t26,275',\n", + " 'bench_scan_aa_100_1000_insert\\t50,462',\n", + " 'bench_scan_aa_100_1000_small\\t23,579',\n", + " 'bench_scan_aa_100_1000_trace\\t514,997',\n", + " 'bench_scan_aa_10_100\\t3,939',\n", + " 'bench_scan_aa_10_100_insert\\t4,071',\n", + " 'bench_scan_aa_10_100_small\\t3,486',\n", + " 'bench_scan_aa_10_100_trace\\t368,967',\n", + " 'bench_scan_nuc_1000_10000\\t223,067',\n", + " 'bench_scan_nuc_100_1000\\t24,061',\n", + " 'bench_triple_accel_1000_10000\\t8,404,724',\n", + " 'bench_triple_accel_100_1000\\t24,589']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo bench --features simd_avx2 --quiet -- bench_ | grep 'bench:' | grep -v 'prefix_scan' | awk '{print $2\"\\t\"$5}'\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:53.925902Z", + "iopub.status.busy": "2023-02-27T11:15:53.925149Z", + "iopub.status.idle": "2023-02-27T11:15:53.927406Z", + "shell.execute_reply": "2023-02-27T11:15:53.927851Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['algorithm\\talphabet\\tk\\tlength\\tproperty\\ttime',\n", + " 'parasailors\\tprotein\\t1000\\t10000\\tdefault\\t52,435,895',\n", + " 'parasailors\\tprotein\\t100\\t1000\\tdefault\\t608,499',\n", + " 'parasailors\\tprotein\\t10\\t100\\tdefault\\t20,155',\n", + " 'rust bio\\tprotein\\t100\\t1000\\tdefault\\t15,363,409',\n", + " 'rust bio\\tprotein\\t10\\t100\\tdefault\\t170,271',\n", + " 'ours\\tprotein\\t1000\\t10000\\tdefault\\t231,190',\n", + " 'ours\\tprotein\\t1000\\t10000\\tinsert\\t4,913,574',\n", + " 'ours\\tprotein\\t1000\\t10000\\tsmall\\t220,464',\n", + " 'ours\\tprotein\\t1000\\t10000\\ttrace\\t1,861,712',\n", + " 'ours\\tprotein\\t100\\t1000\\tdefault\\t26,275',\n", + " 'ours\\tprotein\\t100\\t1000\\tinsert\\t50,462',\n", + " 'ours\\tprotein\\t100\\t1000\\tsmall\\t23,579',\n", + " 'ours\\tprotein\\t100\\t1000\\ttrace\\t514,997',\n", + " 'ours\\tprotein\\t10\\t100\\tdefault\\t3,939',\n", + " 'ours\\tprotein\\t10\\t100\\tinsert\\t4,071',\n", + " 'ours\\tprotein\\t10\\t100\\tsmall\\t3,486',\n", + " 'ours\\tprotein\\t10\\t100\\ttrace\\t368,967',\n", + " 'ours\\tnucleotide\\t1000\\t10000\\tdefault\\t223,067',\n", + " 'ours\\tnucleotide\\t100\\t1000\\tdefault\\t24,061',\n", + " 'triple accel\\tnucleotide\\t1000\\t10000\\tdefault\\t8,404,724',\n", + " 'triple accel\\tnucleotide\\t100\\t1000\\tdefault\\t24,589']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned = [\"algorithm\\talphabet\\tk\\tlength\\tproperty\\ttime\"]\n", + "names = [\"parasailors_aa\", \"rustbio_aa\", \"scan_aa\", \"scan_nuc\", \"triple_accel\"]\n", + "new_names = [\"parasailors\\tprotein\", \"rust bio\\tprotein\", \"ours\\tprotein\", \"ours\\tnucleotide\", \"triple accel\\tnucleotide\"]\n", + "\n", + "for o in output:\n", + " o = o[len(\"bench_\"):]\n", + " for n, nn in zip(names, new_names):\n", + " if o.startswith(n):\n", + " suffix = o[len(n):].replace(\"_\", \"\\t\")\n", + " o = nn + suffix\n", + " break\n", + " if len(o.split(\"\\t\")) < len(cleaned[0].split(\"\\t\")):\n", + " insert_idx = o.rindex(\"\\t\")\n", + " o = o[:insert_idx] + \"\\tdefault\" + o[insert_idx:]\n", + " cleaned.append(o)\n", + "\n", + "cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:53.930417Z", + "iopub.status.busy": "2023-02-27T11:15:53.930017Z", + "iopub.status.idle": "2023-02-27T11:15:53.940656Z", + "shell.execute_reply": "2023-02-27T11:15:53.940932Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmalphabetklengthpropertytime
0parasailorsprotein100010000default52435895
1parasailorsprotein1001000default608499
2parasailorsprotein10100default20155
3rust bioprotein1001000default15363409
4rust bioprotein10100default170271
5oursprotein100010000default231190
6oursprotein100010000insert4913574
7oursprotein100010000small220464
8oursprotein100010000trace1861712
9oursprotein1001000default26275
10oursprotein1001000insert50462
11oursprotein1001000small23579
12oursprotein1001000trace514997
13oursprotein10100default3939
14oursprotein10100insert4071
15oursprotein10100small3486
16oursprotein10100trace368967
17oursnucleotide100010000default223067
18oursnucleotide1001000default24061
19triple accelnucleotide100010000default8404724
20triple accelnucleotide1001000default24589
\n", + "
" + ], + "text/plain": [ + " algorithm alphabet k length property time\n", + "0 parasailors protein 1000 10000 default 52435895\n", + "1 parasailors protein 100 1000 default 608499\n", + "2 parasailors protein 10 100 default 20155\n", + "3 rust bio protein 100 1000 default 15363409\n", + "4 rust bio protein 10 100 default 170271\n", + "5 ours protein 1000 10000 default 231190\n", + "6 ours protein 1000 10000 insert 4913574\n", + "7 ours protein 1000 10000 small 220464\n", + "8 ours protein 1000 10000 trace 1861712\n", + "9 ours protein 100 1000 default 26275\n", + "10 ours protein 100 1000 insert 50462\n", + "11 ours protein 100 1000 small 23579\n", + "12 ours protein 100 1000 trace 514997\n", + "13 ours protein 10 100 default 3939\n", + "14 ours protein 10 100 insert 4071\n", + "15 ours protein 10 100 small 3486\n", + "16 ours protein 10 100 trace 368967\n", + "17 ours nucleotide 1000 10000 default 223067\n", + "18 ours nucleotide 100 1000 default 24061\n", + "19 triple accel nucleotide 1000 10000 default 8404724\n", + "20 triple accel nucleotide 100 1000 default 24589" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(cleaned, d = \"\\t\", t = \",\")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:53.944797Z", + "iopub.status.busy": "2023-02-27T11:15:53.944375Z", + "iopub.status.idle": "2023-02-27T11:15:53.946227Z", + "shell.execute_reply": "2023-02-27T11:15:53.946673Z" + } + }, + "outputs": [], + "source": [ + "data[\"algorithm property\"] = data[\"algorithm\"] + \" \" + data[\"property\"]\n", + "data[\"time\"] /= 1000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Random Protein Sequences Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:53.965124Z", + "iopub.status.busy": "2023-02-27T11:15:53.964727Z", + "iopub.status.idle": "2023-02-27T11:15:54.728585Z", + "shell.execute_reply": "2023-02-27T11:15:54.728997Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(\n", + " x = alt.X(\"time\", axis = alt.Axis(title = \"time (us)\"), scale = alt.Scale(type = \"log\", domain = [1, 50000])),\n", + " y = alt.Y(\"algorithm property\", axis = alt.Axis(title = \"algorithm\", grid = True), sort = alt.EncodingSortField(field = \"time\")),\n", + " color = \"length:N\",\n", + " shape = \"length:N\"\n", + ").transform_filter(\n", + " datum.alphabet == \"protein\"\n", + ").properties(\n", + " width = 200,\n", + " height = 150\n", + ")\n", + "save(c, \"random_protein_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Random DNA Sequences Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:54.743436Z", + "iopub.status.busy": "2023-02-27T11:15:54.743036Z", + "iopub.status.idle": "2023-02-27T11:15:55.497291Z", + "shell.execute_reply": "2023-02-27T11:15:55.497737Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(\n", + " x = alt.X(\"time\", axis = alt.Axis(title = \"time (us)\"), scale = alt.Scale(type = \"log\", domain = [1, 50000])),\n", + " y = alt.Y(\"algorithm property\", axis = alt.Axis(title = \"algorithm\", grid = True), sort = alt.EncodingSortField(field = \"time\")),\n", + " color = alt.Color(\"length:N\", scale = alt.Scale(domain = [100, 1000, 10000])),\n", + " shape = alt.Color(\"length:N\", scale = alt.Scale(domain = [100, 1000, 10000]))\n", + ").transform_filter(\n", + " datum.alphabet == \"nucleotide\"\n", + ").properties(\n", + " width = 200,\n", + " height = 50\n", + ")\n", + "save(c, \"random_dna_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uniclust 30 Data Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:15:55.500767Z", + "iopub.status.busy": "2023-02-27T11:15:55.500333Z", + "iopub.status.idle": "2023-02-27T11:16:06.938386Z", + "shell.execute_reply": "2023-02-27T11:16:06.938902Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['# time (s)',\n", + " 'algorithm, dataset, size, time',\n", + " 'ours (no trace), uc30, 32-32, 0.056515289',\n", + " 'ours (no trace), uc30 0.95, 32-32, 0.060549953',\n", + " 'ours (no trace), uc30, 32-256, 0.089667067',\n", + " 'ours (no trace), uc30 0.95, 32-256, 0.07712594',\n", + " 'ours (no trace), uc30, 256-256, 0.199199121',\n", + " 'ours (no trace), uc30 0.95, 256-256, 0.22163839',\n", + " 'ours (trace), uc30, 32-256, 0.168939332',\n", + " 'ours (trace), uc30 0.95, 32-256, 0.150488372',\n", + " 'parasail, uc30, full, 0.885421801',\n", + " 'parasail, uc30 0.95, full, 1.028783795']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example uc_bench --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:16:06.942041Z", + "iopub.status.busy": "2023-02-27T11:16:06.941574Z", + "iopub.status.idle": "2023-02-27T11:16:06.950831Z", + "shell.execute_reply": "2023-02-27T11:16:06.951174Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmdatasetsizetime
0ours (no trace)uc3032-320.056515
1ours (no trace)uc30 0.9532-320.060550
2ours (no trace)uc3032-2560.089667
3ours (no trace)uc30 0.9532-2560.077126
4ours (no trace)uc30256-2560.199199
5ours (no trace)uc30 0.95256-2560.221638
6ours (trace)uc3032-2560.168939
7ours (trace)uc30 0.9532-2560.150488
8parasailuc30full0.885422
9parasailuc30 0.95full1.028784
\n", + "
" + ], + "text/plain": [ + " algorithm dataset size time\n", + "0 ours (no trace) uc30 32-32 0.056515\n", + "1 ours (no trace) uc30 0.95 32-32 0.060550\n", + "2 ours (no trace) uc30 32-256 0.089667\n", + "3 ours (no trace) uc30 0.95 32-256 0.077126\n", + "4 ours (no trace) uc30 256-256 0.199199\n", + "5 ours (no trace) uc30 0.95 256-256 0.221638\n", + "6 ours (trace) uc30 32-256 0.168939\n", + "7 ours (trace) uc30 0.95 32-256 0.150488\n", + "8 parasail uc30 full 0.885422\n", + "9 parasail uc30 0.95 full 1.028784" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:16:06.974028Z", + "iopub.status.busy": "2023-02-27T11:16:06.973627Z", + "iopub.status.idle": "2023-02-27T11:16:07.745338Z", + "shell.execute_reply": "2023-02-27T11:16:07.745757Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.FacetChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = alt.X(\"algorithm\", axis = None),\n", + " y = alt.Y(\"time\", axis = alt.Axis(title = \"time (s)\"), scale = alt.Scale(domain = [0.0, 1.0])),\n", + " color = \"algorithm\"\n", + ").transform_filter(\n", + " (datum.size == \"32-256\") | (datum.algorithm == \"parasail\")\n", + ")\n", + "t = c.mark_text(dy = -4, size = 8).encode(text = alt.Text(\"time\", format = \".2f\"), color = alt.value(\"black\"))\n", + "c = (c + t).properties(\n", + " width = 50,\n", + " height = 100\n", + ").facet(\n", + " column = alt.Column(\"dataset\", header = alt.Header(orient = \"bottom\")),\n", + ").configure_range(\n", + " category = {\"scheme\": \"dark2\"}\n", + ")\n", + "save(c, \"uniclust30_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uniclust30 Block Size Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:16:07.764443Z", + "iopub.status.busy": "2023-02-27T11:16:07.764039Z", + "iopub.status.idle": "2023-02-27T11:16:08.538727Z", + "shell.execute_reply": "2023-02-27T11:16:08.539147Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.FacetChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = alt.X(\"size\", axis = None, sort = [\"32-32\", \"32-256\", \"256-256\"]),\n", + " y = alt.Y(\"time\", axis = alt.Axis(title = \"time (s)\"), scale = alt.Scale(domain = [0.0, 1.0])),\n", + " color = alt.Color(\"size\", title = \"block size\", sort = [\"32-32\", \"32-256\", \"256-256\"])\n", + ").transform_filter(\n", + " datum.algorithm == \"ours (no trace)\"\n", + ")\n", + "t = c.mark_text(dy = -4, size = 8).encode(text = alt.Text(\"time\", format = \".2f\"), color = alt.value(\"black\"))\n", + "c = (c + t).properties(\n", + " width = 50,\n", + " height = 100\n", + ").facet(\n", + " column = alt.Column(\"dataset\", header = alt.Header(orient = \"bottom\")),\n", + ")\n", + "save(c, \"uniclust30_size_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DNA Global Alignment Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:16:08.542462Z", + "iopub.status.busy": "2023-02-27T11:16:08.542030Z", + "iopub.status.idle": "2023-02-27T11:42:45.389836Z", + "shell.execute_reply": "2023-02-27T11:42:45.390335Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['# time (s)',\n", + " 'dataset, algorithm, time',\n", + " 'illumina, ours (1%-1%), 0.19954773599994785',\n", + " 'illumina, ours (1%-10%), 0.21103766700000928',\n", + " 'illumina, edlib, 0.3688567379999959',\n", + " 'illumina, ksw_extz2_sse (1%), 0.5450279550000086',\n", + " 'illumina, ksw_extz2_sse (10%), 0.5442524660000194',\n", + " 'illumina, wfa2, 0.07222540399997783',\n", + " 'illumina, wfa2 adaptive, 0.07643134800001018',\n", + " 'illumina, parasail, 1.905899847999988',\n", + " 'nanopore 1kbp, ours (1%-1%), 0.20423522500000035',\n", + " 'nanopore 1kbp, ours (1%-10%), 0.2545604999999995',\n", + " 'nanopore 1kbp, edlib, 0.361402042999999',\n", + " 'nanopore 1kbp, ksw_extz2_sse (1%), 0.5809077849999988',\n", + " 'nanopore 1kbp, ksw_extz2_sse (10%), 1.2340126780000036',\n", + " 'nanopore 1kbp, wfa2, 1.182402372000001',\n", + " 'nanopore 1kbp, wfa2 adaptive, 0.9811043799999979',\n", + " 'nanopore 1kbp, parasail, 3.922164622000011',\n", + " 'nanopore <10kbp, ours (1%-1%), 0.7581100459999974',\n", + " 'nanopore <10kbp, ours (1%-10%), 1.082943893999997',\n", + " 'nanopore <10kbp, edlib, 1.2777344540000006',\n", + " 'nanopore <10kbp, ksw_extz2_sse (1%), 1.6990866190000016',\n", + " 'nanopore <10kbp, ksw_extz2_sse (10%), 10.586113880999992',\n", + " 'nanopore <10kbp, wfa2, 18.751342348000023',\n", + " 'nanopore <10kbp, wfa2 adaptive, 6.086742439000021',\n", + " 'nanopore <10kbp, parasail, 33.93674228799994',\n", + " 'nanopore <50kbp, ours (1%-1%), 16.517385590999943',\n", + " 'nanopore <50kbp, ours (1%-10%), 28.693855664999983',\n", + " 'nanopore <50kbp, edlib, 21.42700936800003',\n", + " 'nanopore <50kbp, ksw_extz2_sse (1%), 32.73222982999999',\n", + " 'nanopore <50kbp, ksw_extz2_sse (10%), 298.4867550620014',\n", + " 'nanopore <50kbp, wfa2, 739.9792715350003',\n", + " 'nanopore <50kbp, wfa2 adaptive, 166.71527254399973']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example nanopore_bench_global --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:42:45.393704Z", + "iopub.status.busy": "2023-02-27T11:42:45.393201Z", + "iopub.status.idle": "2023-02-27T11:42:45.403465Z", + "shell.execute_reply": "2023-02-27T11:42:45.403961Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datasetalgorithmtime
0illuminaours (1%-1%)0.199548
1illuminaours (1%-10%)0.211038
2illuminaedlib0.368857
3illuminaksw_extz2_sse (1%)0.545028
4illuminaksw_extz2_sse (10%)0.544252
5illuminawfa20.072225
6illuminawfa2 adaptive0.076431
7illuminaparasail1.905900
8nanopore 1kbpours (1%-1%)0.204235
9nanopore 1kbpours (1%-10%)0.254560
10nanopore 1kbpedlib0.361402
11nanopore 1kbpksw_extz2_sse (1%)0.580908
12nanopore 1kbpksw_extz2_sse (10%)1.234013
13nanopore 1kbpwfa21.182402
14nanopore 1kbpwfa2 adaptive0.981104
15nanopore 1kbpparasail3.922165
16nanopore <10kbpours (1%-1%)0.758110
17nanopore <10kbpours (1%-10%)1.082944
18nanopore <10kbpedlib1.277734
19nanopore <10kbpksw_extz2_sse (1%)1.699087
20nanopore <10kbpksw_extz2_sse (10%)10.586114
21nanopore <10kbpwfa218.751342
22nanopore <10kbpwfa2 adaptive6.086742
23nanopore <10kbpparasail33.936742
24nanopore <50kbpours (1%-1%)16.517386
25nanopore <50kbpours (1%-10%)28.693856
26nanopore <50kbpedlib21.427009
27nanopore <50kbpksw_extz2_sse (1%)32.732230
28nanopore <50kbpksw_extz2_sse (10%)298.486755
29nanopore <50kbpwfa2739.979272
30nanopore <50kbpwfa2 adaptive166.715273
\n", + "
" + ], + "text/plain": [ + " dataset algorithm time\n", + "0 illumina ours (1%-1%) 0.199548\n", + "1 illumina ours (1%-10%) 0.211038\n", + "2 illumina edlib 0.368857\n", + "3 illumina ksw_extz2_sse (1%) 0.545028\n", + "4 illumina ksw_extz2_sse (10%) 0.544252\n", + "5 illumina wfa2 0.072225\n", + "6 illumina wfa2 adaptive 0.076431\n", + "7 illumina parasail 1.905900\n", + "8 nanopore 1kbp ours (1%-1%) 0.204235\n", + "9 nanopore 1kbp ours (1%-10%) 0.254560\n", + "10 nanopore 1kbp edlib 0.361402\n", + "11 nanopore 1kbp ksw_extz2_sse (1%) 0.580908\n", + "12 nanopore 1kbp ksw_extz2_sse (10%) 1.234013\n", + "13 nanopore 1kbp wfa2 1.182402\n", + "14 nanopore 1kbp wfa2 adaptive 0.981104\n", + "15 nanopore 1kbp parasail 3.922165\n", + "16 nanopore <10kbp ours (1%-1%) 0.758110\n", + "17 nanopore <10kbp ours (1%-10%) 1.082944\n", + "18 nanopore <10kbp edlib 1.277734\n", + "19 nanopore <10kbp ksw_extz2_sse (1%) 1.699087\n", + "20 nanopore <10kbp ksw_extz2_sse (10%) 10.586114\n", + "21 nanopore <10kbp wfa2 18.751342\n", + "22 nanopore <10kbp wfa2 adaptive 6.086742\n", + "23 nanopore <10kbp parasail 33.936742\n", + "24 nanopore <50kbp ours (1%-1%) 16.517386\n", + "25 nanopore <50kbp ours (1%-10%) 28.693856\n", + "26 nanopore <50kbp edlib 21.427009\n", + "27 nanopore <50kbp ksw_extz2_sse (1%) 32.732230\n", + "28 nanopore <50kbp ksw_extz2_sse (10%) 298.486755\n", + "29 nanopore <50kbp wfa2 739.979272\n", + "30 nanopore <50kbp wfa2 adaptive 166.715273" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "DNA Global Alignment Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:42:45.424787Z", + "iopub.status.busy": "2023-02-27T11:42:45.424315Z", + "iopub.status.idle": "2023-02-27T11:42:46.215270Z", + "shell.execute_reply": "2023-02-27T11:42:46.215707Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.FacetChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "algos = [\"ours (1%-1%)\", \"ours (1%-10%)\", \"edlib\", \"ksw_extz2_sse (1%)\", \"ksw_extz2_sse (10%)\", \"wfa2\", \"wfa2 adaptive\", \"parasail\"]\n", + "c = alt.Chart(data).mark_bar().encode(\n", + " x = alt.X(\"algorithm\", sort = algos, title = None),\n", + " y = alt.Y(\"time\", axis = alt.Axis(title = \"time (s)\")),\n", + " color = alt.Color(\"algorithm\", legend = None)\n", + ")\n", + "t = c.mark_text(dy = -4, size = 8).encode(text = alt.Text(\"time\", format = \".2f\"), color = alt.value(\"black\"))\n", + "c = (c + t).properties(\n", + " width = 140,\n", + " height = 140\n", + ").facet(\n", + " facet = alt.Facet(\"dataset\", title = None, header = alt.Header(orient = \"top\")),\n", + " columns = 2\n", + ").resolve_scale(\n", + " y = \"independent\"\n", + ").configure_axisY(\n", + " labelPadding = 18,\n", + " labelAlign = \"left\"\n", + ")\n", + "save(c, \"dna_global_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nanopore Data Benchmark Setup\n", + "\n", + "To run the benchmarks below, you need to clone the following repos, place them in the same directory where this repo (block aligner) is located, and follow their setup instructions:\n", + "* [diff-bench-paper](https://github.com/Daniel-Liu-c0deb0t/diff-bench-paper)\n", + "* [adaptivebandbench](https://github.com/Daniel-Liu-c0deb0t/adaptivebandbench)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nanopore Data Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:42:46.218784Z", + "iopub.status.busy": "2023-02-27T11:42:46.218396Z", + "iopub.status.idle": "2023-02-27T11:43:12.923815Z", + "shell.execute_reply": "2023-02-27T11:43:12.924373Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['# time (s)',\n", + " 'algorithm, dataset, time',\n", + " 'ours (no trace 32-32), nanopore 25kbp, 1.033478184',\n", + " 'ours (no trace 32-32), random, 2.550499957',\n", + " 'ours (trace 32-32), nanopore 25kbp, 1.412760824',\n", + " 'ours (trace 32-32), random, 3.400294701',\n", + " 'ours (trace 32-64), nanopore 25kbp, 1.6694597629999999',\n", + " 'ours (trace 32-64), random, 3.5342896550000003']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example nanopore_bench --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:43:12.927699Z", + "iopub.status.busy": "2023-02-27T11:43:12.927222Z", + "iopub.status.idle": "2023-02-27T11:43:12.934820Z", + "shell.execute_reply": "2023-02-27T11:43:12.935315Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmdatasettime
0ours (no trace 32-32)nanopore 25kbp1.033478
1ours (no trace 32-32)random2.550500
2ours (trace 32-32)nanopore 25kbp1.412761
3ours (trace 32-32)random3.400295
4ours (trace 32-64)nanopore 25kbp1.669460
5ours (trace 32-64)random3.534290
\n", + "
" + ], + "text/plain": [ + " algorithm dataset time\n", + "0 ours (no trace 32-32) nanopore 25kbp 1.033478\n", + "1 ours (no trace 32-32) random 2.550500\n", + "2 ours (trace 32-32) nanopore 25kbp 1.412761\n", + "3 ours (trace 32-32) random 3.400295\n", + "4 ours (trace 32-64) nanopore 25kbp 1.669460\n", + "5 ours (trace 32-64) random 3.534290" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:43:12.939354Z", + "iopub.status.busy": "2023-02-27T11:43:12.938867Z", + "iopub.status.idle": "2023-02-27T11:46:20.791973Z", + "shell.execute_reply": "2023-02-27T11:46:20.792429Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['algorithm\\tfill time\\ttrace time\\tconvert time\\ttotal time\\tscore\\tfail',\n", + " 'editdist\\t476567000\\t170629000\\t67690000\\t714886000\\t6880489\\t0',\n", + " 'non-diff\\t681717000\\t270098000\\t61707000\\t1013522000\\t27124786\\t52',\n", + " 'diff-raw\\t634119000\\t213761000\\t64883000\\t912763000\\t27291141\\t32',\n", + " 'libgaba\\t452611000\\t157111000\\t32888000\\t642610000\\t27121546\\t53',\n", + " 'edlib\\t28046347000\\t19390997000\\t106224000\\t47543568000\\t37\\t0',\n", + " 'seqan\\t90098655000\\t0\\t0\\t90098655000\\t0\\t0']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output2 = !cd ../../diff-bench-paper/supplementary_data/benchmark_codes && ./custom_bench.sh\n", + "\n", + "for i, o in enumerate(output2):\n", + " if o.startswith(\"cells(\"):\n", + " break\n", + "output2 = output2[i + 1:]\n", + "\n", + "output2.insert(0, \"algorithm\\tfill time\\ttrace time\\tconvert time\\ttotal time\\tscore\\tfail\")\n", + "output2" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:20.795582Z", + "iopub.status.busy": "2023-02-27T11:46:20.795183Z", + "iopub.status.idle": "2023-02-27T11:46:20.803300Z", + "shell.execute_reply": "2023-02-27T11:46:20.803710Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmfill timetrace timeconvert timetotal timescorefail
0editdist4765670001706290006769000071488600068804890
1non-diff6817170002700980006170700010135220002712478652
2diff-raw634119000213761000648830009127630002729114132
3libgaba452611000157111000328880006426100002712154653
4edlib280463470001939099700010622400047543568000370
5seqan90098655000009009865500000
\n", + "
" + ], + "text/plain": [ + " algorithm fill time trace time convert time total time score \\\n", + "0 editdist 476567000 170629000 67690000 714886000 6880489 \n", + "1 non-diff 681717000 270098000 61707000 1013522000 27124786 \n", + "2 diff-raw 634119000 213761000 64883000 912763000 27291141 \n", + "3 libgaba 452611000 157111000 32888000 642610000 27121546 \n", + "4 edlib 28046347000 19390997000 106224000 47543568000 37 \n", + "5 seqan 90098655000 0 0 90098655000 0 \n", + "\n", + " fail \n", + "0 0 \n", + "1 52 \n", + "2 32 \n", + "3 53 \n", + "4 0 \n", + "5 0 " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data2 = csv_to_pandas(output2, d = \"\\t\")\n", + "data2" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:20.810251Z", + "iopub.status.busy": "2023-02-27T11:46:20.809714Z", + "iopub.status.idle": "2023-02-27T11:46:20.811935Z", + "shell.execute_reply": "2023-02-27T11:46:20.812354Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmtime
0editdist0.476567
1non-diff0.681717
2diff-raw0.634119
3libgaba0.452611
4edlib28.046347
5seqan90.098655
\n", + "
" + ], + "text/plain": [ + " algorithm time\n", + "0 editdist 0.476567\n", + "1 non-diff 0.681717\n", + "2 diff-raw 0.634119\n", + "3 libgaba 0.452611\n", + "4 edlib 28.046347\n", + "5 seqan 90.098655" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned2 = data2.drop(columns = [\"trace time\", \"convert time\", \"total time\", \"score\", \"fail\"])\n", + "cleaned2 = cleaned2.rename(columns = {\"fill time\": \"time\"})\n", + "cleaned2[\"time\"] /= 1e9\n", + "cleaned2" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:20.819196Z", + "iopub.status.busy": "2023-02-27T11:46:20.818521Z", + "iopub.status.idle": "2023-02-27T11:46:20.820917Z", + "shell.execute_reply": "2023-02-27T11:46:20.821388Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmtime
0ours (no trace 32-32)1.033478
1ours (trace 32-32)1.412761
2ours (trace 32-64)1.669460
3editdist0.476567
4non-diff0.681717
5diff-raw0.634119
6libgaba0.452611
7edlib28.046347
8seqan90.098655
\n", + "
" + ], + "text/plain": [ + " algorithm time\n", + "0 ours (no trace 32-32) 1.033478\n", + "1 ours (trace 32-32) 1.412761\n", + "2 ours (trace 32-64) 1.669460\n", + "3 editdist 0.476567\n", + "4 non-diff 0.681717\n", + "5 diff-raw 0.634119\n", + "6 libgaba 0.452611\n", + "7 edlib 28.046347\n", + "8 seqan 90.098655" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned = data.drop(index = [1, 3, 5])\n", + "cleaned = cleaned.drop(columns = [\"dataset\"])\n", + "cleaned = cleaned.append(cleaned2, ignore_index = True)\n", + "cleaned" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "25kbp Nanopore Reads Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:20.841149Z", + "iopub.status.busy": "2023-02-27T11:46:20.840536Z", + "iopub.status.idle": "2023-02-27T11:46:21.590379Z", + "shell.execute_reply": "2023-02-27T11:46:21.590783Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chart1 = alt.Chart(cleaned).mark_point(opacity = 1, filled = True).encode(\n", + " x = alt.X(\"time\", axis = alt.Axis(title = \"time (s)\", grid = True), scale = alt.Scale(type = \"log\")),\n", + " y = alt.Y(\"algorithm\", axis = alt.Axis(grid = True), sort = alt.EncodingSortField(field = \"time\"))\n", + ").transform_filter((datum.algorithm != \"ours (trace 32-32)\") & (datum.algorithm != \"ours (no trace 32-32)\") & (datum.algorithm != \"ours (trace 32-64)\"))\n", + "\n", + "chart2 = alt.Chart(cleaned).mark_point(color = \"red\", filled = True).encode(\n", + " x = alt.X(\"time\", axis = alt.Axis(title = \"time (s)\", grid = True), scale = alt.Scale(type = \"log\")),\n", + " y = alt.Y(\"algorithm\", axis = alt.Axis(grid = True), sort = alt.EncodingSortField(field = \"time\"))\n", + ").transform_filter((datum.algorithm == \"ours (trace 32-32)\") | (datum.algorithm == \"ours (no trace 32-32)\") | (datum.algorithm == \"ours (trace 32-64)\"))\n", + "\n", + "c = (chart1 + chart2).properties(\n", + " width = 150,\n", + " height = 150\n", + ")\n", + "save(c, \"nanopore_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sequence-to-Profile Alignment Benchmark" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:21.593980Z", + "iopub.status.busy": "2023-02-27T11:46:21.593590Z", + "iopub.status.idle": "2023-02-27T11:46:33.256268Z", + "shell.execute_reply": "2023-02-27T11:46:33.256824Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['size, time',\n", + " '32-32, 0.150358618',\n", + " '32-64, 0.174074908',\n", + " '32-128, 0.200178733',\n", + " '128-128, 0.202823522',\n", + " 'parasail, 0.579455554',\n", + " '# Done!']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !cd .. && cargo run --example pssm_bench --release --features simd_avx2 --quiet\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:33.260059Z", + "iopub.status.busy": "2023-02-27T11:46:33.259583Z", + "iopub.status.idle": "2023-02-27T11:46:33.266791Z", + "shell.execute_reply": "2023-02-27T11:46:33.267338Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sizetime
032-320.150359
132-640.174075
232-1280.200179
3128-1280.202824
4parasail0.579456
\n", + "
" + ], + "text/plain": [ + " size time\n", + "0 32-32 0.150359\n", + "1 32-64 0.174075\n", + "2 32-128 0.200179\n", + "3 128-128 0.202824\n", + "4 parasail 0.579456" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(output)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:33.272519Z", + "iopub.status.busy": "2023-02-27T11:46:33.271983Z", + "iopub.status.idle": "2023-02-27T11:46:33.274557Z", + "shell.execute_reply": "2023-02-27T11:46:33.274054Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " block size time\n", + "0 32-32 0.15\n", + "1 32-64 0.17\n", + "2 32-128 0.2\n", + "3 128-128 0.2\n", + "4 parasail 0.58\n" + ] + } + ], + "source": [ + "table = data.copy()\n", + "table = table.rename(columns = {\"size\": \"block size\"})\n", + "table[\"time\"] = table[\"time\"].map(\"{:.2}\".format)\n", + "print(table)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SCOP Sequence-to-Profile Alignment Benchmark (AVX2)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:33.294570Z", + "iopub.status.busy": "2023-02-27T11:46:33.294126Z", + "iopub.status.idle": "2023-02-27T11:46:34.048649Z", + "shell.execute_reply": "2023-02-27T11:46:34.049062Z" + }, + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_bar().encode(\n", + " x = alt.X(\"size\", title = \"block size\", sort = [\"32-32\", \"32-64\", \"32-128\", \"128-128\", \"parasail\"]),\n", + " y = alt.Y(\"time\", axis = alt.Axis(title = \"time (s)\")),\n", + " color = alt.Color(\"size\", sort = [\"32-32\", \"32-64\", \"32-128\", \"128-128\", \"parasail\"], legend = None)\n", + ").transform_filter(\n", + " datum.size != \"2048-2048\"\n", + ").properties(\n", + " width = 75,\n", + " height = 100\n", + ")\n", + "t = c.mark_text(dy = -4, size = 8).encode(text = alt.Text(\"time\", format = \".2f\"), color = alt.value(\"black\"))\n", + "c = c + t\n", + "save(c, \"pssm_size_bench.pdf\")\n", + "c" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## WASM SIMD\n", + "\n", + "[Wasmtime](https://wasmtime.dev/) is needed to run the webassembly code." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:46:34.052396Z", + "iopub.status.busy": "2023-02-27T11:46:34.052013Z", + "iopub.status.idle": "2023-02-27T11:47:07.493267Z", + "shell.execute_reply": "2023-02-27T11:47:07.493680Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['bench_rustbio_aa_100_1000\\t24,067,946',\n", + " 'bench_rustbio_aa_10_100\\t254,862',\n", + " 'bench_scan_aa_1000_10000\\t829,802',\n", + " 'bench_scan_aa_1000_10000_insert\\t10,785,694',\n", + " 'bench_scan_aa_1000_10000_small\\t578,308',\n", + " 'bench_scan_aa_1000_10000_trace\\t1,995,010',\n", + " 'bench_scan_aa_100_1000\\t74,559',\n", + " 'bench_scan_aa_100_1000_insert\\t156,641',\n", + " 'bench_scan_aa_100_1000_small\\t57,182',\n", + " 'bench_scan_aa_100_1000_trace\\t220,585',\n", + " 'bench_scan_aa_10_100\\t6,824',\n", + " 'bench_scan_aa_10_100_insert\\t7,029',\n", + " 'bench_scan_aa_10_100_small\\t5,345',\n", + " 'bench_scan_aa_10_100_trace\\t84,180',\n", + " 'bench_scan_nuc_1000_10000\\t556,340',\n", + " 'bench_scan_nuc_100_1000\\t54,636']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = !CARGO_TARGET_WASM32_WASI_RUNNER=\"wasmtime --wasm-features simd --\" cargo bench --target=wasm32-wasi --features simd_wasm --quiet -- --nocapture | grep 'bench:' | awk '{print $2\"\\t\"$5}'\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:07.499150Z", + "iopub.status.busy": "2023-02-27T11:47:07.498303Z", + "iopub.status.idle": "2023-02-27T11:47:07.501332Z", + "shell.execute_reply": "2023-02-27T11:47:07.501755Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['algorithm\\talphabet\\tk\\tlength\\tproperty\\ttime',\n", + " 'rust bio\\tprotein\\t100\\t1000\\tdefault\\t24,067,946',\n", + " 'rust bio\\tprotein\\t10\\t100\\tdefault\\t254,862',\n", + " 'ours\\tprotein\\t1000\\t10000\\tdefault\\t829,802',\n", + " 'ours\\tprotein\\t1000\\t10000\\tinsert\\t10,785,694',\n", + " 'ours\\tprotein\\t1000\\t10000\\tsmall\\t578,308',\n", + " 'ours\\tprotein\\t1000\\t10000\\ttrace\\t1,995,010',\n", + " 'ours\\tprotein\\t100\\t1000\\tdefault\\t74,559',\n", + " 'ours\\tprotein\\t100\\t1000\\tinsert\\t156,641',\n", + " 'ours\\tprotein\\t100\\t1000\\tsmall\\t57,182',\n", + " 'ours\\tprotein\\t100\\t1000\\ttrace\\t220,585',\n", + " 'ours\\tprotein\\t10\\t100\\tdefault\\t6,824',\n", + " 'ours\\tprotein\\t10\\t100\\tinsert\\t7,029',\n", + " 'ours\\tprotein\\t10\\t100\\tsmall\\t5,345',\n", + " 'ours\\tprotein\\t10\\t100\\ttrace\\t84,180',\n", + " 'ours\\tnucleotide\\t1000\\t10000\\tdefault\\t556,340',\n", + " 'ours\\tnucleotide\\t100\\t1000\\tdefault\\t54,636']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cleaned = [\"algorithm\\talphabet\\tk\\tlength\\tproperty\\ttime\"]\n", + "names = [\"rustbio_aa\", \"scan_aa\", \"scan_nuc\"]\n", + "new_names = [\"rust bio\\tprotein\", \"ours\\tprotein\", \"ours\\tnucleotide\"]\n", + "\n", + "for o in output:\n", + " o = o[len(\"bench_\"):]\n", + " for n, nn in zip(names, new_names):\n", + " if o.startswith(n):\n", + " suffix = o[len(n):].replace(\"_\", \"\\t\")\n", + " o = nn + suffix\n", + " break\n", + " if len(o.split(\"\\t\")) < len(cleaned[0].split(\"\\t\")):\n", + " insert_idx = o.rindex(\"\\t\")\n", + " o = o[:insert_idx] + \"\\tdefault\" + o[insert_idx:]\n", + " cleaned.append(o)\n", + "\n", + "cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:07.504482Z", + "iopub.status.busy": "2023-02-27T11:47:07.504081Z", + "iopub.status.idle": "2023-02-27T11:47:07.513915Z", + "shell.execute_reply": "2023-02-27T11:47:07.514320Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
algorithmalphabetklengthpropertytime
0rust bioprotein1001000default24067946
1rust bioprotein10100default254862
2oursprotein100010000default829802
3oursprotein100010000insert10785694
4oursprotein100010000small578308
5oursprotein100010000trace1995010
6oursprotein1001000default74559
7oursprotein1001000insert156641
8oursprotein1001000small57182
9oursprotein1001000trace220585
10oursprotein10100default6824
11oursprotein10100insert7029
12oursprotein10100small5345
13oursprotein10100trace84180
14oursnucleotide100010000default556340
15oursnucleotide1001000default54636
\n", + "
" + ], + "text/plain": [ + " algorithm alphabet k length property time\n", + "0 rust bio protein 100 1000 default 24067946\n", + "1 rust bio protein 10 100 default 254862\n", + "2 ours protein 1000 10000 default 829802\n", + "3 ours protein 1000 10000 insert 10785694\n", + "4 ours protein 1000 10000 small 578308\n", + "5 ours protein 1000 10000 trace 1995010\n", + "6 ours protein 100 1000 default 74559\n", + "7 ours protein 100 1000 insert 156641\n", + "8 ours protein 100 1000 small 57182\n", + "9 ours protein 100 1000 trace 220585\n", + "10 ours protein 10 100 default 6824\n", + "11 ours protein 10 100 insert 7029\n", + "12 ours protein 10 100 small 5345\n", + "13 ours protein 10 100 trace 84180\n", + "14 ours nucleotide 1000 10000 default 556340\n", + "15 ours nucleotide 100 1000 default 54636" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = csv_to_pandas(cleaned, d = \"\\t\", t = \",\")\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:07.518392Z", + "iopub.status.busy": "2023-02-27T11:47:07.517903Z", + "iopub.status.idle": "2023-02-27T11:47:07.519916Z", + "shell.execute_reply": "2023-02-27T11:47:07.520318Z" + } + }, + "outputs": [], + "source": [ + "data[\"algorithm property\"] = data[\"algorithm\"] + \" \" + data[\"property\"]\n", + "data[\"time\"] /= 1000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Random Protein Sequences Benchmark (WASM SIMD)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "execution": { + "iopub.execute_input": "2023-02-27T11:47:07.538555Z", + "iopub.status.busy": "2023-02-27T11:47:07.538143Z", + "iopub.status.idle": "2023-02-27T11:47:08.292006Z", + "shell.execute_reply": "2023-02-27T11:47:08.292504Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c = alt.Chart(data).mark_point(opacity = 1, filled = True).encode(\n", + " x = alt.X(\"time\", axis = alt.Axis(title = \"time (us)\"), scale = alt.Scale(type = \"log\")),\n", + " y = alt.Y(\"algorithm property\", axis = alt.Axis(title = \"algorithm\", grid = True), sort = alt.EncodingSortField(field = \"time\")),\n", + " color = \"length:N\",\n", + " shape = \"length:N\"\n", + ").transform_filter(\n", + " datum.alphabet == \"protein\"\n", + ").properties(\n", + " width = 200,\n", + " height = 150\n", + ")\n", + "save(c, \"random_protein_bench_wasm.pdf\")\n", + "c" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/lib/block-aligner/vis/block_img1.png b/lib/block-aligner/vis/block_img1.png new file mode 100644 index 000000000..f4882c36c Binary files /dev/null and b/lib/block-aligner/vis/block_img1.png differ diff --git a/lib/block-aligner/vis/block_img2.png b/lib/block-aligner/vis/block_img2.png new file mode 100644 index 000000000..419c792d8 Binary files /dev/null and b/lib/block-aligner/vis/block_img2.png differ diff --git a/lib/block-aligner/vis/compare_adaptive_banding.pdf b/lib/block-aligner/vis/compare_adaptive_banding.pdf new file mode 100644 index 000000000..c29949757 Binary files /dev/null and b/lib/block-aligner/vis/compare_adaptive_banding.pdf differ diff --git a/lib/block-aligner/vis/compare_diagonal.pdf b/lib/block-aligner/vis/compare_diagonal.pdf new file mode 100644 index 000000000..e983f14a2 Binary files /dev/null and b/lib/block-aligner/vis/compare_diagonal.pdf differ diff --git a/lib/block-aligner/vis/dna_global_bench.pdf b/lib/block-aligner/vis/dna_global_bench.pdf new file mode 100644 index 000000000..4b57ace18 Binary files /dev/null and b/lib/block-aligner/vis/dna_global_bench.pdf differ diff --git a/lib/block-aligner/vis/nanopore_10kbp_scores.pdf b/lib/block-aligner/vis/nanopore_10kbp_scores.pdf new file mode 100644 index 000000000..bbda30a30 Binary files /dev/null and b/lib/block-aligner/vis/nanopore_10kbp_scores.pdf differ diff --git a/lib/block-aligner/vis/nanopore_50kbp_scores.pdf b/lib/block-aligner/vis/nanopore_50kbp_scores.pdf new file mode 100644 index 000000000..1420bc927 Binary files /dev/null and b/lib/block-aligner/vis/nanopore_50kbp_scores.pdf differ diff --git a/lib/block-aligner/vis/nanopore_bench.pdf b/lib/block-aligner/vis/nanopore_bench.pdf new file mode 100644 index 000000000..695c8a034 Binary files /dev/null and b/lib/block-aligner/vis/nanopore_bench.pdf differ diff --git a/lib/block-aligner/vis/prefix_scan_bench.pdf b/lib/block-aligner/vis/prefix_scan_bench.pdf new file mode 100644 index 000000000..e7c7d7549 Binary files /dev/null and b/lib/block-aligner/vis/prefix_scan_bench.pdf differ diff --git a/lib/block-aligner/vis/pssm_accuracy.pdf b/lib/block-aligner/vis/pssm_accuracy.pdf new file mode 100644 index 000000000..1bd694355 Binary files /dev/null and b/lib/block-aligner/vis/pssm_accuracy.pdf differ diff --git a/lib/block-aligner/vis/pssm_scores.pdf b/lib/block-aligner/vis/pssm_scores.pdf new file mode 100644 index 000000000..276d1808f Binary files /dev/null and b/lib/block-aligner/vis/pssm_scores.pdf differ diff --git a/lib/block-aligner/vis/pssm_size_bench.pdf b/lib/block-aligner/vis/pssm_size_bench.pdf new file mode 100644 index 000000000..af6fc7f49 Binary files /dev/null and b/lib/block-aligner/vis/pssm_size_bench.pdf differ diff --git a/lib/block-aligner/vis/random_dna_accuracy.pdf b/lib/block-aligner/vis/random_dna_accuracy.pdf new file mode 100644 index 000000000..3bde27c68 Binary files /dev/null and b/lib/block-aligner/vis/random_dna_accuracy.pdf differ diff --git a/lib/block-aligner/vis/random_dna_bench.pdf b/lib/block-aligner/vis/random_dna_bench.pdf new file mode 100644 index 000000000..3f02db145 Binary files /dev/null and b/lib/block-aligner/vis/random_dna_bench.pdf differ diff --git a/lib/block-aligner/vis/random_protein_bench.pdf b/lib/block-aligner/vis/random_protein_bench.pdf new file mode 100644 index 000000000..6ffa7596b Binary files /dev/null and b/lib/block-aligner/vis/random_protein_bench.pdf differ diff --git a/lib/block-aligner/vis/random_protein_bench_wasm.pdf b/lib/block-aligner/vis/random_protein_bench_wasm.pdf new file mode 100644 index 000000000..439716522 Binary files /dev/null and b/lib/block-aligner/vis/random_protein_bench_wasm.pdf differ diff --git a/lib/block-aligner/vis/run_vis.sh b/lib/block-aligner/vis/run_vis.sh new file mode 100755 index 000000000..b36176d78 --- /dev/null +++ b/lib/block-aligner/vis/run_vis.sh @@ -0,0 +1,5 @@ +set -e +jupyter nbconvert --to notebook --inplace --execute block_aligner_bench_vis.ipynb --allow-errors +jupyter trust block_aligner_bench_vis.ipynb +jupyter nbconvert --to notebook --inplace --execute block_aligner_accuracy_vis.ipynb --allow-errors +jupyter trust block_aligner_accuracy_vis.ipynb diff --git a/lib/block-aligner/vis/uniclust30_accuracy.pdf b/lib/block-aligner/vis/uniclust30_accuracy.pdf new file mode 100644 index 000000000..508f410cc Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_accuracy.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_bench.pdf b/lib/block-aligner/vis/uniclust30_bench.pdf new file mode 100644 index 000000000..bd75f23b0 Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_bench.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_length_accuracy.pdf b/lib/block-aligner/vis/uniclust30_length_accuracy.pdf new file mode 100644 index 000000000..a1e4fa6dd Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_length_accuracy.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_overall_accuracy.pdf b/lib/block-aligner/vis/uniclust30_overall_accuracy.pdf new file mode 100644 index 000000000..f663d8e39 Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_overall_accuracy.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_percent_error.pdf b/lib/block-aligner/vis/uniclust30_percent_error.pdf new file mode 100644 index 000000000..a64adb662 Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_percent_error.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_scores.pdf b/lib/block-aligner/vis/uniclust30_scores.pdf new file mode 100644 index 000000000..051b1c47a Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_scores.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_seq_id_accuracy.pdf b/lib/block-aligner/vis/uniclust30_seq_id_accuracy.pdf new file mode 100644 index 000000000..384649699 Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_seq_id_accuracy.pdf differ diff --git a/lib/block-aligner/vis/uniclust30_size_bench.pdf b/lib/block-aligner/vis/uniclust30_size_bench.pdf new file mode 100644 index 000000000..3a5b78abf Binary files /dev/null and b/lib/block-aligner/vis/uniclust30_size_bench.pdf differ diff --git a/lib/corrosion/.github/FUNDING.yml b/lib/corrosion/.github/FUNDING.yml new file mode 100644 index 000000000..a36eb8cfc --- /dev/null +++ b/lib/corrosion/.github/FUNDING.yml @@ -0,0 +1 @@ +github: ["jschwe"] diff --git a/lib/corrosion/.github/ISSUE_TEMPLATE/bug_report.yml b/lib/corrosion/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..22bda5e7d --- /dev/null +++ b/lib/corrosion/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,81 @@ +name: Bug Report +description: File a bug report +title: "[Bug]: " +labels: ["bug", "triage"] +assignees: + - jschwe +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + - type: textarea + attributes: + label: Current Behavior + description: A concise description of what you're experiencing. + validations: + required: false + - type: textarea + attributes: + label: Expected Behavior + description: A concise description of what you expected to happen. + validations: + required: false + - type: textarea + attributes: + label: Steps To Reproduce + description: Steps to reproduce the behavior. + placeholder: | + 1. In this environment... + 2. With this config... + 3. Run '...' + 4. See error... + validations: + required: false + - type: textarea + attributes: + label: Environment + description: | + examples: + - **OS**: Ubuntu 22.04 + - **CMake**: 3.22.0 + - **CMake Generator**: Ninja 1.11 + value: | + - OS: + - CMake: + - CMake Generator: + render: markdown + validations: + required: false + - type: textarea + attributes: + label: CMake configure log with Debug log-level + description: | + Output when configuring with `cmake -S -B --log-level=DEBUG `: +
CMake configure log +

+ + ``` + + ``` + +

+
+ validations: + required: false + - type: textarea + attributes: + label: CMake Build step log + description: | + Output when building with `cmake --build --verbose`: +
CMake build log +

+ + ``` + + ``` + +

+
+ validations: + required: false diff --git a/lib/corrosion/.github/actions/setup_test/action.yaml b/lib/corrosion/.github/actions/setup_test/action.yaml new file mode 100644 index 000000000..bb36278cc --- /dev/null +++ b/lib/corrosion/.github/actions/setup_test/action.yaml @@ -0,0 +1,184 @@ +name: Setup Corrosion Tests +description: "Internal helper action to setup the Environment for Corrosions tests" +inputs: + target_arch: + required: true + description: CMake target architecture + abi: + required: false + description: msvc, gnu or darwin + default: default + cmake: + required: true + description: Cmake version + rust: + required: true + description: Rust version + generator: + required: true + description: CMake Generator (e.g Ninja) + build_dir: + required: true + description: Path of the CMake build directory + configure_params: + required: false + description: Additional parameters to pass to CMake configure step + install_path: + required: false + description: CMake install prefix + default: "" + compiler: + required: false + description: Compiler to use. Valid options are clang, gcc, cl, default, or an empty string. + default: "default" + +runs: + using: composite + steps: + - name: Cache Cargo registry + id: cache-registry + uses: actions/cache@v4 + with: + path: ~/.cargo/registry + key: ${{ runner.os }}-cargo-registry + - name: Determine Rust OS + id: determine_rust_os + shell: bash + run: | + if [ "${{ runner.os }}" == "Windows" ]; then + echo "os=pc-windows" >> $GITHUB_OUTPUT + echo "host_abi=msvc" >> $GITHUB_OUTPUT + elif [ "${{ runner.os }}" == "Linux" ]; then + echo "os=unknown-linux" >> $GITHUB_OUTPUT + echo "host_abi=gnu" >> $GITHUB_OUTPUT + elif [ "${{ runner.os }}" == "macOS" ]; then + echo "os=apple" >> $GITHUB_OUTPUT + echo "host_abi=darwin" >> $GITHUB_OUTPUT + fi + - name: Determine Rust ABI + id: determine_abi + shell: bash + run: | + if [[ ! ( -z "${{ inputs.abi }}" || "${{ inputs.abi }}" == "default" ) ]]; then + echo "abi=${{ inputs.abi }}" >> $GITHUB_OUTPUT + elif [ "${{ runner.os }}" == "Linux" ]; then + echo "abi=gnu" >> $GITHUB_OUTPUT + elif [ "${{ runner.os }}" == "macOS" ]; then + echo "abi=darwin" >> $GITHUB_OUTPUT + else + echo "abi=msvc" >> $GITHUB_OUTPUT + fi + - name: Determine if Cross-compiling + id: determine_cross_compile + shell: bash + run: | + # For now it is safe to assume that all github runners are x86_64 + if [[ "${{ inputs.target_arch }}" != "x86_64" ]]; then + echo "Cross-Compiling to ${{ inputs.target_arch }}" + if [[ "${{ runner.os }}" == "macOS" ]]; then + echo "system_name=-DCMAKE_SYSTEM_NAME=Darwin" >> $GITHUB_OUTPUT + else + # Either `Linux` or `Windows` + echo "system_name=-DCMAKE_SYSTEM_NAME=${{ runner.os }}" >> $GITHUB_OUTPUT + fi + fi + - name: Pick Compiler + id: pick_compiler + shell: bash + run: > + ./.github/scripts/determine_compiler.sh + "${{ inputs.compiler }}" + "${{ runner.os }}" + "${{ steps.determine_abi.outputs.abi }}" + "${{steps.determine_cross_compile.outputs.system_name}}" + "${{inputs.target_arch}}" + - name: Pick Generator + id: pick_generator + shell: bash + run: | + if [ "${{ inputs.generator }}" == "ninja" ]; then + echo "generator=-GNinja" >> $GITHUB_OUTPUT + elif [ "${{ inputs.generator }}" == "ninja-multiconfig" ];then + echo "generator=-GNinja Multi-Config" >> $GITHUB_OUTPUT + fi + - name: Arch Flags + id: arch_flags + shell: bash + run: | # Cross-compiling is currently only supported on Windows+MSVC with the default generator + if [ "${{ runner.os }}" == "Windows" ]; then + if [ "${{inputs.generator}}" == "default" ]; then + if [ "${{ inputs.target_arch }}" == "x86_64" ]; then + echo "msvc=amd64" >> $GITHUB_OUTPUT + echo "cmake=-Ax64" >> $GITHUB_OUTPUT + elif [ "${{ inputs.target_arch }}" == "i686" ]; then + echo "msvc=amd64_x86" >> $GITHUB_OUTPUT + echo "cmake=-AWin32" >> $GITHUB_OUTPUT + elif [ "${{ inputs.target_arch }}" == "aarch64" ]; then + echo "msvc=amd64_arm64" >> $GITHUB_OUTPUT + echo "cmake=-AARM64" >> $GITHUB_OUTPUT + fi + elif [ "${{inputs.generator}}" == "ninja" ]; then + # We don't do cross-compiling builds with Ninja + # Todo: Why not (cross-compile)? + echo "msvc=amd64" >> $GITHUB_OUTPUT + fi + elif [ "${{ runner.os }}" == "Linux" ]; then + echo "cmake=-DRust_CARGO_TARGET=${{inputs.target_arch}}-${{steps.determine_rust_os.outputs.os}}-${{steps.determine_abi.outputs.abi}}" >> $GITHUB_OUTPUT + fi + - name: Determine Install Prefix + id: install_prefix + shell: bash + run: | + if [ ! -z "${{ inputs.install_path }}" ]; then + echo "install_path=-DCMAKE_INSTALL_PREFIX=${{ inputs.install_path }}" >> $GITHUB_OUTPUT + fi + - name: Setup MSVC Development Environment + uses: ilammy/msvc-dev-cmd@v1 + with: + arch: ${{ steps.arch_flags.outputs.msvc }} + if: ${{ 'msvc' == steps.determine_abi.outputs.abi }} + - name: Install CMake + uses: lukka/get-cmake@519de0c7b4812477d74976b2523a9417f552d126 + with: + cmakeVersion: "${{ inputs.cmake }}" + ninjaVersion: "~1.10.0" + - name: Install Rust + id: install_rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{inputs.rust}} + targets: ${{inputs.target_arch}}-${{steps.determine_rust_os.outputs.os}}-${{steps.determine_abi.outputs.abi}} + - name: Install Cross Compiler + shell: bash + run: | + if [[ "${{ inputs.target_arch }}" != 'x86_64' ]]; then + echo "::group::apt-install" + sudo apt-get update + sudo apt-get install -y g++-$(echo "${{inputs.target_arch}}" | tr _ -)-linux-gnu + echo "::endgroup::" + fi + if: ${{ 'Linux' == runner.os }} + - name: Determine Configure Shell + id: configure_shell + shell: bash + run: | + if [ "${{ runner.os }}" == "Windows" ]; then + echo "shell=pwsh" >> $GITHUB_OUTPUT + else + echo "shell=bash" >> $GITHUB_OUTPUT + fi + - name: Configure + shell: ${{steps.configure_shell.outputs.shell}} + run: > + cmake + "-S." + "-B${{inputs.build_dir}}" + "-DCORROSION_VERBOSE_OUTPUT=ON" + "${{steps.arch_flags.outputs.cmake}}" + "${{steps.pick_compiler.outputs.c_compiler}}" + "${{steps.pick_compiler.outputs.cxx_compiler}}" + "${{steps.determine_cross_compile.outputs.system_name}}" + "${{steps.pick_generator.outputs.generator}}" + ${{steps.install_prefix.outputs.install_path}} + "-DRust_TOOLCHAIN=${{steps.install_rust.outputs.name}}" + ${{ inputs.configure_params }} diff --git a/lib/corrosion/.github/scripts/determine_compiler.sh b/lib/corrosion/.github/scripts/determine_compiler.sh new file mode 100755 index 000000000..302d25c64 --- /dev/null +++ b/lib/corrosion/.github/scripts/determine_compiler.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +compiler_kind="$1" +runner_os="$2" +target_abi="$3" +target_system_name="$4" +target_arch="$5" + +set -e + +if [[ -z "$GITHUB_OUTPUT" ]]; then + echo "Error: This script should only be run in github actions environment" + exit 1 +fi +if [[ -z "${runner_os}" || -z "${target_abi}" || -z "${target_arch}" ]]; then + echo "Error: Not all required parameters where set" + exit 1 +fi +if [[ -z "${compiler_kind}" || "${compiler_kind}" == "default" ]]; then + echo "compiler option was not set. Determining default compiler." + if [[ "${runner_os}" == "Windows" ]]; then + if [[ "${target_abi}" == "msvc" ]]; then + compiler_kind=msvc + elif [[ "${target_abi}" == "gnu" ]]; then + compiler_kind=gcc + else + echo "Unknown abi for Windows: ${target_abi}" + exit 1 + fi + elif [[ "${runner_os}" == "macOS" ]]; then + compiler_kind="clang" + elif [[ "${runner_os}" == "Linux" ]]; then + compiler_kind="gcc" + else + echo "Unknown Runner OS: ${runner_os}" + exit 1 + fi +fi +echo "Compiler Family: '${compiler_kind}'" + +if [[ "${compiler_kind}" == "clang" ]]; then + c_compiler="clang" + cxx_compiler="clang++" +elif [[ "${compiler_kind}" == "msvc" ]]; then + c_compiler="cl" + cxx_compiler="cl" +elif [[ "${compiler_kind}" == "gcc" ]]; then + if [[ -z "${target_system_name}" ]]; then + c_compiler="gcc" + cxx_compiler="g++" + else + c_compiler="${target_arch}-linux-gnu-gcc" + cxx_compiler="${target_arch}-linux-gnu-g++" + fi +fi +echo "Chose C compiler: '${c_compiler}'" +echo "Chose C++ compiler: '${cxx_compiler}'" +echo "c_compiler=-DCMAKE_C_COMPILER=${c_compiler}" >> $GITHUB_OUTPUT +echo "cxx_compiler=-DCMAKE_CXX_COMPILER=${cxx_compiler}" >> $GITHUB_OUTPUT diff --git a/lib/corrosion/.github/workflows/gh-pages.yaml b/lib/corrosion/.github/workflows/gh-pages.yaml new file mode 100644 index 000000000..e8a4d3170 --- /dev/null +++ b/lib/corrosion/.github/workflows/gh-pages.yaml @@ -0,0 +1,62 @@ +name: Deploy GH pages +on: + push: + branches: + - master + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow one concurrent deployment +concurrency: + group: "pages" + cancel-in-progress: true + +jobs: + deploy: + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v3 + - name: Install mdbook + env: + MDBOOK_VERSION: 'v0.4.27' + run: | + mkdir mdbook + curl -sSL https://github.com/rust-lang/mdBook/releases/download/${MDBOOK_VERSION}/mdbook-${MDBOOK_VERSION}-x86_64-unknown-linux-gnu.tar.gz | tar -xz --directory=./mdbook + echo `pwd`/mdbook >> $GITHUB_PATH + - name: Build mdbook + run: | + cd doc + mdbook build + # Override mdbooks default highlight.js with a custom version containing CMake support. + - uses: actions/checkout@v4 + with: + repository: 'highlightjs/highlight.js' + # mdbook currently (as of v0.4.27) does not support v11 yet. + ref: '10.7.3' + path: highlightjs + - name: Build custom highlight.js + run: | + npm install + node tools/build.js :common cmake yaml + working-directory: highlightjs + - name: Override highlightjs + run: | + cp highlightjs/build/highlight.min.js doc/book/highlight.js + - name: Upload artifact + uses: actions/upload-pages-artifact@v1 + with: + path: 'doc/book' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 diff --git a/lib/corrosion/.github/workflows/test.yaml b/lib/corrosion/.github/workflows/test.yaml new file mode 100644 index 000000000..e93987ffe --- /dev/null +++ b/lib/corrosion/.github/workflows/test.yaml @@ -0,0 +1,356 @@ +name: Tests +on: + push: + branches: + - master + pull_request: + branches: + - 'master' + - 'stable/**' +jobs: + + test_legacy_linux: + name: Test Corrosion (CMake 3.15) + uses: ./.github/workflows/test_legacy.yaml + with : + os: ubuntu-20.04 + rust: 1.46.0 + test_legacy_mac: + name: Test Corrosion (CMake 3.15) + uses: ./.github/workflows/test_legacy.yaml + with: + os: macos-12 + rust: 1.54.0 + test_legacy_windows: + name: Test Corrosion (CMake 3.15) + uses: ./.github/workflows/test_legacy.yaml + with: + os: windows-2019 + rust: 1.46.0 + + test_legacy_stable: + name: Legacy CMake + stable Rust + uses: ./.github/workflows/test_legacy.yaml + strategy: + fail-fast: false + matrix: + os: + - windows-2019 # windows-latest is currently not having a supported MSVC compiler + - ubuntu-20.04 + - macos-12 + with: + os: ${{ matrix.os }} + rust: stable + + test_legacy_nightly: + name: Legacy CMake + nightly Rust + uses: ./.github/workflows/test_legacy.yaml + with: + os: ubuntu-20.04 + rust: nightly + + test_legacy_new_lockfile_msrv: + name: Test MSRV of the new lockfile + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - name: Install Rust + id: install_rust + uses: dtolnay/rust-toolchain@1.56 + - name: Test Generator build with MSRV + run: cargo build + working-directory: generator + + test: + name: Test Corrosion + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.rust == 'nightly' }} + strategy: + fail-fast: false + matrix: + os: + - windows-2019 # windows-latest is currently not having a supported MSVC compiler + - ubuntu-latest + - macos-12 + arch: + - x86_64 + - i686 + - aarch64 + - powerpc64le + abi: + - gnu + - darwin + - msvc + cmake: + - 3.19.0 + rust: + # Our MSRV is already tested with the legacy generator, so just test the current stable rust here. + - stable + generator: + - default # This is just whatever the platform default is + - ninja + compiler: [default] + include: + - rust: nightly + cmake: 3.19.0 + generator: ninja + arch: x86_64 + abi: msvc + os: windows-2019 + - rust: nightly + cmake: 3.19.0 + generator: ninja + arch: x86_64 + abi: gnu + os: ubuntu-latest + - rust: nightly + cmake: 3.19.0 + generator: ninja + arch: x86_64 + abi: darwin + os: macos-12 + - rust: 1.54 + cmake: 3.19.0 + generator: ninja + arch: x86_64 + abi: msvc + os: windows-2019 + compiler: clang + - os: ubuntu-latest + arch: x86_64 + abi: gnu + cmake: 3.20.0 + rust: 1.54 + generator: ninja-multiconfig + + exclude: + + # We have a separate test Matrix for the Visual Studio Generator + - os: windows-2019 + generator: default # Default generator is Visual Studio + + # ARCH + - os: windows-2019 + arch: i686 + abi: gnu + - os: windows-2019 + arch: aarch64 + abi: gnu + - os: windows-2019 + arch: i686 + generator: ninja + - os: windows-2019 + arch: aarch64 + generator: ninja + - os: windows-2019 + arch: powerpc64le + - os: macos-12 + arch: i686 + - os: macos-12 + arch: aarch64 + - os: macos-12 + arch: powerpc64le + + # ABI + - os: ubuntu-latest + abi: msvc + - os: ubuntu-latest + abi: darwin + - os: windows-2019 + abi: darwin + - os: macos-12 + abi: msvc + - os: macos-12 + abi: gnu + + steps: + - uses: actions/checkout@v4 + - name: Setup Environment and Configure CMake + uses: "./.github/actions/setup_test" + with: + target_arch: ${{matrix.arch}} + abi: ${{matrix.abi}} + cmake: ${{matrix.cmake}} + rust: ${{matrix.rust}} + generator: ${{matrix.generator}} + build_dir: build + compiler: ${{matrix.compiler}} + - name: Run Tests + id: run_tests + working-directory: build + run: ctest --output-on-failure --build-config Debug -j 3 + + test_msvc: + name: Test MSVC Generator + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - windows-2019 + - windows-2022 + arch: + - x86_64 + - i686 + - aarch64 + include: + - rust: 1.54.0 + # Override rust version for x86_64 + - arch: x86_64 + rust: 1.46.0 + - os: windows-2019 + cmake: 3.20.6 # Multi-config Generators require at least CMake 3.20 + - os: windows-2022 + cmake: 3.21.5 # VS on windows-2022 requires at least CMake 3.21 + + steps: + - uses: actions/checkout@v4 + # The initial configure for MSVC is quite slow, so we cache the build directory + # (including the build directories of the tests) since reconfiguring is + # significantly faster. + - name: Cache MSVC build directory + id: cache-msvc-builddir + uses: actions/cache@v4 + with: + path: build + key: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.rust }}-msvc-build + - name: Setup Environment and Configure CMake + uses: "./.github/actions/setup_test" + with: + target_arch: ${{matrix.arch}} + abi: msvc + cmake: ${{matrix.cmake}} + rust: ${{matrix.rust}} + generator: default + build_dir: build + configure_params: "-DCORROSION_TESTS_KEEP_BUILDDIRS=ON" + - name: Run Tests + working-directory: build + run: ctest --output-on-failure --build-config Debug -j 3 + + test_cxxbridge: + name: Test cxxbridge integration + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - windows-2019 + - ubuntu-latest + - macos-12 + include: + - abi: default + # - os: windows-2019 + # abi: gnu + steps: + - uses: actions/checkout@v4 + - uses: actions/cache@v4 + id: cache_cxxbridge + with: + path: "~/.cargo/bin/cxxbridge*" + key: ${{ runner.os }}-cxxbridge_1_0_86 + - name: Install cxxbridge + if: steps.cache_cxxbridge.outputs.cache-hit != 'true' + run: cargo install cxxbridge-cmd@1.0.86 + - name: Install lld + run: sudo apt update && sudo apt install -y lld + if: ${{ 'Linux' == runner.os }} + - name: Setup Environment and Configure CMake + uses: "./.github/actions/setup_test" + with: + target_arch: x86_64 + cmake: 3.15.7 + rust: stable minus 2 releases + abi: ${{ matrix.abi }} + generator: ninja + build_dir: build + configure_params: -DCORROSION_TESTS_CXXBRIDGE=ON + - name: Run Tests + working-directory: build + run: ctest --output-on-failure --build-config Debug -j 3 -R "^cxxbridge" + install: + name: Test Corrosion as a Library + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: + - windows-2019 + - ubuntu-latest + - macos-12 + include: + - rust: 1.46.0 + - os: macos-12 + rust: 1.54.0 # On MacOS-12 linking fails before Rust 1.54 + steps: + - uses: actions/checkout@v4 + - name: Setup MSVC Development Environment + uses: ilammy/msvc-dev-cmd@v1 + if: runner.os == 'Windows' + - name: Install CMake + uses: lukka/get-cmake@519de0c7b4812477d74976b2523a9417f552d126 + with: + cmakeVersion: "~3.18.0" + ninjaVersion: "~1.10.0" + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{matrix.rust}} + - name: CMake Version + run: cmake --version + - name: Rust Version + run: rustc --version + - name: Test Corrosion as subdirectory + run: > + cmake + -S. + -Bbuild + -GNinja + -DCORROSION_VERBOSE_OUTPUT=ON + -DCORROSION_TESTS_INSTALL_CORROSION=OFF + && + cd build + && + ctest --output-on-failure -C Debug -j 3 + - name: Test Corrosion as installed module + run: > + cmake -E remove_directory build + && + cmake + -S. + -Bbuild + -GNinja + -DCORROSION_VERBOSE_OUTPUT=ON + -DCMAKE_BUILD_TYPE=Release + -DCORROSION_TESTS_INSTALL_CORROSION=ON + && + cd build + && + ctest --output-on-failure -C Release -j 3 + # We need some "accumulation" job here because bors fails (timeouts) to + # listen on matrix builds. + # Hence, we have some kind of dummy here that bors can listen on + ci-success: + name: bors-ci-status + if: ${{ always() }} + needs: + - test_legacy_linux + - test_legacy_mac + - test_legacy_windows + - test_legacy_stable + - test_legacy_new_lockfile_msrv + - test + - test_msvc + - test_cxxbridge + - install + runs-on: ubuntu-latest + # Step copied from: https://github.com/cross-rs/cross/blob/80c9f9109a719ffb0f694060ddc6e371d5b3a540/.github/workflows/ci.yml#L361 + steps: + - name: Result + run: | + jq -C <<< "${needs}" + # Check if all needs were successful or skipped. + "$(jq -r 'all(.result as $result | (["success", "skipped"] | contains([$result])))' <<< "${needs}")" + env: + needs: ${{ toJson(needs) }} + diff --git a/lib/corrosion/.github/workflows/test_legacy.yaml b/lib/corrosion/.github/workflows/test_legacy.yaml new file mode 100644 index 000000000..20c7bf582 --- /dev/null +++ b/lib/corrosion/.github/workflows/test_legacy.yaml @@ -0,0 +1,80 @@ +name: Test Corrosion using prebuilt legacy generator + +on: + workflow_call: + inputs: + os: + required: true + type: string + rust: + required: false + type: string + default: 1.46.0 + target_arch: + required: false + type: string + default: x86_64 + generator: + required: false + type: string + default : ninja + +jobs: + test_legacy: + name: Test (${{inputs.os}}) + runs-on: ${{ inputs.os }} + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v4 + - name: Cache Legacy Generator + id: cache_generator + uses: actions/cache@v4 + with: + path: ${{github.workspace}}/corrosion-prebuilt-generator + key: ${{ runner.os }}-${{ inputs.rust }}-generator-${{ hashFiles('generator/src/**', 'generator/Cargo.toml', 'generator/Cargo.lock') }} + - name: Setup Environment and Configure CMake + uses: "./.github/actions/setup_test" + with: + target_arch: x86_64 + cmake: 3.15.7 + rust: ${{inputs.rust}} + generator: ninja + build_dir: build + install_path: ${{github.workspace}}/corrosion-prebuilt-generator + configure_params: "-DCMAKE_BUILD_TYPE=Release" + if: steps.cache_generator.outputs.cache-hit != 'true' + - name: Build corrosion + run: cmake --build build --config Release + if: steps.cache_generator.outputs.cache-hit != 'true' + - name: Install corrosion + run: cmake --install build --config Release + if: steps.cache_generator.outputs.cache-hit != 'true' + - name: Determine Corrosion Generator path + id: cor_gen + shell: bash + run: | + export base_generator_bin="${{github.workspace}}/corrosion-prebuilt-generator/libexec/corrosion-generator" + if [ "${{ runner.os }}" == "Windows" ]; then + echo "generator_bin=${base_generator_bin}.exe" >> $GITHUB_OUTPUT + else + echo "generator_bin=${base_generator_bin}" >> $GITHUB_OUTPUT + chmod +x "${base_generator_bin}" + fi + - name: Setup Environment and Configure CMake + uses: "./.github/actions/setup_test" + with: + target_arch: ${{inputs.target_arch}} + cmake: 3.15.7 + rust: ${{inputs.rust}} + generator: ${{inputs.generator}} + build_dir: build + configure_params: "-DCORROSION_GENERATOR_EXECUTABLE=${{steps.cor_gen.outputs.generator_bin}}" + - name: Run Tests + id: run_tests + working-directory: build + run: ctest --build-config Debug -j 3 + - name: Rerun failed tests verbose + working-directory: build + run: ctest --rerun-failed --verbose --build-config Debug + if: ${{ failure() && steps.run_tests.conclusion == 'failure' }} \ No newline at end of file diff --git a/lib/corrosion/.gitignore b/lib/corrosion/.gitignore new file mode 100644 index 000000000..c7125b0a7 --- /dev/null +++ b/lib/corrosion/.gitignore @@ -0,0 +1,9 @@ + +**/target/ +**/*.rs.bk +build*/ +install*/ +.vscode +.idea +cmake-build-* +test/test_header.cmake diff --git a/lib/corrosion/CMakeLists.txt b/lib/corrosion/CMakeLists.txt new file mode 100644 index 000000000..8b3d7d3dd --- /dev/null +++ b/lib/corrosion/CMakeLists.txt @@ -0,0 +1,135 @@ +cmake_minimum_required(VERSION 3.15) +project(Corrosion + # Official releases will be major.minor.patch. When the `tweak` field is + # set it indicates that we are on a commit, that is not a officially + # tagged release. Users don't need to care about this, it is mainly to + # clearly see in configure logs which version was used, without needing to + # rely on `git`, since Corrosion may be installed or otherwise packaged. + VERSION 0.5.0 + LANGUAGES NONE + HOMEPAGE_URL "https://corrosion-rs.github.io/corrosion/" +) + +# Default behavior: +# - If the project is being used as a subdirectory, then don't build tests and +# don't enable any languages. +# - If this is a top level project, then build tests and enable the C++ compiler +if (NOT CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) + set(_CORROSION_TOP_LEVEL OFF) +else() + set(_CORROSION_TOP_LEVEL ON) +endif() + +# ==== Corrosion Configuration ==== + +option( + CORROSION_DEV_MODE + "Enables some additional features if you're developing Corrosion" + ${_CORROSION_TOP_LEVEL} +) + +option( + CORROSION_BUILD_TESTS + "Build Corrosion test project" + ${_CORROSION_TOP_LEVEL} +) + +set( + CORROSION_GENERATOR_EXECUTABLE CACHE STRING + "Use prebuilt, non-bootstrapped corrosion-generator") +mark_as_advanced(CORROSION_GENERATOR_EXECUTABLE) + +if (CORROSION_GENERATOR_EXECUTABLE) + add_executable(Corrosion::Generator IMPORTED GLOBAL) + set_property( + TARGET Corrosion::Generator + PROPERTY IMPORTED_LOCATION ${CORROSION_GENERATOR_EXECUTABLE}) + set(CORROSION_INSTALL_EXECUTABLE_DEFAULT OFF) +elseif(CORROSION_NATIVE_TOOLING OR CMAKE_VERSION VERSION_LESS 3.19.0) + set(CORROSION_INSTALL_EXECUTABLE_DEFAULT "ON") +else() + set(CORROSION_INSTALL_EXECUTABLE_DEFAULT OFF) +endif() + +option( + CORROSION_INSTALL_EXECUTABLE + "Controls whether corrosion-generator is installed with the package" + ${CORROSION_INSTALL_EXECUTABLE_DEFAULT} +) +mark_as_advanced(CORROSION_INSTALL_EXECUTABLE) + +if (_CORROSION_TOP_LEVEL) + # We need to enable a language for corrosions test to work. + # For projects using corrosion this is not needed + enable_language(C) +endif() + +# This little bit self-hosts the Corrosion toolchain to build the generator +# tool. +# +# It is strongly encouraged to install Corrosion separately and use +# `find_package(Corrosion REQUIRED)` instead if that works with your workflow. +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +include(Corrosion) + +# Testing +if (CORROSION_BUILD_TESTS) + include(CTest) + add_subdirectory(test) +endif() + +# If Corrosion is a subdirectory, do not enable its install code +if (NOT _CORROSION_TOP_LEVEL) + return() +endif() + +# Installation + +include(GNUInstallDirs) + +if(CORROSION_INSTALL_EXECUTABLE) + get_property( + _CORROSION_GENERATOR_EXE + TARGET Corrosion::Generator PROPERTY IMPORTED_LOCATION + ) + install(PROGRAMS "${_CORROSION_GENERATOR_EXE}" DESTINATION "${CMAKE_INSTALL_FULL_LIBEXECDIR}") +else() + message(DEBUG "Not installing corrosion-generator since " + "`CORROSION_INSTALL_EXECUTABLE` is set to ${CORROSION_INSTALL_EXECUTABLE}" + ) +endif() + +# Generate the Config file +include(CMakePackageConfigHelpers) + +configure_package_config_file( + cmake/CorrosionConfig.cmake.in CorrosionConfig.cmake + INSTALL_DESTINATION + "${CMAKE_INSTALL_FULL_LIBDIR}/cmake/Corrosion" +) + +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/CorrosionConfigVersion.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY + SameMinorVersion # TODO: Should be SameMajorVersion when 1.0 is released + ARCH_INDEPENDENT +) + +install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/CorrosionConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/CorrosionConfigVersion.cmake" + DESTINATION + "${CMAKE_INSTALL_FULL_LIBDIR}/cmake/Corrosion" +) + +# These CMake scripts are needed both for the install and as a subdirectory +install( + FILES + cmake/Corrosion.cmake + cmake/CorrosionGenerator.cmake + cmake/FindRust.cmake + DESTINATION + "${CMAKE_INSTALL_FULL_DATADIR}/cmake" +) diff --git a/lib/corrosion/LICENSE b/lib/corrosion/LICENSE new file mode 100644 index 000000000..5e30d776d --- /dev/null +++ b/lib/corrosion/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Andrew Gaspar + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/corrosion/README.md b/lib/corrosion/README.md new file mode 100644 index 000000000..4462d9586 --- /dev/null +++ b/lib/corrosion/README.md @@ -0,0 +1,40 @@ +# Corrosion +[![Build Status](https://github.com/corrosion-rs/corrosion/actions/workflows/test.yaml/badge.svg)](https://github.com/corrosion-rs/corrosion/actions?query=branch%3Amaster) +[![Documentation](https://img.shields.io/badge/docs-latest-blue.svg)](https://corrosion-rs.github.io/corrosion/) +![License](https://img.shields.io/badge/license-MIT-blue) + +Corrosion, formerly known as cmake-cargo, is a tool for integrating Rust into an existing CMake +project. Corrosion can automatically import executables, static libraries, and dynamic libraries +from a workspace or package manifest (`Cargo.toml` file). + +## Features +- Automatic Import of Executable, Static, and Shared Libraries from Rust Crate +- Easy Installation of Rust Executables +- Trivially Link Rust Executables to C/C++ Libraries in Tree +- Multi-Config Generator Support +- Simple Cross-Compilation + +## Sample Usage with FetchContent + +Using the CMake `FetchContent` module allows you to easily integrate corrosion into your build. +Other methods including installing corrosion or adding it as a subdirectory are covered in the +[setup chapter](https://corrosion-rs.github.io/corrosion/setup_corrosion.html) of the +corrosion [documentation](https://corrosion-rs.github.io/corrosion/). + +```cmake +include(FetchContent) + +FetchContent_Declare( + Corrosion + GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git + GIT_TAG v0.5 # Optionally specify a commit hash, version tag or branch here +) +FetchContent_MakeAvailable(Corrosion) + +# Import targets defined in a package or workspace manifest `Cargo.toml` file +corrosion_import_crate(MANIFEST_PATH rust-lib/Cargo.toml) + +add_executable(your_cpp_bin main.cpp) +target_link_libraries(your_cpp_bin PUBLIC rust-lib) +``` + diff --git a/lib/corrosion/RELEASES.md b/lib/corrosion/RELEASES.md new file mode 100644 index 000000000..b9444fccf --- /dev/null +++ b/lib/corrosion/RELEASES.md @@ -0,0 +1,465 @@ +# v0.5.1 (2024-12-29) + +### Fixes + +- Update FindRust to support `rustup` v1.28.0. Support for older rustup versions is retained, + so updating corrosion quickly is recommended to all rustup users. + +# v0.5.0 (2024-05-11) + +### Breaking Changes + +- Dashes (`-`) in names of imported CMake **library** targets are now replaced with underscores (`_`). + See [issue #501] for details. Users on older Corrosion versions will experience the same + change when using Rust 1.79 or newer. `bin` targets are not affected by this change. + +[issue #501]: https://github.com/corrosion-rs/corrosion/issues/501 + +# v0.4.10 (2024-05-11) + +### New features + +- `corrosion_experimental_cbindgen()` can now be called multiple times on the same Rust target, + as long as the output header name differs. This may be useful to generate separate C and C++ + bindings. [#507] +- If `corrosion_link_libraries()` is called on a Rust static library target, then + `target_link_libraries()` is called to propagate the dependencies to C/C++ consumers. + Previously a warning was emitted in this case and the arguments ignored. [#506] + +### Fixes + +- Combine `-framework` flags on macos to avoid linker deduplication errors [#455] +- `corrosion_experimental_cbindgen()` will now correctly use the package name, instead of assuming that + the package and crate name are identical. ([11e27c]) +- Set the `AR_` variable for `cc-rs` (except for msvc targets) [#456] +- Fix hostbuild when cross-compiling to windows [#477] +- Consider vworks executable suffix [#504] +- `corrosion_experimental_cbindgen()` now forwards the Rust target-triple (e.g. `aarch64-unknown-linux-gnu`) + to cbindgen via the `TARGET` environment variable. The `hostbuild` property is considered. [#507] +- Fix linking errors with Rust >= 1.79 and `-msvc` targets.` [#511] + + +[#455]: https://github.com/corrosion-rs/corrosion/pull/455 +[#456]: https://github.com/corrosion-rs/corrosion/pull/456 +[#477]: https://github.com/corrosion-rs/corrosion/pull/477 +[#504]: https://github.com/corrosion-rs/corrosion/pull/504 +[#506]: https://github.com/corrosion-rs/corrosion/pull/506 +[#507]: https://github.com/corrosion-rs/corrosion/pull/507 +[#511]: https://github.com/corrosion-rs/corrosion/pull/511 +[11e27c]: https://github.com/corrosion-rs/corrosion/pull/514/commits/11e27cde2cf32c7ed539c96eb03c2f10035de538 + +# v0.4.9 (2024-05-01) + +### New Features + +- Automatically detect Rust target for OpenHarmony ([#510]). + +### Fixes + +- Make find_package portable ([#509]). + +[#510]: https://github.com/corrosion-rs/corrosion/pull/510 +[#509]: https://github.com/corrosion-rs/corrosion/pull/509 + +# v0.4.8 (2024-04-03) + +### Fixes + +- Fix an internal error when passing both the `PROFILE` and `CRATES` option to + `corrosion_import_crate()` ([#496]). + +[#496]: https://github.com/corrosion-rs/corrosion/pull/496 + +# v0.4.7 (2024-01-19) + +### Fixes + +- The C/C++ compiler passed from corrosion to `cc-rs` can now be overriden by users setting + `CC_` (e.g. `CC_x86_64-unknown-linux-gnu=/path/to/my-compiler`) environment variables ([#475]). + +[#475]: https://github.com/corrosion-rs/corrosion/pull/475 + +# v0.4.6 (2024-01-17) + +### Fixes + +- Fix hostbuild executables when cross-compiling from non-windows to windows targets. + (Only with CMake >= 3.19). + +# v0.4.5 (2023-11-30) + +### Fixes + +- Fix hostbuild executables when cross-compiling on windows to non-windows targets + (Only with CMake >= 3.19). + +# v0.4.4 (2023-10-06) + +### Fixes + +- Add `chimera` ([#445]) and `unikraft` ([#446]) to the list of known vendors + +[#445]: https://github.com/corrosion-rs/corrosion/pull/445 +[#446]: https://github.com/corrosion-rs/corrosion/pull/446 + +# v0.4.3 (2023-09-09) + +### Fixes + +- Fix the PROFILE option with CMake < 3.19 [#427] +- Relax vendor parsing for espressif targets (removes warnings) +- Fix an issue detecting required link libraries with Rust >= 1.71 + when the cmake build directory is located in a Cargo workspace. + +# 0.4.2 (2023-07-16) + +### Fixes + +- Fix an issue when cross-compiling with clang +- Fix detecting required libraries with cargo 1.71 + +### New features + +- Users can now set `Rust_RESOLVE_RUSTUP_TOOLCHAINS` to `OFF`, which will result in Corrosion + not attempting to resolve rustc/cargo. + +# 0.4.1 (2023-06-03) + +This is a bugfix release. + +### Fixes + +- Fixes a regression on multi-config Generators + +# 0.4.0 LTS (2023-06-01) + +No changes compared to v0.4.0-beta2. + +## Announcements + +The `v0.4.x` LTS series will be the last release to support older CMake and Rust versions. +If necessary, fixes will be backported to the v0.4 branch. New features will not be +actively backported after the next major release, but community contributions are possible. +The `v0.4.x` series is currently planned to be maintained until the end of 2024. + +The following major release will increase the minimum required CMake version to 3.22. The +minimum supported Rust version will also be increased to make use of newly added flags, but +the exact version is not fixed yet. + + +## Changes compared to v0.3.5: + +### Breaking Changes + +- The Visual Studio Generators now require at least CMake 3.20. + This was previously announced in the 0.3.0 release notes and is the same + requirement as for the other Multi-Config Generators. +- The previously deprecated function `corrosion_set_linker_language()` + will now raise an error when called and may be removed without further + notice in future stable releases. Use `corrosion_set_linker()` instead. +- Improved the FindRust target triple detection, which may cause different behavior in some cases. + The detection does not require an enabled language anymore and will always fall back + to the default host target triple. A warning is issued if target triple detection failed. + +### Potentially Breaking Changes + +- Corrosion now sets the `IMPORTED_NO_SONAME` property for shared rust libraries, since by + default they won't have an `soname` field. + If you add a rustflag like `-Clink-arg=-Wl,-soname,libmycrate.so` in your project, + you should set this property to false on the shared rust library. +- Corrosion now uses a mechanism to determine which native libraries need to be linked with + Rust `staticlib` targets into C/C++ targets. The previous mechanism contained a hardcoded list. + The new mechanism asks `rustc` which libraries are needed at minimum for a given + target triple (with `std` support). This should not be a breaking change, but if you + do encounter a new linking issue when upgrading with `staticlib` targets, please open an + issue. + +### New features + +- `corrosion_import_crate()` has two new options `LOCKED` and `FROZEN` which pass the + `--locked` and `--frozen` flags to all invocations of cargo. +- `FindRust` now provides cache variables containing information on the default host + target triple: + - `Rust_CARGO_HOST_ARCH` + - `Rust_CARGO_HOST_VENDOR` + - `Rust_CARGO_HOST_OS` + - `Rust_CARGO_HOST_ENV` + +### Other changes + +- When installing Corrosion with CMake >= 3.19, the legacy Generator tool is + no longer built and installed by default. +- Corrosion now issues a warning when setting the linker or setting linker + options for a Rust static library. +- Corrosion no longer enables the `C` language when CMake is in crosscompiling mode and + no languages where previously enabled. This is not considered a breaking change. +- `corrosion_import_crate()` now warns about unexpected arguments. + +### Fixes + +- Fix building when the `dev` profile is explicitly set by the user. + +## Experimental features (may be changed or removed without a major version bump) + +- Experimental cxxbridge and cbindgen integration. +- Add a helper function to parse the package version from a Cargo.toml file +- Expose rustup toolchains discovered by `FindRust` in the following cache variables + which contain a list. + - `Rust_RUSTUP_TOOLCHAINS`: List of toolchains names + - `Rust_RUSTUP_TOOLCHAINS_VERSION`: List of `rustc` version of the toolchains + - `Rust_RUSTUP_TOOLCHAINS_RUSTC_PATH`: List of the path to `rustc` + - `Rust_RUSTUP_TOOLCHAINS_CARGO_PATH`: List of the path to `cargo`. Entries may be `NOTFOUND` if cargo + is not available for that toolchain. +- Add target properties `INTERFACE_CORROSION_RUSTC` and `INTERFACE_CORROSION_CARGO`, which may + be set to paths to `rustc` and `cargo` respectively to override the toolchain for a specific + target. + +# 0.3.5 (2023-03-19) + +- Fix building the Legacy Generator on Rust toolchains < 1.56 ([#365]) + +[#365]: https://github.com/corrosion-rs/corrosion/pull/365 + +# 0.3.4 (2023-03-02) + +## Fixes + +- Fix hostbuild (when CMake/Cargo is configured for cross-compiling) if clang is used ([#338]). + +## Other + +- Pass `--no-deps` to cargo metadata ([#334]). +- Bump the legacy generator dependencies + +[#334]: https://github.com/corrosion-rs/corrosion/pull/334 +[#338]: https://github.com/corrosion-rs/corrosion/pull/338 + + +# 0.3.3 (2023-02-17) + +## New features (Only available on CMake >= 3.19) + +- Add new `IMPORTED_CRATES` flag to `corrosion_import_crate()` to retrieve the list of imported crates in the current + scope ([#312](https://github.com/corrosion-rs/corrosion/pull/312)). + +## Fixes + +- Fix imported location target property when the rust target name contains dashes + and a custom OUTPUT_DIRECTORY was specified by the user ([#322](https://github.com/corrosion-rs/corrosion/pull/322)). +- Fix building for custom rust target-triples ([#316](https://github.com/corrosion-rs/corrosion/pull/316)) + +# 0.3.2 (2023-01-11) + +## New features (Only available on CMake >= 3.19) + +- Add new `CRATE_TYPES` flag to `corrosion_import_crate()` to restrict which + crate types should be imported ([#269](https://github.com/corrosion-rs/corrosion/pull/269)). +- Add `NO_LINKER_OVERRIDE` flag to let Rust choose the default linker for the target + instead of what Corrosion thinks is the appropriate linker driver ([#272](https://github.com/corrosion-rs/corrosion/pull/272)). + +## Fixes + +- Fix clean target when cross-compiling ([#291](https://github.com/corrosion-rs/corrosion/pull/291)). +- Don't set the linker for Rust static libraries ([#275](https://github.com/corrosion-rs/corrosion/pull/275)). +- Minor fixes in FindRust [#297](https://github.com/corrosion-rs/corrosion/pull/297): + - fix a logic error in the version detection + - fix a logic error in `QUIET` mode when rustup is not found. + +# 0.3.1 (2022-12-13) + +### Fixes + +- Fix a regression in detecting the MSVC abi ([#256]) +- Fix an issue on macOS 13 which affected rust crates compiling C++ code in build scripts ([#254]). +- Fix corrosion not respecting `CMAKE__OUTPUT_DIRECTORY` values ([#268]). +- Don't override rusts linker choice for the msvc abi (previously this was only skipped for msvc generators) ([#271]) + +[#254]: https://github.com/corrosion-rs/corrosion/pull/254 +[#256]: https://github.com/corrosion-rs/corrosion/pull/256 +[#268]: https://github.com/corrosion-rs/corrosion/pull/268 +[#271]: https://github.com/corrosion-rs/corrosion/pull/271 + +# 0.3.0 (2022-10-31) + +## Breaking + +- The minimum supported rust version (MSRV) was increased to 1.46, due to a cargo issue that recently + surfaced on CI when using crates.io. On MacOS 12 and Windows-2022 at least Rust 1.54 is required. +- MacOS 10 and 11 are no longer officially supported and untested in CI. +- The minimum required CMake version is now 3.15. +- Adding a `PRE_BUILD` custom command on a `cargo-build_` CMake target will no + longer work as expected. To support executing user defined commands before cargo build is + invoked users should use the newly added targets `cargo-prebuild` (before all cargo build invocations) + or `cargo-prebuild_` as a dependency target. + Example: `add_dependencies(cargo-prebuild code_generator_target)` + +### Breaking: Removed previously deprecated functionality +- Removed `add_crate()` function. Use `corrosio_import_crate()` instead. +- Removed `cargo_link_libraries()` function. Use `corrosion_link_libraries()` instead. +- Removed experimental CMake option `CORROSION_EXPERIMENTAL_PARSER`. + The corresponding stable option is `CORROSION_NATIVE_TOOLING` albeit with inverted semantics. +- Previously Corrosion would set the `HOST_CC` and `HOST_CXX` environment variables when invoking + cargo build, if the environment variables `CC` and `CXX` outside of CMake where set. + However this did not work as expected in all cases and sometimes the `HOST_CC` variable would be set + to a cross-compiler for unknown reasons. For this reason `HOST_CC` and `HOST_CXX` are not set by + corrosion anymore, but users can still set them manually if required via `corrosion_set_env_vars()`. +- The `CARGO_RUST_FLAGS` family of cache variables were removed. Corrosion does not internally use them + anymore. + +## Potentially breaking + +- The working directory when invoking `cargo build` was changed to the directory of the Manifest + file. This now allows cargo to pick up `.cargo/config.toml` files located in the source tree. + ([205](https://github.com/corrosion-rs/corrosion/pull/205)) +- Corrosion internally invokes `cargo build`. When passing arguments to `cargo build`, Corrosion + now uses the CMake `VERBATIM` option. In rare cases this may require you to change how you quote + parameters passed to corrosion (e.g. via `corrosion_add_target_rustflags()`). + For example setting a `cfg` option previously required double escaping the rustflag like this + `"--cfg=something=\\\"value\\\""`, but now it can be passed to corrosion without any escapes: + `--cfg=something="value"`. +- Corrosion now respects the CMake `OUTPUT_DIRECTORY` target properties. More details in the "New features" section. + +## New features + +- Support setting rustflags for only the main target and none of its dependencies ([215](https://github.com/corrosion-rs/corrosion/pull/215)). + A new function `corrosion_add_target_local_rustflags(target_name rustc_flag [more_flags ...])` + is added for this purpose. + This is useful in cases where you only need rustflags on the main-crate, but need to set different + flags for different targets. Without "local" Rustflags this would require rebuilds of the + dependencies when switching targets. +- Support explicitly selecting a linker ([208](https://github.com/corrosion-rs/corrosion/pull/208)). + The linker can be selected via `corrosion_set_linker(target_name linker)`. + Please note that this only has an effect for targets, where the final linker invocation is done + by cargo, i.e. targets where foreign code is linked into rust code and not the other way around. +- Corrosion now respects the CMake `OUTPUT_DIRECTORY` target properties and copies build artifacts to the expected + locations ([217](https://github.com/corrosion-rs/corrosion/pull/217)), if the properties are set. + This feature requires at least CMake 3.19 and is enabled by default if supported. Please note that the `OUTPUT_NAME` + target properties are currently not supported. + Specifically, the following target properties are now respected: + - [ARCHIVE_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/ARCHIVE_OUTPUT_DIRECTORY.html) + - [LIBRARY_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/LIBRARY_OUTPUT_DIRECTORY.html) + - [RUNTIME_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/RUNTIME_OUTPUT_DIRECTORY.html) + - [PDB_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/PDB_OUTPUT_DIRECTORY.html) +- Corrosion now supports packages with potentially multiple binaries (bins) and a library (lib) at the + same time. The only requirement is that the names of all `bin`s and `lib`s in the whole project must be unique. + Users can set the names in the `Cargo.toml` by adding `name = ` in the `[[bin]]` and `[lib]` tables. +- FindRust now has improved support for the `VERSION` option of `find_package` and will now attempt to find a matching + toolchain version. Previously it was only checked if the default toolchain matched to required version. +- For rustup managed toolchains a CMake error is issued with a helpful message if the required target for + the selected toolchain is not installed. + +## Fixes + +- Fix a CMake developer Warning when a Multi-Config Generator and Rust executable targets + ([#213](https://github.com/corrosion-rs/corrosion/pull/213)). +- FindRust now respects the `QUIET` option to `find_package()` in most cases. + +## Deprecation notice + +- Support for the MSVC Generators with CMake toolchains before 3.20 is deprecated and will be removed in the next + release (v0.4). All other Multi-config Generators already require CMake 3.20. + +## Internal Changes + +- The CMake Generator written in Rust and `CorrosionGenerator.cmake` which are responsible for parsing + `cargo metadata` output to create corresponding CMake targets for all Rust targets now share most code. + This greatly simplified the CMake generator written in Rust and makes it much easier maintaining and adding + new features regardless of how `cargo metadata` is parsed. + +# 0.2.2 (2022-09-01) + +## Fixes + +- Do not use C++17 in the tests (makes tests work with older C++ compilers) ([184](https://github.com/corrosion-rs/corrosion/pull/184)) +- Fix finding cargo on NixOS ([192](https://github.com/corrosion-rs/corrosion/pull/192)) +- Fix issue with Rustflags test when using a Build type other than Debug and Release ([203](https://github.com/corrosion-rs/corrosion/pull/203)). + +# 0.2.1 (2022-05-07) + +## Fixes + +- Fix missing variables provided by corrosion, when corrosion is used as a subdirectory ([181](https://github.com/corrosion-rs/corrosion/pull/181)): + Public [Variables](https://github.com/corrosion-rs/corrosion#information-provided-by-corrosion) set + by Corrosion were not visible when using Corrosion as a subdirectory, due to the wrong scope of + the variables. This was fixed by promoting the respective variables to Cache variables. + +# 0.2.0 (2022-05-05) + +## Breaking changes + +- Removed the integrator build script ([#156](https://github.com/corrosion-rs/corrosion/pull/156)). + The build script provided by corrosion (for rust code that links in foreign code) is no longer necessary, + so users can just remove the dependency. + +## Deprecations + +- Direct usage of the following target properties has been deprecated. The names of the custom properties are + no longer considered part of the public API and may change in the future. Instead, please use the functions + provided by corrosion. Internally different property names are used depending on the CMake version. + - `CORROSION_FEATURES`, `CORROSION_ALL_FEATURES`, `CORROSION_NO_DEFAULT_FEATURES`. Instead please use + `corrosion_set_features()`. See the updated Readme for details. + - `CORROSION_ENVIRONMENT_VARIABLES`. Please use `corrosion_set_env_vars()` instead. + - `CORROSION_USE_HOST_BUILD`. Please use `corrosion_set_hostbuild()` instead. +- The Minimum CMake version will likely be increased for the next major release. At the very least we want to drop + support for CMake 3.12, but requiring CMake 3.16 or even 3.18 is also on the table. If you are using a CMake version + that would be no longer supported by corrosion, please comment on issue + [#168](https://github.com/corrosion-rs/corrosion/issues/168), so that we can gauge the number of affected users. + +## New features + +- Add `NO_STD` option to `corrosion_import_crate` ([#154](https://github.com/corrosion-rs/corrosion/pull/154)). +- Remove the requirement of building the Rust based generator crate for CMake >= 3.19. This makes using corrosion as + a subdirectory as fast as the installed version (since everything is done in CMake). + ([#131](https://github.com/corrosion-rs/corrosion/pull/131), [#161](https://github.com/corrosion-rs/corrosion/pull/161)) + If you do choose to install Corrosion, then by default the old Generator is still compiled and installed, so you can + fall back to using it in case you use multiple cmake versions on the same machine for different projects. + +## Fixes + +- Fix Corrosion on MacOS 11 and 12 ([#167](https://github.com/corrosion-rs/corrosion/pull/167) and + [#164](https://github.com/corrosion-rs/corrosion/pull/164)). +- Improve robustness of parsing the LLVM version (exported in `Rust_LLVM_VERSION`). It now also works for + Rust versions, where the LLVM version is reported as `MAJOR.MINOR`. ([#148](https://github.com/corrosion-rs/corrosion/pull/148)) +- Fix a bug which occurred when Corrosion was added multiple times via `add_subdirectory()` + ([#143](https://github.com/corrosion-rs/corrosion/pull/143)). +- Set `CC_` and `CXX_` environment variables for the invocation of + `cargo build` to the compilers selected by CMake (if any) + ([#138](https://github.com/corrosion-rs/corrosion/pull/138) and [#161](https://github.com/corrosion-rs/corrosion/pull/161)). + This should ensure that C dependencies built in cargo buildscripts via [cc-rs](https://github.com/alexcrichton/cc-rs) + use the same compiler as CMake built dependencies. Users can override the compiler by specifying the higher + priority environment variable variants with dashes instead of underscores (See cc-rs documentation for details). +- Fix Ninja-Multiconfig Generator support for CMake versions >= 3.20. Previous CMake versions are missing a feature, + which prevents us from supporting the Ninja-Multiconfig generator. ([#137](https://github.com/corrosion-rs/corrosion/pull/137)) + + +# 0.1.0 (2022-02-01) + +This is the first release of corrosion after it was moved to the new corrosion-rs organization. +Since there are no previous releases, this is not a complete changelog but only lists changes since +September 2021. + +## New features +- [Add --profile support for rust >= 1.57](https://github.com/corrosion-rs/corrosion/pull/130): + Allows users to specify a custom cargo profile with + `corrosion_import_crate(... PROFILE )`. +- [Add support for specifying per-target Rustflags](https://github.com/corrosion-rs/corrosion/pull/127): + Rustflags can be added via `corrosion_add_target_rustflags( [rustflags1...])` +- [Add `Rust_IS_NIGHTLY` and `Rust_LLVM_VERSION` variables](https://github.com/corrosion-rs/corrosion/pull/123): + This may be useful if you want to conditionally enabled features when using a nightly toolchain + or a specific LLVM Version. +- [Let `FindRust` fail gracefully if rustc is not found](https://github.com/corrosion-rs/corrosion/pull/111): + This allows using `FindRust` in a more general setting (without corrosion). +- [Add support for cargo feature selection](https://github.com/corrosion-rs/corrosion/pull/108): + See the [README](https://github.com/corrosion-rs/corrosion#cargo-feature-selection) for details on + how to select features. + + +## Fixes +- [Fix the cargo-clean target](https://github.com/corrosion-rs/corrosion/pull/129) +- [Fix #84: CorrosionConfig.cmake looks in wrong place for Corrosion::Generator when CMAKE_INSTALL_LIBEXEC is an absolute path](https://github.com/corrosion-rs/corrosion/pull/122/commits/6f29af3ac53917ca2e0638378371e715a18a532d) +- [Fix #116: (Option CORROSION_INSTALL_EXECUTABLE not working)](https://github.com/corrosion-rs/corrosion/commit/97d44018fac1b1a2a7c095288c628f5bbd9b3184) +- [Fix building on Windows with rust >= 1.57](https://github.com/corrosion-rs/corrosion/pull/120) + +## Known issues: +- Corrosion is currently not working on macos-11 and newer. See issue [#104](https://github.com/corrosion-rs/corrosion/issues/104). + Contributions are welcome. diff --git a/lib/corrosion/cmake/Corrosion.cmake b/lib/corrosion/cmake/Corrosion.cmake new file mode 100644 index 000000000..1eeaff9d2 --- /dev/null +++ b/lib/corrosion/cmake/Corrosion.cmake @@ -0,0 +1,1910 @@ +cmake_minimum_required(VERSION 3.15) + +list(APPEND CMAKE_MESSAGE_CONTEXT "Corrosion") + +message(DEBUG "Using Corrosion ${Corrosion_VERSION} with CMake ${CMAKE_VERSION} " + "and the `${CMAKE_GENERATOR}` Generator" +) + +get_cmake_property(COR_IS_MULTI_CONFIG GENERATOR_IS_MULTI_CONFIG) +set(COR_IS_MULTI_CONFIG "${COR_IS_MULTI_CONFIG}" CACHE BOOL "Do not change this" FORCE) +mark_as_advanced(FORCE COR_IS_MULTI_CONFIG) + +if (COR_IS_MULTI_CONFIG AND CMAKE_VERSION VERSION_LESS 3.20.0) + message(FATAL_ERROR "Corrosion requires at least CMake 3.20 with Multi-Config Generators such as " + "\"Ninja Multi-Config\" or Visual Studio. " + "Please use a different generator or update to cmake >= 3.20.\n" + "Note: You are using CMake ${CMAKE_VERSION} (Path: `${CMAKE_COMMAND}`) with " + " the `${CMAKE_GENERATOR}` Generator." + ) +elseif(NOT COR_IS_MULTI_CONFIG AND DEFINED CMAKE_CONFIGURATION_TYPES) + message(WARNING "The Generator is ${CMAKE_GENERATOR}, which is not a multi-config " + "Generator, but CMAKE_CONFIGURATION_TYPES is set. Please don't set " + "CMAKE_CONFIGURATION_TYPES unless you are using a multi-config Generator." + ) +endif() + +option(CORROSION_VERBOSE_OUTPUT "Enables verbose output from Corrosion and Cargo" OFF) + +set(CORROSION_NATIVE_TOOLING_DESCRIPTION + "Use native tooling - Required on CMake < 3.19 and available as a fallback option for recent versions" + ) + +set(CORROSION_RESPECT_OUTPUT_DIRECTORY_DESCRIPTION + "Respect the CMake target properties specifying the output directory of a target, such as + `RUNTIME_OUTPUT_DIRECTORY`. This requires CMake >= 3.19, otherwise this option is forced off." +) + +option( + CORROSION_NATIVE_TOOLING + "${CORROSION_NATIVE_TOOLING_DESCRIPTION}" + OFF +) + +option(CORROSION_RESPECT_OUTPUT_DIRECTORY + "${CORROSION_RESPECT_OUTPUT_DIRECTORY_DESCRIPTION}" + ON +) + +option( + CORROSION_NO_WARN_PARSE_TARGET_TRIPLE_FAILED + "Surpresses a warning if the parsing the target triple failed." + OFF +) + +# The native tooling is required on CMAke < 3.19 so we override whatever the user may have set. +if (CMAKE_VERSION VERSION_LESS 3.19.0) + set(CORROSION_NATIVE_TOOLING ON CACHE INTERNAL "${CORROSION_NATIVE_TOOLING_DESCRIPTION}" FORCE) + set(CORROSION_RESPECT_OUTPUT_DIRECTORY OFF CACHE INTERNAL + "${CORROSION_RESPECT_OUTPUT_DIRECTORY_DESCRIPTION}" FORCE + ) +endif() + +find_package(Rust REQUIRED) + +if(Rust_TOOLCHAIN_IS_RUSTUP_MANAGED) + execute_process(COMMAND rustup target list --toolchain "${Rust_TOOLCHAIN}" + OUTPUT_VARIABLE AVAILABLE_TARGETS_RAW + ) + string(REPLACE "\n" ";" AVAILABLE_TARGETS_RAW "${AVAILABLE_TARGETS_RAW}") + string(REPLACE " (installed)" "" "AVAILABLE_TARGETS" "${AVAILABLE_TARGETS_RAW}") + set(INSTALLED_TARGETS_RAW "${AVAILABLE_TARGETS_RAW}") + list(FILTER INSTALLED_TARGETS_RAW INCLUDE REGEX " \\(installed\\)") + string(REPLACE " (installed)" "" "INSTALLED_TARGETS" "${INSTALLED_TARGETS_RAW}") + list(TRANSFORM INSTALLED_TARGETS STRIP) + if("${Rust_CARGO_TARGET}" IN_LIST AVAILABLE_TARGETS) + message(DEBUG "Cargo target ${Rust_CARGO_TARGET} is an official target-triple") + message(DEBUG "Installed targets: ${INSTALLED_TARGETS}") + if(NOT ("${Rust_CARGO_TARGET}" IN_LIST INSTALLED_TARGETS)) + message(FATAL_ERROR "Target ${Rust_CARGO_TARGET} is not installed for toolchain ${Rust_TOOLCHAIN}.\n" + "Help: Run `rustup target add --toolchain ${Rust_TOOLCHAIN} ${Rust_CARGO_TARGET}` to install " + "the missing target." + ) + endif() + endif() + +endif() + +if(CMAKE_GENERATOR MATCHES "Visual Studio" + AND (NOT CMAKE_VS_PLATFORM_NAME STREQUAL CMAKE_VS_PLATFORM_NAME_DEFAULT) + AND Rust_VERSION VERSION_LESS "1.54") + message(FATAL_ERROR "Due to a cargo issue, cross-compiling with a Visual Studio generator and rust versions" + " before 1.54 is not supported. Rust build scripts would be linked with the cross-compiler linker, which" + " causes the build to fail. Please upgrade your Rust version to 1.54 or newer.") +endif() + +if (NOT TARGET Corrosion::Generator) + message(STATUS "Using Corrosion as a subdirectory") +endif() + +get_property( + RUSTC_EXECUTABLE + TARGET Rust::Rustc PROPERTY IMPORTED_LOCATION +) + +get_property( + CARGO_EXECUTABLE + TARGET Rust::Cargo PROPERTY IMPORTED_LOCATION +) + + +# Sets out_var to true if the byproduct copying and imported location is done in a deferred +# manner to respect target properties, etc. that may be set later. +function(_corrosion_determine_deferred_byproduct_copying_and_import_location_handling out_var) + set(${out_var} ${CORROSION_RESPECT_OUTPUT_DIRECTORY} PARENT_SCOPE) +endfunction() + +function(_corrosion_bin_target_suffix target_name out_var_suffix) + get_target_property(hostbuild "${target_name}" ${_CORR_PROP_HOST_BUILD}) + if((hostbuild AND CMAKE_HOST_WIN32) + OR ((NOT hostbuild) AND (Rust_CARGO_TARGET_OS STREQUAL "windows"))) + set(_suffix ".exe") + elseif(Rust_CARGO_TARGET_OS STREQUAL "vxworks") + set(_suffix ".vxe") + else() + set(_suffix "") + endif() + set(${out_var_suffix} "${_suffix}" PARENT_SCOPE) +endfunction() + +# Do not call this function directly! +# +# This function should be called deferred to evaluate target properties late in the configure stage. +# IMPORTED_LOCATION does not support Generator expressions, so we must evaluate the output +# directory target property value at configure time. This function must be deferred to the end of +# the configure stage, so we can be sure that the output directory is not modified afterwards. +function(_corrosion_set_imported_location_deferred target_name base_property output_directory_property filename) + # The output directory property is expected to be set on the exposed target (without postfix), + # but we need to set the imported location on the actual library target with postfix. + if("${target_name}" MATCHES "^(.+)-(static|shared)$") + set(output_dir_prop_target_name "${CMAKE_MATCH_1}") + else() + set(output_dir_prop_target_name "${target_name}") + endif() + if(CORROSION_NATIVE_TOOLING) + set(output_directory_property "INTERFACE_${output_directory_property}") + endif() + + # Append .exe suffix for executable by-products if the target is windows or if it's a host + # build and the host is Windows. + get_target_property(target_type ${target_name} TYPE) + if(${target_type} STREQUAL "EXECUTABLE" AND (NOT "${filename}" MATCHES "\.pdb$")) + _corrosion_bin_target_suffix(${target_name} "suffix") + if(suffix) + set(filename "${filename}${suffix}") + endif() + endif() + + get_target_property(output_directory "${output_dir_prop_target_name}" "${output_directory_property}") + message(DEBUG "Output directory property (target ${output_dir_prop_target_name}): ${output_directory_property} dir: ${output_directory}") + + foreach(config_type ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER "${config_type}" config_type_upper) + get_target_property(output_dir_curr_config ${output_dir_prop_target_name} + "${output_directory_property}_${config_type_upper}" + ) + if(output_dir_curr_config) + set(curr_out_dir "${output_dir_curr_config}") + elseif(output_directory) + set(curr_out_dir "${output_directory}") + else() + set(curr_out_dir "${CMAKE_CURRENT_BINARY_DIR}") + endif() + message(DEBUG "Setting ${base_property}_${config_type_upper} for target ${target_name}" + " to `${curr_out_dir}/${filename}`.") + # For Multiconfig we want to specify the correct location for each configuration + set_property( + TARGET ${target_name} + PROPERTY "${base_property}_${config_type_upper}" + "${curr_out_dir}/${filename}" + ) + set(base_output_directory "${curr_out_dir}") + endforeach() + + if(NOT COR_IS_MULTI_CONFIG) + if(output_directory) + set(base_output_directory "${output_directory}") + else() + set(base_output_directory "${CMAKE_CURRENT_BINARY_DIR}") + endif() + endif() + + message(DEBUG "Setting ${base_property} for target ${target_name}" + " to `${base_output_directory}/${filename}`.") + + # IMPORTED_LOCATION must be set regardless of possible overrides. In the multiconfig case, + # the last configuration "wins" (IMPORTED_LOCATION is not documented to have Genex support). + set_property( + TARGET ${target_name} + PROPERTY "${base_property}" "${base_output_directory}/${filename}" + ) +endfunction() + +# Helper function to call _corrosion_set_imported_location_deferred while eagerly +# evaluating arguments. +# Refer to https://cmake.org/cmake/help/latest/command/cmake_language.html#deferred-call-examples +function(_corrosion_call_set_imported_location_deferred target_name base_property output_directory_property filename) + cmake_language(EVAL CODE " + cmake_language(DEFER + CALL + _corrosion_set_imported_location_deferred + [[${target_name}]] + [[${base_property}]] + [[${output_directory_property}]] + [[${filename}]] + ) + ") +endfunction() + +# Set the imported location of a Rust target. +# +# Rust targets are built via custom targets / custom commands. The actual artifacts are exposed +# to CMake as imported libraries / executables that depend on the cargo_build command. For CMake +# to find the built artifact we need to set the IMPORTED location to the actual location on disk. +# Corrosion tries to copy the artifacts built by cargo to standard locations. The IMPORTED_LOCATION +# is set to point to the copy, and not the original from the cargo build directory. +# +# Parameters: +# - target_name: Name of the Rust target +# - base_property: Name of the base property - i.e. `IMPORTED_LOCATION` or `IMPORTED_IMPLIB`. +# - output_directory_property: Target property name that determines the standard location for the +# artifact. +# - filename of the artifact. +function(_corrosion_set_imported_location target_name base_property output_directory_property filename) + _corrosion_determine_deferred_byproduct_copying_and_import_location_handling("defer") + if(defer) + _corrosion_call_set_imported_location_deferred("${target_name}" "${base_property}" "${output_directory_property}" "${filename}") + else() + # We can't actually call the function in a deferred way, but we can still respect the output directory + # variables that were set **before** importing the crate. + _corrosion_set_imported_location_deferred("${target_name}" "${base_property}" "${output_directory_property}" "${filename}") + endif() +endfunction() + +function(_corrosion_copy_byproduct_deferred target_name output_dir_prop_name cargo_build_dir file_names) + if(ARGN) + message(FATAL_ERROR "Unexpected additional arguments") + endif() + get_target_property(output_dir ${target_name} "${output_dir_prop_name}") + + # A Genex expanding to the output directory depending on the configuration. + set(multiconfig_out_dir_genex "") + + foreach(config_type ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER "${config_type}" config_type_upper) + get_target_property(output_dir_curr_config ${target_name} "${output_dir_prop_name}_${config_type_upper}") + + if(output_dir_curr_config) + set(curr_out_dir "${output_dir_curr_config}") + elseif(output_dir) + # Fallback to `output_dir` if specified + # Note: Multi-configuration generators append a per-configuration subdirectory to the + # specified directory unless a generator expression is used (from CMake documentation). + set(curr_out_dir "${output_dir}") + else() + # Fallback to the default directory. We do not append the configuration directory here + # and instead let CMake do this, since otherwise the resolving of dynamic library + # imported paths may fail. + set(curr_out_dir "${CMAKE_CURRENT_BINARY_DIR}") + endif() + set(multiconfig_out_dir_genex "${multiconfig_out_dir_genex}$<$:${curr_out_dir}>") + endforeach() + + if(COR_IS_MULTI_CONFIG) + set(output_dir "${multiconfig_out_dir_genex}") + else() + if(NOT output_dir) + # Fallback to default directory. + set(output_dir "${CMAKE_CURRENT_BINARY_DIR}") + endif() + endif() + + # Append .exe suffix for executable by-products if the target is windows or if it's a host + # build and the host is Windows. + get_target_property(target_type "${target_name}" TYPE) + if (target_type STREQUAL "EXECUTABLE") + list(LENGTH file_names list_len) + if(NOT list_len EQUAL "1") + message(FATAL_ERROR + "Internal error: Exactly one filename should be passed for executable types.") + endif() + _corrosion_bin_target_suffix(${target_name} "suffix") + if(suffix AND (NOT "${file_names}" MATCHES "\.pdb$")) + # For executable targets we know / checked that only one file will be passed. + string(APPEND file_names "${suffix}") + endif() + endif() + + list(TRANSFORM file_names PREPEND "${cargo_build_dir}/" OUTPUT_VARIABLE src_file_names) + list(TRANSFORM file_names PREPEND "${output_dir}/" OUTPUT_VARIABLE dst_file_names) + message(DEBUG "Adding command to copy byproducts `${file_names}` to ${dst_file_names}") + add_custom_command(TARGET _cargo-build_${target_name} + POST_BUILD + # output_dir may contain a Generator expression. + COMMAND ${CMAKE_COMMAND} -E make_directory "${output_dir}" + COMMAND + ${CMAKE_COMMAND} -E copy_if_different + # tested to work with both multiple files and paths with spaces + ${src_file_names} + "${output_dir}" + BYPRODUCTS ${dst_file_names} + COMMENT "Copying byproducts `${file_names}` to ${output_dir}" + VERBATIM + COMMAND_EXPAND_LISTS + ) +endfunction() + +function(_corrosion_call_copy_byproduct_deferred target_name output_dir_prop_name cargo_build_dir file_names) + cmake_language(EVAL CODE " + cmake_language(DEFER + CALL + _corrosion_copy_byproduct_deferred + [[${target_name}]] + [[${output_dir_prop_name}]] + [[${cargo_build_dir}]] + [[${file_names}]] + ) + ") +endfunction() + +# Copy the artifacts generated by cargo to the appropriate destination. +# +# Parameters: +# - target_name: The name of the Rust target +# - output_dir_prop_name: The property name controlling the destination (e.g. +# `RUNTIME_OUTPUT_DIRECTORY`) +# - cargo_build_dir: the directory cargo build places it's output artifacts in. +# - filenames: the file names of any output artifacts as a list. +# - is_binary: TRUE if the byproducts are program executables. +function(_corrosion_copy_byproducts target_name output_dir_prop_name cargo_build_dir filenames) + _corrosion_determine_deferred_byproduct_copying_and_import_location_handling("defer") + if(defer) + _corrosion_call_copy_byproduct_deferred("${target_name}" "${output_dir_prop_name}" "${cargo_build_dir}" "${filenames}") + else() + _corrosion_copy_byproduct_deferred("${target_name}" "${output_dir_prop_name}" "${cargo_build_dir}" "${filenames}") + endif() +endfunction() + + +# Add targets for the static and/or shared libraries of the rust target. +# The generated byproduct names are returned via the `OUT__BYPRODUCTS` arguments. +function(_corrosion_add_library_target) + set(OPTIONS "") + set(ONE_VALUE_KEYWORDS + WORKSPACE_MANIFEST_PATH + TARGET_NAME + OUT_ARCHIVE_OUTPUT_BYPRODUCTS + OUT_SHARED_LIB_BYPRODUCTS + OUT_PDB_BYPRODUCT + ) + set(MULTI_VALUE_KEYWORDS LIB_KINDS) + cmake_parse_arguments(PARSE_ARGV 0 CALT "${OPTIONS}" "${ONE_VALUE_KEYWORDS}" "${MULTI_VALUE_KEYWORDS}") + + if(DEFINED CALT_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Internal error - unexpected arguments: ${CALT_UNPARSED_ARGUMENTS}") + elseif(DEFINED CALT_KEYWORDS_MISSING_VALUES) + message(FATAL_ERROR "Internal error - the following keywords had no associated value(s):" + "${CALT_KEYWORDS_MISSING_VALUES}") + endif() + list(TRANSFORM ONE_VALUE_KEYWORDS PREPEND CALT_ OUTPUT_VARIABLE required_arguments) + foreach(required_argument ${required_arguments} ) + if(NOT DEFINED "${required_argument}") + message(FATAL_ERROR "Internal error: Missing required argument ${required_argument}." + "Complete argument list: ${ARGN}" + ) + endif() + endforeach() + if("staticlib" IN_LIST CALT_LIB_KINDS) + set(has_staticlib TRUE) + endif() + if("cdylib" IN_LIST CALT_LIB_KINDS) + set(has_cdylib TRUE) + endif() + + if(NOT (has_staticlib OR has_cdylib)) + message(FATAL_ERROR "Unknown library type(s): ${CALT_LIB_KINDS}") + endif() + set(workspace_manifest_path "${CALT_WORKSPACE_MANIFEST_PATH}") + set(target_name "${CALT_TARGET_NAME}") + + set(is_windows "") + set(is_windows_gnu "") + set(is_windows_msvc "") + set(is_macos "") + if(Rust_CARGO_TARGET_OS STREQUAL "windows") + set(is_windows TRUE) + if(Rust_CARGO_TARGET_ENV STREQUAL "msvc") + set(is_windows_msvc TRUE) + elseif(Rust_CARGO_TARGET_ENV STREQUAL "gnu") + set(is_windows_gnu TRUE) + endif() + elseif(Rust_CARGO_TARGET_OS STREQUAL "darwin") + set(is_macos TRUE) + endif() + + # target file names + string(REPLACE "-" "_" lib_name "${target_name}") + + if(is_windows_msvc) + set(static_lib_name "${lib_name}.lib") + else() + set(static_lib_name "lib${lib_name}.a") + endif() + + if(is_windows) + set(dynamic_lib_name "${lib_name}.dll") + elseif(is_macos) + set(dynamic_lib_name "lib${lib_name}.dylib") + else() + set(dynamic_lib_name "lib${lib_name}.so") + endif() + + if(is_windows_msvc) + set(implib_name "${lib_name}.dll.lib") + elseif(is_windows_gnu) + set(implib_name "lib${lib_name}.dll.a") + elseif(is_windows) + message(FATAL_ERROR "Unknown windows environment - Can't determine implib name") + endif() + + + set(pdb_name "${lib_name}.pdb") + + set(archive_output_byproducts "") + if(has_staticlib) + list(APPEND archive_output_byproducts ${static_lib_name}) + endif() + + if(has_cdylib) + set("${CALT_OUT_SHARED_LIB_BYPRODUCTS}" "${dynamic_lib_name}" PARENT_SCOPE) + if(is_windows) + list(APPEND archive_output_byproducts ${implib_name}) + endif() + if(is_windows_msvc) + set("${CALT_OUT_PDB_BYPRODUCT}" "${pdb_name}" PARENT_SCOPE) + endif() + endif() + set("${CALT_OUT_ARCHIVE_OUTPUT_BYPRODUCTS}" "${archive_output_byproducts}" PARENT_SCOPE) + + if(has_staticlib) + add_library(${target_name}-static STATIC IMPORTED GLOBAL) + add_dependencies(${target_name}-static cargo-build_${target_name}) + + _corrosion_set_imported_location("${target_name}-static" "IMPORTED_LOCATION" + "ARCHIVE_OUTPUT_DIRECTORY" + "${static_lib_name}") + + # Todo: NO_STD target property? + if(NOT COR_NO_STD) + set_property( + TARGET ${target_name}-static + PROPERTY INTERFACE_LINK_LIBRARIES ${Rust_CARGO_TARGET_LINK_NATIVE_LIBS} + ) + set_property( + TARGET ${target_name}-static + PROPERTY INTERFACE_LINK_OPTIONS ${Rust_CARGO_TARGET_LINK_OPTIONS} + ) + if(is_macos) + set_property(TARGET ${target_name}-static + PROPERTY INTERFACE_LINK_DIRECTORIES "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib" + ) + endif() + endif() + endif() + + if(has_cdylib) + add_library(${target_name}-shared SHARED IMPORTED GLOBAL) + add_dependencies(${target_name}-shared cargo-build_${target_name}) + + # Todo: (Not new issue): What about IMPORTED_SONAME and IMPORTED_NO_SYSTEM? + _corrosion_set_imported_location("${target_name}-shared" "IMPORTED_LOCATION" + "LIBRARY_OUTPUT_DIRECTORY" + "${dynamic_lib_name}" + ) + # In the future we would probably prefer to let Rust set the soname for packages >= 1.0. + # This is tracked in issue #333. + set_target_properties(${target_name}-shared PROPERTIES IMPORTED_NO_SONAME TRUE) + + if(is_windows) + _corrosion_set_imported_location("${target_name}-shared" "IMPORTED_IMPLIB" + "ARCHIVE_OUTPUT_DIRECTORY" + "${implib_name}" + ) + endif() + + if(is_macos) + set_property(TARGET ${target_name}-shared + PROPERTY INTERFACE_LINK_DIRECTORIES "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib" + ) + endif() + endif() + + if(has_cdylib AND has_staticlib) + if(BUILD_SHARED_LIBS) + target_link_libraries(${target_name} INTERFACE ${target_name}-shared) + else() + target_link_libraries(${target_name} INTERFACE ${target_name}-static) + endif() + elseif(has_cdylib) + target_link_libraries(${target_name} INTERFACE ${target_name}-shared) + else() + target_link_libraries(${target_name} INTERFACE ${target_name}-static) + endif() +endfunction() + +function(_corrosion_add_bin_target workspace_manifest_path bin_name out_bin_byproduct out_pdb_byproduct) + if(NOT bin_name) + message(FATAL_ERROR "No bin_name in _corrosion_add_bin_target for target ${target_name}") + endif() + + string(REPLACE "-" "_" bin_name_underscore "${bin_name}") + + set(pdb_name "${bin_name_underscore}.pdb") + + if(Rust_CARGO_TARGET_ENV STREQUAL "msvc") + set(${out_pdb_byproduct} "${pdb_name}" PARENT_SCOPE) + endif() + + set(bin_filename "${bin_name}") + _corrosion_determine_deferred_byproduct_copying_and_import_location_handling("defer") + set(${out_bin_byproduct} "${bin_filename}" PARENT_SCOPE) + add_dependencies(${bin_name} cargo-build_${bin_name}) + + if(Rust_CARGO_TARGET_OS STREQUAL "darwin") + set_property(TARGET ${bin_name} + PROPERTY INTERFACE_LINK_DIRECTORIES "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib" + ) + endif() + + _corrosion_set_imported_location("${bin_name}" "IMPORTED_LOCATION" + "RUNTIME_OUTPUT_DIRECTORY" + "${bin_filename}" + ) + +endfunction() + + +if (NOT CORROSION_NATIVE_TOOLING) + include(CorrosionGenerator) +endif() + +# Note: `cmake_language(GET_MESSAGE_LOG_LEVEL )` requires CMake 3.25, +# so we offer our own option to control verbosity of downstream commands (e.g. cargo build) +if (CORROSION_VERBOSE_OUTPUT) + set(_CORROSION_VERBOSE_OUTPUT_FLAG --verbose CACHE INTERNAL "") +else() + # We want to silence some less important commands by default. + set(_CORROSION_QUIET_OUTPUT_FLAG --quiet CACHE INTERNAL "") +endif() + +if(CORROSION_NATIVE_TOOLING) + if (NOT TARGET Corrosion::Generator ) + add_subdirectory(generator) + endif() + get_property( + _CORROSION_GENERATOR_EXE + TARGET Corrosion::Generator PROPERTY IMPORTED_LOCATION + ) + set( + _CORROSION_GENERATOR + ${CMAKE_COMMAND} -E env + CARGO_BUILD_RUSTC=${RUSTC_EXECUTABLE} + ${_CORROSION_GENERATOR_EXE} + --cargo ${CARGO_EXECUTABLE} + ${_CORROSION_VERBOSE_OUTPUT_FLAG} + CACHE INTERNAL "corrosion-generator runner" + ) +endif() + +set(_CORROSION_CARGO_VERSION ${Rust_CARGO_VERSION} CACHE INTERNAL "cargo version used by corrosion") +set(_CORROSION_RUST_CARGO_TARGET ${Rust_CARGO_TARGET} CACHE INTERNAL "target triple used by corrosion") +set(_CORROSION_RUST_CARGO_HOST_TARGET ${Rust_CARGO_HOST_TARGET} CACHE INTERNAL "host triple used by corrosion") +set(_CORROSION_RUSTC "${RUSTC_EXECUTABLE}" CACHE INTERNAL "Path to rustc used by corrosion") +set(_CORROSION_CARGO "${CARGO_EXECUTABLE}" CACHE INTERNAL "Path to cargo used by corrosion") + +string(REPLACE "-" "_" _CORROSION_RUST_CARGO_TARGET_UNDERSCORE "${Rust_CARGO_TARGET}") +set(_CORROSION_RUST_CARGO_TARGET_UNDERSCORE "${_CORROSION_RUST_CARGO_TARGET_UNDERSCORE}" CACHE INTERNAL "lowercase target triple with underscores") +string(TOUPPER "${_CORROSION_RUST_CARGO_TARGET_UNDERSCORE}" _CORROSION_TARGET_TRIPLE_UPPER) +set(_CORROSION_RUST_CARGO_TARGET_UPPER + "${_CORROSION_TARGET_TRIPLE_UPPER}" + CACHE INTERNAL + "target triple in uppercase with underscore" +) + +# We previously specified some Custom properties as part of our public API, however the chosen names prevented us from +# supporting CMake versions before 3.19. In order to both support older CMake versions and not break existing code +# immediately, we are using a different property name depending on the CMake version. However users avoid using +# any of the properties directly, as they are no longer part of the public API and are to be considered deprecated. +# Instead use the corrosion_set_... functions as documented in the Readme. +if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.19.0) + set(_CORR_PROP_FEATURES CORROSION_FEATURES CACHE INTERNAL "") + set(_CORR_PROP_ALL_FEATURES CORROSION_ALL_FEATURES CACHE INTERNAL "") + set(_CORR_PROP_NO_DEFAULT_FEATURES CORROSION_NO_DEFAULT_FEATURES CACHE INTERNAL "") + set(_CORR_PROP_ENV_VARS CORROSION_ENVIRONMENT_VARIABLES CACHE INTERNAL "") + set(_CORR_PROP_HOST_BUILD CORROSION_USE_HOST_BUILD CACHE INTERNAL "") +else() + set(_CORR_PROP_FEATURES INTERFACE_CORROSION_FEATURES CACHE INTERNAL "") + set(_CORR_PROP_ALL_FEATURES INTERFACE_CORROSION_ALL_FEATURES CACHE INTERNAL "") + set(_CORR_PROP_NO_DEFAULT_FEATURES INTERFACE_NO_DEFAULT_FEATURES CACHE INTERNAL "") + set(_CORR_PROP_ENV_VARS INTERFACE_CORROSION_ENVIRONMENT_VARIABLES CACHE INTERNAL "") + set(_CORR_PROP_HOST_BUILD INTERFACE_CORROSION_USE_HOST_BUILD CACHE INTERNAL "") +endif() + +# Add custom command to build one target in a package (crate) +# +# A target may be either a specific bin +function(_add_cargo_build out_cargo_build_out_dir) + set(options NO_LINKER_OVERRIDE) + set(one_value_args PACKAGE TARGET MANIFEST_PATH WORKSPACE_MANIFEST_PATH) + set(multi_value_args BYPRODUCTS TARGET_KINDS) + cmake_parse_arguments( + ACB + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + + if(DEFINED ACB_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Internal error - unexpected arguments: " + ${ACB_UNPARSED_ARGUMENTS}) + elseif(DEFINED ACB_KEYWORDS_MISSING_VALUES) + message(FATAL_ERROR "Internal error - missing values for the following arguments: " + ${ACB_KEYWORDS_MISSING_VALUES}) + endif() + + set(package_name "${ACB_PACKAGE}") + set(target_name "${ACB_TARGET}") + set(path_to_toml "${ACB_MANIFEST_PATH}") + set(target_kinds "${ACB_TARGET_KINDS}") + set(workspace_manifest_path "${ACB_WORKSPACE_MANIFEST_PATH}") + + + if(NOT target_kinds) + message(FATAL_ERROR "TARGET_KINDS not specified") + elseif("staticlib" IN_LIST target_kinds OR "cdylib" IN_LIST target_kinds) + set(cargo_rustc_filter "--lib") + elseif("bin" IN_LIST target_kinds) + set(cargo_rustc_filter "--bin=${target_name}") + else() + message(FATAL_ERROR "TARGET_KINDS contained unknown kind `${target_kind}`") + endif() + + if (NOT IS_ABSOLUTE "${path_to_toml}") + set(path_to_toml "${CMAKE_SOURCE_DIR}/${path_to_toml}") + endif() + get_filename_component(workspace_toml_dir ${path_to_toml} DIRECTORY ) + + if (CMAKE_VS_PLATFORM_NAME) + set (build_dir "${CMAKE_VS_PLATFORM_NAME}/$") + elseif(COR_IS_MULTI_CONFIG) + set (build_dir "$") + else() + set (build_dir .) + endif() + + # If a CMake sysroot is specified, forward it to the linker rustc invokes, too. CMAKE_SYSROOT is documented + # to be passed via --sysroot, so we assume that when it's set, the linker supports this option in that style. + if(CMAKE_CROSSCOMPILING AND CMAKE_SYSROOT) + set(corrosion_link_args "--sysroot=${CMAKE_SYSROOT}") + endif() + + if(COR_ALL_FEATURES) + set(all_features_arg --all-features) + endif() + if(COR_NO_DEFAULT_FEATURES) + set(no_default_features_arg --no-default-features) + endif() + + set(global_rustflags_target_property "$>") + set(local_rustflags_target_property "$>") + + # todo: this probably should be TARGET_GENEX_EVAL + set(features_target_property "$>") + set(features_genex "$<$:--features=$>>") + + # target property overrides corrosion_import_crate argument + set(all_features_target_property "$>") + set(all_features_arg "$<$:--all-features>") + + set(no_default_features_target_property "$>") + set(no_default_features_arg "$<$:--no-default-features>") + + set(build_env_variable_genex "$>") + set(hostbuild_override "$>") + set(if_not_host_build_condition "$") + + set(corrosion_link_args "$<${if_not_host_build_condition}:${corrosion_link_args}>") + # We always set `--target`, so that cargo always places artifacts into a directory with the + # target triple. + set(cargo_target_option "--target=$") + + # The target may be a filepath to custom target json file. For host targets we assume that they are built-in targets. + _corrosion_strip_target_triple(${_CORROSION_RUST_CARGO_TARGET} stripped_target_triple) + set(target_artifact_dir "$") + + set(flags_genex "$>") + + set(explicit_linker_property "$") + set(explicit_linker_defined "$") + + set(cargo_profile_target_property "$>") + + # Option to override the rustc/cargo binary to something other than the global default + set(rustc_override "$") + set(cargo_override "$") + set(rustc_bin "$,${rustc_override},${_CORROSION_RUSTC}>") + set(cargo_bin "$,${cargo_override},${_CORROSION_CARGO}>") + + + # Rust will add `-lSystem` as a flag for the linker on macOS. Adding the -L flag via RUSTFLAGS only fixes the + # problem partially - buildscripts still break, since they won't receive the RUSTFLAGS. This seems to only be a + # problem if we specify the linker ourselves (which we do, since this is necessary for e.g. linking C++ code). + # We can however set `LIBRARY_PATH`, which is propagated to the build-script-build properly. + if(NOT CMAKE_CROSSCOMPILING AND CMAKE_SYSTEM_NAME STREQUAL "Darwin") + # not needed anymore on macos 13 (and causes issues) + if(${CMAKE_SYSTEM_VERSION} VERSION_LESS 22) + set(cargo_library_path "LIBRARY_PATH=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib") + endif() + elseif(CMAKE_CROSSCOMPILING AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin") + if(${CMAKE_HOST_SYSTEM_VERSION} VERSION_LESS 22) + set(cargo_library_path "$<${hostbuild_override}:LIBRARY_PATH=/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib>") + endif() + endif() + + set(cargo_profile_set "$") + # In the default case just specify --release or nothing to stay compatible with + # older rust versions. + set(default_profile_option "$<$,$>>:--release>") + # evaluates to either `--profile=`, `--release` or nothing (for debug). + set(cargo_profile "$") + + # If the profile name is `dev` change the dir name to `debug`. + set(is_dev_profile "$") + set(profile_dir_override "$<${is_dev_profile}:debug>") + set(profile_dir_is_overridden "$") + set(custom_profile_build_type_dir "$") + + set(default_build_type_dir "$,$>,debug,release>") + set(build_type_dir "$") + + set(cargo_target_dir "${CMAKE_BINARY_DIR}/${build_dir}/cargo/build") + set(cargo_build_dir "${cargo_target_dir}/${target_artifact_dir}/${build_type_dir}") + set("${out_cargo_build_out_dir}" "${cargo_build_dir}" PARENT_SCOPE) + + set(corrosion_cc_rs_flags) + + if(CMAKE_C_COMPILER) + # This variable is read by cc-rs (often used in build scripts) to determine the c-compiler. + # It can still be overridden if the user sets the non underscore variant via the environment variables + # on the target. + list(APPEND corrosion_cc_rs_flags "CC_${_CORROSION_RUST_CARGO_TARGET_UNDERSCORE}=${CMAKE_C_COMPILER}") + endif() + if(CMAKE_CXX_COMPILER) + list(APPEND corrosion_cc_rs_flags "CXX_${_CORROSION_RUST_CARGO_TARGET_UNDERSCORE}=${CMAKE_CXX_COMPILER}") + endif() + # cc-rs doesn't seem to support `llvm-ar` (commandline syntax), wo we might as well just use + # the default AR. + if(CMAKE_AR AND NOT (Rust_CARGO_TARGET_ENV STREQUAL "msvc")) + list(APPEND corrosion_cc_rs_flags "AR_${_CORROSION_RUST_CARGO_TARGET_UNDERSCORE}=${CMAKE_AR}") + endif() + + # Since we instruct cc-rs to use the compiler found by CMake, it is likely one that requires also + # specifying the target sysroot to use. CMake's generator makes sure to pass --sysroot with + # CMAKE_OSX_SYSROOT. Fortunately the compilers Apple ships also respect the SDKROOT environment + # variable, which we can set for use when cc-rs invokes the compiler. + if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_SYSROOT) + list(APPEND corrosion_cc_rs_flags "SDKROOT=${CMAKE_OSX_SYSROOT}") + endif() + + corrosion_add_target_local_rustflags("${target_name}" "$<$:-Clink-args=${corrosion_link_args}>") + + # todo: this should probably also be guarded by if_not_host_build_condition. + if(COR_NO_STD) + corrosion_add_target_local_rustflags("${target_name}" "-Cdefault-linker-libraries=no") + else() + corrosion_add_target_local_rustflags("${target_name}" "-Cdefault-linker-libraries=yes") + endif() + + set(global_joined_rustflags "$") + set(global_rustflags_genex "$<$:RUSTFLAGS=${global_joined_rustflags}>") + set(local_rustflags_delimiter "$<$:-->") + set(local_rustflags_genex "$<$:${local_rustflags_target_property}>") + + set(deps_link_languages_prop "$") + set(deps_link_languages "$") + set(target_uses_cxx "$") + unset(default_linker) + # With the MSVC ABI rustc only supports directly invoking the linker - Invoking cl as the linker driver is not supported. + if(NOT (Rust_CARGO_TARGET_ENV STREQUAL "msvc" OR COR_NO_LINKER_OVERRIDE)) + set(default_linker "$,${CMAKE_CXX_COMPILER},${CMAKE_C_COMPILER}>") + endif() + # Used to set a linker for a specific target-triple. + set(cargo_target_linker_var "CARGO_TARGET_${_CORROSION_RUST_CARGO_TARGET_UPPER}_LINKER") + set(linker "$") + set(cargo_target_linker $<$:${cargo_target_linker_var}=${linker}>) + + if(Rust_CROSSCOMPILING AND (CMAKE_C_COMPILER_TARGET OR CMAKE_CXX_COMPILER_TARGET)) + set(linker_target_triple "$,${CMAKE_CXX_COMPILER_TARGET},${CMAKE_C_COMPILER_TARGET}>") + set(rustflag_linker_arg "-Clink-args=--target=${linker_target_triple}") + set(rustflag_linker_arg "$<${if_not_host_build_condition}:${rustflag_linker_arg}>") + # Skip adding the linker argument, if the linker is explicitly set, since the + # explicit_linker_property will not be set when this function runs. + # Passing this rustflag is necessary for clang. + corrosion_add_target_local_rustflags("${target_name}" "$<$:${rustflag_linker_arg}>") + endif() + + message(DEBUG "TARGET ${target_name} produces byproducts ${byproducts}") + + add_custom_target( + _cargo-build_${target_name} + # Build crate + COMMAND + ${CMAKE_COMMAND} -E env + "${build_env_variable_genex}" + "${global_rustflags_genex}" + "${cargo_target_linker}" + "${corrosion_cc_rs_flags}" + "${cargo_library_path}" + "CORROSION_BUILD_DIR=${CMAKE_CURRENT_BINARY_DIR}" + "CARGO_BUILD_RUSTC=${rustc_bin}" + "${cargo_bin}" + rustc + ${cargo_rustc_filter} + ${cargo_target_option} + ${_CORROSION_VERBOSE_OUTPUT_FLAG} + ${all_features_arg} + ${no_default_features_arg} + ${features_genex} + --package ${package_name} + --manifest-path "${path_to_toml}" + --target-dir "${cargo_target_dir}" + ${cargo_profile} + ${flags_genex} + # Any arguments to cargo must be placed before this line + ${local_rustflags_delimiter} + ${local_rustflags_genex} + + # Note: Adding `build_byproducts` (the byproducts in the cargo target directory) here + # causes CMake to fail during the Generate stage, because the target `target_name` was not + # found. I don't know why this happens, so we just don't specify byproducts here and + # only specify the actual byproducts in the `POST_BUILD` custom command that copies the + # byproducts to the final destination. + # BYPRODUCTS ${build_byproducts} + # The build is conducted in the directory of the Manifest, so that configuration files such as + # `.cargo/config.toml` or `toolchain.toml` are applied as expected. + WORKING_DIRECTORY "${workspace_toml_dir}" + USES_TERMINAL + COMMAND_EXPAND_LISTS + VERBATIM + ) + + # User exposed custom target, that depends on the internal target. + # Corrosion post build steps are added on the internal target, which + # ensures that they run before any user defined post build steps on this + # target. + add_custom_target( + cargo-build_${target_name} + ALL + ) + add_dependencies(cargo-build_${target_name} _cargo-build_${target_name}) + + # Add custom target before actual build that user defined custom commands (e.g. code generators) can + # use as a hook to do something before the build. This mainly exists to not expose the `_cargo-build` targets. + add_custom_target(cargo-prebuild_${target_name}) + add_dependencies(_cargo-build_${target_name} cargo-prebuild_${target_name}) + if(NOT TARGET cargo-prebuild) + add_custom_target(cargo-prebuild) + endif() + add_dependencies(cargo-prebuild cargo-prebuild_${target_name}) + + add_custom_target( + cargo-clean_${target_name} + COMMAND + "${cargo_bin}" clean ${cargo_target_option} + -p ${package_name} --manifest-path ${path_to_toml} + WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${build_dir} + USES_TERMINAL + ) + + if (NOT TARGET cargo-clean) + add_custom_target(cargo-clean) + endif() + add_dependencies(cargo-clean cargo-clean_${target_name}) +endfunction() + +#[=======================================================================[.md: +ANCHOR: corrosion-import-crate +```cmake +corrosion_import_crate( + MANIFEST_PATH + [ALL_FEATURES] + [NO_DEFAULT_FEATURES] + [NO_STD] + [NO_LINKER_OVERRIDE] + [LOCKED] + [FROZEN] + [PROFILE ] + [IMPORTED_CRATES ] + [CRATE_TYPES ... ] + [CRATES ... ] + [FEATURES ... ] + [FLAGS ... ] +) +``` +* **MANIFEST_PATH**: Path to a [Cargo.toml Manifest] file. +* **ALL_FEATURES**: Equivalent to [--all-features] passed to cargo build +* **NO_DEFAULT_FEATURES**: Equivalent to [--no-default-features] passed to cargo build +* **NO_STD**: Disable linking of standard libraries (required for no_std crates). +* **NO_LINKER_OVERRIDE**: Will let Rust/Cargo determine which linker to use instead of corrosion (when linking is invoked by Rust) +* **LOCKED**: Pass [`--locked`] to cargo build and cargo metadata. +* **FROZEN**: Pass [`--frozen`] to cargo build and cargo metadata. +* **PROFILE**: Specify cargo build profile (`dev`/`release` or a [custom profile]; `bench` and `test` are not supported) +* **IMPORTED_CRATES**: Save the list of imported crates into the variable with the provided name in the current scope. +* **CRATE_TYPES**: Only import the specified crate types. Valid values: `staticlib`, `cdylib`, `bin`. +* **CRATES**: Only import the specified crates from a workspace. Values: Crate names. +* **FEATURES**: Enable the specified features. Equivalent to [--features] passed to `cargo build`. +* **FLAGS**: Arbitrary flags to `cargo build`. + +[custom profile]: https://doc.rust-lang.org/cargo/reference/profiles.html#custom-profiles +[--all-features]: https://doc.rust-lang.org/cargo/reference/features.html#command-line-feature-options +[--no-default-features]: https://doc.rust-lang.org/cargo/reference/features.html#command-line-feature-options +[--features]: https://doc.rust-lang.org/cargo/reference/features.html#command-line-feature-options +[`--locked`]: https://doc.rust-lang.org/cargo/commands/cargo.html#manifest-options +[`--frozen`]: https://doc.rust-lang.org/cargo/commands/cargo.html#manifest-options +[Cargo.toml Manifest]: https://doc.rust-lang.org/cargo/appendix/glossary.html#manifest + +ANCHOR_END: corrosion-import-crate +#]=======================================================================] +function(corrosion_import_crate) + set(OPTIONS ALL_FEATURES NO_DEFAULT_FEATURES NO_STD NO_LINKER_OVERRIDE LOCKED FROZEN) + set(ONE_VALUE_KEYWORDS MANIFEST_PATH PROFILE IMPORTED_CRATES) + set(MULTI_VALUE_KEYWORDS CRATE_TYPES CRATES FEATURES FLAGS) + cmake_parse_arguments(COR "${OPTIONS}" "${ONE_VALUE_KEYWORDS}" "${MULTI_VALUE_KEYWORDS}" ${ARGN}) + list(APPEND CMAKE_MESSAGE_CONTEXT "corrosion_import_crate") + + if(DEFINED COR_UNPARSED_ARGUMENTS) + message(AUTHOR_WARNING "Unexpected arguments: " ${COR_UNPARSED_ARGUMENTS} + "\nCorrosion will ignore these unexpected arguments." + ) + endif() + if(DEFINED COR_KEYWORDS_MISSING_VALUES) + message(DEBUG "Note: the following keywords passed to corrosion_import_crate had no associated value(s): " + ${COR_KEYWORDS_MISSING_VALUES} + ) + endif() + if (NOT DEFINED COR_MANIFEST_PATH) + message(FATAL_ERROR "MANIFEST_PATH is a required keyword to corrosion_add_crate") + endif() + _corrosion_option_passthrough_helper(NO_LINKER_OVERRIDE COR no_linker_override) + _corrosion_option_passthrough_helper(LOCKED COR locked) + _corrosion_option_passthrough_helper(FROZEN COR frozen) + _corrosion_arg_passthrough_helper(CRATES COR crate_allowlist) + _corrosion_arg_passthrough_helper(CRATE_TYPES COR crate_types) + + if(COR_PROFILE) + if(Rust_VERSION VERSION_LESS 1.57.0) + message(FATAL_ERROR "Selecting custom profiles via `PROFILE` requires at least rust 1.57.0, but you " + "have ${Rust_VERSION}." + ) + # The profile name could be part of a Generator expression, so this won't catch all occurences. + # Since it is hard to add an error message for genex, we don't do that here. + elseif("${COR_PROFILE}" STREQUAL "test" OR "${COR_PROFILE}" STREQUAL "bench") + message(FATAL_ERROR "Corrosion does not support building Rust crates with the cargo profiles" + " `test` or `bench`. These profiles add a hash to the output artifact name that we" + " cannot predict. Please consider using a custom cargo profile which inherits from the" + " built-in profile instead." + ) + endif() + endif() + + if (NOT IS_ABSOLUTE "${COR_MANIFEST_PATH}") + set(COR_MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${COR_MANIFEST_PATH}) + endif() + + set(additional_cargo_flags ${COR_FLAGS}) + + if(COR_LOCKED AND NOT "--locked" IN_LIST additional_cargo_flags) + list(APPEND additional_cargo_flags "--locked") + endif() + if(COR_FROZEN AND NOT "--frozen" IN_LIST additional_cargo_flags) + list(APPEND additional_cargo_flags "--frozen") + endif() + + set(imported_crates "") + if (CORROSION_NATIVE_TOOLING) + get_filename_component(manifest_directory "${COR_MANIFEST_PATH}" DIRECTORY) + get_filename_component(toml_dir_name ${manifest_directory} NAME) + + set( + generated_cmake + "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/corrosion/${toml_dir_name}.dir/cargo-build.cmake" + ) + + if (CMAKE_VS_PLATFORM_NAME) + set (_CORROSION_CONFIGURATION_ROOT --configuration-root ${CMAKE_VS_PLATFORM_NAME}) + endif() + + set(crates_args) + foreach(crate ${COR_CRATES}) + list(APPEND crates_args --crates ${crate}) + endforeach() + if(DEFINED COR_CRATE_TYPES) + set(crate_types "--crate-type=${COR_CRATE_TYPES}") + endif() + + list(APPEND passthrough_to_acb_args ${no_linker_override}) + if(passthrough_to_acb_args) + # 31 == 0x1f + string(ASCII 31 unit_seperator) + list(JOIN passthrough_to_acb_args "${unit_seperator}" joined_args) + set(passthrough_to_acb "--passthrough-acb=${joined_args}") + endif() + + execute_process( + COMMAND + ${_CORROSION_GENERATOR} + --manifest-path ${COR_MANIFEST_PATH} + gen-cmake + ${_CORROSION_CONFIGURATION_ROOT} + ${crates_args} + ${crate_types} + --imported-crates=imported_crates + ${passthrough_to_acb} + -o ${generated_cmake} + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + RESULT_VARIABLE ret) + + if (NOT ret EQUAL "0") + message(FATAL_ERROR "corrosion-generator failed") + endif() + + include(${generated_cmake}) + else() + _generator_add_cargo_targets( + MANIFEST_PATH + "${COR_MANIFEST_PATH}" + IMPORTED_CRATES + imported_crates + ${crate_allowlist} + ${crate_types} + ${no_linker_override} + ) + endif() + + # Not target props yet: + # NO_STD + # NO_LINKER_OVERRIDE # We could simply zero INTERFACE_CORROSION_LINKER if this is set. + # LOCKED / FROZEN get merged into FLAGS after cargo metadata. + + # Initialize the target properties with the arguments to corrosion_import_crate. + set_target_properties( + ${imported_crates} + PROPERTIES + "${_CORR_PROP_ALL_FEATURES}" "${COR_ALL_FEATURES}" + "${_CORR_PROP_NO_DEFAULT_FEATURES}" "${COR_NO_DEFAULT_FEATURES}" + "${_CORR_PROP_FEATURES}" "${COR_FEATURES}" + INTERFACE_CORROSION_CARGO_PROFILE "${COR_PROFILE}" + INTERFACE_CORROSION_CARGO_FLAGS "${additional_cargo_flags}" + ) + + # _CORR_PROP_ENV_VARS + if(DEFINED COR_IMPORTED_CRATES) + set(${COR_IMPORTED_CRATES} ${imported_crates} PARENT_SCOPE) + endif() +endfunction() + +function(corrosion_set_linker_language target_name language) + message(FATAL_ERROR "corrosion_set_linker_language was deprecated and removed." + "Please use corrosion_set_linker and set a specific linker.") +endfunction() + +function(corrosion_set_linker target_name linker) + if(NOT linker) + message(FATAL_ERROR "The linker passed to `corrosion_set_linker` may not be empty") + elseif(NOT TARGET "${target_name}") + message(FATAL_ERROR "The target `${target_name}` does not exist.") + endif() + if(MSVC) + message(WARNING "Explicitly setting the linker with the MSVC toolchain is currently not supported and ignored") + endif() + + if(TARGET "${target_name}-static" AND NOT TARGET "${target_name}-shared") + message(WARNING "The target ${target_name} builds a static library." + "The linker is never invoked for a static library so specifying a linker has no effect." + ) + endif() + + set_property( + TARGET ${target_name} + PROPERTY INTERFACE_CORROSION_LINKER "${linker}" + ) +endfunction() + +function(corrosion_set_hostbuild target_name) + # Configure the target to be compiled for the Host target and ignore any cross-compile configuration. + set_property( + TARGET ${target_name} + PROPERTY ${_CORR_PROP_HOST_BUILD} 1 + ) +endfunction() + +# Add flags for rustc (RUSTFLAGS) which affect the target and all of it's Rust dependencies +# +# Additional rustflags may be passed as optional parameters after rustflag. +# Please note, that if you import multiple targets from a package or workspace, but set different +# Rustflags via this function, the Rust dependencies will have to be rebuilt when changing targets. +# Consider `corrosion_add_target_local_rustflags()` as an alternative to avoid this. +function(corrosion_add_target_rustflags target_name rustflag) + # Additional rustflags may be passed as optional parameters after rustflag. + set_property( + TARGET ${target_name} + APPEND + PROPERTY INTERFACE_CORROSION_RUSTFLAGS ${rustflag} ${ARGN} + ) +endfunction() + +# Add flags for rustc (RUSTFLAGS) which only affect the target, but none of it's (Rust) dependencies +# +# Additional rustflags may be passed as optional parameters after rustc_flag. +function(corrosion_add_target_local_rustflags target_name rustc_flag) + # Set Rustflags via `cargo rustc` which only affect the current crate, but not dependencies. + set_property( + TARGET ${target_name} + APPEND + PROPERTY INTERFACE_CORROSION_LOCAL_RUSTFLAGS ${rustc_flag} ${ARGN} + ) +endfunction() + +function(corrosion_set_env_vars target_name env_var) + # Additional environment variables may be passed as optional parameters after env_var. + set_property( + TARGET ${target_name} + APPEND + PROPERTY ${_CORR_PROP_ENV_VARS} ${env_var} ${ARGN} + ) +endfunction() + +function(corrosion_set_cargo_flags target_name) + # corrosion_set_cargo_flags( [ ... ]) + + set_property( + TARGET ${target_name} + APPEND + PROPERTY INTERFACE_CORROSION_CARGO_FLAGS ${ARGN} + ) +endfunction() + +function(corrosion_set_features target_name) + # corrosion_set_features( [ALL_FEATURES=Bool] [NO_DEFAULT_FEATURES] [FEATURES ... ]) + set(options NO_DEFAULT_FEATURES) + set(one_value_args ALL_FEATURES) + set(multi_value_args FEATURES) + cmake_parse_arguments( + PARSE_ARGV 1 + SET + "${options}" + "${one_value_args}" + "${multi_value_args}" + ) + + if(DEFINED SET_ALL_FEATURES) + set_property( + TARGET ${target_name} + PROPERTY ${_CORR_PROP_ALL_FEATURES} ${SET_ALL_FEATURES} + ) + endif() + if(SET_NO_DEFAULT_FEATURES) + set_property( + TARGET ${target_name} + PROPERTY ${_CORR_PROP_NO_DEFAULT_FEATURES} 1 + ) + endif() + if(SET_FEATURES) + set_property( + TARGET ${target_name} + APPEND + PROPERTY ${_CORR_PROP_FEATURES} ${SET_FEATURES} + ) + endif() +endfunction() + +function(corrosion_link_libraries target_name) + if(TARGET "${target_name}-static") + message(DEBUG "The target ${target_name} builds a static Rust library." + "Calling `target_link_libraries()` instead." + ) + target_link_libraries("${target_name}-static" INTERFACE ${ARGN}) + if(NOT TARGET "${target_name}-shared") + # Early return, since Rust won't invoke the linker for static libraries + return() + endif() + endif() + add_dependencies(_cargo-build_${target_name} ${ARGN}) + foreach(library ${ARGN}) + set_property( + TARGET _cargo-build_${target_name} + APPEND + PROPERTY CARGO_DEPS_LINKER_LANGUAGES + $ + ) + + corrosion_add_target_local_rustflags(${target_name} "-L$") + corrosion_add_target_local_rustflags(${target_name} "-l$") + endforeach() +endfunction() + +function(corrosion_install) + # Default install dirs + include(GNUInstallDirs) + + # Parse arguments to corrosion_install + list(GET ARGN 0 INSTALL_TYPE) + list(REMOVE_AT ARGN 0) + + # The different install types that are supported. Some targets may have more than one of these + # types. For example, on Windows, a shared library will have both an ARCHIVE component and a + # RUNTIME component. + set(INSTALL_TARGET_TYPES ARCHIVE LIBRARY RUNTIME PRIVATE_HEADER PUBLIC_HEADER) + + # Arguments to each install target type + set(OPTIONS) + set(ONE_VALUE_ARGS DESTINATION) + set(MULTI_VALUE_ARGS PERMISSIONS CONFIGURATIONS) + set(TARGET_ARGS ${OPTIONS} ${ONE_VALUE_ARGS} ${MULTI_VALUE_ARGS}) + + if (INSTALL_TYPE STREQUAL "TARGETS") + # corrosion_install(TARGETS ... [EXPORT ] + # [[ARCHIVE|LIBRARY|RUNTIME|PRIVATE_HEADER|PUBLIC_HEADER] + # [DESTINATION ] + # [PERMISSIONS permissions...] + # [CONFIGURATIONS [Debug|Release|...]] + # ] [...]) + + # Extract targets + set(INSTALL_TARGETS) + list(LENGTH ARGN ARGN_LENGTH) + set(DELIMITERS EXPORT ${INSTALL_TARGET_TYPES} ${TARGET_ARGS}) + while(ARGN_LENGTH) + # If we hit another keyword, stop - we've found all the targets + list(GET ARGN 0 FRONT) + if (FRONT IN_LIST DELIMITERS) + break() + endif() + + list(APPEND INSTALL_TARGETS ${FRONT}) + list(REMOVE_AT ARGN 0) + + # Update ARGN_LENGTH + list(LENGTH ARGN ARGN_LENGTH) + endwhile() + + # Check if there are any args left before proceeding + list(LENGTH ARGN ARGN_LENGTH) + if (ARGN_LENGTH) + list(GET ARGN 0 FRONT) + if (FRONT STREQUAL "EXPORT") + list(REMOVE_AT ARGN 0) # Pop "EXPORT" + + list(GET ARGN 0 EXPORT_NAME) + list(REMOVE_AT ARGN 0) # Pop + message(FATAL_ERROR "EXPORT keyword not yet implemented!") + endif() + endif() + + # Loop over all arguments and get options for each install target type + list(LENGTH ARGN ARGN_LENGTH) + while(ARGN_LENGTH) + # Check if we're dealing with arguments for a specific install target type, or with + # default options for all target types. + list(GET ARGN 0 FRONT) + if (FRONT IN_LIST INSTALL_TARGET_TYPES) + set(INSTALL_TARGET_TYPE ${FRONT}) + list(REMOVE_AT ARGN 0) + else() + set(INSTALL_TARGET_TYPE DEFAULT) + endif() + + # Gather the arguments to this install type + set(ARGS) + while(ARGN_LENGTH) + # If the next keyword is an install target type, then break - arguments have been + # gathered. + list(GET ARGN 0 FRONT) + if (FRONT IN_LIST INSTALL_TARGET_TYPES) + break() + endif() + + list(APPEND ARGS ${FRONT}) + list(REMOVE_AT ARGN 0) + + list(LENGTH ARGN ARGN_LENGTH) + endwhile() + + # Parse the arguments and register the file install + cmake_parse_arguments( + COR "${OPTIONS}" "${ONE_VALUE_ARGS}" "${MULTI_VALUE_ARGS}" ${ARGS}) + + if (COR_DESTINATION) + set(COR_INSTALL_${INSTALL_TARGET_TYPE}_DESTINATION ${COR_DESTINATION}) + endif() + + if (COR_PERMISSIONS) + set(COR_INSTALL_${INSTALL_TARGET_TYPE}_PERMISSIONS ${COR_PERMISSIONS}) + endif() + + if (COR_CONFIGURATIONS) + set(COR_INSTALL_${INSTALL_TARGET_TYPE}_CONFIGURATIONS ${COR_CONFIGURATIONS}) + endif() + + # Update ARG_LENGTH + list(LENGTH ARGN ARGN_LENGTH) + endwhile() + + # Default permissions for all files + set(DEFAULT_PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ) + + # Loop through each install target and register file installations + foreach(INSTALL_TARGET ${INSTALL_TARGETS}) + # Don't both implementing target type differentiation using generator expressions since + # TYPE cannot change after target creation + get_property( + TARGET_TYPE + TARGET ${INSTALL_TARGET} PROPERTY TYPE + ) + + # Install executable files first + if (TARGET_TYPE STREQUAL "EXECUTABLE") + if (DEFINED COR_INSTALL_RUNTIME_DESTINATION) + set(DESTINATION ${COR_INSTALL_RUNTIME_DESTINATION}) + elseif (DEFINED COR_INSTALL_DEFAULT_DESTINATION) + set(DESTINATION ${COR_INSTALL_DEFAULT_DESTINATION}) + else() + set(DESTINATION ${CMAKE_INSTALL_BINDIR}) + endif() + + if (DEFINED COR_INSTALL_RUNTIME_PERMISSIONS) + set(PERMISSIONS ${COR_INSTALL_RUNTIME_PERMISSIONS}) + elseif (DEFINED COR_INSTALL_DEFAULT_PERMISSIONS) + set(PERMISSIONS ${COR_INSTALL_DEFAULT_PERMISSIONS}) + else() + set( + PERMISSIONS + ${DEFAULT_PERMISSIONS} OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE) + endif() + + if (DEFINED COR_INSTALL_RUNTIME_CONFIGURATIONS) + set(CONFIGURATIONS CONFIGURATIONS ${COR_INSTALL_RUNTIME_CONFIGURATIONS}) + elseif (DEFINED COR_INSTALL_DEFAULT_CONFIGURATIONS) + set(CONFIGURATIONS CONFIGURATIONS ${COR_INSTALL_DEFAULT_CONFIGURATIONS}) + else() + set(CONFIGURATIONS) + endif() + + install( + FILES $ + DESTINATION ${DESTINATION} + PERMISSIONS ${PERMISSIONS} + ${CONFIGURATIONS} + ) + endif() + endforeach() + + elseif(INSTALL_TYPE STREQUAL "EXPORT") + message(FATAL_ERROR "install(EXPORT ...) not yet implemented") + endif() +endfunction() + +#[=======================================================================[.md: +** EXPERIMENTAL **: This function is currently still considered experimental + and is not officially released yet. Feedback and Suggestions are welcome. + +ANCHOR: corrosion_add_cxxbridge + +```cmake +corrosion_add_cxxbridge(cxx_target + CRATE + [FILES ] +) +``` + +Adds build-rules to create C++ bindings using the [cxx] crate. + +### Arguments: +* `cxxtarget`: Name of the C++ library target for the bindings, which corrosion will create. +* **FILES**: Input Rust source file containing #[cxx::bridge]. +* **CRATE**: Name of an imported Rust target. Note: Parameter may be renamed before release + +#### Currently missing arguments + +The following arguments to cxxbridge **currently** have no way to be passed by the user: +- `--cfg` +- `--cxx-impl-annotations` +- `--include` + +The created rules approximately do the following: +- Check which version of `cxx` the Rust crate specified by the `CRATE` argument depends on. +- Check if the exact same version of `cxxbridge-cmd` is installed (available in `PATH`) +- If not, create a rule to build the exact same version of `cxxbridge-cmd`. +- Create rules to run `cxxbridge` and generate + - The `rust/cxx.h` header + - A header and source file for each of the files specified in `FILES` +- The generated sources (and header include directories) are added to the `cxxtarget` CMake + library target. + +### Limitations + +We currently require the `CRATE` argument to be a target imported by Corrosion, however, +Corrosion does not import `rlib` only libraries. As a workaround users can add +`staticlib` to their list of crate kinds. In the future this may be solved more properly, +by either adding an option to also import Rlib targets (without build rules) or by +adding a `MANIFEST_PATH` argument to this function, specifying where the crate is. + +### Contributing + +Specifically some more realistic test / demo projects and feedback about limitations would be +welcome. + +[cxx]: https://github.com/dtolnay/cxx + +ANCHOR_END: corrosion_add_cxxbridge +#]=======================================================================] +function(corrosion_add_cxxbridge cxx_target) + set(OPTIONS) + set(ONE_VALUE_KEYWORDS CRATE) + set(MULTI_VALUE_KEYWORDS FILES) + cmake_parse_arguments(PARSE_ARGV 1 _arg "${OPTIONS}" "${ONE_VALUE_KEYWORDS}" "${MULTI_VALUE_KEYWORDS}") + + set(required_keywords CRATE FILES) + foreach(keyword ${required_keywords}) + if(NOT DEFINED "_arg_${keyword}") + message(FATAL_ERROR "Missing required parameter `${keyword}`.") + elseif("${_arg_${keyword}}" STREQUAL "") + message(FATAL_ERROR "Required parameter `${keyword}` may not be set to an empty string.") + endif() + endforeach() + + get_target_property(manifest_path "${_arg_CRATE}" INTERFACE_COR_PACKAGE_MANIFEST_PATH) + + if(NOT EXISTS "${manifest_path}") + message(FATAL_ERROR "Internal error: No package manifest found at ${manifest_path}") + endif() + + get_filename_component(manifest_dir ${manifest_path} DIRECTORY) + + execute_process(COMMAND ${CMAKE_COMMAND} -E env + "CARGO_BUILD_RUSTC=${_CORROSION_RUSTC}" + ${_CORROSION_CARGO} tree -i cxx --depth=0 + WORKING_DIRECTORY "${manifest_dir}" + RESULT_VARIABLE cxx_version_result + OUTPUT_VARIABLE cxx_version_output + ) + if(NOT "${cxx_version_result}" EQUAL "0") + message(FATAL_ERROR "Crate ${_arg_CRATE} does not depend on cxx.") + endif() + if(cxx_version_output MATCHES "cxx v([0-9]+.[0-9]+.[0-9]+)") + set(cxx_required_version "${CMAKE_MATCH_1}") + else() + message(FATAL_ERROR "Failed to parse cxx version from cargo tree output: `cxx_version_output`") + endif() + + # First check if a suitable version of cxxbridge is installed + find_program(INSTALLED_CXXBRIDGE cxxbridge PATHS "$ENV{HOME}/.cargo/bin/") + mark_as_advanced(INSTALLED_CXXBRIDGE) + if(INSTALLED_CXXBRIDGE) + execute_process(COMMAND ${INSTALLED_CXXBRIDGE} --version OUTPUT_VARIABLE cxxbridge_version_output) + if(cxxbridge_version_output MATCHES "cxxbridge ([0-9]+.[0-9]+.[0-9]+)") + set(cxxbridge_version "${CMAKE_MATCH_1}") + else() + set(cxxbridge_version "") + endif() + endif() + + set(cxxbridge "") + if(cxxbridge_version) + if(cxxbridge_version VERSION_EQUAL cxx_required_version) + set(cxxbridge "${INSTALLED_CXXBRIDGE}") + if(NOT TARGET "cxxbridge_v${cxx_required_version}") + # Add an empty target. + add_custom_target("cxxbridge_v${cxx_required_version}" + ) + endif() + endif() + endif() + + # No suitable version of cxxbridge was installed, so use custom target to build correct version. + if(NOT cxxbridge) + if(NOT TARGET "cxxbridge_v${cxx_required_version}") + add_custom_command(OUTPUT "${CMAKE_BINARY_DIR}/corrosion/cxxbridge_v${cxx_required_version}/bin/cxxbridge" + COMMAND + ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/corrosion/cxxbridge_v${cxx_required_version}" + COMMAND + ${CMAKE_COMMAND} -E env + "CARGO_BUILD_RUSTC=${_CORROSION_RUSTC}" + ${_CORROSION_CARGO} install + cxxbridge-cmd + --version "${cxx_required_version}" + --root "${CMAKE_BINARY_DIR}/corrosion/cxxbridge_v${cxx_required_version}" + --quiet + # todo: use --target-dir to potentially reuse artifacts + COMMENT "Building cxxbridge (version ${cxx_required_version})" + ) + add_custom_target("cxxbridge_v${cxx_required_version}" + DEPENDS "${CMAKE_BINARY_DIR}/corrosion/cxxbridge_v${cxx_required_version}/bin/cxxbridge" + ) + endif() + set(cxxbridge "${CMAKE_BINARY_DIR}/corrosion/cxxbridge_v${cxx_required_version}/bin/cxxbridge") + endif() + + + # The generated folder structure will be of the following form + # + # CMAKE_CURRENT_BINARY_DIR + # corrosion_generated + # cxxbridge + # + # include + # + # + # rust + # cxx.h + # src + # + # cbindgen + # ... + # other + # ... + + set(corrosion_generated_dir "${CMAKE_CURRENT_BINARY_DIR}/corrosion_generated") + set(generated_dir "${corrosion_generated_dir}/cxxbridge/${cxx_target}") + set(header_placement_dir "${generated_dir}/include/${cxx_target}") + set(source_placement_dir "${generated_dir}/src") + + add_library(${cxx_target} STATIC) + target_include_directories(${cxx_target} + PUBLIC + $ + $ + ) + + # cxx generated code is using c++11 features in headers, so propagate c++11 as minimal requirement + target_compile_features(${cxx_target} PUBLIC cxx_std_11) + + # Todo: target_link_libraries is only necessary for rust2c projects. + # It is possible that checking if the rust crate is an executable is a sufficient check, + # but some more thought may be needed here. + # Maybe we should also let the user do this, since for c2rust, the user also has to call + # corrosion_link_libraries() themselves. + get_target_property(crate_target_type ${_arg_CRATE} TYPE) + if (NOT crate_target_type STREQUAL "EXECUTABLE") + target_link_libraries(${cxx_target} PRIVATE ${_arg_CRATE}) + endif() + + file(MAKE_DIRECTORY "${generated_dir}/include/rust") + add_custom_command( + OUTPUT "${generated_dir}/include/rust/cxx.h" + COMMAND + ${cxxbridge} --header --output "${generated_dir}/include/rust/cxx.h" + DEPENDS "cxxbridge_v${cxx_required_version}" + COMMENT "Generating rust/cxx.h header" + ) + + foreach(filepath ${_arg_FILES}) + get_filename_component(filename ${filepath} NAME_WE) + get_filename_component(directory ${filepath} DIRECTORY) + set(directory_component "") + if(directory) + set(directory_component "${directory}/") + endif() + # todo: convert potentially absolute paths to relative paths.. + set(cxx_header ${directory_component}${filename}.h) + set(cxx_source ${directory_component}${filename}.cpp) + + # todo: not all projects may use the `src` directory. + set(rust_source_path "${manifest_dir}/src/${filepath}") + + file(MAKE_DIRECTORY "${header_placement_dir}/${directory}" "${source_placement_dir}/${directory}") + + add_custom_command( + OUTPUT + "${header_placement_dir}/${cxx_header}" + "${source_placement_dir}/${cxx_source}" + COMMAND + ${cxxbridge} ${rust_source_path} --header --output "${header_placement_dir}/${cxx_header}" + COMMAND + ${cxxbridge} ${rust_source_path} + --output "${source_placement_dir}/${cxx_source}" + --include "${cxx_target}/${cxx_header}" + DEPENDS "cxxbridge_v${cxx_required_version}" "${rust_source_path}" + COMMENT "Generating cxx bindings for crate ${_arg_CRATE}" + ) + + target_sources(${cxx_target} + PRIVATE + "${header_placement_dir}/${cxx_header}" + "${generated_dir}/include/rust/cxx.h" + "${source_placement_dir}/${cxx_source}" + ) + endforeach() +endfunction() + +#[=======================================================================[.md: +ANCHOR: corrosion_cbindgen +```cmake +corrosion_cbindgen( + TARGET + HEADER_NAME + [MANIFEST_DIRECTORY ] + [CBINDGEN_VERSION ] + [FLAGS ... ] +) +``` + +A helper function which uses [cbindgen] to generate C/C++ bindings for a Rust crate. +If `cbindgen` is not in `PATH` the helper function will automatically try to download +`cbindgen` and place the built binary into `CMAKE_BINARY_DIR`. The binary is shared +between multiple invocations of this function. + + +* **TARGET**: The name of an imported Rust library target (crate), for which bindings should be generated. + If the target was not previously imported by Corrosion, because the crate only produces an + `rlib`, you must additionally specify `MANIFEST_DIRECTORY`. + +* **MANIFEST_DIRECTORY**: Directory of the package defining the library crate bindings should be generated for. + If you want to avoid specifying `MANIFEST_DIRECTORY` you could add a `staticlib` target to your package + manifest as a workaround to make corrosion import the crate. + +* **HEADER_NAME**: The name of the generated header file. This will be the name which you include in your C/C++ code + (e.g. `#include "myproject/myheader.h" if you specify `HEADER_NAME "myproject/myheader.h"`. +* **CBINDGEN_VERSION**: Version requirement for cbindgen. Exact semantics to be specified. Currently not implemented. +* **FLAGS**: Arbitrary other flags for `cbindgen`. Run `cbindgen --help` to see the possible flags. + +[cbindgen]: https://github.com/eqrion/cbindgen + +ANCHOR_END: corrosion_cbindgen +#]=======================================================================] +function(corrosion_experimental_cbindgen) + set(OPTIONS "") + set(ONE_VALUE_KEYWORDS TARGET MANIFEST_DIRECTORY HEADER_NAME CBINDGEN_VERSION) + set(MULTI_VALUE_KEYWORDS "FLAGS") + cmake_parse_arguments(PARSE_ARGV 0 CCN "${OPTIONS}" "${ONE_VALUE_KEYWORDS}" "${MULTI_VALUE_KEYWORDS}") + + set(required_keywords TARGET HEADER_NAME) + foreach(keyword ${required_keywords}) + if(NOT DEFINED "CCN_${keyword}") + message(FATAL_ERROR "Missing required parameter `${keyword}`.") + elseif("${CCN_${keyword}}" STREQUAL "") + message(FATAL_ERROR "Required parameter `${keyword}` may not be set to an empty string.") + endif() + endforeach() + set(rust_target "${CCN_TARGET}") + unset(package_manifest_dir) + + + set(hostbuild_override "$>") + set(cbindgen_target_triple "$") + + if(TARGET "${rust_target}") + get_target_property(package_manifest_path "${rust_target}" INTERFACE_COR_PACKAGE_MANIFEST_PATH) + if(NOT EXISTS "${package_manifest_path}") + message(FATAL_ERROR "Internal error: No package manifest found at ${package_manifest_path}") + endif() + get_filename_component(package_manifest_dir "${package_manifest_path}" DIRECTORY) + # todo: as an optimization we could cache the cargo metadata output (but --no-deps makes that slightly more complicated) + else() + if(NOT DEFINED CCN_MANIFEST_DIRECTORY) + message(FATAL_ERROR + "`${rust_target}` is not a target imported by corrosion and `MANIFEST_DIRECTORY` was not provided." + ) + else() + set(package_manifest_dir "${CCN_MANIFEST_DIRECTORY}") + endif() + endif() + + unset(rust_cargo_package) + if(NOT DEFINED CCN_CARGO_PACKAGE) + get_target_property(rust_cargo_package "${rust_target}" INTERFACE_COR_CARGO_PACKAGE_NAME ) + if(NOT rust_cargo_package) + message(FATAL_ERROR "Could not determine cargo package name for cbindgen!") + endif() + else() + set(rust_cargo_package "${CCN_CARGO_PACKAGE}") + endif() + message(STATUS "Using package ${rust_cargo_package} as crate for cbindgen") + + + set(output_header_name "${CCN_HEADER_NAME}") + + find_program(installed_cbindgen cbindgen) + + # Install the newest cbindgen version into our build tree. + if(installed_cbindgen) + set(cbindgen "${installed_cbindgen}") + else() + set(local_cbindgen_install_dir "${CMAKE_BINARY_DIR}/corrosion/cbindgen") + unset(executable_postfix) + if(Rust_CARGO_HOST_OS STREQUAL "windows") + set(executable_postfix ".exe") + endif() + set(cbindgen "${local_cbindgen_install_dir}/bin/cbindgen${executable_postfix}") + if(NOT TARGET "_corrosion_cbindgen") + file(MAKE_DIRECTORY "${local_cbindgen_install_dir}") + add_custom_command(OUTPUT "${cbindgen}" + COMMAND ${CMAKE_COMMAND} + -E env + "CARGO_BUILD_RUSTC=${_CORROSION_RUSTC}" + ${_CORROSION_CARGO} install + cbindgen + --root "${local_cbindgen_install_dir}" + ${_CORROSION_QUIET_OUTPUT_FLAG} + COMMENT "Building cbindgen" + ) + add_custom_target("_corrosion_cbindgen" + DEPENDS "${cbindgen}" + ) + endif() + endif() + + set(corrosion_generated_dir "${CMAKE_CURRENT_BINARY_DIR}/corrosion_generated") + set(generated_dir "${corrosion_generated_dir}/cbindgen/${rust_target}") + set(header_placement_dir "${generated_dir}/include/") + set(depfile_placement_dir "${generated_dir}/depfile") + set(generated_depfile "${depfile_placement_dir}/${output_header_name}.d") + set(generated_header "${header_placement_dir}/${output_header_name}") + message(STATUS "rust target is ${rust_target}") + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.23") + target_sources(${rust_target} + INTERFACE + FILE_SET HEADERS + BASE_DIRS "${header_placement_dir}" + FILES "${header_placement_dir}/${output_header_name}" + ) + else() + # Note: not clear to me how install would best work before CMake 3.23 + target_include_directories(${rust_target} + INTERFACE + $ + $ + ) + endif() + + # This may be different from $header_placement_dir since the user specified HEADER_NAME may contain + # relative directories. + get_filename_component(generated_header_dir "${generated_header}" DIRECTORY) + file(MAKE_DIRECTORY "${generated_header_dir}") + + unset(depfile_cbindgen_arg) + unset(depfile_cmake_arg) + get_filename_component(generated_depfile_dir "${generated_depfile}" DIRECTORY) + file(MAKE_DIRECTORY "${generated_depfile_dir}") + set(depfile_cbindgen_arg "--depfile=${generated_depfile}") + + # Users might want to call cbindgen multiple times, e.g. to generate separate C++ and C header files. + string(MAKE_C_IDENTIFIER "${output_header_name}" header_identifier ) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.22") + add_custom_command( + OUTPUT + "${generated_header}" + COMMAND + "${CMAKE_COMMAND}" -E env + TARGET="${cbindgen_target_triple}" + "${cbindgen}" + --output "${generated_header}" + --crate "${rust_cargo_package}" + ${depfile_cbindgen_arg} + ${CCN_FLAGS} + COMMENT "Generate cbindgen bindings for package ${rust_cargo_package} and output header ${generated_header}" + DEPFILE "${generated_depfile}" + COMMAND_EXPAND_LISTS + WORKING_DIRECTORY "${package_manifest_dir}" + ) + add_custom_target("_corrosion_cbindgen_${rust_target}_bindings_${header_identifier}" + DEPENDS "${generated_header}" + COMMENT "Generate ${generated_header} for ${rust_target}" + ) + else() + add_custom_target("_corrosion_cbindgen_${rust_target}_bindings_${header_identifier}" + "${CMAKE_COMMAND}" -E env + TARGET="${cbindgen_target_triple}" + "${cbindgen}" + --output "${generated_header}" + --crate "${rust_cargo_package}" + ${depfile_cbindgen_arg} + ${CCN_FLAGS} + COMMENT "Generate ${generated_header} for ${rust_target}" + COMMAND_EXPAND_LISTS + WORKING_DIRECTORY "${package_manifest_dir}" + ) + endif() + + if(NOT installed_cbindgen) + add_custom_command( + OUTPUT "${generated_header}" + APPEND + DEPENDS _corrosion_cbindgen + ) + endif() + + if(NOT TARGET "_corrosion_cbindgen_${rust_target}_bindings") + add_custom_target(_corrosion_cbindgen_${rust_target}_bindings + COMMENT "Generate cbindgen bindings for package ${rust_cargo_package}" + ) + endif() + + add_dependencies("_corrosion_cbindgen_${rust_target}_bindings" "_corrosion_cbindgen_${rust_target}_bindings_${header_identifier}") + add_dependencies(${rust_target} "_corrosion_cbindgen_${rust_target}_bindings") +endfunction() + +# Parse the version of a Rust package from it's package manifest (Cargo.toml) +function(corrosion_parse_package_version package_manifest_path out_package_version) + if(NOT EXISTS "${package_manifest_path}") + message(FATAL_ERROR "Package manifest `${package_manifest_path}` does not exist.") + endif() + + file(READ "${package_manifest_path}" package_manifest) + + # Find the package table. It may contain arrays, so match until \n\[, which should mark the next + # table. Note: backslashes must be doubled to escape the backslash for the bracket. LF is single + # backslash however. On windows the line also ends in \n, so matching against \n\[ is sufficient + # to detect an opening bracket on a new line. + set(package_table_regex "\\[package\\](.*)\n\\[") + + string(REGEX MATCH "${package_table_regex}" _package_table "${package_manifest}") + + if(CMAKE_MATCH_COUNT EQUAL "1") + set(package_table "${CMAKE_MATCH_1}") + else() + message(DEBUG + "Failed to find `[package]` table in package manifest `${package_manifest_path}`.\n" + "Matches: ${CMAKE_MATCH_COUNT}\n" + ) + set(${out_package_version} + "NOTFOUND" + PARENT_SCOPE + ) + endif() + # Match `version = "0.3.2"`, `"version" = "0.3.2" Contains one matching group for the version + set(version_regex "[\r]?\n[\"']?version[\"']?[ \t]*=[ \t]*[\"']([0-9\.]+)[\"']") + + string(REGEX MATCH "${version_regex}" _version "${package_table}") + + if("${package_table}" MATCHES "${version_regex}") + set(${out_package_version} + "${CMAKE_MATCH_1}" + PARENT_SCOPE + ) + else() + message(DEBUG "Failed to extract package version from manifest `${package_manifest_path}`.") + set(${out_package_version} + "NOTFOUND" + PARENT_SCOPE + ) + endif() +endfunction() + +function(_corrosion_initialize_properties target_name) + set(prefix "") + if(CORROSION_NATIVE_TOOLING) + set(prefix "INTERFACE_") + endif() + # Initialize the `_OUTPUT_DIRECTORY` properties based on `CMAKE__OUTPUT_DIRECTORY`. + foreach(output_var RUNTIME_OUTPUT_DIRECTORY ARCHIVE_OUTPUT_DIRECTORY LIBRARY_OUTPUT_DIRECTORY PDB_OUTPUT_DIRECTORY) + if (DEFINED "CMAKE_${output_var}") + set_property(TARGET ${target_name} PROPERTY "${prefix}${output_var}" "${CMAKE_${output_var}}") + endif() + + foreach(config_type ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER "${config_type}" config_type_upper) + if (DEFINED "CMAKE_${output_var}_${config_type_upper}") + set_property(TARGET ${target_name} PROPERTY "${prefix}${output_var}_${config_type_upper}" "${CMAKE_${output_var}_${config_type_upper}}") + endif() + endforeach() + endforeach() +endfunction() + +# Helper macro to pass through an optional `OPTION` argument parsed via `cmake_parse_arguments` +# to another function that takes the same OPTION. +# If the option was set, then the variable will be set to the same option name again, +# otherwise will be unset. +macro(_corrosion_option_passthrough_helper option_name prefix var_name) + if(${${prefix}_${option_name}}) + set("${var_name}" "${option_name}") + else() + unset("${var_name}") + endif() +endmacro() + +# Helper macro to pass through an optional argument with value(s), parsed via `cmake_parse_arguments`, +# to another function that takes the same keyword + associated values. +# If the argument was given, then the variable will be a list of the argument name and the values, +# which will be expanded, when calling the function (assuming no quotes). +macro(_corrosion_arg_passthrough_helper arg_name prefix var_name) + if(DEFINED "${prefix}_${arg_name}") + set("${var_name}" "${arg_name}" "${${prefix}_${arg_name}}") + else() + unset("${var_name}") + endif() +endmacro() + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) + diff --git a/lib/corrosion/cmake/CorrosionConfig.cmake.in b/lib/corrosion/cmake/CorrosionConfig.cmake.in new file mode 100644 index 000000000..a43a3f6b4 --- /dev/null +++ b/lib/corrosion/cmake/CorrosionConfig.cmake.in @@ -0,0 +1,18 @@ +@PACKAGE_INIT@ + +if (Corrosion_FOUND) + return() +endif() + +list(APPEND CMAKE_MODULE_PATH "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_DATADIR@/cmake") + +set(CORROSION_NATIVE_TOOLING_INSTALLED @CORROSION_NATIVE_TOOLING@) +if(CORROSION_NATIVE_TOOLING_INSTALLED AND NOT TARGET Corrosion::Generator) + add_executable(Corrosion::Generator IMPORTED GLOBAL) + + set_property( + TARGET Corrosion::Generator + PROPERTY IMPORTED_LOCATION "@CMAKE_INSTALL_FULL_LIBEXECDIR@/corrosion-generator") +endif() + +include(Corrosion) diff --git a/lib/corrosion/cmake/CorrosionGenerator.cmake b/lib/corrosion/cmake/CorrosionGenerator.cmake new file mode 100644 index 000000000..9acdcf613 --- /dev/null +++ b/lib/corrosion/cmake/CorrosionGenerator.cmake @@ -0,0 +1,313 @@ +function(_cargo_metadata out manifest) + set(OPTIONS LOCKED FROZEN) + set(ONE_VALUE_KEYWORDS "") + set(MULTI_VALUE_KEYWORDS "") + cmake_parse_arguments(PARSE_ARGV 2 CM "${OPTIONS}" "${ONE_VALUE_KEYWORDS}" "${MULTI_VALUE_KEYWORDS}") + + list(APPEND CMAKE_MESSAGE_CONTEXT "_cargo_metadata") + + if(DEFINED CM_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Internal error - unexpected arguments: ${CM_UNPARSED_ARGUMENTS}") + elseif(DEFINED CM_KEYWORDS_MISSING_VALUES) + message(FATAL_ERROR "Internal error - the following keywords had no associated value(s):" + "${CM_KEYWORDS_MISSING_VALUES}") + endif() + + set(cargo_locked "") + set(cargo_frozen "") + if(LOCKED) + set(cargo_locked "--locked") + endif() + if(FROZEN) + set(cargo_frozen "--frozen") + endif() + execute_process( + COMMAND + ${CMAKE_COMMAND} -E env + "CARGO_BUILD_RUSTC=${_CORROSION_RUSTC}" + "${_CORROSION_CARGO}" + metadata + --manifest-path "${manifest}" + --format-version 1 + # We don't care about non-workspace dependencies + --no-deps + ${cargo_locked} + ${cargo_frozen} + + OUTPUT_VARIABLE json + COMMAND_ERROR_IS_FATAL ANY + ) + + set(${out} "${json}" PARENT_SCOPE) +endfunction() + +# Add targets (crates) of one package +function(_generator_add_package_targets) + set(OPTIONS NO_LINKER_OVERRIDE) + set(ONE_VALUE_KEYWORDS WORKSPACE_MANIFEST_PATH PACKAGE_MANIFEST_PATH PACKAGE_NAME PACKAGE_VERSION TARGETS_JSON OUT_CREATED_TARGETS) + set(MULTI_VALUE_KEYWORDS CRATE_TYPES) + cmake_parse_arguments(PARSE_ARGV 0 GAPT "${OPTIONS}" "${ONE_VALUE_KEYWORDS}" "${MULTI_VALUE_KEYWORDS}") + + if(DEFINED GAPT_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Internal error - unexpected arguments: ${GAPT_UNPARSED_ARGUMENTS}") + elseif(DEFINED GAPT_KEYWORDS_MISSING_VALUES) + message(FATAL_ERROR "Internal error - the following keywords had no associated value(s):" + "${GAPT_KEYWORDS_MISSING_VALUES}") + endif() + + _corrosion_option_passthrough_helper(NO_LINKER_OVERRIDE GAPT no_linker_override) + + set(workspace_manifest_path "${GAPT_WORKSPACE_MANIFEST_PATH}") + set(package_manifest_path "${GAPT_PACKAGE_MANIFEST_PATH}") + set(package_name "${GAPT_PACKAGE_NAME}") + set(package_version "${GAPT_PACKAGE_VERSION}") + set(targets "${GAPT_TARGETS_JSON}") + set(out_created_targets "${GAPT_OUT_CREATED_TARGETS}") + set(crate_types "${GAPT_CRATE_TYPES}") + + set(corrosion_targets "") + + file(TO_CMAKE_PATH "${package_manifest_path}" manifest_path) + + string(JSON targets_len LENGTH "${targets}") + math(EXPR targets_len-1 "${targets_len} - 1") + + message(DEBUG "Found ${targets_len} targets in package ${package_name}") + + foreach(ix RANGE ${targets_len-1}) + string(JSON target GET "${targets}" ${ix}) + string(JSON target_name GET "${target}" "name") + string(JSON target_kind GET "${target}" "kind") + string(JSON target_kind_len LENGTH "${target_kind}") + + math(EXPR target_kind_len-1 "${target_kind_len} - 1") + set(kinds) + foreach(ix RANGE ${target_kind_len-1}) + string(JSON kind GET "${target_kind}" ${ix}) + if(NOT crate_types OR ${kind} IN_LIST crate_types) + list(APPEND kinds ${kind}) + endif() + endforeach() + + if(TARGET "${target_name}" + AND ("staticlib" IN_LIST kinds OR "cdylib" IN_LIST kinds OR "bin" IN_LIST kinds) + ) + message(WARNING "Failed to import Rust crate ${target_name} (kind: `${target_kind}`) because a target " + "with the same name already exists. Skipping this target.\n" + "Help: If you are importing a package which exposes both a `lib` and " + "a `bin` target, please consider explicitly naming the targets in your `Cargo.toml` manifest.\n" + "Note: If you have multiple different packages which have targets with the same name, please note that " + "this is currently not supported by Corrosion. Feel free to open an issue on Github to request " + "supporting this scenario." + ) + # Skip this target to prevent a hard error. + continue() + endif() + + if("staticlib" IN_LIST kinds OR "cdylib" IN_LIST kinds) + # Explicitly set library names have always been forbidden from using dashes (by cargo). + # Starting with Rust 1.79, names inherited from the package name will have dashes replaced + # by underscores too. Corrosion will thus replace dashes with underscores, to make the target + # name consistent independent of the Rust version. `bin` target names are not affected. + # See https://github.com/corrosion-rs/corrosion/issues/501 for more details. + string(REPLACE "\-" "_" target_name "${target_name}") + + set(archive_byproducts "") + set(shared_lib_byproduct "") + set(pdb_byproduct "") + + add_library(${target_name} INTERFACE) + _corrosion_initialize_properties(${target_name}) + _corrosion_add_library_target( + WORKSPACE_MANIFEST_PATH "${workspace_manifest_path}" + TARGET_NAME "${target_name}" + LIB_KINDS ${kinds} + OUT_ARCHIVE_OUTPUT_BYPRODUCTS archive_byproducts + OUT_SHARED_LIB_BYPRODUCTS shared_lib_byproduct + OUT_PDB_BYPRODUCT pdb_byproduct + ) + + set(byproducts "") + list(APPEND byproducts "${archive_byproducts}" "${shared_lib_byproduct}" "${pdb_byproduct}") + + set(cargo_build_out_dir "") + _add_cargo_build( + cargo_build_out_dir + PACKAGE ${package_name} + TARGET ${target_name} + MANIFEST_PATH "${manifest_path}" + WORKSPACE_MANIFEST_PATH "${workspace_manifest_path}" + TARGET_KINDS "${kinds}" + BYPRODUCTS "${byproducts}" + # Optional + ${no_linker_override} + ) + if(archive_byproducts) + _corrosion_copy_byproducts( + ${target_name} ARCHIVE_OUTPUT_DIRECTORY "${cargo_build_out_dir}" "${archive_byproducts}" + ) + endif() + if(shared_lib_byproduct) + _corrosion_copy_byproducts( + ${target_name} LIBRARY_OUTPUT_DIRECTORY "${cargo_build_out_dir}" "${shared_lib_byproduct}" + ) + endif() + if(pdb_byproduct) + _corrosion_copy_byproducts( + ${target_name} PDB_OUTPUT_DIRECTORY "${cargo_build_out_dir}" "${pdb_byproduct}" + ) + endif() + list(APPEND corrosion_targets ${target_name}) + set_property(TARGET "${target_name}" PROPERTY INTERFACE_COR_CARGO_PACKAGE_NAME "${package_name}" ) + # Note: "bin" is mutually exclusive with "staticlib/cdylib", since `bin`s are seperate crates from libraries. + elseif("bin" IN_LIST kinds) + set(bin_byproduct "") + set(pdb_byproduct "") + add_executable(${target_name} IMPORTED GLOBAL) + _corrosion_initialize_properties(${target_name}) + _corrosion_add_bin_target("${workspace_manifest_path}" "${target_name}" + "bin_byproduct" "pdb_byproduct" + ) + + set(byproducts "") + list(APPEND byproducts "${bin_byproduct}" "${pdb_byproduct}") + + set(cargo_build_out_dir "") + _add_cargo_build( + cargo_build_out_dir + PACKAGE "${package_name}" + TARGET "${target_name}" + MANIFEST_PATH "${manifest_path}" + WORKSPACE_MANIFEST_PATH "${workspace_manifest_path}" + TARGET_KINDS "bin" + BYPRODUCTS "${byproducts}" + # Optional + ${no_linker_override} + ) + _corrosion_copy_byproducts( + ${target_name} RUNTIME_OUTPUT_DIRECTORY "${cargo_build_out_dir}" "${bin_byproduct}" + ) + if(pdb_byproduct) + _corrosion_copy_byproducts( + ${target_name} PDB_OUTPUT_DIRECTORY "${cargo_build_out_dir}" "${pdb_byproduct}" + ) + endif() + list(APPEND corrosion_targets ${target_name}) + set_property(TARGET "${target_name}" PROPERTY INTERFACE_COR_CARGO_PACKAGE_NAME "${package_name}" ) + else() + # ignore other kinds (like examples, tests, build scripts, ...) + endif() + endforeach() + + if(NOT corrosion_targets) + message(DEBUG "No relevant targets found in package ${package_name} - Ignoring") + else() + set_target_properties(${corrosion_targets} PROPERTIES INTERFACE_COR_PACKAGE_MANIFEST_PATH "${package_manifest_path}") + endif() + set(${out_created_targets} "${corrosion_targets}" PARENT_SCOPE) + +endfunction() + +# Add all cargo targets defined in the packages defined in the Cargo.toml manifest at +# `MANIFEST_PATH`. +function(_generator_add_cargo_targets) + set(options NO_LINKER_OVERRIDE) + set(one_value_args MANIFEST_PATH IMPORTED_CRATES) + set(multi_value_args CRATES CRATE_TYPES) + cmake_parse_arguments( + GGC + "${options}" + "${one_value_args}" + "${multi_value_args}" + ${ARGN} + ) + list(APPEND CMAKE_MESSAGE_CONTEXT "_add_cargo_targets") + + _corrosion_option_passthrough_helper(NO_LINKER_OVERRIDE GGC no_linker_override) + _corrosion_arg_passthrough_helper(CRATE_TYPES GGC crate_types) + + _cargo_metadata(json "${GGC_MANIFEST_PATH}") + string(JSON packages GET "${json}" "packages") + string(JSON workspace_members GET "${json}" "workspace_members") + + string(JSON pkgs_len LENGTH "${packages}") + math(EXPR pkgs_len-1 "${pkgs_len} - 1") + + string(JSON ws_mems_len LENGTH ${workspace_members}) + math(EXPR ws_mems_len-1 "${ws_mems_len} - 1") + + set(created_targets "") + set(available_package_names "") + foreach(ix RANGE ${pkgs_len-1}) + string(JSON pkg GET "${packages}" ${ix}) + string(JSON pkg_id GET "${pkg}" "id") + string(JSON pkg_name GET "${pkg}" "name") + string(JSON pkg_manifest_path GET "${pkg}" "manifest_path") + string(JSON pkg_version GET "${pkg}" "version") + list(APPEND available_package_names "${pkg_name}") + + if(DEFINED GGC_CRATES) + if(NOT pkg_name IN_LIST GGC_CRATES) + continue() + endif() + endif() + + # probably this loop is not necessary at all, since when using --no-deps, the + # contents of packages should already be only workspace members! + unset(pkg_is_ws_member) + foreach(ix RANGE ${ws_mems_len-1}) + string(JSON ws_mem GET "${workspace_members}" ${ix}) + if(ws_mem STREQUAL pkg_id) + set(pkg_is_ws_member YES) + break() + endif() + endforeach() + + if(NOT DEFINED pkg_is_ws_member) + # Since we pass `--no-deps` to cargo metadata now, I think this situation can't happen, but lets check for + # it anyway, just to discover any potential issues. + # If nobody complains for a while, it should be safe to remove this check and the previous loop, which + # should speed up the configuration process. + message(WARNING "The package `${pkg_name}` unexpectedly is not part of the workspace." + "Please open an issue at corrosion with some background information on the package" + ) + endif() + + string(JSON targets GET "${pkg}" "targets") + + _generator_add_package_targets( + WORKSPACE_MANIFEST_PATH "${GGC_MANIFEST_PATH}" + PACKAGE_MANIFEST_PATH "${pkg_manifest_path}" + PACKAGE_NAME "${pkg_name}" + PACKAGE_VERSION "${pkg_version}" + TARGETS_JSON "${targets}" + OUT_CREATED_TARGETS curr_created_targets + ${no_linker_override} + ${crate_types} + ) + list(APPEND created_targets "${curr_created_targets}") + endforeach() + + if(NOT created_targets) + set(crates_error_message "") + if(DEFINED GGC_CRATES) + set(crates_error_message "\n`corrosion_import_crate()` was called with the `CRATES` " + "parameter set to `${GGC_CRATES}`. Corrosion will only attempt to import packages matching " + "names from this list." + ) + endif() + message(FATAL_ERROR + "Found no targets in ${pkgs_len} packages." + ${crates_error_message}. + "\nPlease keep in mind that corrosion will only import Rust `bin` targets or" + "`staticlib` or `cdylib` library targets." + "The following packages were found in the Manifest: ${available_package_names}" + ) + else() + message(DEBUG "Corrosion created the following CMake targets: ${created_targets}") + endif() + + if(GGC_IMPORTED_CRATES) + set(${GGC_IMPORTED_CRATES} "${created_targets}" PARENT_SCOPE) + endif() +endfunction() diff --git a/lib/corrosion/cmake/FindRust.cmake b/lib/corrosion/cmake/FindRust.cmake new file mode 100644 index 000000000..fccfac37a --- /dev/null +++ b/lib/corrosion/cmake/FindRust.cmake @@ -0,0 +1,833 @@ +#[=======================================================================[.rst: +FindRust +-------- + +Find Rust + +This module finds an installed rustc compiler and the cargo build tool. If Rust +is managed by rustup it determines the available toolchains and returns a +concrete Rust version, not a rustup proxy. + +#]=======================================================================] + +cmake_minimum_required(VERSION 3.12) + +# search for Cargo here and set up a bunch of cool flags and stuff +include(FindPackageHandleStandardArgs) + +list(APPEND CMAKE_MESSAGE_CONTEXT "FindRust") + +# Print error message and return. Should not be used from inside functions +macro(_findrust_failed) + if("${Rust_FIND_REQUIRED}") + message(FATAL_ERROR ${ARGN}) + elseif(NOT "${Rust_FIND_QUIETLY}") + message(WARNING ${ARGN}) + endif() + set(Rust_FOUND "") + return() +endmacro() + +# Checks if the actual version of a Rust toolchain matches the VERSION requirements specified in find_package. +function(_findrust_version_ok ACTUAL_VERSION OUT_IS_OK) + if(DEFINED Rust_FIND_VERSION_RANGE) + if(Rust_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE") + set(COMPARSION_OPERATOR "VERSION_LESS_EQUAL") + elseif(Rust_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE") + set(COMPARSION_OPERATOR "VERSION_LESS") + else() + message(FATAL_ERROR "Unexpected value in `_FIND_VERSION_RANGE_MAX`: " + "`${Rust_FIND_VERSION_RANGE_MAX}`.") + endif() + if(("${ACTUAL_VERSION}" VERSION_GREATER_EQUAL "${Rust_FIND_VERSION_RANGE_MIN}") + AND + ( "${ACTUAL_VERSION}" ${COMPARSION_OPERATOR} "${Rust_FIND_VERSION_RANGE_MAX}" ) + ) + set("${OUT_IS_OK}" TRUE PARENT_SCOPE) + else() + set("${OUT_IS_OK}" FALSE PARENT_SCOPE) + endif() + elseif(DEFINED Rust_FIND_VERSION) + if(Rust_VERSION_EXACT) + set(COMPARISON_OPERATOR VERSION_EQUAL) + else() + set(COMPARISON_OPERATOR VERSION_GREATER_EQUAL) + endif() + if(_TOOLCHAIN_${_TOOLCHAIN_SELECTED}_VERSION "${COMPARISON_OPERATOR}" Rust_FIND_VERSION) + set("${OUT_IS_OK}" TRUE PARENT_SCOPE) + else() + set("${OUT_IS_OK}" FALSE PARENT_SCOPE) + endif() + else() + # if no VERSION requirement was specified, the version is always okay. + set("${OUT_IS_OK}" TRUE PARENT_SCOPE) + endif() +endfunction() + +function(_corrosion_strip_target_triple input_triple_or_path output_triple) + # If the target_triple is a path to a custom target specification file, then strip everything + # except the filename from `target_triple`. + get_filename_component(target_triple_ext "${input_triple_or_path}" EXT) + set(target_triple "${input_triple_or_path}") + if(target_triple_ext) + if(target_triple_ext STREQUAL ".json") + get_filename_component(target_triple "${input_triple_or_path}" NAME_WE) + endif() + endif() + set(${output_triple} "${target_triple}" PARENT_SCOPE) +endfunction() + +function(_corrosion_parse_target_triple target_triple out_arch out_vendor out_os out_env) + _corrosion_strip_target_triple(${target_triple} target_triple) + + # The vendor part may be left out from the target triple, and since `env` is also optional, + # we determine if vendor is present by matching against a list of known vendors. + set(known_vendors + "apple" + "esp[a-z0-9]*" # espressif, e.g. riscv32imc-esp-espidf or xtensa-esp32s3-none-elf + "fortanix" + "kmc" + "pc" + "nintendo" + "nvidia" + "openwrt" + "alpine" + "chimera" + "unikraft" + "unknown" + "uwp" # aarch64-uwp-windows-msvc + "wrs" # e.g. aarch64-wrs-vxworks + "sony" + "sun" + ) + # todo: allow users to add additional vendors to the list via a cmake variable. + list(JOIN known_vendors "|" known_vendors_joined) + # vendor is optional - We detect if vendor is present by matching against a known list of + # vendors. The next field is the OS, which we assume to always be present, while the last field + # is again optional and contains the environment. + string(REGEX MATCH + "^([a-z0-9_\.]+)-((${known_vendors_joined})-)?([a-z0-9_]+)(-([a-z0-9_]+))?$" + whole_match + "${target_triple}" + ) + if((NOT whole_match) AND (NOT CORROSION_NO_WARN_PARSE_TARGET_TRIPLE_FAILED)) + message(WARNING "Failed to parse target-triple `${target_triple}`." + "Corrosion determines some information about the output artifacts based on OS " + "specified in the Rust target-triple.\n" + "Currently this is relevant for windows and darwin (mac) targets, since file " + "extensions differ.\n" + "Note: If you are targeting a different OS you can suppress this warning by" + " setting the CMake cache variable " + "`CORROSION_NO_WARN_PARSE_TARGET_TRIPLE_FAILED`." + "Please consider opening an issue on github if you you need to add a new vendor to the list." + ) + endif() + + message(DEBUG "Parsed Target triple: arch: ${CMAKE_MATCH_1}, vendor: ${CMAKE_MATCH_3}, " + "OS: ${CMAKE_MATCH_4}, env: ${CMAKE_MATCH_6}") + + set("${out_arch}" "${CMAKE_MATCH_1}" PARENT_SCOPE) + set("${out_vendor}" "${CMAKE_MATCH_3}" PARENT_SCOPE) + set("${out_os}" "${CMAKE_MATCH_4}" PARENT_SCOPE) + set("${out_env}" "${CMAKE_MATCH_6}" PARENT_SCOPE) +endfunction() + +function(_corrosion_determine_libs_new target_triple out_libs out_flags) + set(package_dir "${CMAKE_BINARY_DIR}/corrosion/required_libs") + # Cleanup on reconfigure to get a cleans state (in case we change something in the future) + file(REMOVE_RECURSE "${package_dir}") + file(MAKE_DIRECTORY "${package_dir}") + set(manifest "[package]\nname = \"required_libs\"\nedition = \"2018\"\nversion = \"0.1.0\"\n") + string(APPEND manifest "\n[lib]\ncrate-type=[\"staticlib\"]\npath = \"lib.rs\"\n") + string(APPEND manifest "\n[workspace]\n") + file(WRITE "${package_dir}/Cargo.toml" "${manifest}") + file(WRITE "${package_dir}/lib.rs" "pub fn add(left: usize, right: usize) -> usize {left + right}\n") + + execute_process( + COMMAND ${CMAKE_COMMAND} -E env + "CARGO_BUILD_RUSTC=${Rust_COMPILER_CACHED}" + ${Rust_CARGO_CACHED} rustc --verbose --color never --target=${target_triple} -- --print=native-static-libs + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/corrosion/required_libs" + RESULT_VARIABLE cargo_build_result + ERROR_VARIABLE cargo_build_error_message + ) + if(cargo_build_result) + message(DEBUG "Determining required native libraries - failed: ${cargo_build_result}.") + message(TRACE "The cargo build error was: ${cargo_build_error_message}") + message(DEBUG "Note: This is expected for Rust targets without std support") + return() + else() + # The pattern starts with `native-static-libs:` and goes to the end of the line. + if(cargo_build_error_message MATCHES "native-static-libs: ([^\r\n]+)\r?\n") + string(REPLACE " " ";" "libs_list" "${CMAKE_MATCH_1}") + set(stripped_lib_list "") + set(flag_list "") + + set(was_last_framework OFF) + foreach(lib ${libs_list}) + # merge -framework;lib -> "-framework lib" as CMake does de-duplication of link libraries, and -framework prefix is required + if (lib STREQUAL "-framework") + set(was_last_framework ON) + continue() + endif() + if (was_last_framework) + list(APPEND stripped_lib_list "-framework ${lib}") + set(was_last_framework OFF) + continue() + endif() + + # Flags start with / for MSVC + if (lib MATCHES "^/" AND ${target_triple} MATCHES "msvc$") + list(APPEND flag_list "${lib}") + else() + # Strip leading `-l` (unix) and potential .lib suffix (windows) + string(REGEX REPLACE "^-l" "" "stripped_lib" "${lib}") + string(REGEX REPLACE "\.lib$" "" "stripped_lib" "${stripped_lib}") + list(APPEND stripped_lib_list "${stripped_lib}") + endif() + endforeach() + set(libs_list "${stripped_lib_list}") + # Special case `msvcrt` to link with the debug version in Debug mode. + list(TRANSFORM libs_list REPLACE "^msvcrt$" "\$<\$:msvcrtd>") + else() + message(DEBUG "Determining required native libraries - failed: Regex match failure.") + message(DEBUG "`native-static-libs` not found in: `${cargo_build_error_message}`") + return() + endif() + endif() + set("${out_libs}" "${libs_list}" PARENT_SCOPE) + set("${out_flags}" "${flag_list}" PARENT_SCOPE) +endfunction() + +if (NOT "${Rust_TOOLCHAIN}" STREQUAL "$CACHE{Rust_TOOLCHAIN}") + # Promote Rust_TOOLCHAIN to a cache variable if it is not already a cache variable + set(Rust_TOOLCHAIN ${Rust_TOOLCHAIN} CACHE STRING "Requested rustup toolchain" FORCE) +endif() + +set(_RESOLVE_RUSTUP_TOOLCHAINS_DESC "Indicates whether to descend into the toolchain pointed to by rustup") +set(Rust_RESOLVE_RUSTUP_TOOLCHAINS ON CACHE BOOL ${_RESOLVE_RUSTUP_TOOLCHAINS_DESC}) + +# This block checks to see if we're prioritizing a rustup-managed toolchain. +if (DEFINED Rust_TOOLCHAIN) + # If the user specifies `Rust_TOOLCHAIN`, then look for `rustup` first, rather than `rustc`. + find_program(Rust_RUSTUP rustup PATHS "$ENV{HOME}/.cargo/bin") + if(NOT Rust_RUSTUP) + if(NOT "${Rust_FIND_QUIETLY}") + message( + WARNING "CMake variable `Rust_TOOLCHAIN` specified, but `rustup` was not found. " + "Ignoring toolchain and looking for a Rust toolchain not managed by rustup.") + endif() + endif() +else() + # If we aren't definitely using a rustup toolchain, look for rustc first - the user may have + # a toolchain installed via a method other than rustup higher in the PATH, which should be + # preferred. However, if the first-found rustc is a rustup proxy, then we'll revert to + # finding the preferred toolchain via rustup. + + # Uses `Rust_COMPILER` to let user-specified `rustc` win. But we will still "override" the + # user's setting if it is pointing to `rustup`. Default rustup install path is provided as a + # backup if a toolchain cannot be found in the user's PATH. + + if (DEFINED Rust_COMPILER) + set(_Rust_COMPILER_TEST "${Rust_COMPILER}") + set(_USER_SPECIFIED_RUSTC ON) + if(NOT (EXISTS "${_Rust_COMPILER_TEST}" AND NOT IS_DIRECTORY "${_Rust_COMPILER_TEST}")) + set(_ERROR_MESSAGE "Rust_COMPILER was set to `${Rust_COMPILER}`, but this file does " + "not exist." + ) + _findrust_failed(${_ERROR_MESSAGE}) + return() + endif() + else() + find_program(_Rust_COMPILER_TEST rustc PATHS "$ENV{HOME}/.cargo/bin") + if(NOT EXISTS "${_Rust_COMPILER_TEST}") + set(_ERROR_MESSAGE "`rustc` not found in PATH or `$ENV{HOME}/.cargo/bin`.\n" + "Hint: Check if `rustc` is in PATH or manually specify the location " + "by setting `Rust_COMPILER` to the path to `rustc`.") + _findrust_failed(${_ERROR_MESSAGE}) + endif() + endif() + + # Check if the discovered rustc is actually a "rustup" proxy. + execute_process( + COMMAND + ${CMAKE_COMMAND} -E env + RUSTUP_FORCE_ARG0=rustup + "${_Rust_COMPILER_TEST}" --version + OUTPUT_VARIABLE _RUSTC_VERSION_RAW + ERROR_VARIABLE _RUSTC_VERSION_STDERR + RESULT_VARIABLE _RUSTC_VERSION_RESULT + ) + + if(NOT (_RUSTC_VERSION_RESULT EQUAL "0")) + _findrust_failed("`${_Rust_COMPILER_TEST} --version` failed with ${_RUSTC_VERSION_RESULT}\n" + "rustc stderr:\n${_RUSTC_VERSION_STDERR}" + ) + endif() + + if (_RUSTC_VERSION_RAW MATCHES "rustup [0-9\\.]+") + if (_USER_SPECIFIED_RUSTC) + message( + WARNING "User-specified Rust_COMPILER pointed to rustup's rustc proxy. Corrosion's " + "FindRust will always try to evaluate to an actual Rust toolchain, and so the " + "user-specified Rust_COMPILER will be discarded in favor of the default " + "rustup-managed toolchain." + ) + + unset(Rust_COMPILER) + unset(Rust_COMPILER CACHE) + endif() + + # Get `rustup` next to the `rustc` proxy + get_filename_component(_RUST_PROXIES_PATH "${_Rust_COMPILER_TEST}" DIRECTORY) + find_program(Rust_RUSTUP rustup HINTS "${_RUST_PROXIES_PATH}" NO_DEFAULT_PATH) + endif() + + unset(_Rust_COMPILER_TEST CACHE) +endif() + +# At this point, the only thing we should have evaluated is a path to `rustup` _if that's what the +# best source for a Rust toolchain was determined to be_. +if (NOT Rust_RUSTUP) + set(Rust_RESOLVE_RUSTUP_TOOLCHAINS OFF CACHE BOOL ${_RESOLVE_RUSTUP_TOOLCHAINS_DESC} FORCE) +endif() + +# List of user variables that will override any toolchain-provided setting +set(_Rust_USER_VARS Rust_COMPILER Rust_CARGO Rust_CARGO_TARGET Rust_CARGO_HOST_TARGET) +foreach(_VAR ${_Rust_USER_VARS}) + if (DEFINED "${_VAR}") + set(${_VAR}_CACHED "${${_VAR}}" CACHE INTERNAL "Internal cache of ${_VAR}") + else() + unset(${_VAR}_CACHED CACHE) + endif() +endforeach() + +# Discover what toolchains are installed by rustup, if the discovered `rustc` is a proxy from +# `rustup` and the user hasn't explicitly requested to override this behavior, then select either +# the default toolchain, or the requested toolchain Rust_TOOLCHAIN +if (Rust_RESOLVE_RUSTUP_TOOLCHAINS) + execute_process( + COMMAND + "${Rust_RUSTUP}" toolchain list --verbose + OUTPUT_VARIABLE _TOOLCHAINS_RAW + ) + + string(REPLACE "\n" ";" _TOOLCHAINS_RAW "${_TOOLCHAINS_RAW}") + set(_DISCOVERED_TOOLCHAINS "") + set(_DISCOVERED_TOOLCHAINS_RUSTC_PATH "") + set(_DISCOVERED_TOOLCHAINS_CARGO_PATH "") + set(_DISCOVERED_TOOLCHAINS_VERSION "") + + foreach(_TOOLCHAIN_RAW ${_TOOLCHAINS_RAW}) + if (_TOOLCHAIN_RAW MATCHES "([a-zA-Z0-9\\._\\-]+)[ \t\r\n]?(\\(active\\)|\\(active, default\\)|\\(default\\) \\(override\\)|\\(default\\)|\\(override\\))?[ \t\r\n]+(.+)") + set(_TOOLCHAIN "${CMAKE_MATCH_1}") + set(_TOOLCHAIN_TYPE "${CMAKE_MATCH_2}") + + set(_TOOLCHAIN_PATH "${CMAKE_MATCH_3}") + set(_TOOLCHAIN_${_TOOLCHAIN}_PATH "${CMAKE_MATCH_3}") + + if (_TOOLCHAIN_TYPE MATCHES ".*\\((active, )?default\\).*") + set(_TOOLCHAIN_DEFAULT "${_TOOLCHAIN}") + endif() + + if (_TOOLCHAIN_TYPE MATCHES ".*\\((active|override)\\).*") + set(_TOOLCHAIN_OVERRIDE "${_TOOLCHAIN}") + endif() + + execute_process( + COMMAND + "${_TOOLCHAIN_PATH}/bin/rustc" --version + OUTPUT_VARIABLE _TOOLCHAIN_RAW_VERSION + ) + if (_TOOLCHAIN_RAW_VERSION MATCHES "rustc ([0-9]+)\\.([0-9]+)\\.([0-9]+)(-nightly)?") + list(APPEND _DISCOVERED_TOOLCHAINS "${_TOOLCHAIN}") + list(APPEND _DISCOVERED_TOOLCHAINS_RUSTC_PATH "${_TOOLCHAIN_PATH}/bin/rustc") + list(APPEND _DISCOVERED_TOOLCHAINS_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + + # We need this variable to determine the default toolchain, since `foreach(... IN ZIP_LISTS ...)` + # requires CMake 3.17. As a workaround we define this variable to lookup the version when iterating + # through the `_DISCOVERED_TOOLCHAINS` lists. + set(_TOOLCHAIN_${_TOOLCHAIN}_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}") + if(CMAKE_MATCH_4) + set(_TOOLCHAIN_${_TOOLCHAIN}_IS_NIGHTLY "TRUE") + else() + set(_TOOLCHAIN_${_TOOLCHAIN}_IS_NIGHTLY "FALSE") + endif() + if(EXISTS "${_TOOLCHAIN_PATH}/bin/cargo") + list(APPEND _DISCOVERED_TOOLCHAINS_CARGO_PATH "${_TOOLCHAIN_PATH}/bin/cargo") + else() + list(APPEND _DISCOVERED_TOOLCHAINS_CARGO_PATH "NOTFOUND") + endif() + else() + message(AUTHOR_WARNING "Unexpected output from `rustc --version` for Toolchain `${_TOOLCHAIN}`: " + "`${_TOOLCHAIN_RAW_VERSION}`.\n" + "Ignoring this toolchain." + ) + endif() + else() + message(AUTHOR_WARNING "Didn't recognize toolchain: ${_TOOLCHAIN_RAW}. Ignoring this toolchain.\n" + "Rustup toolchain list output( `${Rust_RUSTUP} toolchain list --verbose`):\n" + "${_TOOLCHAINS_RAW}" + ) + endif() + endforeach() + + # Expose a list of available rustup toolchains. + list(LENGTH _DISCOVERED_TOOLCHAINS _toolchain_len) + list(LENGTH _DISCOVERED_TOOLCHAINS_RUSTC_PATH _toolchain_rustc_len) + list(LENGTH _DISCOVERED_TOOLCHAINS_CARGO_PATH _toolchain_cargo_len) + list(LENGTH _DISCOVERED_TOOLCHAINS_VERSION _toolchain_version_len) + if(NOT + (_toolchain_len EQUAL _toolchain_rustc_len + AND _toolchain_cargo_len EQUAL _toolchain_version_len + AND _toolchain_len EQUAL _toolchain_cargo_len) + ) + message(FATAL_ERROR "Internal error - list length mismatch." + "List lengths: ${_toolchain_len} toolchains, ${_toolchain_rustc_len} rustc, ${_toolchain_cargo_len} cargo," + " ${_toolchain_version_len} version. The lengths should be the same." + ) + endif() + + set(Rust_RUSTUP_TOOLCHAINS CACHE INTERNAL "List of available Rustup toolchains" "${_DISCOVERED_TOOLCHAINS}") + set(Rust_RUSTUP_TOOLCHAINS_RUSTC_PATH + CACHE INTERNAL + "List of the rustc paths corresponding to the toolchain at the same index in `Rust_RUSTUP_TOOLCHAINS`." + "${_DISCOVERED_TOOLCHAINS_RUSTC_PATH}" + ) + set(Rust_RUSTUP_TOOLCHAINS_CARGO_PATH + CACHE INTERNAL + "List of the cargo paths corresponding to the toolchain at the same index in `Rust_RUSTUP_TOOLCHAINS`. \ + May also be `NOTFOUND` if the toolchain does not have a cargo executable." + "${_DISCOVERED_TOOLCHAINS_CARGO_PATH}" + ) + set(Rust_RUSTUP_TOOLCHAINS_VERSION + CACHE INTERNAL + "List of the rust toolchain version corresponding to the toolchain at the same index in \ + `Rust_RUSTUP_TOOLCHAINS`." + "${_DISCOVERED_TOOLCHAINS_VERSION}" + ) + + # Rust_TOOLCHAIN is preferred over a requested version if it is set. + if (NOT DEFINED Rust_TOOLCHAIN) + if (NOT DEFINED _TOOLCHAIN_OVERRIDE) + set(_TOOLCHAIN_SELECTED "${_TOOLCHAIN_DEFAULT}") + else() + set(_TOOLCHAIN_SELECTED "${_TOOLCHAIN_OVERRIDE}") + endif() + # Check default toolchain first. + _findrust_version_ok("_TOOLCHAIN_${_TOOLCHAIN_SELECTED}_VERSION" _VERSION_OK) + if(NOT "${_VERSION_OK}") + foreach(_TOOLCHAIN "${_DISCOVERED_TOOLCHAINS}") + _findrust_version_ok("_TOOLCHAIN_${_TOOLCHAIN}_VERSION" _VERSION_OK) + if("${_VERSION_OK}") + set(_TOOLCHAIN_SELECTED "${_TOOLCHAIN}") + break() + endif() + endforeach() + # Check if we found a suitable version in the for loop. + if(NOT "${_VERSION_OK}") + string(REPLACE ";" "\n" _DISCOVERED_TOOLCHAINS "${_DISCOVERED_TOOLCHAINS}") + _findrust_failed("Failed to find a Rust toolchain matching the version requirements of " + "${Rust_FIND_VERSION}. Available toolchains: ${_DISCOVERED_TOOLCHAINS}") + endif() + endif() + endif() + + set(Rust_TOOLCHAIN "${_TOOLCHAIN_SELECTED}" CACHE STRING "The rustup toolchain to use") + set_property(CACHE Rust_TOOLCHAIN PROPERTY STRINGS "${_DISCOVERED_TOOLCHAINS}") + + if(NOT Rust_FIND_QUIETLY) + message(STATUS "Rust Toolchain: ${Rust_TOOLCHAIN}") + endif() + + if (NOT Rust_TOOLCHAIN IN_LIST _DISCOVERED_TOOLCHAINS) + # If the precise toolchain wasn't found, try appending the default host + execute_process( + COMMAND + "${Rust_RUSTUP}" show + RESULT_VARIABLE _SHOW_RESULT + OUTPUT_VARIABLE _SHOW_RAW + ) + if(NOT "${_SHOW_RESULT}" EQUAL "0") + _findrust_failed("Command `${Rust_RUSTUP} show` failed") + endif() + + if (_SHOW_RAW MATCHES "Default host: ([a-zA-Z0-9_\\-]*)\n") + set(_DEFAULT_HOST "${CMAKE_MATCH_1}") + else() + _findrust_failed("Failed to parse \"Default host\" from `${Rust_RUSTUP} show`. Got: ${_SHOW_RAW}") + endif() + + if (NOT "${Rust_TOOLCHAIN}-${_DEFAULT_HOST}" IN_LIST _DISCOVERED_TOOLCHAINS) + set(_NOT_FOUND_MESSAGE "Could not find toolchain '${Rust_TOOLCHAIN}'\n" + "Available toolchains:\n" + ) + foreach(_TOOLCHAIN ${_DISCOVERED_TOOLCHAINS}) + list(APPEND _NOT_FOUND_MESSAGE " `${_TOOLCHAIN}`\n") + endforeach() + _findrust_failed(${_NOT_FOUND_MESSAGE}) + endif() + + set(_RUSTUP_TOOLCHAIN_FULL "${Rust_TOOLCHAIN}-${_DEFAULT_HOST}") + else() + set(_RUSTUP_TOOLCHAIN_FULL "${Rust_TOOLCHAIN}") + endif() + + set(_RUST_TOOLCHAIN_PATH "${_TOOLCHAIN_${_RUSTUP_TOOLCHAIN_FULL}_PATH}") + if(NOT "${Rust_FIND_QUIETLY}") + message(VERBOSE "Rust toolchain ${_RUSTUP_TOOLCHAIN_FULL}") + message(VERBOSE "Rust toolchain path ${_RUST_TOOLCHAIN_PATH}") + endif() + + # Is overridden if the user specifies `Rust_COMPILER` explicitly. + find_program( + Rust_COMPILER_CACHED + rustc + HINTS "${_RUST_TOOLCHAIN_PATH}/bin" + NO_DEFAULT_PATH) +elseif (Rust_RUSTUP) + get_filename_component(_RUST_TOOLCHAIN_PATH "${Rust_RUSTUP}" DIRECTORY) + get_filename_component(_RUST_TOOLCHAIN_PATH "${_RUST_TOOLCHAIN_PATH}" DIRECTORY) + find_program( + Rust_COMPILER_CACHED + rustc + HINTS "${_RUST_TOOLCHAIN_PATH}/bin" + NO_DEFAULT_PATH) +else() + find_program(Rust_COMPILER_CACHED rustc) + if (EXISTS "${Rust_COMPILER_CACHED}") + # rustc is expected to be at `/bin/rustc`. + get_filename_component(_RUST_TOOLCHAIN_PATH "${Rust_COMPILER_CACHED}" DIRECTORY) + get_filename_component(_RUST_TOOLCHAIN_PATH "${_RUST_TOOLCHAIN_PATH}" DIRECTORY) + endif() +endif() + +if (NOT EXISTS "${Rust_COMPILER_CACHED}") + set(_NOT_FOUND_MESSAGE "The rustc executable was not found. " + "Rust not installed or ~/.cargo/bin not added to path?\n" + "Hint: Consider setting `Rust_COMPILER` to the absolute path of `rustc`." + ) + _findrust_failed(${_NOT_FOUND_MESSAGE}) +endif() + +if (Rust_RESOLVE_RUSTUP_TOOLCHAINS) + set(_NOT_FOUND_MESSAGE "Rust was detected to be managed by rustup, but failed to find `cargo` " + "next to `rustc` in `${_RUST_TOOLCHAIN_PATH}/bin`. This can happen for custom toolchains, " + "if cargo was not built. " + "Please manually specify the path to a compatible `cargo` by setting `Rust_CARGO`." + ) + find_program( + Rust_CARGO_CACHED + cargo + HINTS "${_RUST_TOOLCHAIN_PATH}/bin" + NO_DEFAULT_PATH + ) + # note: maybe can use find_package_handle_standard_args here, if we remove the _CACHED postfix. + # not sure why that is here... + if(NOT EXISTS "${Rust_CARGO_CACHED}") + _findrust_failed(${_NOT_FOUND_MESSAGE}) + endif() + set(Rust_TOOLCHAIN_IS_RUSTUP_MANAGED TRUE CACHE INTERNAL "" FORCE) +else() + set(_NOT_FOUND_MESSAGE "Failed to find `cargo` in PATH and `${_RUST_TOOLCHAIN_PATH}/bin`.\n" + "Please ensure cargo is in PATH or manually specify the path to a compatible `cargo` by " + "setting `Rust_CARGO`." + ) + # On some systems (e.g. NixOS) cargo is not managed by rustup and also not next to rustc. + find_program( + Rust_CARGO_CACHED + cargo + HINTS "${_RUST_TOOLCHAIN_PATH}/bin" + ) + # note: maybe can use find_package_handle_standard_args here, if we remove the _CACHED postfix. + # not sure why that is here... + if(NOT EXISTS "${Rust_CARGO_CACHED}") + _findrust_failed(${_NOT_FOUND_MESSAGE}) + endif() +endif() + +execute_process( + COMMAND "${Rust_CARGO_CACHED}" --version --verbose + OUTPUT_VARIABLE _CARGO_VERSION_RAW + RESULT_VARIABLE _CARGO_VERSION_RESULT +) +# todo: check if cargo is a required component! +if(NOT ( "${_CARGO_VERSION_RESULT}" EQUAL "0" )) + _findrust_failed("Failed to get cargo version.\n" + "`${Rust_CARGO_CACHED} --version` failed with error: `${_CARGO_VERSION_RESULT}" +) +endif() + +# todo: don't set cache variables here, but let find_package_handle_standard_args do the promotion +# later. +if (_CARGO_VERSION_RAW MATCHES "cargo ([0-9]+)\\.([0-9]+)\\.([0-9]+)") + set(Rust_CARGO_VERSION_MAJOR "${CMAKE_MATCH_1}" CACHE INTERNAL "" FORCE) + set(Rust_CARGO_VERSION_MINOR "${CMAKE_MATCH_2}" CACHE INTERNAL "" FORCE) + set(Rust_CARGO_VERSION_PATCH "${CMAKE_MATCH_3}" CACHE INTERNAL "" FORCE) + set(Rust_CARGO_VERSION "${Rust_CARGO_VERSION_MAJOR}.${Rust_CARGO_VERSION_MINOR}.${Rust_CARGO_VERSION_PATCH}" CACHE INTERNAL "" FORCE) +# Workaround for the version strings where the `cargo ` prefix is missing. +elseif(_CARGO_VERSION_RAW MATCHES "([0-9]+)\\.([0-9]+)\\.([0-9]+)") + set(Rust_CARGO_VERSION_MAJOR "${CMAKE_MATCH_1}" CACHE INTERNAL "" FORCE) + set(Rust_CARGO_VERSION_MINOR "${CMAKE_MATCH_2}" CACHE INTERNAL "" FORCE) + set(Rust_CARGO_VERSION_PATCH "${CMAKE_MATCH_3}" CACHE INTERNAL "" FORCE) + set(Rust_CARGO_VERSION "${Rust_CARGO_VERSION_MAJOR}.${Rust_CARGO_VERSION_MINOR}.${Rust_CARGO_VERSION_PATCH}" CACHE INTERNAL "" FORCE) +else() + _findrust_failed( + "Failed to parse cargo version. `cargo --version` evaluated to (${_CARGO_VERSION_RAW}). " + "Expected a .. version triple." + ) +endif() + +execute_process( + COMMAND "${Rust_COMPILER_CACHED}" --version --verbose + OUTPUT_VARIABLE _RUSTC_VERSION_RAW + RESULT_VARIABLE _RUSTC_VERSION_RESULT +) + +if(NOT ( "${_RUSTC_VERSION_RESULT}" EQUAL "0" )) + _findrust_failed("Failed to get rustc version.\n" + "${Rust_COMPILER_CACHED} --version failed with error: `${_RUSTC_VERSION_RESULT}`") +endif() + +if (_RUSTC_VERSION_RAW MATCHES "rustc ([0-9]+)\\.([0-9]+)\\.([0-9]+)(-nightly)?") + set(Rust_VERSION_MAJOR "${CMAKE_MATCH_1}" CACHE INTERNAL "" FORCE) + set(Rust_VERSION_MINOR "${CMAKE_MATCH_2}" CACHE INTERNAL "" FORCE) + set(Rust_VERSION_PATCH "${CMAKE_MATCH_3}" CACHE INTERNAL "" FORCE) + set(Rust_VERSION "${Rust_VERSION_MAJOR}.${Rust_VERSION_MINOR}.${Rust_VERSION_PATCH}" CACHE INTERNAL "" FORCE) + if(CMAKE_MATCH_4) + set(Rust_IS_NIGHTLY 1 CACHE INTERNAL "" FORCE) + else() + set(Rust_IS_NIGHTLY 0 CACHE INTERNAL "" FORCE) + endif() +else() + _findrust_failed("Failed to parse rustc version. `${Rust_COMPILER_CACHED} --version --verbose` " + "evaluated to:\n`${_RUSTC_VERSION_RAW}`" + ) +endif() + +if (_RUSTC_VERSION_RAW MATCHES "host: ([a-zA-Z0-9_\\-]*)\n") + set(Rust_DEFAULT_HOST_TARGET "${CMAKE_MATCH_1}") + set(Rust_CARGO_HOST_TARGET_CACHED "${Rust_DEFAULT_HOST_TARGET}" CACHE STRING "Host triple") +else() + _findrust_failed( + "Failed to parse rustc host target. `rustc --version --verbose` evaluated to:\n${_RUSTC_VERSION_RAW}" + ) +endif() + +if (_RUSTC_VERSION_RAW MATCHES "LLVM version: ([0-9]+)\\.([0-9]+)(\\.([0-9]+))?") + set(Rust_LLVM_VERSION_MAJOR "${CMAKE_MATCH_1}" CACHE INTERNAL "" FORCE) + set(Rust_LLVM_VERSION_MINOR "${CMAKE_MATCH_2}" CACHE INTERNAL "" FORCE) + # With the Rust toolchain 1.44.1 the reported LLVM version is 9.0, i.e. without a patch version. + # Since cmake regex does not support non-capturing groups, just ignore Match 3. + set(Rust_LLVM_VERSION_PATCH "${CMAKE_MATCH_4}" CACHE INTERNAL "" FORCE) + set(Rust_LLVM_VERSION "${Rust_LLVM_VERSION_MAJOR}.${Rust_LLVM_VERSION_MINOR}.${Rust_LLVM_VERSION_PATCH}" CACHE INTERNAL "" FORCE) +elseif(NOT Rust_FIND_QUIETLY) + message( + WARNING + "Failed to parse rustc LLVM version. `rustc --version --verbose` evaluated to:\n${_RUSTC_VERSION_RAW}" + ) +endif() + +if (NOT Rust_CARGO_TARGET_CACHED) + unset(_CARGO_ARCH) + unset(_CARGO_ABI) + if (WIN32) + if (CMAKE_VS_PLATFORM_NAME) + string(TOLOWER "${CMAKE_VS_PLATFORM_NAME}" LOWER_VS_PLATFORM_NAME) + if ("${LOWER_VS_PLATFORM_NAME}" STREQUAL "win32") + set(_CARGO_ARCH i686) + elseif("${LOWER_VS_PLATFORM_NAME}" STREQUAL "x64") + set(_CARGO_ARCH x86_64) + elseif("${LOWER_VS_PLATFORM_NAME}" STREQUAL "arm64") + set(_CARGO_ARCH aarch64) + else() + message(WARNING "VS Platform '${CMAKE_VS_PLATFORM_NAME}' not recognized") + endif() + endif() + # Fallback path + if(NOT DEFINED _CARGO_ARCH) + # Possible values for windows when not cross-compiling taken from here: + # https://learn.microsoft.com/en-us/windows/win32/winprog64/wow64-implementation-details + # When cross-compiling the user is expected to supply the value, so we match more variants. + if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(AMD64|amd64|x86_64)$") + set(_CARGO_ARCH x86_64) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(ARM64|arm64|aarch64)$") + set(_CARGO_ARCH aarch64) + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(X86|x86|i686)$") + set(_CARGO_ARCH i686) + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "i586") + set(_CARGO_ARCH i586) + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "IA64") + message(FATAL_ERROR "No rust target for Intel Itanium.") + elseif(NOT "${CMAKE_SYSTEM_PROCESSOR}") + message(WARNING "Failed to detect target architecture. Please set `CMAKE_SYSTEM_PROCESSOR`" + " to your target architecture or set `Rust_CARGO_TARGET` to your cargo target triple." + ) + else() + message(WARNING "Failed to detect target architecture. Please set " + "`Rust_CARGO_TARGET` to your cargo target triple." + ) + endif() + endif() + + set(_CARGO_VENDOR "pc-windows") + + # The MSVC Generators will always target the msvc ABI. + # For other generators we check the compiler ID and compiler target (if present) + # If no compiler is set and we are not cross-compiling then we just choose the + # default rust host target. + if(DEFINED MSVC + OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" + OR "${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC" + OR "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "-msvc$" + OR "${CMAKE_C_COMPILER_TARGET}" MATCHES "-msvc$" + ) + set(_CARGO_ABI msvc) + elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" + OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" + OR "${CMAKE_CXX_COMPILER_TARGET}" MATCHES "-gnu$" + OR "${CMAKE_C_COMPILER_TARGET}" MATCHES "-gnu$" + OR (NOT CMAKE_CROSSCOMPILING AND "${Rust_DEFAULT_HOST_TARGET}" MATCHES "-gnu$") + ) + set(_CARGO_ABI gnu) + elseif(NOT "${CMAKE_CROSSCOMPILING}" AND "${Rust_DEFAULT_HOST_TARGET}" MATCHES "-msvc$") + # We first check if the gnu branch matches to ensure this fallback is only used + # if no compiler is enabled. + set(_CARGO_ABI msvc) + else() + message(WARNING "Could not determine the target ABI. Please specify `Rust_CARGO_TARGET` manually.") + endif() + + if(DEFINED _CARGO_ARCH AND DEFINED _CARGO_VENDOR AND DEFINED _CARGO_ABI) + set(Rust_CARGO_TARGET_CACHED "${_CARGO_ARCH}-${_CARGO_VENDOR}-${_CARGO_ABI}" + CACHE STRING "Target triple") + endif() + elseif (ANDROID) + if (CMAKE_ANDROID_ARCH_ABI STREQUAL armeabi-v7a) + if (CMAKE_ANDROID_ARM_MODE) + set(_Rust_ANDROID_TARGET armv7-linux-androideabi) + else () + set(_Rust_ANDROID_TARGET thumbv7neon-linux-androideabi) + endif() + elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL arm64-v8a) + set(_Rust_ANDROID_TARGET aarch64-linux-android) + elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL x86) + set(_Rust_ANDROID_TARGET i686-linux-android) + elseif (CMAKE_ANDROID_ARCH_ABI STREQUAL x86_64) + set(_Rust_ANDROID_TARGET x86_64-linux-android) + endif() + + if (_Rust_ANDROID_TARGET) + set(Rust_CARGO_TARGET_CACHED "${_Rust_ANDROID_TARGET}" CACHE STRING "Target triple") + endif() + elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "OHOS") + if(CMAKE_OHOS_ARCH_ABI STREQUAL arm64-v8a) + set(_RUST_OHOS_TARGET aarch64-unknown-linux-ohos) + elseif(CMAKE_OHOS_ARCH_ABI STREQUAL armeabi-v7a) + set(_RUST_OHOS_TARGET armv7-unknown-linux-ohos) + elseif(CMAKE_OHOS_ARCH_ABI STREQUAL x86_64) + set(_RUST_OHOS_TARGET x86_64-unknown-linux-ohos) + else() + message(WARNING "unrecognized OHOS architecture: ${OHOS_ARCH}") + endif() + if(_RUST_OHOS_TARGET) + set(Rust_CARGO_TARGET_CACHED "${_RUST_OHOS_TARGET}" CACHE STRING "Target triple") + endif() + endif() + # Fallback to the default host target + if(NOT Rust_CARGO_TARGET_CACHED) + if(CMAKE_CROSSCOMPILING) + message(WARNING "CMake is in cross-compiling mode, but the cargo target-triple could not be inferred." + "Falling back to the default host target. Please consider manually setting `Rust_CARGO_TARGET`." + ) + endif() + set(Rust_CARGO_TARGET_CACHED "${Rust_DEFAULT_HOST_TARGET}" CACHE STRING "Target triple") + endif() + + message(STATUS "Rust Target: ${Rust_CARGO_TARGET_CACHED}") +endif() + +if(Rust_CARGO_TARGET_CACHED STREQUAL Rust_DEFAULT_HOST_TARGET) + set(Rust_CROSSCOMPILING FALSE CACHE INTERNAL "Rust is configured for cross-compiling") +else() + set(Rust_CROSSCOMPILING TRUE CACHE INTERNAL "Rust is configured for cross-compiling") +endif() + +_corrosion_parse_target_triple("${Rust_CARGO_TARGET_CACHED}" rust_arch rust_vendor rust_os rust_env) +_corrosion_parse_target_triple("${Rust_CARGO_HOST_TARGET_CACHED}" rust_host_arch rust_host_vendor rust_host_os rust_host_env) + +set(Rust_CARGO_TARGET_ARCH "${rust_arch}" CACHE INTERNAL "Target architecture") +set(Rust_CARGO_TARGET_VENDOR "${rust_vendor}" CACHE INTERNAL "Target vendor") +set(Rust_CARGO_TARGET_OS "${rust_os}" CACHE INTERNAL "Target Operating System") +set(Rust_CARGO_TARGET_ENV "${rust_env}" CACHE INTERNAL "Target environment") + +set(Rust_CARGO_HOST_ARCH "${rust_host_arch}" CACHE INTERNAL "Host architecture") +set(Rust_CARGO_HOST_VENDOR "${rust_host_vendor}" CACHE INTERNAL "Host vendor") +set(Rust_CARGO_HOST_OS "${rust_host_os}" CACHE INTERNAL "Host Operating System") +set(Rust_CARGO_HOST_ENV "${rust_host_env}" CACHE INTERNAL "Host environment") + +if(NOT DEFINED CACHE{Rust_CARGO_TARGET_LINK_NATIVE_LIBS}) + message(STATUS "Determining required link libraries for target ${Rust_CARGO_TARGET_CACHED}") + unset(required_native_libs) + _corrosion_determine_libs_new("${Rust_CARGO_TARGET_CACHED}" required_native_libs required_link_flags) + if(DEFINED required_native_libs) + message(STATUS "Required static libs for target ${Rust_CARGO_TARGET_CACHED}: ${required_native_libs}" ) + endif() + if(DEFINED required_link_flags) + message(STATUS "Required link flags for target ${Rust_CARGO_TARGET_CACHED}: ${required_link_flags}" ) + endif() + # In very recent corrosion versions it is possible to override the rust compiler version + # per target, so to be totally correct we would need to determine the libraries for + # every installed Rust version, that the user could choose from. + # In practice there aren't likely going to be any major differences, so we just do it once + # for the target and once for the host target (if cross-compiling). + set(Rust_CARGO_TARGET_LINK_NATIVE_LIBS "${required_native_libs}" CACHE INTERNAL + "Required native libraries when linking Rust static libraries") + set(Rust_CARGO_TARGET_LINK_OPTIONS "${required_link_flags}" CACHE INTERNAL + "Required link flags when linking Rust static libraries") +endif() + +if(Rust_CROSSCOMPILING AND NOT DEFINED CACHE{Rust_CARGO_HOST_TARGET_LINK_NATIVE_LIBS}) + message(STATUS "Determining required link libraries for target ${Rust_CARGO_HOST_TARGET_CACHED}") + unset(host_libs) + _corrosion_determine_libs_new("${Rust_CARGO_HOST_TARGET_CACHED}" host_libs host_flags) + if(DEFINED host_libs) + message(STATUS "Required static libs for host target ${Rust_CARGO_HOST_TARGET_CACHED}: ${host_libs}" ) + endif() + set(Rust_CARGO_HOST_TARGET_LINK_NATIVE_LIBS "${host_libs}" CACHE INTERNAL + "Required native libraries when linking Rust static libraries for the host target") + set(Rust_CARGO_HOST_TARGET_LINK_OPTIONS "${host_flags}" CACHE INTERNAL + "Required linker flags when linking Rust static libraries for the host target") +endif() + +# Set the input variables as non-cache variables so that the variables are available after +# `find_package`, even if the values were evaluated to defaults. +foreach(_VAR ${_Rust_USER_VARS}) + set(${_VAR} "${${_VAR}_CACHED}") + # Ensure cached variables have type INTERNAL + set(${_VAR}_CACHED "${${_VAR}_CACHED}" CACHE INTERNAL "Internal cache of ${_VAR}") +endforeach() + +find_package_handle_standard_args( + Rust + REQUIRED_VARS Rust_COMPILER Rust_VERSION Rust_CARGO Rust_CARGO_VERSION Rust_CARGO_TARGET Rust_CARGO_HOST_TARGET + VERSION_VAR Rust_VERSION +) + + +if(NOT TARGET Rust::Rustc) + add_executable(Rust::Rustc IMPORTED GLOBAL) + set_property( + TARGET Rust::Rustc + PROPERTY IMPORTED_LOCATION "${Rust_COMPILER_CACHED}" + ) + + add_executable(Rust::Cargo IMPORTED GLOBAL) + set_property( + TARGET Rust::Cargo + PROPERTY IMPORTED_LOCATION "${Rust_CARGO_CACHED}" + ) + set(Rust_FOUND true) +endif() + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/lib/corrosion/doc/.gitignore b/lib/corrosion/doc/.gitignore new file mode 100644 index 000000000..7585238ef --- /dev/null +++ b/lib/corrosion/doc/.gitignore @@ -0,0 +1 @@ +book diff --git a/lib/corrosion/doc/book.toml b/lib/corrosion/doc/book.toml new file mode 100644 index 000000000..bb294c056 --- /dev/null +++ b/lib/corrosion/doc/book.toml @@ -0,0 +1,5 @@ +[book] +language = "en" +multilingual = false +src = "src" +title = "Corrosion v0.5 documentation" diff --git a/lib/corrosion/doc/src/SUMMARY.md b/lib/corrosion/doc/src/SUMMARY.md new file mode 100644 index 000000000..9f877b928 --- /dev/null +++ b/lib/corrosion/doc/src/SUMMARY.md @@ -0,0 +1,9 @@ +# Summary + +- [Introduction](./introduction.md) +- [Quick Start](./quick_start.md) +- [Setup Corrosion](./setup_corrosion.md) +- [Usage](./usage.md) +- [Advanced](./advanced.md) +- [FFI binding integrations](./ffi_bindings.md) +- [Common Issues](./common_issues.md) diff --git a/lib/corrosion/doc/src/advanced.md b/lib/corrosion/doc/src/advanced.md new file mode 100644 index 000000000..203c48bf1 --- /dev/null +++ b/lib/corrosion/doc/src/advanced.md @@ -0,0 +1,100 @@ +## What does corrosion do? + +The specifics of what corrosion does should be regarded as an implementation detail and not relied on +when writing user code. However, a basic understanding of what corrosion does may be helpful when investigating +issues. + +### FindRust + +Corrosion maintains a CMake module `FindRust` which is executed when Corrosion is loaded, i.e. at the time +of `find_package(corrosion)`, `FetchContent_MakeAvailable(corrosion)` or `add_subdirectory(corrosion)` depending +on the method used to include Corrosion. + +`FindRust` will search for installed rust toolchains, respecting the options prefixed with `Rust_` documented in +the [Usage](usage.md#corrosion-options) chapter. +It will select _one_ Rust toolchain to be used for the compilation of Rust code. Toolchains managed by `rustup` +will be resolved and corrosion will always select a specific toolchain, not a `rustup` proxy. + + +### Importing Rust crates + +Corrosion's main function is `corrosion_import_crate`, which internally will call `cargo metadata` to provide +structured information based on the `Cargo.toml` manifest. +Corrosion will then iterate over all workspace and/or package members and find all rust crates that are either +a static (`staticlib`) or shared (`cdylib`) library or a `bin` target and create CMake targets matching the +crate name. Additionally, a build target is created for each imported target, containing the required build +command to create the imported artifact. This build command can be influenced by various arguments to +`corrosion_import_crate` as well as corrosion specific target properties which are documented int the +[Usage](usage.md) chapter. +Corrosion adds the necessary dependencies and also copies the target artifacts out of the cargo build tree +to standard CMake locations, even respecting `OUTPUT_DIRECTORY` target properties if set. + +### Linking + +Depending on the type of the crate the linker will either be invoked by CMake or by `rustc`. +Rust `staticlib`s are linked into C/C++ code via `target_link_libraries()` and the linker is +invoked by CMake. +For rust `cdylib`s and `bin`s, the linker is invoked via `rustc` and CMake just gets the final artifact. + +#### CMake invokes the linker + +When CMake invokes the linker, everything is as usual. CMake will call the linker with +the compiler as the linker driver and users can just use the regular CMake functions to +modify linking behaviour. `corrosion_set_linker()` has **no effect**. +As a convenience, `corrosion_link_libraries()` will forward its arguments to `target_link_libraries()`. + +#### Rustc invokes the linker + +Rust `cdylib`s and `bin`s are linked via `rustc`. Corrosion provides several helper functions +to influence the linker invocation for such targets. + +`corrosion_link_libraries()` is a limited version of `target_link_libraries()` +for rust `cdylib` or `bin` targets. +Under the hood this function passes `-l` and `-L` flags to the linker invocation and +ensures the linked libraries are built first. +Much of the advanced functionality available in `target_link_libraries()` is not implemented yet, +but pull-requests are welcome! In the meantime, users may want to use +`corrosion_add_target_local_rustflags()` to pass customized linking flags. + +`corrosion_set_linker()` can be used to specify a custom linker, in case the default one +chosen by corrosion is not what you want. +Corrosion currently instructs `rustc` to use the C/C++ compiler as the linker driver. +This is done because: +- For C++ code we must link with `libstdc++` or `libc++` (depending on the compiler), so we must + either specify the library on the link line or use a `c++` compiler as the linker driver. +- `Rustc`s default linker selection currently is not so great. For a number of platforms + `rustc` will fallback to `cc` as the linker driver. When cross-compiling, this leads + to linking failures, since the linker driver is for the host architecture. + Corrosion avoids this by specifying the C/C++ compiler as the linker driver. + + +In some cases, especially in older rust versions (pre 1.68), the linker flavor detection +of `rustc` is also not correct, so when setting a custom linker you may want to pass the +[`-C linker-flavor`](https://doc.rust-lang.org/rustc/codegen-options/index.html#linker-flavor) +rustflag via `corrosion_add_target_local_rustflags()`. + +## FFI bindings + +For interaction between Rust and other languages there need to be some FFI bindings of some sort. +For simple cases manually defining the interfaces may be sufficient, but in many cases users +wish to use tools like [bindgen], [cbindgen], [cxx] or [autocxx] to automate the generating of +bindings. + +In principle there are two different ways to generate the bindings: +- use a `build.rs` script to generate the bindings when cargo is invoked, using + library versions of the tools to generate the bindings. +- use the cli versions of the tools and setup custom CMake targets/commands to + generate the bindings. This approach should be preferred if the bindings are needed + by the C/C++ side. + +Corrosion currently provides 2 experimental functions to integrate cbindgen and cxx into +the build process. They are not 100% production ready yet, but should work well as a +template on how to integrate generating bindings into your build process. + +Todo: expand this documentation and link to other resources. + +[bindgen]: https://rust-lang.github.io/rust-bindgen/ +[cbindgen]: https://github.com/eqrion/cbindgen +[cxx]: https://cxx.rs/ +[autocxx]: https://google.github.io/autocxx/index.html + \ No newline at end of file diff --git a/lib/corrosion/doc/src/common_issues.md b/lib/corrosion/doc/src/common_issues.md new file mode 100644 index 000000000..62d3d3d95 --- /dev/null +++ b/lib/corrosion/doc/src/common_issues.md @@ -0,0 +1,88 @@ +# Commonly encountered (Non-Corrosion) Issues + +## Table of Contents + +- [Linking Debug C/C++ libraries into Rust fails on Windows MSVC targets](#linking-debug-cc-libraries-into-rust-fails-on-windows-msvc-targets) +- [Linking Rust static libraries into Debug C/C++ binaries fails on Windows MSVC targets](#linking-rust-static-libraries-into-debug-cc-binaries-fails-on-windows-msvc-targets) +- [Missing `soname` on Linux for `cdylibs`](#missing-soname-on-linux-for-cdylibs) +- [Missing `install_name` on MacOS for `ccdylibs` / Hardcoded references to the build-directory](#missing-installname-on-macos-for-ccdylibs--hardcoded-references-to-the-build-directory) + +## Linking Debug C/C++ libraries into Rust fails on Windows MSVC targets + +`rustc` always links against the non-debug Windows runtime on `*-msvc` targets. +This is tracked [in this issue](https://github.com/rust-lang/rust/issues/39016) +and could be fixed upstream. + +A typical error message for this issue is: + +``` + Compiling rust_bin v0.1.0 (D:\a\corrosion\corrosion\test\cxxbridge\cxxbridge_cpp2rust\rust) +error: linking with `link.exe` failed: exit code: 1319 +[ redacted ] + = note: cxxbridge-cpp.lib(lib.cpp.obj) : error LNK2038: mismatch detected for '_ITERATOR_DEBUG_LEVEL': value '2' doesn't match value '0' in libcxx-bafec361a1a30317.rlib(cxx.o) + + cxxbridge-cpp.lib(lib.cpp.obj) : error LNK2038: mismatch detected for 'RuntimeLibrary': value 'MDd_DynamicDebug' doesn't match value 'MD_DynamicRelease' in libcxx-bafec361a1a30317.rlib(cxx.o) + + cpp_lib.lib(cpplib.cpp.obj) : error LNK2038: mismatch detected for '_ITERATOR_DEBUG_LEVEL': value '2' doesn't match value '0' in libcxx-bafec361a1a30317.rlib(cxx.o) + + cpp_lib.lib(cpplib.cpp.obj) : error LNK2038: mismatch detected for 'RuntimeLibrary': value 'MDd_DynamicDebug' doesn't match value 'MD_DynamicRelease' in libcxx-bafec361a1a30317.rlib(cxx.o) + + msvcrt.lib(initializers.obj) : warning LNK4098: defaultlib 'msvcrtd.lib' conflicts with use of other libs; use /NODEFAULTLIB:library +``` + +### Solutions + +One solution is to also use the non-debug version when building the C/C++ libraries. +You can set the [MSVC_RUNTIME_LIBRARY] target properties of your C/C++ libraries to the non-debug variants. +By default you will probably want to select the `MultiThreadedDLL` variant, unless you specified +[`-Ctarget-feature=+crt-static`](https://rust-lang.github.io/rfcs/1721-crt-static.html) in your +`RUSTFLAGS`. + + +[MSVC_RUNTIME_LIBRARY]: https://cmake.org/cmake/help/latest/prop_tgt/MSVC_RUNTIME_LIBRARY.html#prop_tgt:MSVC_RUNTIME_LIBRARY + +## Linking Rust static libraries into Debug C/C++ binaries fails on Windows MSVC targets + +This issue is quite similar to the previous one, except that this time it's a Rust library being linked +into a C/C++ target. If it's 100% only Rust code you likely won't even have any issues. +However, if somewhere in the dependency graph C/C++ code is built and linked into your Rust library, +you will likely encounter this issue. Please note, that using [cxx] counts as using C++ code and will +lead to this issue. + +The previous solution should also work for this case, but additionally you [may also +have success](https://github.com/rust-lang/rust/issues/39016#issuecomment-853964918) by using +`corrosion_set_env_vars(your_rust_lib "CFLAGS=-MDd" "CXXFLAGS=-MDd")` (or `-MTd` for a statically linked +runtime). +For debug builds, this is likely to be the preferable solution. It assumes that downstream C/C++ code +is built by the `cc` crate, which respects the `CFLAGS` and `CXXFLAGS` environment variables. + +[cxx]: https://github.com/dtolnay/cxx + + +## Missing `soname` on Linux for `cdylibs` + +Cargo doesn't support setting the `soname` field for cdylib, which may cause issues. +You can set the soname manually by passing a linker-flag such as `-Clink-arg=-Wl,-soname,libyour_crate.so` +to the linker via `corrosion_add_target_local_rustflags()` and additionally seting the `IMPORTED_SONAME` +property on the import CMake target: +``` +set_target_properties(your_crate-shared PROPERTIES IMPORTED_SONAME libyour_crate.so) +``` +Replace `your_crate` with the name of your shared library as defined in the `[lib]` section of your Cargo.toml +Manifest file. + +Attention: The Linux section may not be entirely correct, maybe `$ORIGIN` needs to be added to the linker arguments. +Feel free to open a pull-request with corrections. + +## Missing `install_name` on MacOS for `ccdylibs` / Hardcoded references to the build-directory + +The solution here is essentially the same as in the previous section. +``` +corrosion_add_target_local_rustflags(your_crate -Clink-arg=-Wl,-install_name,@rpath/libyour_crate.dylib,-current_version,${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR},-compatibility_version,${PROJECT_VERSION_MAJOR}.0) +set_target_properties(your_crate-shared PROPERTIES IMPORTED_NO_SONAME 0) +set_target_properties(your_crate-shared PROPERTIES IMPORTED_SONAME libyour_crate.dylib) +``` +When building binaries using this shared library, you should set the build rpath to the output directory of +your shared library, e.g. by setting `set(CMAKE_BUILD_RPATH ${YOUR_CUSTOM_OUTPUT_DIRECTORY})` before adding +executables. +For a practical example, you may look at [Slint PR 2455](https://github.com/slint-ui/slint/pull/2455). diff --git a/lib/corrosion/doc/src/ffi_bindings.md b/lib/corrosion/doc/src/ffi_bindings.md new file mode 100644 index 000000000..dfb4f4468 --- /dev/null +++ b/lib/corrosion/doc/src/ffi_bindings.md @@ -0,0 +1,43 @@ +# Integrating Automatically Generated FFI Bindings + +There are a number of tools to automatically generate bindings between Rust and different +foreign languages. + +1. [bindgen](#bindgen) +2. [cbindgen](#cbindgen-integration) +3. [cxx](#cxx-integration) + +## bindgen + +[bindgen] is a tool to automatically generate Rust bindings from C headers. +As such, integrating bindgen [via a build-script](https://rust-lang.github.io/rust-bindgen/library-usage.html) +works well and their doesn't seem to be a need to create CMake rules for +generating the bindings. + +[bindgen]: https://github.com/rust-lang/rust-bindgen + +## cbindgen integration + +⚠️⚠️⚠️ **EXPERIMENTAL** ⚠️⚠️⚠️ + +[cbindgen] is a tool that generates C/C++ headers from Rust code. When compiling C/C++ +code that `#include`s such generated headers the buildsystem must be aware of the dependencies. +Generating the headers via a build-script is possible, but Corrosion offers no guidance here. + +Instead, Corrosion offers an experimental function to add CMake rules using cbindgen to generate +the headers. +This is not available on a stable released version yet, and the details are subject to change. +{{#include ../../cmake/Corrosion.cmake:corrosion_cbindgen}} + +### Current limitations + +- The current version regenerates the bindings more often then necessary to be on the safe side, + but an upstream PR is open to solve this in a future cbindgen version. + +## cxx integration + +⚠️⚠️⚠️ **EXPERIMENTAL** ⚠️⚠️⚠️ + +[cxx] is a tool which generates bindings for C++/Rust interop. + +{{#include ../../cmake/Corrosion.cmake:corrosion_add_cxxbridge}} diff --git a/lib/corrosion/doc/src/introduction.md b/lib/corrosion/doc/src/introduction.md new file mode 100644 index 000000000..e44401d68 --- /dev/null +++ b/lib/corrosion/doc/src/introduction.md @@ -0,0 +1,19 @@ +## About Corrosion + +Corrosion, formerly known as cmake-cargo, is a tool for integrating Rust into an existing CMake +project. Corrosion is capable of automatically importing executables, static libraries, and +dynamic libraries from a Rust package or workspace as CMake targets. + +The imported static and dynamic library types can be linked into C/C++ CMake targets using the usual +CMake functions such as [`target_link_libraries()`]. +For rust executables and dynamic libraries corrosion provides a `corrosion_link_libraries` +helper function to conveniently add the necessary flags to link C/C++ libraries into +the rust target. + +You are currently viewing the documentation of the stable v0.5 release branch. + +[`target_link_libraries()`]: https://cmake.org/cmake/help/latest/command/target_link_libraries.html + +## Requirements + +Corrosion v0.5 requires at least CMake 3.15 and at least Rust 1.46 or newer. \ No newline at end of file diff --git a/lib/corrosion/doc/src/quick_start.md b/lib/corrosion/doc/src/quick_start.md new file mode 100644 index 000000000..3e5b83921 --- /dev/null +++ b/lib/corrosion/doc/src/quick_start.md @@ -0,0 +1,36 @@ +# Quick Start + +You can add corrosion to your project via the `FetchContent` CMake module or one of the other methods +described in the [Setup chapter](setup_corrosion.md). +Afterwards you can import Rust targets defined in a `Cargo.toml` manifest file by using +`corrosion_import_crate`. This will add CMake targets with names matching the crate names defined +in the Cargo.toml manifest. These targets can then subsequently be used, e.g. to link the imported +target into a regular C/C++ target. + +The example below shows how to add Corrosion to your project via `FetchContent` +and how to import a rust library and link it into a regular C/C++ CMake target. + +```cmake +include(FetchContent) + +FetchContent_Declare( + Corrosion + GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git + GIT_TAG v0.5 # Optionally specify a commit hash, version tag or branch here +) +# Set any global configuration variables such as `Rust_TOOLCHAIN` before this line! +FetchContent_MakeAvailable(Corrosion) + +# Import targets defined in a package or workspace manifest `Cargo.toml` file +corrosion_import_crate(MANIFEST_PATH rust-lib/Cargo.toml) + +add_executable(your_cool_cpp_bin main.cpp) + +# In this example the the `Cargo.toml` file passed to `corrosion_import_crate` is assumed to have +# defined a static (`staticlib`) or shared (`cdylib`) rust library with the name "rust-lib". +# A target with the same name is now available in CMake and you can use it to link the rust library into +# your C/C++ CMake target(s). +target_link_libraries(your_cool_cpp_bin PUBLIC rust-lib) +``` + +Please see the [Usage chapter](usage.md) for a complete discussion of possible configuration options. diff --git a/lib/corrosion/doc/src/setup_corrosion.md b/lib/corrosion/doc/src/setup_corrosion.md new file mode 100644 index 000000000..509be0544 --- /dev/null +++ b/lib/corrosion/doc/src/setup_corrosion.md @@ -0,0 +1,89 @@ +# Adding Corrosion to your project + +There are two fundamental installation methods that are supported by Corrosion - installation as a +CMake package or using it as a subdirectory in an existing CMake project. For CMake versions below +3.19 Corrosion strongly recommends installing the package, either via a package manager or manually +using CMake's installation facilities. +If you have CMake 3.19 or newer, we recommend to use either the [FetchContent](#fetchcontent) or the +[Subdirectory](#subdirectory) method to integrate Corrosion. + +## FetchContent +If you are using CMake >= 3.19 or installation is difficult or not feasible in +your environment, you can use the +[FetchContent](https://cmake.org/cmake/help/latest/module/FetchContent.html) module to include +Corrosion. This will download Corrosion and use it as if it were a subdirectory at configure time. + +In your CMakeLists.txt: +```cmake +include(FetchContent) + +FetchContent_Declare( + Corrosion + GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git + GIT_TAG v0.5 # Optionally specify a commit hash, version tag or branch here +) +# Set any global configuration variables such as `Rust_TOOLCHAIN` before this line! +FetchContent_MakeAvailable(Corrosion) +``` + +## Subdirectory +Corrosion can also be used directly as a subdirectory. This solution may work well for small +projects, but it's discouraged for large projects with many dependencies, especially those which may +themselves use Corrosion. Either copy the Corrosion library into your source tree, being sure to +preserve the `LICENSE` file, or add this repository as a git submodule: +```bash +git submodule add https://github.com/corrosion-rs/corrosion.git +``` + +From there, using Corrosion is easy. In your CMakeLists.txt: +```cmake +add_subdirectory(path/to/corrosion) +``` + +## Installation + + +Installation will pre-build all of Corrosion's native tooling (required only for CMake versions +below 3.19) and install it together with Corrosions CMake files into a standard location. +On CMake >= 3.19 installing Corrosion does not offer any speed advantages, unless the native +tooling option is explicitly enabled. + +### Install from source + +First, download and install Corrosion: +```bash +git clone https://github.com/corrosion-rs/corrosion.git +# Optionally, specify -DCMAKE_INSTALL_PREFIX= to specify a +# custom installation directory +cmake -Scorrosion -Bbuild -DCMAKE_BUILD_TYPE=Release +cmake --build build --config Release +# This next step may require sudo or admin privileges if you're installing to a system location, +# which is the default. +cmake --install build --config Release +``` + +You'll want to ensure that the install directory is available in your `PATH` or `CMAKE_PREFIX_PATH` +environment variable. This is likely to already be the case by default on a Unix system, but on +Windows it will install to `C:\Program Files (x86)\Corrosion` by default, which will not be in your +`PATH` or `CMAKE_PREFIX_PATH` by default. + +Once Corrosion is installed, and you've ensured the package is available in your `PATH`, you +can use it from your own project like any other package from your CMakeLists.txt: +```cmake +find_package(Corrosion REQUIRED) +``` + +### Package Manager + +#### Homebrew (unofficial) + +Corrosion is available via Homebrew and can be installed via + +```bash +brew install corrosion +``` + +Please note that this package is community maintained. Please also keep in mind that Corrosion follows +semantic versioning and minor version bumps (i.e. `0.3` -> `0.4`) may contain breaking changes, while +Corrosion is still pre `1.0`. +Please read the release notes when upgrading Corrosion. diff --git a/lib/corrosion/doc/src/usage.md b/lib/corrosion/doc/src/usage.md new file mode 100644 index 000000000..19bf950d1 --- /dev/null +++ b/lib/corrosion/doc/src/usage.md @@ -0,0 +1,320 @@ +## Usage + +### Automatically import crate targets with `corrosion_import_crate` + +In order to integrate a Rust crate into CMake, you first need to import Rust crates from +a [package] or [workspace]. Corrosion provides `corrosion_import_crate()` to automatically import +crates defined in a Cargo.toml Manifest file: + +{{#include ../../cmake/Corrosion.cmake:corrosion-import-crate}} + +Corrosion will use `cargo metadata` to add a cmake target for each crate defined in the Manifest file +and add the necessary rules to build the targets. +For Rust executables an [`IMPORTED`] executable target is created with the same name as defined in the `[[bin]]` +section of the Manifest corresponding to this target. +If no such name was defined the target name defaults to the Rust package name. +For Rust library targets an [`INTERFACE`] library target is created with the same name as defined in the `[lib]` +section of the Manifest. This `INTERFACE` library links an internal corrosion target, which is either a +`SHARED` or `STATIC` `IMPORTED` library, depending on the Rust crate type (`cdylib` vs `staticlib`). + +The created library targets can be linked into other CMake targets by simply using [target_link_libraries]. + +Corrosion will by default copy the produced Rust artifacts into `${CMAKE_CURRENT_BINARY_DIR}`. The target location +can be changed by setting the CMake `OUTPUT_DIRECTORY` target properties on the imported Rust targets. +See the [OUTPUT_DIRECTORY](#cmake-output_directory-target-properties-and-imported_location) section for more details. + +Many of the options available for `corrosion_import_crate` can also be individually set per +target, see [Per Target options](#per-target-options) for details. + +[package]: https://doc.rust-lang.org/book/ch07-01-packages-and-crates.html +[workspace]: https://doc.rust-lang.org/cargo/reference/workspaces.html +[`IMPORTED`]: https://cmake.org/cmake/help/latest/prop_tgt/IMPORTED.html +[`INTERFACE`]: https://cmake.org/cmake/help/latest/command/add_library.html#interface-libraries +[target_link_libraries]: https://cmake.org/cmake/help/latest/command/target_link_libraries.html + +### Per Target options + +Some configuration options can be specified individually for each target. You can set them via the +`corrosion_set_xxx()` functions specified below: + +- `corrosion_set_env_vars( [... ])`: Define environment variables + that should be set during the invocation of `cargo build` for the specified target. Please note that + the environment variable will only be set for direct builds of the target via cmake, and not for any + build where cargo built the crate in question as a dependency for another target. + The environment variables may contain generator expressions. +- `corrosion_add_target_rustflags( [... ])`: When building the target, + the `RUSTFLAGS` environment variable will contain the flags added via this function. Please note that any + dependencies (built by cargo) will also see these flags. See also: `corrosion_add_target_local_rustflags`. +- `corrosion_add_target_local_rustflags(target_name rustc_flag [more_flags ...])`: Support setting + rustflags for only the main target (crate) and none of its dependencies. + This is useful in cases where you only need rustflags on the main-crate, but need to set different + flags for different targets. Without "local" Rustflags this would require rebuilds of the + dependencies when switching targets. +- `corrosion_set_hostbuild()`: The target should be compiled for the Host target and ignore any + cross-compile configuration. +- `corrosion_set_features( [ALL_FEATURES ] [NO_DEFAULT_FEATURES] [FEATURES ... ])`: + For a given target, enable specific features via `FEATURES`, toggle `ALL_FEATURES` on or off or disable all features + via `NO_DEFAULT_FEATURES`. For more information on features, please see also the + [cargo reference](https://doc.rust-lang.org/cargo/reference/features.html). +- `corrosion_set_cargo_flags( ...])`: + For a given target, add options and flags at the end of `cargo build` invocation. This will be appended after any + arguments passed through the `FLAGS` during the crate import. +- `corrosion_set_linker(target_name linker)`: Use `linker` to link the target. + Please note that this only has an effect for targets where the final linker invocation is done + by cargo, i.e. targets where foreign code is linked into rust code and not the other way around. + Please also note that if you are cross-compiling and specify a linker such as `clang`, you are + responsible for also adding a rustflag which adds the necessary `--target=` argument for the + linker. + + +### Global Corrosion Options +All of the following variables are evaluated automatically in most cases. In typical cases you +shouldn't need to alter any of these. If you do want to specify them manually, make sure to set +them **before** `find_package(Corrosion REQUIRED)`. + +- `Rust_TOOLCHAIN:STRING` - Specify a named rustup toolchain to use. Changes to this variable + resets all other options. Default: If the first-found `rustc` is a `rustup` proxy, then the default + rustup toolchain (see `rustup show`) is used. Otherwise, the variable is unset by default. +- `Rust_ROOT:STRING` - CMake provided. Path to a Rust toolchain to use. This is an alternative if + you want to select a specific Rust toolchain, but it's not managed by rustup. Default: Nothing +- `Rust_COMPILER:STRING` - Path to `rustc`, which should be used for compiling or for toolchain + detection (if it is a `rustup` proxy). Default: The `rustc` in the first-found toolchain, either + from `rustup`, or from a toolchain available in the user's `PATH`. +- `Rust_RESOLVE_RUSTUP_TOOLCHAINS:BOOL` - If the found `rustc` is a `rustup` proxy, resolve a + concrete path to a specific toolchain managed by `rustup`, according to the `rustup` toolchain + selection rules and other options detailed here. If this option is turned off, the found `rustc` + will be used as-is to compile, even if it is a `rustup` proxy, which might increase compilation + time. Default: `ON` if the found `rustc` is a rustup proxy or a `rustup` managed toolchain was + requested, `OFF` otherwise. Forced `OFF` if `rustup` was not found. +- `Rust_CARGO:STRING` - Path to `cargo`. Default: the `cargo` installed next to `${Rust_COMPILER}`. +- `Rust_CARGO_TARGET:STRING` - The default target triple to build for. Alter for cross-compiling. + Default: On Visual Studio Generator, the matching triple for `CMAKE_VS_PLATFORM_NAME`. Otherwise, + the default target triple reported by `${Rust_COMPILER} --version --verbose`. +- `CORROSION_NATIVE_TOOLING:BOOL` - Use a native tool (written in Rust) as part of Corrosion. This + option increases the configure-time significantly unless Corrosion is installed. + Default: `OFF` if CMake >= 3.19.0. Forced `ON` for CMake < 3.19. + + +#### Developer/Maintainer Options +These options are not used in the course of normal Corrosion usage, but are used to configure how +Corrosion is built and installed. Only applies to Corrosion builds and subdirectory uses. + +- `CORROSION_DEV_MODE:BOOL` - Indicates that Corrosion is being actively developed. Default: `OFF` + if Corrosion is a subdirectory, `ON` if it is the top-level project +- `CORROSION_BUILD_TESTS:BOOL` - Build the Corrosion tests. Default: `Off` if Corrosion is a + subdirectory, `ON` if it is the top-level project +- `CORROSION_GENERATOR_EXECUTABLE:STRING` - Specify a path to the corrosion-generator executable. + This is to support scenarios where it's easier to build corrosion-generator outside of the normal + bootstrap path, such as in the case of package managers that make it very easy to import Rust + crates for fully reproducible, offline builds. +- `CORROSION_INSTALL_EXECUTABLE:BOOL` - Controls whether corrosion-generator is installed with the + package. Default: `ON` with `CORROSION_GENERATOR_EXECUTABLE` unset, otherwise `OFF` + + +### Information provided by Corrosion + +For your convenience, Corrosion sets a number of variables which contain information about the version of the rust +toolchain. You can use the CMake version comparison operators +(e.g. [`VERSION_GREATER_EQUAL`](https://cmake.org/cmake/help/latest/command/if.html#version-comparisons)) on the main +variable (e.g. `if(Rust_VERSION VERSION_GREATER_EQUAL "1.57.0")`), or you can inspect the major, minor and patch +versions individually. +- `Rust_VERSION<_MAJOR|_MINOR|_PATCH>` - The version of rustc. +- `Rust_CARGO_VERSION<_MAJOR|_MINOR|_PATCH>` - The cargo version. +- `Rust_LLVM_VERSION<_MAJOR|_MINOR|_PATCH>` - The LLVM version used by rustc. +- `Rust_IS_NIGHTLY` - 1 if a nightly toolchain is used, otherwise 0. Useful for selecting an unstable feature for a + crate, that is only available on nightly toolchains. +- Cache variables containing information based on the target triple for the selected target + as well as the default host target: + - `Rust_CARGO_TARGET_ARCH`, `Rust_CARGO_HOST_ARCH`: e.g. `x86_64` or `aarch64` + - `Rust_CARGO_TARGET_VENDOR`, `Rust_CARGO_HOST_VENDOR`: e.g. `apple`, `pc`, `unknown` etc. + - `Rust_CARGO_TARGET_OS`, `Rust_CARGO_HOST_OS`: e.g. `darwin`, `linux`, `windows`, `none` + - `Rust_CARGO_TARGET_ENV`, `Rust_CARGO_HOST_ENV`: e.g. `gnu`, `musl` + + + + +### Selecting a custom cargo profile + +[Rust 1.57](https://blog.rust-lang.org/2021/12/02/Rust-1.57.0.html) stabilized the support for custom +[profiles](https://doc.rust-lang.org/cargo/reference/profiles.html). If you are using a sufficiently new rust toolchain, +you may select a custom profile by adding the optional argument `PROFILE ` to +`corrosion_import_crate()`. If you do not specify a profile, or you use an older toolchain, corrosion will select +the standard `dev` profile if the CMake config is either `Debug` or unspecified. In all other cases the `release` +profile is chosen for cargo. + +### Importing C-Style Libraries Written in Rust +Corrosion makes it completely trivial to import a crate into an existing CMake project. Consider +a project called [rust2cpp](test/rust2cpp/rust2cpp) with the following file structure: +``` +rust2cpp/ + rust/ + src/ + lib.rs + Cargo.lock + Cargo.toml + CMakeLists.txt + main.cpp +``` + +This project defines a simple Rust lib crate, like so, in [`rust2cpp/rust/Cargo.toml`](test/rust2cpp/rust2cpp/rust/Cargo.toml): +```toml +[package] +name = "rust-lib" +version = "0.1.0" +authors = ["Andrew Gaspar "] +license = "MIT" +edition = "2018" + +[dependencies] + +[lib] +crate-type=["staticlib"] +``` + +In addition to `"staticlib"`, you can also use `"cdylib"`. In fact, you can define both with a +single crate and switch between which is used using the standard +[`BUILD_SHARED_LIBS`](https://cmake.org/cmake/help/latest/variable/BUILD_SHARED_LIBS.html) variable. + +This crate defines a simple crate called `rust-lib`. Importing this crate into your +[CMakeLists.txt](test/rust2cpp/CMakeLists.txt) is trivial: +```cmake +# Note: you must have already included Corrosion for `corrosion_import_crate` to be available. See # the `Installation` section above. + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) +``` + +Now that you've imported the crate into CMake, all of the executables, static libraries, and dynamic +libraries defined in the Rust can be directly referenced. So, merely define your C++ executable as +normal in CMake and add your crate's library using target_link_libraries: +```cmake +add_executable(cpp-exe main.cpp) +target_link_libraries(cpp-exe PUBLIC rust-lib) +``` + +That's it! You're now linking your Rust library to your C++ library. + +#### Generate Bindings to Rust Library Automatically + +Currently, you must manually declare bindings in your C or C++ program to the exported routines and +types in your Rust project. You can see boths sides of this in +[the Rust code](test/rust2cpp/rust2cpp/rust/src/lib.rs) and in [the C++ code](test/rust2cpp/rust2cpp/main.cpp). + +Integration with [cbindgen](https://github.com/eqrion/cbindgen) is +planned for the future. + +### Importing Libraries Written in C and C++ Into Rust + +The rust targets can be imported with `corrosion_import_crate()` into CMake. +For targets where the linker should be invoked by Rust corrosion provides +`corrosion_link_libraries()` to link your C/C++ libraries with the Rust target. +For additional linker flags you may use `corrosion_add_target_local_rustflags()` +and pass linker arguments via the `-Clink-args` flag to rustc. These flags will +only be passed to the final rustc invocation and not affect any rust dependencies. + +C bindings can be generated via [bindgen](https://github.com/rust-lang/rust-bindgen). +Corrosion does not offer any direct integration yet, but you can either generate the +bindings in the build-script of your crate, or generate the bindings as a CMake build step +(e.g. a custom target) and add a dependency from `cargo-prebuild_` to your +custom target for generating the bindings. + +Example: + +```cmake +# Import your Rust targets +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) +# Link C/C++ libraries with your Rust target +corrosion_link_libraries(target_name c_library) +# Optionally explicitly define which linker to use. +corrosion_set_linker(target_name your_custom_linker) +# Optionally set linker arguments +corrosion_add_target_local_rustflags(target_name "-Clink-args=") +# Optionally tell CMake that the rust crate depends on another target (e.g. a code generator) +add_dependencies(cargo-prebuild_ custom_bindings_target) +``` + +### Cross Compiling +Corrosion attempts to support cross-compiling as generally as possible, though not all +configurations are tested. Cross-compiling is explicitly supported in the following scenarios. + +In all cases, you will need to install the standard library for the Rust target triple. When using +Rustup, you can use it to install the target standard library: + +```bash +rustup target add +``` + +If the target triple is automatically derived, Corrosion will print the target during configuration. +For example: + +``` +-- Rust Target: aarch64-linux-android +``` + +#### Windows-to-Windows +Corrosion supports cross-compiling between arbitrary Windows architectures using the Visual Studio +Generator. For example, to cross-compile for ARM64 from any platform, simply set the `-A` +architecture flag: + +```bash +cmake -S. -Bbuild-arm64 -A ARM64 +cmake --build build-arm64 +``` + +Please note that for projects containing a build-script at least Rust 1.54 is required due to a bug +in previous cargo versions, which causes the build-script to incorrectly be built for the target +platform. + +#### Linux-to-Linux +In order to cross-compile on Linux, you will need to install a cross-compiler. For example, on +Ubuntu, to cross compile for 64-bit Little-Endian PowerPC Little-Endian, install +`g++-powerpc64le-linux-gnu` from apt-get: + +```bash +sudo apt install g++-powerpc64le-linux-gnu +``` + +Currently, Corrosion does not automatically determine the target triple while cross-compiling on +Linux, so you'll need to specify a matching `Rust_CARGO_TARGET`. + +```bash +cmake -S. -Bbuild-ppc64le -DRust_CARGO_TARGET=powerpc64le-unknown-linux-gnu -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++ +cmake --build build-ppc64le +``` + +#### Android + +Cross-compiling for Android is supported on all platforms with the Makefile and Ninja generators, +and the Rust target triple will automatically be selected. The CMake +[cross-compiling instructions for Android](https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html#cross-compiling-for-android) +apply here. For example, to build for ARM64: + +```bash +cmake -S. -Bbuild-android-arm64 -GNinja -DCMAKE_SYSTEM_NAME=Android \ + -DCMAKE_ANDROID_NDK=/path/to/android-ndk-rxxd -DCMAKE_ANDROID_ARCH_ABI=arm64-v8a +``` + +**Important note:** The Android SDK ships with CMake 3.10 at newest, which Android Studio will +prefer over any CMake you've installed locally. CMake 3.10 is insufficient for using Corrosion, +which requires a minimum of CMake 3.15. If you're using Android Studio to build your project, +follow the instructions in the Android Studio documentation for +[using a specific version of CMake](https://developer.android.com/studio/projects/install-ndk#vanilla_cmake). + + +### CMake `OUTPUT_DIRECTORY` target properties and `IMPORTED_LOCATION` + +Corrosion respects the following `OUTPUT_DIRECTORY` target properties on CMake >= 3.19: +- [ARCHIVE_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/ARCHIVE_OUTPUT_DIRECTORY.html) +- [LIBRARY_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/LIBRARY_OUTPUT_DIRECTORY.html) +- [RUNTIME_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/RUNTIME_OUTPUT_DIRECTORY.html) +- [PDB_OUTPUT_DIRECTORY](https://cmake.org/cmake/help/latest/prop_tgt/PDB_OUTPUT_DIRECTORY.html) + +If the target property is set (e.g. by defining the `CMAKE_XYZ_OUTPUT_DIRECTORY` variable before calling +`corrosion_import_crate()`), corrosion will copy the built rust artifacts to the location defined in the +target property. +Due to limitations in CMake these target properties are evaluated in a deferred manner, to +support the user setting the target properties after the call to `corrosion_import_crate()`. +This has the side effect that the `IMPORTED_LOCATION` property will be set late, and users should not +use `get_property` to read `IMPORTED_LOCATION` at configure time. Instead, generator expressions +should be used to get the location of the target artifact. +If `IMPORTED_LOCATION` is needed at configure time users may use `cmake_language(DEFER CALL ...)` to defer +evaluation to after the `IMPORTED_LOCATION` property is set. diff --git a/lib/corrosion/generator/CMakeLists.txt b/lib/corrosion/generator/CMakeLists.txt new file mode 100644 index 000000000..c3f9a329b --- /dev/null +++ b/lib/corrosion/generator/CMakeLists.txt @@ -0,0 +1,64 @@ +message(STATUS "Building CMake Generator for Corrosion - This may take a while") + +set(generator_src "${CMAKE_CURRENT_BINARY_DIR}/legacy_generator_src") +set(generator_destination "${CMAKE_CURRENT_BINARY_DIR}/legacy_generator") +set(generator_build_quiet "") + +file(MAKE_DIRECTORY "${generator_src}") +file(COPY src DESTINATION "${generator_src}") +if(Rust_VERSION VERSION_LESS "1.56") + message(STATUS "Corrosion Generator: Using Compatibility lock file, due to rust version less than 1.56") + file(COPY Compat.Cargo.lock Compat.Cargo.toml DESTINATION "${generator_src}") + file(RENAME "${generator_src}/Compat.Cargo.lock" "${generator_src}/Cargo.lock") + file(RENAME "${generator_src}/Compat.Cargo.toml" "${generator_src}/Cargo.toml") +else() + file(COPY Cargo.lock Cargo.toml DESTINATION "${generator_src}") +endif() + +# Using cargo install has the advantage of caching the build in the user .cargo directory, +# so likely the rebuild will be very cheap even after deleting the build directory. +execute_process( + COMMAND ${CMAKE_COMMAND} + -E env + # If the Generator is built at configure of a project (instead of being pre-installed) + # We don't want environment variables like `RUSTFLAGS` affecting the Generator build. + --unset=RUSTFLAGS + "CARGO_BUILD_RUSTC=${RUSTC_EXECUTABLE}" + "${CARGO_EXECUTABLE}" install + --path "." + --root "${generator_destination}" + --locked + ${_CORROSION_QUIET_OUTPUT_FLAG} + WORKING_DIRECTORY "${generator_src}" + RESULT_VARIABLE generator_build_failed +) +if(generator_build_failed) + message(FATAL_ERROR "Building CMake Generator for Corrosion - failed") +else() + message(STATUS "Building CMake Generator for Corrosion - done") +endif() +set(host_executable_suffix "") +if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") + set(host_executable_suffix ".exe") +endif() + +set(_CORROSION_GENERATOR_EXE + "${generator_destination}/bin/corrosion-generator${host_executable_suffix}" +) + +add_executable(Corrosion::Generator IMPORTED GLOBAL) +set_property( + TARGET Corrosion::Generator + PROPERTY IMPORTED_LOCATION "${_CORROSION_GENERATOR_EXE}") + +if (CORROSION_DEV_MODE) + # If you're developing Corrosion, you want to make sure to re-configure whenever the + # generator changes. + file(GLOB_RECURSE _RUST_FILES CONFIGURE_DEPENDS generator/src/*.rs) + file(GLOB _CARGO_FILES CONFIGURE_DEPENDS generator/Cargo.*) + set_property( + DIRECTORY APPEND + PROPERTY CMAKE_CONFIGURE_DEPENDS + ${_RUST_FILES} ${_CARGO_FILES}) +endif() + diff --git a/lib/corrosion/generator/Cargo.lock b/lib/corrosion/generator/Cargo.lock new file mode 100644 index 000000000..26457a0a6 --- /dev/null +++ b/lib/corrosion/generator/Cargo.lock @@ -0,0 +1,183 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "camino" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59e92b5a388f549b863a7bea62612c09f24c8393560709a54558a9abdfb3b9c" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cfa25e60aea747ec7e1124f238816749faa93759c6ff5b31f1ccdda137f4479" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7daec1a2a2129eeba1644b220b4647ec537b0b5d4bfd6876fcc5a540056b592" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "corrosion-generator" +version = "0.1.0" +dependencies = [ + "cargo_metadata", + "clap", + "serde", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "semver" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.188" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" diff --git a/lib/corrosion/generator/Cargo.toml b/lib/corrosion/generator/Cargo.toml new file mode 100644 index 000000000..e22689fab --- /dev/null +++ b/lib/corrosion/generator/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "corrosion-generator" +version = "0.1.0" +authors = ["Andrew Gaspar "] +license = "MIT" +edition = "2018" + +[dependencies] +cargo_metadata = "0.17" +serde = { version = " 1.0.186", features = ["derive"] } + +[dependencies.clap] +version = "2.34" +default-features = false +# Make sure this crate still compiles while it is checked out +# in a sub-directory of a repository that has a Cargo.toml. +[workspace] diff --git a/lib/corrosion/generator/Compat.Cargo.lock b/lib/corrosion/generator/Compat.Cargo.lock new file mode 100644 index 000000000..02274a27c --- /dev/null +++ b/lib/corrosion/generator/Compat.Cargo.lock @@ -0,0 +1,184 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "camino" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c530edf18f37068ac2d977409ed5cd50d53d73bc653c7647b48eb78976ac9ae2" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo-platform" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbdb825da8a5df079a43676dbe042702f1707b1109f713a01420fbb4cc71fa27" +dependencies = [ + "serde", +] + +[[package]] +name = "cargo_metadata" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08a1ec454bc3eead8719cb56e15dbbfecdbc14e4b3a3ae4936cc6e31f5fc0d07" +dependencies = [ + "camino", + "cargo-platform", + "semver", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "corrosion-generator" +version = "0.1.0" +dependencies = [ + "cargo_metadata", + "clap", + "serde", + "thiserror", +] + +[[package]] +name = "itoa" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" + +[[package]] +name = "proc-macro2" +version = "1.0.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d0e1ae9e836cc3beddd63db0df682593d7e2d3d891ae8c9083d2113e1744224" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" + +[[package]] +name = "semver" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.156" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.156" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" diff --git a/lib/corrosion/generator/Compat.Cargo.toml b/lib/corrosion/generator/Compat.Cargo.toml new file mode 100644 index 000000000..4097d9699 --- /dev/null +++ b/lib/corrosion/generator/Compat.Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "corrosion-generator" +version = "0.1.0" +authors = ["Andrew Gaspar "] +license = "MIT" +edition = "2018" + +[dependencies] +cargo_metadata = "0.15" +# The crates below are indirect dependencies of cargo metadata, +# We explicitly specify maximum versions to allow building the generator +# with older toolchains. +# Version 1.0.157 upgrades to syn 2.0 and raises MSRV to 1.56 +serde = { version = ">=1, <1.0.157", default-features=false } +# Version 1.0.40 upgrades to syn 2.0 and raises MSRV to 1.56 +thiserror = { version = ">=1, <1.0.40", default-features=false } + +[dependencies.clap] +version = "2.34" +default-features = false +# Make sure this crate still compiles while it is checked out +# in a sub-directory of a repository that has a Cargo.toml. +[workspace] diff --git a/lib/corrosion/generator/src/main.rs b/lib/corrosion/generator/src/main.rs new file mode 100644 index 000000000..df21133e8 --- /dev/null +++ b/lib/corrosion/generator/src/main.rs @@ -0,0 +1,95 @@ +use std::path::PathBuf; + +use cargo_metadata::Metadata; +use clap::{App, Arg}; + +mod subcommands { + pub mod gen_cmake; +} + +use subcommands::*; + +// common options +const MANIFEST_PATH: &str = "manifest-path"; +const CARGO_EXECUTABLE: &str = "cargo-executable"; +const VERBOSE: &str = "verbose"; +const LOCKED: &str = "locked"; +const FROZEN: &str = "frozen"; + +pub struct GeneratorSharedArgs { + pub manifest_path: PathBuf, + pub cargo_executable: PathBuf, + pub metadata: Metadata, + pub verbose: bool, +} + +fn main() -> Result<(), Box> { + let matches = App::new("CMake Generator for Cargo") + .version("0.1") + .author("Andrew Gaspar ") + .about("Generates CMake files for Cargo projects") + .arg( + Arg::with_name(MANIFEST_PATH) + .long("manifest-path") + .value_name("Cargo.toml") + .help("Specifies the target Cargo project") + .required(true) + .takes_value(true), + ) + .arg( + Arg::with_name(CARGO_EXECUTABLE) + .long("cargo") + .value_name("EXECUTABLE") + .required(true) + .help("Path to the cargo executable to use"), + ) + .arg( + Arg::with_name(VERBOSE) + .long("verbose") + .help("Request verbose output"), + ) + .arg( + Arg::with_name(LOCKED) + .long("locked") + .help("Pass --locked to cargo invocations"), + ) + .arg( + Arg::with_name(FROZEN) + .long("frozen") + .help("Pass --frozen to cargo invocations"), + ) + .subcommand(gen_cmake::subcommand()) + .get_matches(); + + let mut cmd = cargo_metadata::MetadataCommand::new(); + cmd.no_deps(); + if matches.is_present(LOCKED) { + cmd.other_options(["--locked".into()]); + } + if matches.is_present(FROZEN) { + cmd.other_options(["--frozen".into()]); + } + + let manifest_path = matches.value_of(MANIFEST_PATH).unwrap(); + let cargo_executable = matches.value_of(CARGO_EXECUTABLE).unwrap(); + + cmd.manifest_path(manifest_path); + cmd.cargo_path(cargo_executable); + + let metadata = cmd.exec()?; + + let shared_args = GeneratorSharedArgs { + manifest_path: manifest_path.into(), + cargo_executable: cargo_executable.into(), + metadata, + verbose: matches.is_present(VERBOSE), + }; + + match matches.subcommand() { + (gen_cmake::GEN_CMAKE, Some(matches)) => gen_cmake::invoke(&shared_args, matches)?, + _ => unreachable!(), + }; + + // We should never reach this statement + std::process::exit(1); +} diff --git a/lib/corrosion/generator/src/subcommands/gen_cmake.rs b/lib/corrosion/generator/src/subcommands/gen_cmake.rs new file mode 100644 index 000000000..416d522b5 --- /dev/null +++ b/lib/corrosion/generator/src/subcommands/gen_cmake.rs @@ -0,0 +1,160 @@ +use std::{ + fs::{create_dir_all, File}, + io::{stdout, Write}, + path::Path, + rc::Rc, +}; + +use clap::{App, Arg, ArgMatches, SubCommand}; + +mod target; + +// Command name +pub const GEN_CMAKE: &str = "gen-cmake"; + +// Options +const OUT_FILE: &str = "out-file"; +const CONFIGURATION_ROOT: &str = "configuration-root"; +const CRATES: &str = "crates"; +const IMPORTED_CRATES: &str = "imported-crates"; +const CRATE_TYPE: &str = "crate-type"; +const PASSTHROUGH_ADD_CARGO_BUILD: &str = "passthrough-acb"; + +pub fn subcommand() -> App<'static, 'static> { + SubCommand::with_name(GEN_CMAKE) + .arg( + Arg::with_name(CONFIGURATION_ROOT) + .long("configuration-root") + .value_name("DIRECTORY") + .takes_value(true) + .help( + "Specifies a root directory for configuration folders. E.g. Win32 \ + in VS Generator.", + ), + ) + .arg( + Arg::with_name(CRATES) + .long("crates") + .value_name("crates") + .takes_value(true) + .multiple(true) + .require_delimiter(true) + .help("Specifies which crates of the workspace to import"), + ) + .arg( + Arg::with_name(CRATE_TYPE) + .long(CRATE_TYPE) + .value_name("kind") + .possible_values(&["staticlib", "cdylib", "bin"]) + .multiple(true) + .value_delimiter(";") + .help("Only import the specified crate types") + ) + .arg( + Arg::with_name(OUT_FILE) + .short("o") + .long("out-file") + .value_name("FILE") + .help("Output CMake file name. Defaults to stdout."), + ) + .arg( + Arg::with_name(IMPORTED_CRATES) + .long(IMPORTED_CRATES) + .value_name("variable_name") + .takes_value(true) + .help("Save a list of the imported target names into c CMake variable with the given name"), + ) + .arg( + Arg::with_name(PASSTHROUGH_ADD_CARGO_BUILD) + .long(PASSTHROUGH_ADD_CARGO_BUILD) + .takes_value(true) + .multiple(true) + .value_delimiter(std::char::from_u32(0x1f).unwrap().to_string().as_str()) + .help("Passthrough arguments to the _add_cargo_build invocation(s) in CMake") + ) +} + +pub fn invoke( + args: &crate::GeneratorSharedArgs, + matches: &ArgMatches, +) -> Result<(), Box> { + let mut out_file: Box = if let Some(path) = matches.value_of(OUT_FILE) { + let path = Path::new(path); + if let Some(parent) = path.parent() { + create_dir_all(parent).expect("Failed to create directory!"); + } + let file = File::create(path).expect("Unable to open out-file!"); + Box::new(file) + } else { + Box::new(stdout()) + }; + + writeln!( + out_file, + "\ +cmake_minimum_required(VERSION 3.15) +" + )?; + + let crates = matches + .values_of(CRATES) + .map_or(Vec::new(), |c| c.collect()); + let crate_kinds: Option> = matches.values_of(CRATE_TYPE).map(|c| c.collect()); + let workspace_manifest_path = Rc::new(args.manifest_path.clone()); + let targets: Vec<_> = args + .metadata + .packages + .iter() + .filter(|p| { + args.metadata.workspace_members.contains(&p.id) + && (crates.is_empty() || crates.contains(&p.name.as_str())) + }) + .cloned() + .map(Rc::new) + .flat_map(|package| { + package + .targets + .iter() + .filter_map(|t| { + target::CargoTarget::from_metadata( + package.clone(), + t.clone(), + workspace_manifest_path.clone(), + &crate_kinds, + ) + }) + .collect::>() + }) + .collect(); + + let passthrough_args: Vec = matches + .values_of(PASSTHROUGH_ADD_CARGO_BUILD) + .map(|values| { + // Add quotes around each argument for CMake to preserve which arguments belong together. + values + .filter(|val| !val.is_empty()) + .map(|val| format!("\"{}\"", val)) + .collect() + }) + .unwrap_or_default(); + let passthrough_str = passthrough_args.join(" "); + + for target in &targets { + target + .emit_cmake_target(&mut out_file, &passthrough_str) + .unwrap(); + } + if let Some(imported_crate_list_name) = matches.value_of(IMPORTED_CRATES) { + let imported_targets: Vec<_> = targets.iter().map(|target| target.target_name()).collect(); + let imported_targets_list = imported_targets.join(";"); + writeln!( + out_file, + "set({} \"{}\")", + imported_crate_list_name, imported_targets_list + )?; + } + + writeln!(out_file)?; + + std::process::exit(0); +} diff --git a/lib/corrosion/generator/src/subcommands/gen_cmake/target.rs b/lib/corrosion/generator/src/subcommands/gen_cmake/target.rs new file mode 100644 index 000000000..489540e6a --- /dev/null +++ b/lib/corrosion/generator/src/subcommands/gen_cmake/target.rs @@ -0,0 +1,226 @@ +use std::error::Error; +use std::path::PathBuf; +use std::rc::Rc; + +#[derive(Clone)] +pub enum CargoTargetType { + Executable, + Library { + has_staticlib: bool, + has_cdylib: bool, + }, +} + +#[derive(Clone)] +pub struct CargoTarget { + cargo_package: Rc, + cargo_target: cargo_metadata::Target, + target_type: CargoTargetType, + workspace_manifest_path: Rc, +} + +impl CargoTargetType { + fn to_string(&self) -> String { + let mut s = String::new(); + match self { + Self::Executable => { + s.push_str("bin"); + } + Self::Library { + has_staticlib, + has_cdylib, + } => { + if *has_staticlib { + s.push_str("staticlib") + } + if *has_cdylib { + s.push_str(" cdylib") + } + } + } + s + } +} + +impl CargoTarget { + pub fn from_metadata( + cargo_package: Rc, + cargo_target: cargo_metadata::Target, + workspace_manifest_path: Rc, + // If Some, only import crates if the kind variant is given in crate_kinds. + crate_kinds: &Option>, + ) -> Option { + let filtered_kinds: Vec = cargo_target + .kind + .clone() + .into_iter() + .filter(|kind| match crate_kinds { + None => true, + Some(allowed_kinds_subset) => allowed_kinds_subset.contains(&&**kind), + }) + .collect(); + + let target_type = if filtered_kinds + .iter() + .any(|k| k.as_str() == "staticlib" || k.as_str() == "cdylib") + { + CargoTargetType::Library { + has_staticlib: filtered_kinds.iter().any(|k| k == "staticlib"), + has_cdylib: filtered_kinds.iter().any(|k| k == "cdylib"), + } + } else if filtered_kinds.iter().any(|k| k == "bin") { + CargoTargetType::Executable + } else { + return None; + }; + + Some(Self { + cargo_package, + cargo_target, + target_type, + workspace_manifest_path, + }) + } + + /// Cargo / Rust 1.78 and newer replace dashes with underscores in libraries + /// To make the names consistent across versions we also do the replacement here. + pub(crate) fn target_name(&self) -> String { + match self.target_type { + CargoTargetType::Library { .. } => self.cargo_target.name.replace("-", "_"), + _ => self.cargo_target.name.to_string(), + } + } + + pub fn emit_cmake_target( + &self, + out_file: &mut dyn std::io::Write, + passthrough_add_cargo_build: &str, + ) -> Result<(), Box> { + writeln!( + out_file, + "set(byproducts \"\") + set(cargo_build_out_dir \"\") + set(archive_byproducts \"\") + set(shared_lib_byproduct \"\") + set(pdb_byproduct \"\") + set(bin_byproduct \"\") + " + )?; + let ws_manifest = self + .workspace_manifest_path + .to_str() + .expect("Non-utf8 path encountered") + .replace("\\", "/"); + + match self.target_type { + CargoTargetType::Library { + has_staticlib, + has_cdylib, + } => { + assert!(has_staticlib || has_cdylib); + let ws_manifest = self + .workspace_manifest_path + .to_str() + .expect("Non-utf8 path encountered") + .replace("\\", "/"); + let mut lib_kinds = if has_staticlib { "staticlib" } else { "" }.to_string(); + if has_cdylib { + if has_staticlib { + lib_kinds.push(' '); + } + lib_kinds.push_str("cdylib") + } + + writeln!( + out_file, + " + add_library({target_name} INTERFACE) + _corrosion_initialize_properties({target_name}) + _corrosion_add_library_target( + WORKSPACE_MANIFEST_PATH \"{workspace_manifest_path}\" + TARGET_NAME \"{target_name}\" + LIB_KINDS {lib_kinds} + OUT_ARCHIVE_OUTPUT_BYPRODUCTS archive_byproducts + OUT_SHARED_LIB_BYPRODUCTS shared_lib_byproduct + OUT_PDB_BYPRODUCT pdb_byproduct + ) + list(APPEND byproducts + \"${{archive_byproducts}}\" + \"${{shared_lib_byproduct}}\" + \"${{pdb_byproduct}}\" + ) + ", + workspace_manifest_path = ws_manifest, + target_name = self.target_name(), + lib_kinds = lib_kinds, + )?; + } + CargoTargetType::Executable => { + writeln!( + out_file, + " + add_executable({target_name} IMPORTED GLOBAL) + _corrosion_initialize_properties({target_name}) + _corrosion_add_bin_target(\"{workspace_manifest_path}\" \"{target_name}\" + bin_byproduct pdb_byproduct + ) + set(byproducts \"\") + list(APPEND byproducts \"${{bin_byproduct}}\" \"${{pdb_byproduct}}\") + ", + workspace_manifest_path = ws_manifest, + target_name = self.target_name(), + )?; + } + }; + let target_kinds = self.target_type.to_string(); + writeln!(out_file, + " + set(cargo_build_out_dir \"\") + _add_cargo_build( + cargo_build_out_dir + PACKAGE \"{package_name}\" + TARGET \"{target_name}\" + MANIFEST_PATH \"{package_manifest_path}\" + WORKSPACE_MANIFEST_PATH \"{workspace_manifest_path}\" + TARGET_KINDS {target_kinds} + BYPRODUCTS \"${{byproducts}}\" + {passthrough_add_cargo_build} + ) + + set_target_properties({target_name} PROPERTIES + INTERFACE_COR_PACKAGE_MANIFEST_PATH \"{package_manifest_path}\" + ) + + if(archive_byproducts) + _corrosion_copy_byproducts( + {target_name} INTERFACE_ARCHIVE_OUTPUT_DIRECTORY \"${{cargo_build_out_dir}}\" \"${{archive_byproducts}}\" FALSE + ) + endif() + if(shared_lib_byproduct) + _corrosion_copy_byproducts( + {target_name} INTERFACE_LIBRARY_OUTPUT_DIRECTORY \"${{cargo_build_out_dir}}\" \"${{shared_lib_byproduct}}\" FALSE + ) + endif() + if(pdb_byproduct) + _corrosion_copy_byproducts( + {target_name} INTERFACE_PDB_OUTPUT_DIRECTORY \"${{cargo_build_out_dir}}\" \"${{pdb_byproduct}}\" FALSE + ) + endif() + if(bin_byproduct) + _corrosion_copy_byproducts( + {target_name} INTERFACE_RUNTIME_OUTPUT_DIRECTORY \"${{cargo_build_out_dir}}\" \"${{bin_byproduct}}\" TRUE + ) + endif() + set_property(TARGET {target_name} PROPERTY INTERFACE_COR_CARGO_PACKAGE_NAME {package_name} ) + ", + package_name = self.cargo_package.name, + target_name = self.target_name(), + package_manifest_path = self.cargo_package.manifest_path.as_str().replace("\\", "/"), + workspace_manifest_path = ws_manifest, + target_kinds = target_kinds, + passthrough_add_cargo_build = passthrough_add_cargo_build, + + )?; + Ok(()) + } +} diff --git a/lib/corrosion/test/CMakeLists.txt b/lib/corrosion/test/CMakeLists.txt new file mode 100644 index 000000000..fd036e70d --- /dev/null +++ b/lib/corrosion/test/CMakeLists.txt @@ -0,0 +1,183 @@ +# This option is currently used to prevent recursion +option(CORROSION_TESTS "Enable Corrosion tests" ON) +mark_as_advanced(CORROSION_TESTS) +if(NOT CORROSION_TESTS) + return() +endif() + +option(CORROSION_TESTS_CXXBRIDGE + "Build cxxbridge tests which requires cxxbridge executable being available" + OFF) +option(CORROSION_TESTS_KEEP_BUILDDIRS + "By default corrosion tests will cleanup after themselves. This option limits the cleaning up to the + target directories and will keep the build directories, which may be useful for caching." + OFF) +mark_as_advanced(CORROSION_TESTS_NO_CLEANUP) + +set(test_install_path "${CMAKE_CURRENT_BINARY_DIR}/test-install-corrosion") + +set(test_header_contents + "option(CORROSION_TESTS_FIND_CORROSION \"Use Corrosion as a subdirectory\" OFF)" + "if (CORROSION_TESTS_FIND_CORROSION)" + " set(CMAKE_PREFIX_PATH \"${test_install_path}\" CACHE INTERNAL \"\" FORCE)" + " find_package(Corrosion REQUIRED PATHS \"${test_install_path}\" NO_CMAKE_SYSTEM_PATH)" + "else()" + " add_subdirectory(\"${CMAKE_CURRENT_SOURCE_DIR}/..\" corrosion)" + "endif()" +) + +string(REPLACE ";" "\n" test_header_contents "${test_header_contents}") + +file(WRITE test_header.cmake "${test_header_contents}") + +option(CORROSION_TESTS_INSTALL_CORROSION + "Install Corrosion to a test directory and let tests use the installed Corrosion" + OFF) +if(CORROSION_TESTS_INSTALL_CORROSION) + add_test(NAME "install_corrosion_configure" + COMMAND + ${CMAKE_COMMAND} + -S "${CMAKE_CURRENT_SOURCE_DIR}/.." + -B "${CMAKE_CURRENT_BINARY_DIR}/build-corrosion" + -DCORROSION_VERBOSE_OUTPUT=ON + -DCORROSION_TESTS=OFF + -DCMAKE_BUILD_TYPE=Release + -G${CMAKE_GENERATOR} + "-DCMAKE_INSTALL_PREFIX=${test_install_path}" + ) + add_test(NAME "install_corrosion_build" + COMMAND + ${CMAKE_COMMAND} --build "${CMAKE_CURRENT_BINARY_DIR}/build-corrosion" --config Release + ) + add_test(NAME "install_corrosion_install" + COMMAND + ${CMAKE_COMMAND} --install "${CMAKE_CURRENT_BINARY_DIR}/build-corrosion" --config Release + ) + set_tests_properties("install_corrosion_configure" PROPERTIES FIXTURES_SETUP "fixture_corrosion_configure") + set_tests_properties("install_corrosion_build" PROPERTIES FIXTURES_SETUP "fixture_corrosion_build") + set_tests_properties("install_corrosion_build" PROPERTIES FIXTURES_REQUIRED "fixture_corrosion_configure") + set_tests_properties("install_corrosion_install" PROPERTIES FIXTURES_REQUIRED "fixture_corrosion_build") + set_tests_properties("install_corrosion_install" PROPERTIES FIXTURES_SETUP "fixture_corrosion_install") + + add_test(NAME "install_corrosion_build_cleanup" COMMAND "${CMAKE_COMMAND}" -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/build-corrosion") + set_tests_properties("install_corrosion_build_cleanup" PROPERTIES + FIXTURES_CLEANUP + "fixture_corrosion_configure;fixture_corrosion_build" + ) + + add_test(NAME "install_corrosion_cleanup" COMMAND "${CMAKE_COMMAND}" -E remove_directory "${test_install_path}") + set_tests_properties("install_corrosion_cleanup" PROPERTIES + FIXTURES_CLEANUP + "fixture_corrosion_configure;fixture_corrosion_build;fixture_corrosion_install" + ) +endif() + +function(corrosion_tests_add_test test_name bin_names) + set(options "") + set(one_value_kewords "TEST_SRC_DIR") + set(multi_value_keywords "") + cmake_parse_arguments(PARSE_ARGV 2 TST "${options}" "${one_value_kewords}" "${multi_value_keywords}") + set(pass_through_arguments "${TST_UNPARSED_ARGUMENTS}") + +# In the future we could add multiple tests here for different configurations (generator, build mode, rust version ...) +# which would allow us to simplify the github job matrix + if(TST_TEST_SRC_DIR) + set(test_dir "${TST_TEST_SRC_DIR}") + else() + set(test_dir "${test_name}") + endif() + + + if(CMAKE_C_COMPILER) + set(TEST_C_COMPILER "C_COMPILER" "${CMAKE_C_COMPILER}") + endif() + if(CMAKE_CXX_COMPILER) + set(TEST_CXX_COMPILER "CXX_COMPILER" "${CMAKE_CXX_COMPILER}") + endif() + if(CMAKE_GENERATOR_PLATFORM) + set(TEST_GENERATOR_PLATFORM "GENERATOR_PLATFORM" "${CMAKE_GENERATOR_PLATFORM}") + endif() + if(CORROSION_GENERATOR_EXECUTABLE) + # Mainly used in CI to build the native generator once and then reuse it for all tests + set(TEST_GENERATOR_BIN EXTERNAL_CORROSION_GENERATOR "${CORROSION_GENERATOR_EXECUTABLE}") + endif() + if(CMAKE_CROSSCOMPILING) + set(TEST_SYSTEM_NAME SYSTEM_NAME "${CMAKE_SYSTEM_NAME}") + endif() + + add_test(NAME "${test_name}_build" + COMMAND + ${CMAKE_COMMAND} + -P "${CMAKE_SOURCE_DIR}/test/ConfigureAndBuild.cmake" + SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/${test_dir}" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/build-${test_name}" + GENERATOR "${CMAKE_GENERATOR}" + RUST_TOOLCHAIN "${Rust_TOOLCHAIN}" + CARGO_TARGET "${Rust_CARGO_TARGET}" + "${TEST_SYSTEM_NAME}" + "${TEST_C_COMPILER}" + "${TEST_CXX_COMPILER}" + "${TEST_GENERATOR_PLATFORM}" + "${TEST_GENERATOR_BIN}" + ${pass_through_arguments} + + COMMAND_EXPAND_LISTS + ) + set_tests_properties("${test_name}_build" PROPERTIES FIXTURES_SETUP "build_fixture_${test_name}") + if(CORROSION_TESTS_INSTALL_CORROSION) + set_tests_properties("${test_name}_build" PROPERTIES FIXTURES_REQUIRED "fixture_corrosion_install") + endif() + foreach(bin ${bin_names}) + if(WIN32) + set(bin_filename "${bin}.exe") + else() + set(bin_filename "${bin}") + endif() + add_test(NAME "${test_name}_run_${bin}" COMMAND "${CMAKE_CURRENT_BINARY_DIR}/build-${test_name}/${bin_filename}") + set_tests_properties("${test_name}_run_${bin}" PROPERTIES FIXTURES_REQUIRED "build_fixture_${test_name}") + # CMAKE_CROSSCOMPILING is not set when cross-compiling with VS (via -A flag). + # Todo: We could run x86 binaries on x64 hosts. + if(CMAKE_CROSSCOMPILING OR CMAKE_VS_PLATFORM_NAME) + # Todo: In the future we could potentially run some tests with qemu. + set_tests_properties("${test_name}_run_${bin}" PROPERTIES DISABLED TRUE) + endif() + endforeach() + + if(CORROSION_TESTS_KEEP_BUILDDIRS) + add_test(NAME "${test_name}_cleanup_artifacts" + COMMAND "${CMAKE_COMMAND}" --build "${CMAKE_CURRENT_BINARY_DIR}/build-${test_name}" --target clean + ) + add_test(NAME "${test_name}_cleanup_cargo" + COMMAND "${CMAKE_COMMAND}" -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/build-${test_name}/cargo" + ) + set_tests_properties("${test_name}_cleanup_artifacts" PROPERTIES FIXTURES_CLEANUP "build_fixture_${test_name}") + set_tests_properties("${test_name}_cleanup_cargo" PROPERTIES FIXTURES_CLEANUP "build_fixture_${test_name}") + else() + add_test(NAME "${test_name}_cleanup" COMMAND "${CMAKE_COMMAND}" -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/build-${test_name}") + set_tests_properties("${test_name}_cleanup" PROPERTIES FIXTURES_CLEANUP "build_fixture_${test_name}") + endif() +endfunction() + +# Please keep this in alphabetical order. +add_subdirectory(cargo_flags) +add_subdirectory(cpp2rust) +if(Rust_VERSION VERSION_GREATER_EQUAL "1.64.0") + # Flag `--crate-type` is only supported since Rust 1.64.0 + add_subdirectory(crate_type) +endif() +add_subdirectory(custom_profiles) +add_subdirectory(cbindgen) +add_subdirectory(cxxbridge) +add_subdirectory(envvar) +add_subdirectory(external_corrosion_generator) +add_subdirectory(features) +add_subdirectory(find_rust) +add_subdirectory(gensource) +add_subdirectory(hostbuild) +add_subdirectory(multitarget) +add_subdirectory(nostd) +add_subdirectory("output directory") +add_subdirectory(parse_target_triple) +add_subdirectory(rust2cpp) +add_subdirectory(rustflags) +add_subdirectory(workspace) diff --git a/lib/corrosion/test/ConfigureAndBuild.cmake b/lib/corrosion/test/ConfigureAndBuild.cmake new file mode 100644 index 000000000..513e506e2 --- /dev/null +++ b/lib/corrosion/test/ConfigureAndBuild.cmake @@ -0,0 +1,114 @@ +# CMake script to configure and build a test project + +set(TEST_ARG_LIST) + +# Expect actual arguments to start at index 3 (cmake -P ) +foreach(ARG_INDEX RANGE 3 ${CMAKE_ARGC}) + list(APPEND TEST_ARG_LIST "${CMAKE_ARGV${ARG_INDEX}}") +endforeach() + +set(options "USE_INSTALLED_CORROSION") +set(oneValueArgs + SOURCE_DIR + BINARY_DIR + GENERATOR + GENERATOR_PLATFORM + RUST_TOOLCHAIN + CARGO_TARGET + C_COMPILER + CXX_COMPILER + SYSTEM_NAME + EXTERNAL_CORROSION_GENERATOR + CARGO_PROFILE +) +set(multiValueArgs "PASS_THROUGH_ARGS") +cmake_parse_arguments(TEST "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${TEST_ARG_LIST} ) + +if(TEST_CARGO_TARGET) + set(TEST_Rust_CARGO_TARGET "-DRust_CARGO_TARGET=${TEST_CARGO_TARGET}") +endif() +if(TEST_USE_INSTALLED_CORROSION) + set(TEST_CORROSION_INSTALL "-DCORROSION_TESTS_FIND_CORROSION=ON") +endif() +if(TEST_GENERATOR_PLATFORM) + set(TEST_GENERATOR_PLATFORM "-A${TEST_GENERATOR_PLATFORM}") +endif() +if(TEST_C_COMPILER) + set(TEST_C_COMPILER "-DCMAKE_C_COMPILER=${TEST_C_COMPILER}") +endif() +if(TEST_CXX_COMPILER) + set(TEST_CXX_COMPILER "-DCMAKE_CXX_COMPILER=${TEST_CXX_COMPILER}") +endif() +if(TEST_SYSTEM_NAME) + set(TEST_SYSTEM_NAME "-DCMAKE_SYSTEM_NAME=${TEST_SYSTEM_NAME}") +endif() +if(TEST_EXTERNAL_CORROSION_GENERATOR) + set(TEST_EXTERNAL_CORROSION_GENERATOR + "-DCORROSION_GENERATOR_EXECUTABLE=${TEST_EXTERNAL_CORROSION_GENERATOR}" + ) +endif() +if(TEST_CARGO_PROFILE) + set(TEST_CARGO_PROFILE "-DCARGO_PROFILE=${TEST_CARGO_PROFILE}") +endif() + +# Remove old binary directory +file(REMOVE_RECURSE "${TEST_BINARY_DIR}") + +file(MAKE_DIRECTORY "${TEST_BINARY_DIR}") + +message(STATUS "TEST_BINARY_DIRECTORY: ${TEST_BINARY_DIR}") + +execute_process( + COMMAND + "${CMAKE_COMMAND}" + "-G${TEST_GENERATOR}" + "-DRust_TOOLCHAIN=${TEST_RUST_TOOLCHAIN}" + --log-level Debug + ${TEST_Rust_CARGO_TARGET} + ${TEST_CORROSION_INSTALL} + ${TEST_GENERATOR_PLATFORM} + ${TEST_C_COMPILER} + ${TEST_CXX_COMPILER} + ${TEST_SYSTEM_NAME} + ${TEST_EXTERNAL_CORROSION_GENERATOR} + ${TEST_CARGO_PROFILE} + ${TEST_PASS_THROUGH_ARGS} + -S "${TEST_SOURCE_DIR}" + -B "${TEST_BINARY_DIR}" + COMMAND_ECHO STDOUT + RESULT_VARIABLE EXIT_CODE +) + +if (NOT "${EXIT_CODE}" EQUAL 0) + message(FATAL_ERROR "Configure step failed. Exit code: `${EXIT_CODE}`") +endif() + +if ("${TEST_GENERATOR}" STREQUAL "Ninja Multi-Config" + OR "${TEST_GENERATOR}" MATCHES "Visual Studio" + ) + foreach(config Debug Release RelWithDebInfo) + execute_process( + COMMAND "${CMAKE_COMMAND}" + --build "${TEST_BINARY_DIR}" + --config "${config}" + COMMAND_ECHO STDOUT + RESULT_VARIABLE EXIT_CODE + ) + if (NOT "${EXIT_CODE}" EQUAL 0) + message(FATAL_ERROR "Build step failed for config `${config}`. " + "Exit code: `${EXIT_CODE}`") + endif() + endforeach() +else() + execute_process( + COMMAND "${CMAKE_COMMAND}" --build "${TEST_BINARY_DIR}" + COMMAND_ECHO STDOUT + RESULT_VARIABLE EXIT_CODE + ) + if (NOT "${EXIT_CODE}" EQUAL 0) + message(FATAL_ERROR "Build step failed. Exit code: `${EXIT_CODE}`") + endif() +endif() + + diff --git a/lib/corrosion/test/README.md b/lib/corrosion/test/README.md new file mode 100644 index 000000000..df4815880 --- /dev/null +++ b/lib/corrosion/test/README.md @@ -0,0 +1,7 @@ +# Corrosion Tests + +Corrosions tests are run via ctest. The tests themselves utilize CMake script mode +to configure and build a test project, which allows for great flexibility. +Using ctest properties such as `PASS_REGULAR_EXPRESSION` or `FAIL_REGULAR_EXPRESSION` +can be used to confirm that built executable targets run as expected, but can also +be used to fail tests if Corrosion warnings appear in the configure output. \ No newline at end of file diff --git a/lib/corrosion/test/cargo_flags/CMakeLists.txt b/lib/corrosion/test/cargo_flags/CMakeLists.txt new file mode 100644 index 000000000..0707f3a54 --- /dev/null +++ b/lib/corrosion/test/cargo_flags/CMakeLists.txt @@ -0,0 +1,3 @@ +corrosion_tests_add_test(cargo_flags "flags-exe") + +set_tests_properties("cargo_flags_run_flags-exe" PROPERTIES PASS_REGULAR_EXPRESSION [[Hello, Cxx! I am Rust!]]) diff --git a/lib/corrosion/test/cargo_flags/cargo_flags/CMakeLists.txt b/lib/corrosion/test/cargo_flags/cargo_flags/CMakeLists.txt new file mode 100644 index 000000000..8c4d0784c --- /dev/null +++ b/lib/corrosion/test/cargo_flags/cargo_flags/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml FLAGS --features one) + +add_executable(flags-exe main.cpp) +target_link_libraries(flags-exe PUBLIC flags_lib) +corrosion_set_cargo_flags(flags_lib --features two) +corrosion_set_cargo_flags(flags_lib $) + +set_property( + TARGET flags_lib + APPEND + PROPERTY more_flags --features three +) diff --git a/lib/corrosion/test/cargo_flags/cargo_flags/main.cpp b/lib/corrosion/test/cargo_flags/cargo_flags/main.cpp new file mode 100644 index 000000000..b5fde3cf7 --- /dev/null +++ b/lib/corrosion/test/cargo_flags/cargo_flags/main.cpp @@ -0,0 +1,6 @@ +extern "C" void rust_function(char const *name); + + +int main(int argc, char **argv) { + rust_function("Cxx"); +} diff --git a/lib/corrosion/test/cargo_flags/cargo_flags/rust/Cargo.lock b/lib/corrosion/test/cargo_flags/cargo_flags/rust/Cargo.lock new file mode 100644 index 000000000..17c6b4490 --- /dev/null +++ b/lib/corrosion/test/cargo_flags/cargo_flags/rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "flags-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/cargo_flags/cargo_flags/rust/Cargo.toml b/lib/corrosion/test/cargo_flags/cargo_flags/rust/Cargo.toml new file mode 100644 index 000000000..479ff413f --- /dev/null +++ b/lib/corrosion/test/cargo_flags/cargo_flags/rust/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "flags-lib" +version = "0.1.0" +edition = "2018" + +[lib] +crate-type=["staticlib"] + +[features] + +one = [] +two = [] +three = [] diff --git a/lib/corrosion/test/cargo_flags/cargo_flags/rust/src/lib.rs b/lib/corrosion/test/cargo_flags/cargo_flags/rust/src/lib.rs new file mode 100644 index 000000000..7afea31a0 --- /dev/null +++ b/lib/corrosion/test/cargo_flags/cargo_flags/rust/src/lib.rs @@ -0,0 +1,14 @@ +use std::os::raw::c_char; + +#[no_mangle] +pub extern "C" fn rust_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I am Rust!", name); + + #[cfg(not(feature = "one"))] + compile_error!("Feature one is not enabled"); + #[cfg(not(feature = "two"))] + compile_error!("Feature two is not enabled"); + #[cfg(not(feature = "three"))] + compile_error!("Feature three is not enabled"); +} diff --git a/lib/corrosion/test/cbindgen/CMakeLists.txt b/lib/corrosion/test/cbindgen/CMakeLists.txt new file mode 100644 index 000000000..86cabf5b1 --- /dev/null +++ b/lib/corrosion/test/cbindgen/CMakeLists.txt @@ -0,0 +1,9 @@ +corrosion_tests_add_test(cbindgen_rust2cpp "cpp-exe" TEST_SRC_DIR rust2cpp) + +set_tests_properties(cbindgen_rust2cpp_run_cpp-exe PROPERTIES PASS_REGULAR_EXPRESSION + "^add_point Result: Point { x: 100, y: 100 }\r?\n$" +) +# Todo: We also should add a cpp2rust test with the following setup: +# - A rust lib that is used by a rust executable +# - cbindgen creates bindings for the rust-lib +# - c++ code uses the rust lib and is used in turn by the rust bin. diff --git a/lib/corrosion/test/cbindgen/rust2cpp/CMakeLists.txt b/lib/corrosion/test/cbindgen/rust2cpp/CMakeLists.txt new file mode 100644 index 000000000..71967796c --- /dev/null +++ b/lib/corrosion/test/cbindgen/rust2cpp/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) +corrosion_experimental_cbindgen(TARGET rust_lib HEADER_NAME "rust-lib.h") + +add_executable(cpp-exe main.cpp) +set_property(TARGET cpp-exe PROPERTY CXX_STANDARD 11) +target_link_libraries(cpp-exe PUBLIC rust_lib) diff --git a/lib/corrosion/test/cbindgen/rust2cpp/main.cpp b/lib/corrosion/test/cbindgen/rust2cpp/main.cpp new file mode 100644 index 000000000..1085c4afb --- /dev/null +++ b/lib/corrosion/test/cbindgen/rust2cpp/main.cpp @@ -0,0 +1,19 @@ +#include "rust-lib.h" +#include + +int main(int argc, char **argv) { + assert(is_magic_number(MAGIC_NUMBER)); + struct Point p1, p2; + p1.x = 54; + p2.x = 46; + p1.y = 34; + p2.y = 66; + add_point(&p1, &p2); + assert(p1.x == 100); + assert(p2.x == 46); + assert(p1.y == 100); + assert(p2.y == 66); + add_point(&p1, NULL); + assert(p1.x == 100); + assert(p1.y == 100); +} diff --git a/lib/corrosion/test/cbindgen/rust2cpp/rust/Cargo.lock b/lib/corrosion/test/cbindgen/rust2cpp/rust/Cargo.lock new file mode 100644 index 000000000..5dc1732e2 --- /dev/null +++ b/lib/corrosion/test/cbindgen/rust2cpp/rust/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "rust-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/cbindgen/rust2cpp/rust/Cargo.toml b/lib/corrosion/test/cbindgen/rust2cpp/rust/Cargo.toml new file mode 100644 index 000000000..e8816af81 --- /dev/null +++ b/lib/corrosion/test/cbindgen/rust2cpp/rust/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rust-lib" +version = "0.1.0" +license = "MIT" +edition = "2018" + +[dependencies] + +[lib] +crate-type=["staticlib"] diff --git a/lib/corrosion/test/cbindgen/rust2cpp/rust/cbindgen.toml b/lib/corrosion/test/cbindgen/rust2cpp/rust/cbindgen.toml new file mode 100644 index 000000000..b358b7df8 --- /dev/null +++ b/lib/corrosion/test/cbindgen/rust2cpp/rust/cbindgen.toml @@ -0,0 +1,2 @@ +language = "C++" +include_version = true diff --git a/lib/corrosion/test/cbindgen/rust2cpp/rust/src/lib.rs b/lib/corrosion/test/cbindgen/rust2cpp/rust/src/lib.rs new file mode 100644 index 000000000..435c05f7f --- /dev/null +++ b/lib/corrosion/test/cbindgen/rust2cpp/rust/src/lib.rs @@ -0,0 +1,30 @@ +pub const MAGIC_NUMBER: u64 = 0xABCD_EFAB; + +#[derive(Debug)] +#[repr(C)] +pub struct Point { + x: u64, + y: u64, +} + +impl Point { + pub(crate) fn add(&mut self, rhs: &Point) { + self.x = self.x.wrapping_add(rhs.x); + self.y = self.y.wrapping_add(rhs.y); + } +} + +#[no_mangle] +pub extern "C" fn add_point(lhs: Option<&mut Point>, rhs: Option<&Point>) { + if let (Some(p1), Some(p2)) = (lhs, rhs) { + p1.add(p2); + // Print something so we can let Ctest assert the output. + println!("add_point Result: {:?}", p1); + } +} + +// simple test if the constant was exported by cbindgen correctly +#[no_mangle] +pub extern "C" fn is_magic_number(num: u64) -> bool { + num == MAGIC_NUMBER +} diff --git a/lib/corrosion/test/cpp2rust/CMakeLists.txt b/lib/corrosion/test/cpp2rust/CMakeLists.txt new file mode 100644 index 000000000..5b4cb46d6 --- /dev/null +++ b/lib/corrosion/test/cpp2rust/CMakeLists.txt @@ -0,0 +1,5 @@ +corrosion_tests_add_test(cpp2rust "rust-exe") + +set_tests_properties("cpp2rust_run_rust-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello, Rust! I am Cpp!\r?\nHello, Rust! I am Cpp library Number 2!\r?\nHello, Rust! I am Cpp library Number 3!" + ) diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/CMakeLists.txt b/lib/corrosion/test/cpp2rust/cpp2rust/CMakeLists.txt new file mode 100644 index 000000000..2a6fe134e --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) + +add_library(cpp-lib lib.cpp) +target_compile_features(cpp-lib PRIVATE cxx_std_14) +set_target_properties( + cpp-lib + PROPERTIES + POSITION_INDEPENDENT_CODE ON +) + +add_library(cpp-lib2 lib2.cpp) +target_compile_features(cpp-lib2 PRIVATE cxx_std_14) +set_target_properties( + cpp-lib2 + PROPERTIES + POSITION_INDEPENDENT_CODE ON + OUTPUT_NAME cpp-lib-renamed +) + +add_library(cpp-lib3 "path with space/lib3.cpp" ) +target_compile_features(cpp-lib3 PRIVATE cxx_std_14) +set_target_properties( + cpp-lib3 + PROPERTIES + POSITION_INDEPENDENT_CODE ON +) + +corrosion_link_libraries(rust-exe cpp-lib cpp-lib2 cpp-lib3) diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/lib.cpp b/lib/corrosion/test/cpp2rust/cpp2rust/lib.cpp new file mode 100644 index 000000000..df7642d2c --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/lib.cpp @@ -0,0 +1,5 @@ +#include + +extern "C" void cpp_function(char const *name) { + std::cout << "Hello, " << name << "! I am Cpp!\n"; +} diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/lib2.cpp b/lib/corrosion/test/cpp2rust/cpp2rust/lib2.cpp new file mode 100644 index 000000000..63a335183 --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/lib2.cpp @@ -0,0 +1,11 @@ +#include +#include + +extern "C" void cpp_function2(char const *name) { + std::cout << "Hello, " << name << "! I am Cpp library Number 2!\n"; +} + +extern "C" uint32_t get_42() { + uint32_t v = 42; + return v; +} diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/path with space/lib3.cpp b/lib/corrosion/test/cpp2rust/cpp2rust/path with space/lib3.cpp new file mode 100644 index 000000000..d09ddc878 --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/path with space/lib3.cpp @@ -0,0 +1,8 @@ +// Check that libraries located at a path containing a space can also be linked. + +#include + +extern "C" void cpp_function3(char const *name) { + std::cout << "Hello, " << name << "! I am Cpp library Number 3!\n"; +} + diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/rust/Cargo.lock b/lib/corrosion/test/cpp2rust/cpp2rust/rust/Cargo.lock new file mode 100644 index 000000000..be17c9cee --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/rust/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "rust-dependency" +version = "0.1.0" + +[[package]] +name = "rust-exe" +version = "0.1.0" +dependencies = [ + "rust-dependency", +] diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/rust/Cargo.toml b/lib/corrosion/test/cpp2rust/cpp2rust/rust/Cargo.toml new file mode 100644 index 000000000..21bfb64fc --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/rust/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "rust-exe" +version = "0.1.0" +authors = ["Andrew Gaspar "] +license = "MIT" +edition = "2018" + +[dependencies] +rust-dependency = { path = "rust_dependency" } diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/rust/build.rs b/lib/corrosion/test/cpp2rust/cpp2rust/rust/build.rs new file mode 100644 index 000000000..9dfeaa0be --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/rust/build.rs @@ -0,0 +1,4 @@ +// Build-scripts also need to be linked, so just add a dummy buildscript ensuring this works. +fn main() { + println!("Build-script is running.") +} diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/rust/rust_dependency/Cargo.toml b/lib/corrosion/test/cpp2rust/cpp2rust/rust/rust_dependency/Cargo.toml new file mode 100644 index 000000000..89acbbeab --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/rust/rust_dependency/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "rust-dependency" +version = "0.1.0" +license = "MIT" +edition = "2018" + +[dependencies] + diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/rust/rust_dependency/src/lib.rs b/lib/corrosion/test/cpp2rust/cpp2rust/rust/rust_dependency/src/lib.rs new file mode 100644 index 000000000..304cfb900 --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/rust/rust_dependency/src/lib.rs @@ -0,0 +1,8 @@ + +extern "C" { + fn get_42() -> u32; +} +pub fn calls_ffi() { + let res = unsafe { get_42()}; + assert_eq!(res, 42); +} diff --git a/lib/corrosion/test/cpp2rust/cpp2rust/rust/src/bin/rust-exe.rs b/lib/corrosion/test/cpp2rust/cpp2rust/rust/src/bin/rust-exe.rs new file mode 100644 index 000000000..43950e7ec --- /dev/null +++ b/lib/corrosion/test/cpp2rust/cpp2rust/rust/src/bin/rust-exe.rs @@ -0,0 +1,27 @@ +use std::os::raw::c_char; + +extern "C" { + fn cpp_function(name: *const c_char); + fn cpp_function2(name: *const c_char); + fn cpp_function3(name: *const c_char); + +} + +fn greeting(name: &str) { + let name = std::ffi::CString::new(name).unwrap(); + unsafe { + cpp_function(name.as_ptr()); + cpp_function2(name.as_ptr()); + cpp_function3(name.as_ptr()); + } +} + +fn main() { + let args = std::env::args().skip(1).collect::>(); + if args.len() >= 1 { + greeting(&args[0]); + } else { + greeting("Rust"); + } + rust_dependency::calls_ffi(); +} diff --git a/lib/corrosion/test/crate_type/CMakeLists.txt b/lib/corrosion/test/crate_type/CMakeLists.txt new file mode 100644 index 000000000..771cc1976 --- /dev/null +++ b/lib/corrosion/test/crate_type/CMakeLists.txt @@ -0,0 +1,6 @@ +corrosion_tests_add_test(crate_type "cpp-exe") + + +set_tests_properties("crate_type_run_cpp-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello from lib 1!\r?\nHello from lib 2!" + ) diff --git a/lib/corrosion/test/crate_type/crate_type/CMakeLists.txt b/lib/corrosion/test/crate_type/crate_type/CMakeLists.txt new file mode 100644 index 000000000..281d5c4f7 --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +# Add --crate-type to ensure that only the specified type of library is built and no error is thrown +corrosion_import_crate(MANIFEST_PATH proj1/Cargo.toml CRATE_TYPES staticlib FLAGS --crate-type=staticlib) +corrosion_import_crate(MANIFEST_PATH proj2/Cargo.toml CRATE_TYPES cdylib FLAGS --crate-type=cdylib) + +add_executable(cpp-exe main.cpp) +target_link_libraries(cpp-exe proj1) +target_link_libraries(cpp-exe proj2) diff --git a/lib/corrosion/test/crate_type/crate_type/main.cpp b/lib/corrosion/test/crate_type/crate_type/main.cpp new file mode 100644 index 000000000..65113193a --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/main.cpp @@ -0,0 +1,8 @@ +extern "C" void rust_function1(); +extern "C" void rust_function2(); + +int main() { + rust_function1(); + rust_function2(); + return 0; +} diff --git a/lib/corrosion/test/crate_type/crate_type/proj1/Cargo.lock b/lib/corrosion/test/crate_type/crate_type/proj1/Cargo.lock new file mode 100644 index 000000000..dfcd813dc --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/proj1/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "proj1" +version = "0.1.0" diff --git a/lib/corrosion/test/crate_type/crate_type/proj1/Cargo.toml b/lib/corrosion/test/crate_type/crate_type/proj1/Cargo.toml new file mode 100644 index 000000000..81ad88ffe --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/proj1/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "proj1" +version = "0.1.0" +edition = "2018" + +[dependencies] + +[lib] +crate-type=["staticlib", "cdylib"] + diff --git a/lib/corrosion/test/crate_type/crate_type/proj1/src/lib.rs b/lib/corrosion/test/crate_type/crate_type/proj1/src/lib.rs new file mode 100644 index 000000000..433521903 --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/proj1/src/lib.rs @@ -0,0 +1,4 @@ +#[no_mangle] +pub extern "C" fn rust_function1() { + println!("Hello from lib 1!"); +} diff --git a/lib/corrosion/test/crate_type/crate_type/proj2/Cargo.lock b/lib/corrosion/test/crate_type/crate_type/proj2/Cargo.lock new file mode 100644 index 000000000..780ef1f49 --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/proj2/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "proj2" +version = "0.1.0" diff --git a/lib/corrosion/test/crate_type/crate_type/proj2/Cargo.toml b/lib/corrosion/test/crate_type/crate_type/proj2/Cargo.toml new file mode 100644 index 000000000..74c7d6175 --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/proj2/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "proj2" +version = "0.1.0" +edition = "2018" + +[dependencies] + +[lib] +crate-type=["staticlib", "cdylib"] diff --git a/lib/corrosion/test/crate_type/crate_type/proj2/src/lib.rs b/lib/corrosion/test/crate_type/crate_type/proj2/src/lib.rs new file mode 100644 index 000000000..23048680c --- /dev/null +++ b/lib/corrosion/test/crate_type/crate_type/proj2/src/lib.rs @@ -0,0 +1,4 @@ +#[no_mangle] +pub extern "C" fn rust_function2() { + println!("Hello from lib 2!"); +} diff --git a/lib/corrosion/test/custom_profiles/CMakeLists.txt b/lib/corrosion/test/custom_profiles/CMakeLists.txt new file mode 100644 index 000000000..e0d76d893 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/CMakeLists.txt @@ -0,0 +1,27 @@ +# The tests in this folder test specifying the cargo profile name via the --profile option. +# The built-in `test` and `bench` profiles are _not_ supported, because they output +# artifacts to a different location and add a hash to the artifact name. +if(Rust_VERSION VERSION_GREATER_EQUAL 1.57.0) + + corrosion_tests_add_test(custom_profiles_global "custom-profile-exe" TEST_SRC_DIR custom_profiles) + corrosion_tests_add_test(custom_profiles_target_specific "custom-profile-exe" + TEST_SRC_DIR custom_profiles + PASS_THROUGH_ARGS -DCORROSION_TEST_USE_TARGET_SPECIFIC_OVERRIDE=ON + ) + corrosion_tests_add_test(dev_profile "dev_bin" TEST_SRC_DIR basic_profiles CARGO_PROFILE dev) + corrosion_tests_add_test(release_profile "release_bin" TEST_SRC_DIR basic_profiles CARGO_PROFILE release) + + set_tests_properties("custom_profiles_global_run_custom-profile-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "^Hello, Cpp! I'm Rust!\r?\n$" + ) + set_tests_properties("custom_profiles_target_specific_run_custom-profile-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "^Hello, Cpp! I'm Rust!\r?\n$" + ) + set_tests_properties("dev_profile_run_dev_bin" PROPERTIES PASS_REGULAR_EXPRESSION + "^Hello, Cpp! I'm Rust!\r?\n$" + ) + set_tests_properties("release_profile_run_release_bin" PROPERTIES PASS_REGULAR_EXPRESSION + "^Hello, Cpp! I'm Rust!\r?\n$" + ) + +endif() diff --git a/lib/corrosion/test/custom_profiles/basic_profiles/CMakeLists.txt b/lib/corrosion/test/custom_profiles/basic_profiles/CMakeLists.txt new file mode 100644 index 000000000..d020df4f9 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/basic_profiles/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +if(NOT DEFINED CARGO_PROFILE) + message(FATAL_ERROR "Test internal error. The test should be called with the CARGO_PROFILE parameter.") +endif() + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml PROFILE ${CARGO_PROFILE}) + +add_executable(${CARGO_PROFILE}_bin main.cpp) +target_link_libraries(${CARGO_PROFILE}_bin PUBLIC cargo_profiles_lib) diff --git a/lib/corrosion/test/custom_profiles/basic_profiles/main.cpp b/lib/corrosion/test/custom_profiles/basic_profiles/main.cpp new file mode 100644 index 000000000..b714e76ee --- /dev/null +++ b/lib/corrosion/test/custom_profiles/basic_profiles/main.cpp @@ -0,0 +1,6 @@ +extern "C" void rust_function(char const *name); + + +int main(int argc, char **argv) { + rust_function("Cpp"); +} diff --git a/lib/corrosion/test/custom_profiles/basic_profiles/rust/Cargo.lock b/lib/corrosion/test/custom_profiles/basic_profiles/rust/Cargo.lock new file mode 100644 index 000000000..9b747a8a4 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/basic_profiles/rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cargo-profiles-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/custom_profiles/basic_profiles/rust/Cargo.toml b/lib/corrosion/test/custom_profiles/basic_profiles/rust/Cargo.toml new file mode 100644 index 000000000..282029f42 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/basic_profiles/rust/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "cargo-profiles-lib" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type=["staticlib"] diff --git a/lib/corrosion/test/custom_profiles/basic_profiles/rust/src/lib.rs b/lib/corrosion/test/custom_profiles/basic_profiles/rust/src/lib.rs new file mode 100644 index 000000000..1a51950ab --- /dev/null +++ b/lib/corrosion/test/custom_profiles/basic_profiles/rust/src/lib.rs @@ -0,0 +1,8 @@ +use std::os::raw::c_char; + +#[no_mangle] +pub extern "C" fn rust_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust!", name); +} + diff --git a/lib/corrosion/test/custom_profiles/custom_profiles/CMakeLists.txt b/lib/corrosion/test/custom_profiles/custom_profiles/CMakeLists.txt new file mode 100644 index 000000000..c9b5cd2e1 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/custom_profiles/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +set(_release_profile $,release-without-dbg,custom-without-dbg>) +set(custom_profile $,dev-without-dbg,${_release_profile}>) + +if(CORROSION_TEST_USE_TARGET_SPECIFIC_OVERRIDE) + # Select "wrong" profile here on purpose. + corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml PROFILE dev) + set_target_properties(custom_profiles_lib + PROPERTIES + INTERFACE_CORROSION_CARGO_PROFILE "${custom_profile}" + ) +else() + corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml PROFILE ${custom_profile}) +endif() + +add_executable(custom-profile-exe main.cpp) +target_link_libraries(custom-profile-exe PUBLIC custom_profiles_lib) diff --git a/lib/corrosion/test/custom_profiles/custom_profiles/main.cpp b/lib/corrosion/test/custom_profiles/custom_profiles/main.cpp new file mode 100644 index 000000000..b714e76ee --- /dev/null +++ b/lib/corrosion/test/custom_profiles/custom_profiles/main.cpp @@ -0,0 +1,6 @@ +extern "C" void rust_function(char const *name); + + +int main(int argc, char **argv) { + rust_function("Cpp"); +} diff --git a/lib/corrosion/test/custom_profiles/custom_profiles/rust/Cargo.lock b/lib/corrosion/test/custom_profiles/custom_profiles/rust/Cargo.lock new file mode 100644 index 000000000..6c1abd7f5 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/custom_profiles/rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "custom-profiles-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/custom_profiles/custom_profiles/rust/Cargo.toml b/lib/corrosion/test/custom_profiles/custom_profiles/rust/Cargo.toml new file mode 100644 index 000000000..93e3c7592 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/custom_profiles/rust/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "custom-profiles-lib" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type=["staticlib"] + +# Test if neither release or debug where selected by only disabling debug-assertions in the inherited profile. +[profile.release] +debug-assertions = true + +[profile.dev-without-dbg] +inherits = "dev" +debug-assertions = false + +[profile.release-without-dbg] +inherits = "release" +debug-assertions = false + +[profile.custom-without-dbg] +inherits = "release" +opt-level = 1 +debug-assertions = false diff --git a/lib/corrosion/test/custom_profiles/custom_profiles/rust/src/lib.rs b/lib/corrosion/test/custom_profiles/custom_profiles/rust/src/lib.rs new file mode 100644 index 000000000..ee3f4b016 --- /dev/null +++ b/lib/corrosion/test/custom_profiles/custom_profiles/rust/src/lib.rs @@ -0,0 +1,11 @@ +use std::os::raw::c_char; + +#[no_mangle] +pub extern "C" fn rust_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust!", name); +} + + +#[cfg(debug_assertions)] +const _: () = assert!(false, "Debug assertions where not disabled via custom profile!"); diff --git a/lib/corrosion/test/cxxbridge/CMakeLists.txt b/lib/corrosion/test/cxxbridge/CMakeLists.txt new file mode 100644 index 000000000..23c34ca93 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/CMakeLists.txt @@ -0,0 +1,20 @@ +if(CORROSION_TESTS_CXXBRIDGE) + corrosion_tests_add_test(cxxbridge_cpp2rust_1 "rust_bin" + TEST_SRC_DIR cxxbridge_cpp2rust + PASS_THROUGH_ARGS -DTEST_CXXBRIDGE_VARIANT1=ON + ) + corrosion_tests_add_test(cxxbridge_cpp2rust_2 "rust_bin" + TEST_SRC_DIR cxxbridge_cpp2rust + PASS_THROUGH_ARGS -DTEST_CXXBRIDGE_VARIANT2=ON + ) + corrosion_tests_add_test(cxxbridge_rust2cpp "cxxbridge-exe") + + set_tests_properties("cxxbridge_cpp2rust_1_run_rust_bin" + PROPERTIES PASS_REGULAR_EXPRESSION + "main function" + ) + set_tests_properties("cxxbridge_rust2cpp_run_cxxbridge-exe" + PROPERTIES PASS_REGULAR_EXPRESSION + "Hello cxxbridge from lib.rs! \\[4, 5, 6\\]\r?\nHello cxxbridge from foo/mod.rs! \\[4, 5, 6\\]" + ) +endif() diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/CMakeLists.txt b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/CMakeLists.txt new file mode 100644 index 000000000..b063b1734 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/CMakeLists.txt @@ -0,0 +1,39 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0 LANGUAGES CXX) +include(../../test_header.cmake) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED 1) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) +corrosion_add_cxxbridge(cxxbridge-cpp CRATE rust_bin FILES lib.rs) +target_include_directories(cxxbridge-cpp PRIVATE "include") + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux" + OR (CMAKE_SYSTEM_NAME STREQUAL "Windows" + AND (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + ) +) + corrosion_add_target_local_rustflags(rust_bin "-Clink-arg=-fuse-ld=lld") +endif() + +if(MSVC) + set_target_properties(cxxbridge-cpp PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreadedDLL") +endif() + +if(TEST_CXXBRIDGE_VARIANT1) + # Variant 1: Merge the C++ User sources into the generated library target. + target_sources(cxxbridge-cpp PRIVATE cpplib.cpp) + corrosion_link_libraries(rust_bin cxxbridge-cpp) +elseif(TEST_CXXBRIDGE_VARIANT2) + # Variant 2: Create a separate C++ library and link both the User library and + # the generated library into rust + add_library(cpp_lib STATIC cpplib.cpp) + target_include_directories(cpp_lib PUBLIC "${CMAKE_CURRENT_LIST_DIR}/include") + target_link_libraries(cpp_lib PUBLIC cxxbridge-cpp) + corrosion_link_libraries(rust_bin cpp_lib cxxbridge-cpp) + if(MSVC) + set_target_properties(cpp_lib PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreadedDLL") + endif() +else() + message(FATAL_ERROR "Internal test error - required option not defined") +endif() diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/cpplib.cpp b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/cpplib.cpp new file mode 100644 index 000000000..14dd49fc0 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/cpplib.cpp @@ -0,0 +1,15 @@ +#include +#include "cpplib.h" +#include "cxxbridge-cpp/lib.h" +#include "rust/cxx.h" + +RsImage read_image(rust::Str path) { + std::cout << "read_image called" << std::endl; + std::cout << path << std::endl; + Rgba c = { 1.0, 2.0, 3.0, 4.0}; + RsImage v = { 1, 1, c}; + return v; +} +void write_image(::rust::Str path, ::RsImage const & image) { + +} diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/include/cpplib.h b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/include/cpplib.h new file mode 100644 index 000000000..3e96dffc1 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/include/cpplib.h @@ -0,0 +1,5 @@ +#pragma once +#include "cxxbridge-cpp/lib.h" + +::RsImage read_image(::rust::Str path); +void write_image(::rust::Str path, ::RsImage const & image); diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/Cargo.lock b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/Cargo.lock new file mode 100644 index 000000000..4c72e50f7 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/Cargo.lock @@ -0,0 +1,89 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cc" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" + +[[package]] +name = "cxx" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d1075c37807dcf850c379432f0df05ba52cc30f279c5cfc43cc221ce7f8579" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61b50bc93ba22c27b0d31128d2d130a0a6b3d267ae27ef7e4fae2167dfe8781c" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e61fda7e62115119469c7b3591fd913ecca96fb766cfd3f2e2502ab7bc87a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rust_bin" +version = "0.1.0" +dependencies = [ + "cxx", +] + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/Cargo.toml b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/Cargo.toml new file mode 100644 index 000000000..d2cf479dc --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rust_bin" +version = "0.1.0" +edition = "2018" + +[lib] +name = "cxxbridge_lib" + +[dependencies] +cxx = "1.0" diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/src/lib.rs b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/src/lib.rs new file mode 100644 index 000000000..db946caf5 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/src/lib.rs @@ -0,0 +1,26 @@ +#[cxx::bridge] +pub mod ffi +{ + #[derive(Debug, PartialEq)] + pub struct Rgba + { + r: f32, + g: f32, + b: f32, + a: f32, + } + + #[derive(Debug,PartialEq)] + pub struct RsImage + { + width: usize, + height: usize, + raster: Rgba, + } + unsafe extern "C++" + { + include!("cpplib.h"); + pub fn read_image(path: &str) -> RsImage; + fn write_image(path: &str, image: &RsImage); + } +} \ No newline at end of file diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/src/main.rs b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/src/main.rs new file mode 100644 index 000000000..f9ae26e6d --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_cpp2rust/rust/src/main.rs @@ -0,0 +1,14 @@ +use cxxbridge_lib::ffi::{RsImage,Rgba,read_image}; + +fn main() { + println!("main function"); + let expected = RsImage { width: 1, height: 1, raster: Rgba { + r: 1.0, + g: 2.0, + b: 3.0, + a: 4.0, + }}; + let actual = read_image("dummy path"); + println!("returned from C++"); + assert_eq!(actual, expected) +} \ No newline at end of file diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/CMakeLists.txt b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/CMakeLists.txt new file mode 100644 index 000000000..b8979b740 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED 1) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) +corrosion_add_cxxbridge(cxxbridge-cpp CRATE cxxbridge_crate MANIFEST_PATH rust FILES lib.rs foo/mod.rs) + +add_executable(cxxbridge-exe main.cpp) +target_link_libraries(cxxbridge-exe PUBLIC cxxbridge-cpp) + +if(MSVC) + # Note: This is required because we use `cxx` which uses `cc` to compile and link C++ code. + corrosion_set_env_vars(cxxbridge_crate "CFLAGS=-MDd" "CXXFLAGS=-MDd") +endif() diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/main.cpp b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/main.cpp new file mode 100644 index 000000000..e512ce44a --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/main.cpp @@ -0,0 +1,11 @@ +#include +#include +#include + +int main(int argc, char **argv) +{ + std::vector input = { 4, 5, 6}; + rust::Slice slice{input.data(), input.size()}; + lib::print(slice); + foo::print(slice); +} diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/Cargo.lock b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/Cargo.lock new file mode 100644 index 000000000..ff091252f --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/Cargo.lock @@ -0,0 +1,89 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cc" +version = "1.0.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d" + +[[package]] +name = "cxx" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d1075c37807dcf850c379432f0df05ba52cc30f279c5cfc43cc221ce7f8579" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxxbridge-crate" +version = "0.1.0" +dependencies = [ + "cxx", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61b50bc93ba22c27b0d31128d2d130a0a6b3d267ae27ef7e4fae2167dfe8781c" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e61fda7e62115119469c7b3591fd913ecca96fb766cfd3f2e2502ab7bc87a5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] + +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/Cargo.toml b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/Cargo.toml new file mode 100644 index 000000000..3c53c25eb --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "cxxbridge-crate" +version = "0.1.0" +edition = "2018" + +[lib] +crate-type = ["staticlib"] + +[dependencies] +cxx = "1.0" diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/src/foo/mod.rs b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/src/foo/mod.rs new file mode 100644 index 000000000..cbeae92dd --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/src/foo/mod.rs @@ -0,0 +1,10 @@ +#[cxx::bridge(namespace = "foo")] +mod bridge { + extern "Rust" { + fn print(slice: &[u64]); + } +} + +fn print(slice: &[u64]) { + println!("Hello cxxbridge from foo/mod.rs! {:?}", slice); +} diff --git a/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/src/lib.rs b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/src/lib.rs new file mode 100644 index 000000000..0a2f41031 --- /dev/null +++ b/lib/corrosion/test/cxxbridge/cxxbridge_rust2cpp/rust/src/lib.rs @@ -0,0 +1,12 @@ +mod foo; + +#[cxx::bridge(namespace = "lib")] +mod bridge { + extern "Rust" { + fn print(slice: &[u64]); + } +} + +fn print(slice: &[u64]) { + println!("Hello cxxbridge from lib.rs! {:?}", slice); +} diff --git a/lib/corrosion/test/envvar/CMakeLists.txt b/lib/corrosion/test/envvar/CMakeLists.txt new file mode 100644 index 000000000..8762eda62 --- /dev/null +++ b/lib/corrosion/test/envvar/CMakeLists.txt @@ -0,0 +1,5 @@ +corrosion_tests_add_test(envvar "program_requiring_rust_lib_with_envvar") + +set_tests_properties("envvar_run_program_requiring_rust_lib_with_envvar" PROPERTIES PASS_REGULAR_EXPRESSION + "Ok" + ) diff --git a/lib/corrosion/test/envvar/envvar/.cargo/config.toml b/lib/corrosion/test/envvar/envvar/.cargo/config.toml new file mode 100644 index 000000000..0d9ab8c81 --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/.cargo/config.toml @@ -0,0 +1,2 @@ +[env] +COR_CONFIG_TOML_ENV_VAR = "EnvVariableSetViaConfig.toml" diff --git a/lib/corrosion/test/envvar/envvar/CMakeLists.txt b/lib/corrosion/test/envvar/envvar/CMakeLists.txt new file mode 100644 index 000000000..2e0eeabdd --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH Cargo.toml) + +corrosion_set_env_vars(rust_lib_requiring_envvar + "ANOTHER_VARIABLE=ANOTHER_VALUE" + "$" + "COR_CARGO_VERSION_MAJOR=${Rust_CARGO_VERSION_MAJOR}" + "COR_CARGO_VERSION_MINOR=${Rust_CARGO_VERSION_MINOR}" +) + +add_executable(program_requiring_rust_lib_with_envvar main.cpp) + +set_property( + TARGET program_requiring_rust_lib_with_envvar + APPEND + PROPERTY INDIRECT_VAR_TEST + "REQUIRED_VARIABLE=EXPECTED_VALUE" +) + +target_link_libraries(program_requiring_rust_lib_with_envvar PUBLIC rust_lib_requiring_envvar) diff --git a/lib/corrosion/test/envvar/envvar/Cargo.lock b/lib/corrosion/test/envvar/envvar/Cargo.lock new file mode 100644 index 000000000..c079aa709 --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "rust-lib-requiring-envvar" +version = "0.1.0" diff --git a/lib/corrosion/test/envvar/envvar/Cargo.toml b/lib/corrosion/test/envvar/envvar/Cargo.toml new file mode 100644 index 000000000..e74e08bf4 --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "rust-lib-requiring-envvar" +version = "0.1.0" +authors = ["Olivier Goffart "] +edition = "2018" +build = "build.rs" + +[lib] +crate-type = [ "lib", "cdylib" ] diff --git a/lib/corrosion/test/envvar/envvar/build.rs b/lib/corrosion/test/envvar/envvar/build.rs new file mode 100644 index 000000000..4c173db36 --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/build.rs @@ -0,0 +1,18 @@ +fn main() { + assert_eq!(env!("REQUIRED_VARIABLE"), "EXPECTED_VALUE"); + assert_eq!(std::env::var("ANOTHER_VARIABLE").unwrap(), "ANOTHER_VALUE"); + let cargo_major = env!("COR_CARGO_VERSION_MAJOR") + .parse::() + .expect("Invalid Major version"); + let cargo_minor = env!("COR_CARGO_VERSION_MINOR") + .parse::() + .expect("Invalid Minor version"); + + // The `[env]` section in `.cargo/config.toml` was added in version 1.56. + if cargo_major > 1 || (cargo_major == 1 && cargo_minor >= 56) { + // Check if cargo picks up the config.toml, which sets this additional env variable. + let env_value = option_env!("COR_CONFIG_TOML_ENV_VAR") + .expect("Test failure! Cargo >= 1.56.0 should set this environment variable"); + assert_eq!(env_value, "EnvVariableSetViaConfig.toml"); + } +} diff --git a/lib/corrosion/test/envvar/envvar/main.cpp b/lib/corrosion/test/envvar/envvar/main.cpp new file mode 100644 index 000000000..5af405444 --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/main.cpp @@ -0,0 +1,5 @@ +#include + +int main() { + std::cout << "Ok"; +} diff --git a/lib/corrosion/test/envvar/envvar/src/lib.rs b/lib/corrosion/test/envvar/envvar/src/lib.rs new file mode 100644 index 000000000..31e1bb209 --- /dev/null +++ b/lib/corrosion/test/envvar/envvar/src/lib.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} diff --git a/lib/corrosion/test/external_corrosion_generator/CMakeLists.txt b/lib/corrosion/test/external_corrosion_generator/CMakeLists.txt new file mode 100644 index 000000000..954f21f80 --- /dev/null +++ b/lib/corrosion/test/external_corrosion_generator/CMakeLists.txt @@ -0,0 +1,14 @@ +if(CORROSION_TESTS_INSTALL_CORROSION) + add_test(NAME "ExternalCorrosionGenerator" + COMMAND + ${CMAKE_COMMAND} + -P "${CMAKE_SOURCE_DIR}/test/ConfigureAndBuild.cmake" + SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/ExternalCorrosionGenerator" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/build" + GENERATOR "${CMAKE_GENERATOR}" + RUST_TOOLCHAIN "${Rust_TOOLCHAIN}" + EXTERNAL_CORROSION_GENERATOR "${test_install_path}/libexec/corrosion-generator" + COMMAND_EXPAND_LISTS + ) + set_tests_properties("ExternalCorrosionGenerator" PROPERTIES FIXTURES_REQUIRED "fixture_corrosion_install") +endif() \ No newline at end of file diff --git a/lib/corrosion/test/external_corrosion_generator/ExternalCorrosionGenerator/CMakeLists.txt b/lib/corrosion/test/external_corrosion_generator/ExternalCorrosionGenerator/CMakeLists.txt new file mode 100644 index 000000000..69f9b2834 --- /dev/null +++ b/lib/corrosion/test/external_corrosion_generator/ExternalCorrosionGenerator/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.15) +project(ExternalCorrosionGenerator LANGUAGES C) + +add_subdirectory(../../.. corrosion) + +get_property( + GENERATOR_EXE_LOCATION + TARGET Corrosion::Generator PROPERTY IMPORTED_LOCATION +) + +if (NOT GENERATOR_EXE_LOCATION STREQUAL CORROSION_GENERATOR_EXECUTABLE) + message( + FATAL_ERROR + "\ +Corrosion Generator not Imported Correctly: + Corrosion::Generator IMPORTED_LOCATION: ${GENERATOR_EXE_LOCATION} + CORROSION_GENERATOR_EXECUTABLE: ${CORROSION_GENERATOR_EXECUTABLE}") +endif() diff --git a/lib/corrosion/test/external_corrosion_generator/ExternalCorrosionGenerator/Test.cmake b/lib/corrosion/test/external_corrosion_generator/ExternalCorrosionGenerator/Test.cmake new file mode 100644 index 000000000..e92b812ec --- /dev/null +++ b/lib/corrosion/test/external_corrosion_generator/ExternalCorrosionGenerator/Test.cmake @@ -0,0 +1,14 @@ +set(CORROSION_DIR ${CMAKE_ARGV3}) +set(CORROSION_INSTALL ${CMAKE_ARGV4}) + +execute_process( + COMMAND + ${CMAKE_COMMAND} . + -DCORROSION_GENERATOR_EXECUTABLE=${CORROSION_INSTALL}/libexec/corrosion-generator + COMMAND_ECHO STDOUT + RESULT_VARIABLE SUCCESS +) + +if (NOT SUCCESS EQUAL 0) + message(FATAL_ERROR) +endif() diff --git a/lib/corrosion/test/features/CMakeLists.txt b/lib/corrosion/test/features/CMakeLists.txt new file mode 100644 index 000000000..d1d0a7a05 --- /dev/null +++ b/lib/corrosion/test/features/CMakeLists.txt @@ -0,0 +1,5 @@ +corrosion_tests_add_test(features "features-cpp-exe") + +set_tests_properties("features_run_features-cpp-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello, Cpp! I'm Rust!\r?\nHello, Cpp again! I'm Rust again!\r?\nHello, Cpp again! I'm Rust again, third time the charm!" + ) diff --git a/lib/corrosion/test/features/features/CMakeLists.txt b/lib/corrosion/test/features/features/CMakeLists.txt new file mode 100644 index 000000000..7b3767950 --- /dev/null +++ b/lib/corrosion/test/features/features/CMakeLists.txt @@ -0,0 +1,27 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml FEATURES thirdfeature ALL_FEATURES) + +add_executable(features-cpp-exe main.cpp) +target_link_libraries(features-cpp-exe PUBLIC rust_feature_lib) + +corrosion_set_features(rust_feature_lib + ALL_FEATURES OFF + NO_DEFAULT_FEATURES + FEATURES + $ +) + +set_property( + TARGET features-cpp-exe + APPEND + PROPERTY app_features myfeature +) +set_property( + TARGET features-cpp-exe + APPEND + PROPERTY app_features secondfeature +) + diff --git a/lib/corrosion/test/features/features/main.cpp b/lib/corrosion/test/features/features/main.cpp new file mode 100644 index 000000000..0b9b1b92a --- /dev/null +++ b/lib/corrosion/test/features/features/main.cpp @@ -0,0 +1,13 @@ +extern "C" void rust_function(char const *name); +extern "C" void rust_second_function(char const *name); +extern "C" void rust_third_function(char const *name); + +int main(int argc, char **argv) { + if (argc < 2) { + rust_function("Cpp"); + rust_second_function("Cpp again"); + rust_third_function("Cpp again"); + } else { + rust_function(argv[1]); + } +} diff --git a/lib/corrosion/test/features/features/rust/Cargo.lock b/lib/corrosion/test/features/features/rust/Cargo.lock new file mode 100644 index 000000000..5cbbeb270 --- /dev/null +++ b/lib/corrosion/test/features/features/rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "rust-feature-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/features/features/rust/Cargo.toml b/lib/corrosion/test/features/features/rust/Cargo.toml new file mode 100644 index 000000000..977dc534d --- /dev/null +++ b/lib/corrosion/test/features/features/rust/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "rust-feature-lib" +version = "0.1.0" +authors = ["Andrew Gaspar "] +license = "MIT" +edition = "2018" + +[dependencies] + +[lib] +crate-type=["staticlib"] + +[features] +default = ["compile-breakage"] +myfeature = [] +secondfeature = [] +thirdfeature = [] +compile-breakage = [] \ No newline at end of file diff --git a/lib/corrosion/test/features/features/rust/src/lib.rs b/lib/corrosion/test/features/features/rust/src/lib.rs new file mode 100644 index 000000000..7c5c6d648 --- /dev/null +++ b/lib/corrosion/test/features/features/rust/src/lib.rs @@ -0,0 +1,26 @@ +#[cfg(feature = "myfeature")] +use std::os::raw::c_char; + +#[no_mangle] +#[cfg(feature = "myfeature")] +pub extern "C" fn rust_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust!", name); +} + +#[no_mangle] +#[cfg(feature = "secondfeature")] +pub extern "C" fn rust_second_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust again!", name); +} + +#[no_mangle] +#[cfg(feature = "thirdfeature")] +pub extern "C" fn rust_third_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust again, third time the charm!", name); +} + +#[cfg(feature = "compile-breakage")] +const _: [(); 1] = [(); 2]; // Trigger a compile error to make sure that we succeeded in de-activating this feature diff --git a/lib/corrosion/test/find_rust/CMakeLists.txt b/lib/corrosion/test/find_rust/CMakeLists.txt new file mode 100644 index 000000000..1dd963238 --- /dev/null +++ b/lib/corrosion/test/find_rust/CMakeLists.txt @@ -0,0 +1,3 @@ +corrosion_tests_add_test(find_rust "") +corrosion_tests_add_test(rustup_proxy "") + diff --git a/lib/corrosion/test/find_rust/find_rust/CMakeLists.txt b/lib/corrosion/test/find_rust/find_rust/CMakeLists.txt new file mode 100644 index 000000000..bdd0cb7ee --- /dev/null +++ b/lib/corrosion/test/find_rust/find_rust/CMakeLists.txt @@ -0,0 +1,9 @@ + +cmake_minimum_required(VERSION 3.15) +project(FindRust LANGUAGES CXX) + +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/../../../cmake" ${CMAKE_MODULE_PATH}) + +# make sure find_package(Rust) can be used more than once +find_package(Rust REQUIRED) +find_package(Rust REQUIRED) diff --git a/lib/corrosion/test/find_rust/rustup_proxy/CMakeLists.txt b/lib/corrosion/test/find_rust/rustup_proxy/CMakeLists.txt new file mode 100644 index 000000000..7d0440f4f --- /dev/null +++ b/lib/corrosion/test/find_rust/rustup_proxy/CMakeLists.txt @@ -0,0 +1,47 @@ +cmake_minimum_required(VERSION 3.15) +project(RustupProxy LANGUAGES CXX) + +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/../../../cmake" ${CMAKE_MODULE_PATH}) + +function(_assert_is_rustup_proxy executable_path) + execute_process( + COMMAND + ${CMAKE_COMMAND} -E env + RUSTUP_FORCE_ARG0=rustup + "${executable_path}" --version + OUTPUT_VARIABLE _VERSION_RAW + ERROR_VARIABLE _VERSION_STDERR + RESULT_VARIABLE _VERSION_RESULT + ) + + if(NOT _VERSION_RESULT EQUAL "0") + message(FATAL_ERROR "`${executable_path} --version` failed with ${_VERSION_RESULT}\n" + "stderr:\n${_VERSION_STDERR}" + ) + endif() + + if (NOT _VERSION_RAW MATCHES "rustup [0-9\\.]+") + message(FATAL_ERROR "`${executable_path} --version` output does not match rustup: ${_VERSION_RAW}\n") + endif() +endfunction() + +set(Rust_RESOLVE_RUSTUP_TOOLCHAINS OFF CACHE BOOL "" FORCE) +find_package(Rust REQUIRED) + +if (NOT Rust_FOUND) + message(FATAL_ERROR "Rustup not found") +endif() + +get_property( + RUSTC_EXECUTABLE + TARGET Rust::Rustc PROPERTY IMPORTED_LOCATION +) + +_assert_is_rustup_proxy(${RUSTC_EXECUTABLE}) + +get_property( + CARGO_EXECUTABLE + TARGET Rust::Cargo PROPERTY IMPORTED_LOCATION +) + +_assert_is_rustup_proxy(${CARGO_EXECUTABLE}) diff --git a/lib/corrosion/test/gensource/CMakeLists.txt b/lib/corrosion/test/gensource/CMakeLists.txt new file mode 100644 index 000000000..b38a98776 --- /dev/null +++ b/lib/corrosion/test/gensource/CMakeLists.txt @@ -0,0 +1,5 @@ +corrosion_tests_add_test(gensource "") + +#set_tests_properties("features_run_features-cpp-exe" PROPERTIES PASS_REGULAR_EXPRESSION +# "Hello, Cpp! I'm Rust!\r?\nHello, Cpp again! I'm Rust again!\r?\nHello, Cpp again! I'm Rust again, third time the charm!" +# ) \ No newline at end of file diff --git a/lib/corrosion/test/gensource/gensource/.gitignore b/lib/corrosion/test/gensource/gensource/.gitignore new file mode 100644 index 000000000..b4878a28e --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/.gitignore @@ -0,0 +1 @@ +src/foo.rs diff --git a/lib/corrosion/test/gensource/gensource/CMakeLists.txt b/lib/corrosion/test/gensource/gensource/CMakeLists.txt new file mode 100644 index 000000000..ea348d253 --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +add_subdirectory(generator) + +add_custom_command( + OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/src/foo.rs" + DEPENDS $ + COMMAND $ "${CMAKE_CURRENT_SOURCE_DIR}/src/foo.rs" +) + +add_custom_target(after_generation DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/src/foo.rs") +add_custom_target(genexdebug COMMAND ${CMAKE_COMMAND} -E echo "Config DEBUG: $ Config Release: $ IMPORTED_LOCATION: $") + +corrosion_import_crate(MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Cargo.toml) +add_dependencies(cargo-prebuild_generated after_generation) + +# Simple test for corrosion_parse_package_version +corrosion_parse_package_version("${CMAKE_CURRENT_SOURCE_DIR}/Cargo.toml" srcgen_version) +if (NOT "${srcgen_version}" VERSION_EQUAL "0.1.0") + message(FATAL_ERROR "Test failed to parse expected version") +endif() diff --git a/lib/corrosion/test/gensource/gensource/Cargo.lock b/lib/corrosion/test/gensource/gensource/Cargo.lock new file mode 100644 index 000000000..e22568e8f --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "generated" +version = "0.1.0" diff --git a/lib/corrosion/test/gensource/gensource/Cargo.toml b/lib/corrosion/test/gensource/gensource/Cargo.toml new file mode 100644 index 000000000..f4c72d391 --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "generated" +version = "0.1.0" +edition = "2018" + +[lib] +crate-type = ["lib", "cdylib"] +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/lib/corrosion/test/gensource/gensource/generator/CMakeLists.txt b/lib/corrosion/test/gensource/gensource/generator/CMakeLists.txt new file mode 100644 index 000000000..40ab3a72d --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/generator/CMakeLists.txt @@ -0,0 +1,2 @@ +corrosion_import_crate(MANIFEST_PATH Cargo.toml) +corrosion_set_hostbuild(srcgen) diff --git a/lib/corrosion/test/gensource/gensource/generator/Cargo.lock b/lib/corrosion/test/gensource/gensource/generator/Cargo.lock new file mode 100644 index 000000000..6ad6f0d2a --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/generator/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "srcgen" +version = "0.1.0" diff --git a/lib/corrosion/test/gensource/gensource/generator/Cargo.toml b/lib/corrosion/test/gensource/gensource/generator/Cargo.toml new file mode 100644 index 000000000..8712fea43 --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/generator/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "srcgen" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/lib/corrosion/test/gensource/gensource/generator/src/main.rs b/lib/corrosion/test/gensource/gensource/generator/src/main.rs new file mode 100644 index 000000000..84eef8c7b --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/generator/src/main.rs @@ -0,0 +1,6 @@ +use std::io::Write; +fn main() -> Result<(), std::io::Error> { + let out_name = std::env::args().skip(1).next().unwrap(); + let mut out_file = std::fs::File::create(out_name)?; + Ok(write!(out_file, "const _: () = ();")?) +} diff --git a/lib/corrosion/test/gensource/gensource/src/lib.rs b/lib/corrosion/test/gensource/gensource/src/lib.rs new file mode 100644 index 000000000..b624d3c7c --- /dev/null +++ b/lib/corrosion/test/gensource/gensource/src/lib.rs @@ -0,0 +1,10 @@ +mod foo; + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/lib/corrosion/test/hostbuild/CMakeLists.txt b/lib/corrosion/test/hostbuild/CMakeLists.txt new file mode 100644 index 000000000..d98bba00d --- /dev/null +++ b/lib/corrosion/test/hostbuild/CMakeLists.txt @@ -0,0 +1,11 @@ +# FIXME: ONly test this when cross-compiling? +corrosion_tests_add_test(hostbuild "rust-host-program") + +set_tests_properties("hostbuild_run_rust-host-program" PROPERTIES PASS_REGULAR_EXPRESSION + "^ok\r?\nHello Rust Hostbuild, I am an external C function" + ) +# Run tests are disabled by default when cross-compiling, however we still want to test hostbuild! +# So we manually re-enable the test here. +if(CMAKE_CROSSCOMPILING) + set_tests_properties("hostbuild_run_rust-host-program" PROPERTIES DISABLED FALSE) +endif() diff --git a/lib/corrosion/test/hostbuild/hostbuild/CMakeLists.txt b/lib/corrosion/test/hostbuild/hostbuild/CMakeLists.txt new file mode 100644 index 000000000..1e60ff31c --- /dev/null +++ b/lib/corrosion/test/hostbuild/hostbuild/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Cargo.toml) + +corrosion_set_hostbuild(rust-host-program) diff --git a/lib/corrosion/test/hostbuild/hostbuild/Cargo.lock b/lib/corrosion/test/hostbuild/hostbuild/Cargo.lock new file mode 100644 index 000000000..aea4063d5 --- /dev/null +++ b/lib/corrosion/test/hostbuild/hostbuild/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "rust-host-program" +version = "0.1.0" +dependencies = [ + "cc", +] diff --git a/lib/corrosion/test/hostbuild/hostbuild/Cargo.toml b/lib/corrosion/test/hostbuild/hostbuild/Cargo.toml new file mode 100644 index 000000000..935757af0 --- /dev/null +++ b/lib/corrosion/test/hostbuild/hostbuild/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "rust-host-program" +version = "0.1.0" +authors = ["Olivier Goffart "] +edition = "2018" + +[build-dependencies] +cc = "1.0" diff --git a/lib/corrosion/test/hostbuild/hostbuild/build.rs b/lib/corrosion/test/hostbuild/hostbuild/build.rs new file mode 100644 index 000000000..aa725cf5a --- /dev/null +++ b/lib/corrosion/test/hostbuild/hostbuild/build.rs @@ -0,0 +1,10 @@ +fn main() { + let out_dir = std::env::var("OUT_DIR").unwrap(); + cc::Build::new() + .file("src/lib.c") + .compile("hello"); + + println!("cargo:rustc-link-search=native={}", out_dir); + println!("cargo:rustc-link-lib=hello"); + println!("cargo:rerun-if-changed=src/lib.c"); +} \ No newline at end of file diff --git a/lib/corrosion/test/hostbuild/hostbuild/src/lib.c b/lib/corrosion/test/hostbuild/hostbuild/src/lib.c new file mode 100644 index 000000000..4739e9c55 --- /dev/null +++ b/lib/corrosion/test/hostbuild/hostbuild/src/lib.c @@ -0,0 +1,5 @@ +#include + +void c_function(char const *name) { + printf("Hello %s, I am an external C function\n", name); +} diff --git a/lib/corrosion/test/hostbuild/hostbuild/src/main.rs b/lib/corrosion/test/hostbuild/hostbuild/src/main.rs new file mode 100644 index 000000000..d4ec7cb79 --- /dev/null +++ b/lib/corrosion/test/hostbuild/hostbuild/src/main.rs @@ -0,0 +1,13 @@ +use std::os::raw::c_char; + +extern "C" { + fn c_function(name: *const c_char); +} + +fn main() { + println!("ok"); + let name = b"Rust Hostbuild\0"; + unsafe { + c_function(name.as_ptr() as _); + } +} diff --git a/lib/corrosion/test/multitarget/CMakeLists.txt b/lib/corrosion/test/multitarget/CMakeLists.txt new file mode 100644 index 000000000..8f016ec26 --- /dev/null +++ b/lib/corrosion/test/multitarget/CMakeLists.txt @@ -0,0 +1,22 @@ +corrosion_tests_add_test(multitarget "bin1;bin2;bin3") + +# Don't run this test in parallel with others, since the target directory size may cause issues. +set_tests_properties("multitarget_build" PROPERTIES RUN_SERIAL TRUE) + +set_tests_properties("multitarget_run_bin1" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello, world!\r?\nHello, bin1! I'm Cpp!" + RUN_SERIAL + TRUE + ) + +set_tests_properties("multitarget_run_bin2" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello, world!\r?\nHello, bin2! I'm Cpp!" + RUN_SERIAL + TRUE + ) + +set_tests_properties("multitarget_run_bin3" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello, world!\r?\nHello, bin3! I'm Cpp!" + RUN_SERIAL + TRUE + ) diff --git a/lib/corrosion/test/multitarget/multitarget/CMakeLists.txt b/lib/corrosion/test/multitarget/multitarget/CMakeLists.txt new file mode 100644 index 000000000..0d7a4102b --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH Cargo.toml) + +add_library(cpp-lib4 lib.cpp) +target_compile_features(cpp-lib4 PRIVATE cxx_std_14) +set_property(TARGET cpp-lib4 PROPERTY POSITION_INDEPENDENT_CODE ON) +corrosion_link_libraries(bin1 cpp-lib4) +corrosion_link_libraries(bin2 cpp-lib4) +corrosion_link_libraries(bin3 cpp-lib4) diff --git a/lib/corrosion/test/multitarget/multitarget/Cargo.lock b/lib/corrosion/test/multitarget/multitarget/Cargo.lock new file mode 100644 index 000000000..348e15f3b --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "multitarget-crate" +version = "0.1.0" diff --git a/lib/corrosion/test/multitarget/multitarget/Cargo.toml b/lib/corrosion/test/multitarget/multitarget/Cargo.toml new file mode 100644 index 000000000..406e1d004 --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "multitarget-crate" +version = "0.1.0" +edition = "2018" + +[dependencies] + +[lib] +name = "multitarget_lib" +crate-type=["lib", "staticlib", "cdylib"] + +[[bin]] +name = "bin1" + +[[bin]] +name = "bin2" + +[[bin]] +name = "bin3" diff --git a/lib/corrosion/test/multitarget/multitarget/lib.cpp b/lib/corrosion/test/multitarget/multitarget/lib.cpp new file mode 100644 index 000000000..549964a67 --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/lib.cpp @@ -0,0 +1,5 @@ +#include + +extern "C" void cpp_function(char const *name) { + std::cout << "Hello, " << name << "! I'm Cpp!\n"; +} diff --git a/lib/corrosion/test/multitarget/multitarget/src/bin/bin1.rs b/lib/corrosion/test/multitarget/multitarget/src/bin/bin1.rs new file mode 100644 index 000000000..6284966fc --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/src/bin/bin1.rs @@ -0,0 +1,8 @@ +use multitarget_lib::hello_world; + +fn main() { + hello_world(); + unsafe { + multitarget_lib::cpp_function("bin1\0".as_ptr() as *const _); + } +} diff --git a/lib/corrosion/test/multitarget/multitarget/src/bin/bin2.rs b/lib/corrosion/test/multitarget/multitarget/src/bin/bin2.rs new file mode 100644 index 000000000..05ddbdb01 --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/src/bin/bin2.rs @@ -0,0 +1,8 @@ +use multitarget_lib::hello_world; + +fn main() { + hello_world(); + unsafe { + multitarget_lib::cpp_function("bin2\0".as_ptr() as *const _); + } +} diff --git a/lib/corrosion/test/multitarget/multitarget/src/bin/bin3.rs b/lib/corrosion/test/multitarget/multitarget/src/bin/bin3.rs new file mode 100644 index 000000000..238e8b97e --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/src/bin/bin3.rs @@ -0,0 +1,8 @@ +use multitarget_lib::hello_world; + +fn main() { + hello_world(); + unsafe { + multitarget_lib::cpp_function("bin3\0".as_ptr() as *const _); + } +} diff --git a/lib/corrosion/test/multitarget/multitarget/src/lib.rs b/lib/corrosion/test/multitarget/multitarget/src/lib.rs new file mode 100644 index 000000000..1da2d2485 --- /dev/null +++ b/lib/corrosion/test/multitarget/multitarget/src/lib.rs @@ -0,0 +1,9 @@ +use std::os::raw::c_char; + +pub fn hello_world() { + println!("Hello, world!"); +} + +extern "C" { + pub fn cpp_function(name: *const c_char); +} diff --git a/lib/corrosion/test/nostd/CMakeLists.txt b/lib/corrosion/test/nostd/CMakeLists.txt new file mode 100644 index 000000000..be700caa2 --- /dev/null +++ b/lib/corrosion/test/nostd/CMakeLists.txt @@ -0,0 +1 @@ +corrosion_tests_add_test(nostd "") diff --git a/lib/corrosion/test/nostd/nostd/CMakeLists.txt b/lib/corrosion/test/nostd/nostd/CMakeLists.txt new file mode 100644 index 000000000..a533d79a3 --- /dev/null +++ b/lib/corrosion/test/nostd/nostd/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml NO_STD) + +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -nostdlib") +list(REMOVE_ITEM CMAKE_CXX_IMPLICIT_LINK_LIBRARIES stdc++) + +add_library(nostd-cpp-lib STATIC main.cpp) +target_link_libraries(nostd-cpp-lib PUBLIC rust-nostd-lib) diff --git a/lib/corrosion/test/nostd/nostd/main.cpp b/lib/corrosion/test/nostd/nostd/main.cpp new file mode 100644 index 000000000..3cede3a8e --- /dev/null +++ b/lib/corrosion/test/nostd/nostd/main.cpp @@ -0,0 +1,6 @@ +extern "C" void rust_function(); + +extern "C" void cpp_function() { + // Fail on linking issues + rust_function(); +} \ No newline at end of file diff --git a/lib/corrosion/test/nostd/nostd/rust/Cargo.lock b/lib/corrosion/test/nostd/nostd/rust/Cargo.lock new file mode 100644 index 000000000..0216008de --- /dev/null +++ b/lib/corrosion/test/nostd/nostd/rust/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "rust-nostd-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/nostd/nostd/rust/Cargo.toml b/lib/corrosion/test/nostd/nostd/rust/Cargo.toml new file mode 100644 index 000000000..e16f8fe83 --- /dev/null +++ b/lib/corrosion/test/nostd/nostd/rust/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "rust-nostd-lib" +version = "0.1.0" +edition = "2015" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[lib] +crate-type=["staticlib"] + +[profile.release] +panic = "abort" + +[profile.dev] +panic = "abort" diff --git a/lib/corrosion/test/nostd/nostd/rust/src/lib.rs b/lib/corrosion/test/nostd/nostd/rust/src/lib.rs new file mode 100644 index 000000000..81443752c --- /dev/null +++ b/lib/corrosion/test/nostd/nostd/rust/src/lib.rs @@ -0,0 +1,10 @@ +#![no_std] +use core::panic::PanicInfo; + +#[no_mangle] +pub extern "C" fn rust_function() {} + +#[panic_handler] +fn panic(_panic: &PanicInfo<'_>) -> ! { + loop {} +} \ No newline at end of file diff --git a/lib/corrosion/test/output directory/CMakeLists.txt b/lib/corrosion/test/output directory/CMakeLists.txt new file mode 100644 index 000000000..4adb6f9a1 --- /dev/null +++ b/lib/corrosion/test/output directory/CMakeLists.txt @@ -0,0 +1,143 @@ +if(CMAKE_C_COMPILER) + set(TEST_C_COMPILER "C_COMPILER" "${CMAKE_C_COMPILER}") +endif() +if(CMAKE_CXX_COMPILER) + set(TEST_CXX_COMPILER "CXX_COMPILER" "${CMAKE_CXX_COMPILER}") +endif() +if(CMAKE_GENERATOR_PLATFORM) + set(TEST_GENERATOR_PLATFORM "GENERATOR_PLATFORM" "${CMAKE_GENERATOR_PLATFORM}") +endif() + +add_test(NAME "output_directory_build" + COMMAND + ${CMAKE_COMMAND} + -P "${CMAKE_SOURCE_DIR}/test/ConfigureAndBuild.cmake" + SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/output directory" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/build" + GENERATOR "${CMAKE_GENERATOR}" + RUST_TOOLCHAIN "${Rust_TOOLCHAIN}" + CARGO_TARGET "${Rust_CARGO_TARGET}" + SYSTEM_NAME "${CMAKE_SYSTEM_NAME}" + "${TEST_C_COMPILER}" + "${TEST_CXX_COMPILER}" + "${TEST_GENERATOR_PLATFORM}" + + COMMAND_EXPAND_LISTS +) +set_tests_properties("output_directory_build" PROPERTIES FIXTURES_SETUP "build_fixture_output_directory") +if(CORROSION_TESTS_INSTALL_CORROSION) + set_tests_properties("output_directory_build" PROPERTIES FIXTURES_REQUIRED "fixture_corrosion_install") +endif() + +set(test_variants "var") +if(NOT CORROSION_NATIVE_TOOLING) + list(APPEND test_variants "targetprop") +endif() + +foreach(output_approach ${test_variants}) + if(output_approach STREQUAL "targetprop") + set(rust_proj_suffix "1") + elseif(output_approach STREQUAL "var") + set(rust_proj_suffix "2") + else() + message(FATAL_ERROR "specify rust project suffix for new output approach ${output_approach}") + endif() + + set(bin_name "rust_bin${rust_proj_suffix}${CMAKE_EXECUTABLE_SUFFIX}") + + add_test(NAME output_directory_bin_${output_approach} + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/build/custom_bin_${output_approach}/${bin_name}" + ) + set_tests_properties("output_directory_bin_${output_approach}" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") + + set(lib_name "rust_lib${rust_proj_suffix}") + + set(static_lib_name "${CMAKE_STATIC_LIBRARY_PREFIX}${lib_name}${CMAKE_STATIC_LIBRARY_SUFFIX}") + + add_test(NAME output_directory_staticlib_${output_approach} + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/build/custom_archive_${output_approach}/${static_lib_name}" + ) + set_tests_properties("output_directory_staticlib_${output_approach}" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") + + if(MINGW) + # Windows-GNU defines "lib" as prefix for DLLs, but cargo creates foo.dll instead of libfoo.dll + set(dynamic_lib_prefix "") + else() + set(dynamic_lib_prefix "${CMAKE_SHARED_LIBRARY_PREFIX}") + endif() + set(dynamic_lib_name "${dynamic_lib_prefix}${lib_name}${CMAKE_SHARED_LIBRARY_SUFFIX}") + + add_test(NAME output_directory_cdylib_${output_approach} + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/build/custom_lib_${output_approach}/${dynamic_lib_name}" + ) + set_tests_properties("output_directory_cdylib_${output_approach}" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") + + if(WIN32) + set(implib_name ${CMAKE_IMPORT_LIBRARY_PREFIX}${lib_name}${CMAKE_IMPORT_LIBRARY_SUFFIX}) + + add_test(NAME output_directory_implib_${output_approach} + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + # Implib is an ARCHIVE artifact, see: + # https://cmake.org/cmake/help/v3.25/manual/cmake-buildsystem.7.html#archive-output-artifacts + "${CMAKE_CURRENT_BINARY_DIR}/build/custom_archive_${output_approach}/${implib_name}" + ) + set_tests_properties("output_directory_implib_${output_approach}" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") + + if(MSVC) + if(output_approach STREQUAL "targetprop") + set(expected_lib_pdb_path "custom_lib_pdb_targetprop") + set(expected_bin_pdb_path "custom_bin_pdb_targetprop") + elseif(output_approach STREQUAL "var") + # When using a CMAKE_ variable instead of a target property, both targets + # end up in the same directory. + set(expected_lib_pdb_path "custom_binlib_pdb_var") + set(expected_bin_pdb_path "custom_binlib_pdb_var") + else() + message(FATAL_ERROR "specify rust project suffix for new output approach ${output_approach}") + endif() + + set(lib_pdb_name "${lib_name}.pdb") + add_test(NAME output_directory_cdylib_pdb_${output_approach} + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/build/${expected_lib_pdb_path}/${lib_pdb_name}" + ) + set_tests_properties("output_directory_cdylib_pdb_${output_approach}" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") + + set(bin_pdb_name "rust_bin${rust_proj_suffix}.pdb") + add_test(NAME output_directory_bin_pdb_${output_approach} + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/build/${expected_bin_pdb_path}/${bin_pdb_name}" + ) + set_tests_properties("output_directory_bin_pdb_${output_approach}" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") + endif() + endif() + +endforeach() + +if(NOT CORROSION_NATIVE_TOOLING) + add_test(NAME postbuild_custom_command + COMMAND + "${CMAKE_COMMAND}" + -P "${CMAKE_CURRENT_SOURCE_DIR}/TestFileExists.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/build/another_dir/moved_bin" + ) + set_tests_properties("postbuild_custom_command" PROPERTIES FIXTURES_REQUIRED "build_fixture_output_directory") +endif() + +add_test(NAME "output_directory_cleanup" COMMAND "${CMAKE_COMMAND}" -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/build") +set_tests_properties("output_directory_cleanup" PROPERTIES FIXTURES_CLEANUP "build_fixture_output_directory") diff --git a/lib/corrosion/test/output directory/TestFileExists.cmake b/lib/corrosion/test/output directory/TestFileExists.cmake new file mode 100644 index 000000000..cfc33f2de --- /dev/null +++ b/lib/corrosion/test/output directory/TestFileExists.cmake @@ -0,0 +1,13 @@ +# CMake script to test if a file exists. Errors if the file does not exist. +# Expect actual arguments to start at index 3 (cmake -P ) + +# Expect one argument +if(NOT (CMAKE_ARGC EQUAL "4")) + message(FATAL_ERROR "Test Internal Error: Unexpected ARGC Value: ${CMAKE_ARGC}.") +endif() + +set(FILE_PATH "${CMAKE_ARGV3}") + +if(NOT ( EXISTS "${FILE_PATH}" )) + message(FATAL_ERROR "Test failed: File `${FILE_PATH}` does not exist.") +endif() diff --git a/lib/corrosion/test/output directory/output directory/CMakeLists.txt b/lib/corrosion/test/output directory/output directory/CMakeLists.txt new file mode 100644 index 000000000..c3704cc87 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/CMakeLists.txt @@ -0,0 +1,47 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +if(NOT CORROSION_NATIVE_TOOLING) + corrosion_import_crate(MANIFEST_PATH proj1/Cargo.toml) + + # Note: The output directories defined here must be manually kept in sync with the expected test location. + set_target_properties(rust_bin1 + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_bin_targetprop" + PDB_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_bin_pdb_targetprop" + + ) + set_target_properties(rust_lib1 PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_archive_targetprop") + set_target_properties(rust_lib1 + PROPERTIES + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_lib_targetprop" + PDB_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_lib_pdb_targetprop" + ) + + add_custom_command(TARGET cargo-build_rust_bin1 POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/another_dir" + COMMAND + ${CMAKE_COMMAND} -E copy_if_different + "$" + "${CMAKE_CURRENT_BINARY_DIR}/another_dir/moved_bin" + ) +endif() +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_bin_var") +set(CMAKE_PDB_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_binlib_pdb_var") +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_archive_var") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/custom_lib_var") + +corrosion_import_crate(MANIFEST_PATH proj2/Cargo.toml) + +unset(CMAKE_RUNTIME_OUTPUT_DIRECTORY) +unset(CMAKE_PDB_OUTPUT_DIRECTORY) +unset(CMAKE_ARCHIVE_OUTPUT_DIRECTORY) +unset(CMAKE_LIBRARY_OUTPUT_DIRECTORY) +unset(CMAKE_PDB_OUTPUT_DIRECTORY) + +add_executable(consumer consumer.cpp) +add_dependencies(consumer cargo-build_rust_lib2) + +target_link_libraries(consumer rust_lib2) diff --git a/lib/corrosion/test/output directory/output directory/consumer.cpp b/lib/corrosion/test/output directory/output directory/consumer.cpp new file mode 100644 index 000000000..fa943ba65 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/consumer.cpp @@ -0,0 +1,16 @@ +#include +#include + +extern "C" unsigned int ret_12(); + + +int main(int argc, char *argv[]) +{ + std::cout << "HI\n"; + unsigned int a = ret_12(); + if (a != 12) { + return -1; + } + + return 0; +} diff --git a/lib/corrosion/test/output directory/output directory/proj1/Cargo.lock b/lib/corrosion/test/output directory/output directory/proj1/Cargo.lock new file mode 100644 index 000000000..07e006c26 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj1/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "rust_package1" +version = "0.1.0" diff --git a/lib/corrosion/test/output directory/output directory/proj1/Cargo.toml b/lib/corrosion/test/output directory/output directory/proj1/Cargo.toml new file mode 100644 index 000000000..0f3bc1bcc --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj1/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rust_package1" +version = "0.1.0" +edition = "2018" + +[lib] +name = "rust_lib1" +crate-type=["staticlib", "cdylib"] + +[[bin]] +name = "rust_bin1" diff --git a/lib/corrosion/test/output directory/output directory/proj1/src/bin/rust_bin1.rs b/lib/corrosion/test/output directory/output directory/proj1/src/bin/rust_bin1.rs new file mode 100644 index 000000000..11c0291c8 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj1/src/bin/rust_bin1.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world from test rust binary"); +} diff --git a/lib/corrosion/test/output directory/output directory/proj1/src/lib.rs b/lib/corrosion/test/output directory/output directory/proj1/src/lib.rs new file mode 100644 index 000000000..cfadd0f94 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj1/src/lib.rs @@ -0,0 +1,4 @@ +#[no_mangle] +pub extern "C" fn ret_12() -> u32 { + 12 +} diff --git a/lib/corrosion/test/output directory/output directory/proj2/Cargo.lock b/lib/corrosion/test/output directory/output directory/proj2/Cargo.lock new file mode 100644 index 000000000..892d96416 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj2/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "rust_package2" +version = "0.1.0" diff --git a/lib/corrosion/test/output directory/output directory/proj2/Cargo.toml b/lib/corrosion/test/output directory/output directory/proj2/Cargo.toml new file mode 100644 index 000000000..63df8d5ea --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj2/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rust_package2" +version = "0.1.0" +edition = "2018" + +[lib] +name = "rust_lib2" +crate-type=["staticlib", "cdylib"] + +[[bin]] +name = "rust_bin2" diff --git a/lib/corrosion/test/output directory/output directory/proj2/src/bin/rust_bin2.rs b/lib/corrosion/test/output directory/output directory/proj2/src/bin/rust_bin2.rs new file mode 100644 index 000000000..11c0291c8 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj2/src/bin/rust_bin2.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world from test rust binary"); +} diff --git a/lib/corrosion/test/output directory/output directory/proj2/src/lib.rs b/lib/corrosion/test/output directory/output directory/proj2/src/lib.rs new file mode 100644 index 000000000..cfadd0f94 --- /dev/null +++ b/lib/corrosion/test/output directory/output directory/proj2/src/lib.rs @@ -0,0 +1,4 @@ +#[no_mangle] +pub extern "C" fn ret_12() -> u32 { + 12 +} diff --git a/lib/corrosion/test/parse_target_triple/CMakeLists.txt b/lib/corrosion/test/parse_target_triple/CMakeLists.txt new file mode 100644 index 000000000..f98dd184e --- /dev/null +++ b/lib/corrosion/test/parse_target_triple/CMakeLists.txt @@ -0,0 +1,10 @@ +corrosion_tests_add_test(parse_target_triple "") +corrosion_tests_add_test(parse_target_triple_should_fail "") + +set_tests_properties("parse_target_triple_build" PROPERTIES FAIL_REGULAR_EXPRESSION + "CMake Warning" + ) + +set_tests_properties("parse_target_triple_should_fail_build" PROPERTIES PASS_REGULAR_EXPRESSION + "CMake Warning" + ) \ No newline at end of file diff --git a/lib/corrosion/test/parse_target_triple/parse_target_triple/CMakeLists.txt b/lib/corrosion/test/parse_target_triple/parse_target_triple/CMakeLists.txt new file mode 100644 index 000000000..25e9159b2 --- /dev/null +++ b/lib/corrosion/test/parse_target_triple/parse_target_triple/CMakeLists.txt @@ -0,0 +1,106 @@ +# This test is supposed to ensure that the regex in _corrosion_parse_platform works as expected. +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +# Todo: Test if the output matches expectations. +_corrosion_parse_target_triple("../../blah/x86_64-unknown-custom-gnu.json" arch vendor os env) +_corrosion_parse_target_triple("x86_64-unknown-custom-gnu.json" arch vendor os env) +_corrosion_parse_target_triple("/path/to/x86_64-unknown-custom-musl.json" arch vendor os env) +_corrosion_parse_target_triple("../../blah/x86_64-custom_os.json" arch vendor os env) + +# List of builtin targets aquired via `rustup target list` with rust 1.64 on Linux. +set(rustup_shipped_targets + "aarch64-apple-darwin" + "aarch64-apple-ios" + "aarch64-apple-ios-sim" + "aarch64-fuchsia" + "aarch64-linux-android" + "aarch64-pc-windows-msvc" + "aarch64-unknown-linux-gnu" + "aarch64-unknown-linux-musl" + "aarch64-unknown-none" + "aarch64-unknown-none-softfloat" + "arm-linux-androideabi" + "arm-unknown-linux-gnueabi" + "arm-unknown-linux-gnueabihf" + "arm-unknown-linux-musleabi" + "arm-unknown-linux-musleabihf" + "armebv7r-none-eabi" + "armebv7r-none-eabihf" + "armv5te-unknown-linux-gnueabi" + "armv5te-unknown-linux-musleabi" + "armv7-linux-androideabi" + "armv7-unknown-linux-gnueabi" + "armv7-unknown-linux-gnueabihf" + "armv7-unknown-linux-musleabi" + "armv7-unknown-linux-musleabihf" + "armv7a-none-eabi" + "armv7r-none-eabi" + "armv7r-none-eabihf" + "asmjs-unknown-emscripten" + "i586-pc-windows-msvc" + "i586-unknown-linux-gnu" + "i586-unknown-linux-musl" + "i686-linux-android" + "i686-pc-windows-gnu" + "i686-pc-windows-msvc" + "i686-unknown-freebsd" + "i686-unknown-linux-gnu" + "i686-unknown-linux-musl" + "mips-unknown-linux-gnu" + "mips-unknown-linux-musl" + "mips64-unknown-linux-gnuabi64" + "mips64-unknown-linux-muslabi64" + "mips64el-unknown-linux-gnuabi64" + "mips64el-unknown-linux-muslabi64" + "mipsel-unknown-linux-gnu" + "mipsel-unknown-linux-musl" + "nvptx64-nvidia-cuda" + "powerpc-unknown-linux-gnu" + "powerpc64-unknown-linux-gnu" + "powerpc64le-unknown-linux-gnu" + "riscv32i-unknown-none-elf" + "riscv32imac-unknown-none-elf" + "riscv32imc-unknown-none-elf" + "riscv64gc-unknown-linux-gnu" + "riscv64gc-unknown-none-elf" + "riscv64imac-unknown-none-elf" + "s390x-unknown-linux-gnu" + "sparc64-unknown-linux-gnu" + "sparcv9-sun-solaris" + "thumbv6m-none-eabi" + "thumbv7em-none-eabi" + "thumbv7em-none-eabihf" + "thumbv7m-none-eabi" + "thumbv7neon-linux-androideabi" + "thumbv7neon-unknown-linux-gnueabihf" + "thumbv8m.base-none-eabi" + "thumbv8m.main-none-eabi" + "thumbv8m.main-none-eabihf" + "wasm32-unknown-emscripten" + "wasm32-unknown-unknown" + "wasm32-wasi" + "x86_64-apple-darwin" + "x86_64-apple-ios" + "x86_64-fortanix-unknown-sgx" + "x86_64-fuchsia" + "x86_64-linux-android" + "x86_64-pc-solaris" + "x86_64-pc-windows-gnu" + "x86_64-pc-windows-msvc" + "x86_64-sun-solaris" + "x86_64-unknown-freebsd" + "x86_64-unknown-illumos" + "x86_64-unknown-linux-gnu" + "x86_64-unknown-linux-gnux32" + "x86_64-unknown-linux-musl" + "x86_64-unknown-netbsd" + "x86_64-unknown-none" + "x86_64-unknown-redox" +) +set(other_targets riscv32imc-esp-espidf xtensa-esp32s3-none-elf) + +foreach(target ${rustup_shipped_targets} ${other_targets}) + _corrosion_parse_target_triple("${target}" arch vendor os env) +endforeach() diff --git a/lib/corrosion/test/parse_target_triple/parse_target_triple_should_fail/CMakeLists.txt b/lib/corrosion/test/parse_target_triple/parse_target_triple_should_fail/CMakeLists.txt new file mode 100644 index 000000000..59d90e800 --- /dev/null +++ b/lib/corrosion/test/parse_target_triple/parse_target_triple_should_fail/CMakeLists.txt @@ -0,0 +1,6 @@ +# This test is supposed to ensure that the regex in _corrosion_parse_platform works as expected. +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +_corrosion_parse_target_triple("x86_64-unknown-linux-gnu-toomuch" arch vendor os env) diff --git a/lib/corrosion/test/rust2cpp/CMakeLists.txt b/lib/corrosion/test/rust2cpp/CMakeLists.txt new file mode 100644 index 000000000..4df9b288c --- /dev/null +++ b/lib/corrosion/test/rust2cpp/CMakeLists.txt @@ -0,0 +1,9 @@ +corrosion_tests_add_test(rust2cpp "cpp-exe;cpp-exe-shared") + +set_tests_properties("rust2cpp_run_cpp-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "^Hello, Cpp! I'm Rust!\r?\n$" + ) + +set_tests_properties("rust2cpp_run_cpp-exe-shared" PROPERTIES PASS_REGULAR_EXPRESSION + "^Hello, Cpp! I'm Rust!\r?\n$" + ) diff --git a/lib/corrosion/test/rust2cpp/rust2cpp/CMakeLists.txt b/lib/corrosion/test/rust2cpp/rust2cpp/CMakeLists.txt new file mode 100644 index 000000000..e14de93a9 --- /dev/null +++ b/lib/corrosion/test/rust2cpp/rust2cpp/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) + +add_executable(cpp-exe main.cpp) +target_link_libraries(cpp-exe PUBLIC rust_lib) + +add_executable(cpp-exe-shared main.cpp) +target_link_libraries(cpp-exe-shared + PUBLIC rust_lib-shared) diff --git a/lib/corrosion/test/rust2cpp/rust2cpp/main.cpp b/lib/corrosion/test/rust2cpp/rust2cpp/main.cpp new file mode 100644 index 000000000..785c0bc3a --- /dev/null +++ b/lib/corrosion/test/rust2cpp/rust2cpp/main.cpp @@ -0,0 +1,9 @@ +extern "C" void rust_function(char const *name); + +int main(int argc, char **argv) { + if (argc < 2) { + rust_function("Cpp"); + } else { + rust_function(argv[1]); + } +} diff --git a/lib/corrosion/test/rust2cpp/rust2cpp/rust/Cargo.lock b/lib/corrosion/test/rust2cpp/rust2cpp/rust/Cargo.lock new file mode 100644 index 000000000..5dc1732e2 --- /dev/null +++ b/lib/corrosion/test/rust2cpp/rust2cpp/rust/Cargo.lock @@ -0,0 +1,5 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "rust-lib" +version = "0.1.0" diff --git a/lib/corrosion/test/rust2cpp/rust2cpp/rust/Cargo.toml b/lib/corrosion/test/rust2cpp/rust2cpp/rust/Cargo.toml new file mode 100644 index 000000000..ab91e6204 --- /dev/null +++ b/lib/corrosion/test/rust2cpp/rust2cpp/rust/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rust-lib" +version = "0.1.0" +authors = ["Andrew Gaspar "] +license = "MIT" +edition = "2018" + +[dependencies] + +[lib] +crate-type=["staticlib", "cdylib"] diff --git a/lib/corrosion/test/rust2cpp/rust2cpp/rust/build.rs b/lib/corrosion/test/rust2cpp/rust2cpp/rust/build.rs new file mode 100644 index 000000000..9dfeaa0be --- /dev/null +++ b/lib/corrosion/test/rust2cpp/rust2cpp/rust/build.rs @@ -0,0 +1,4 @@ +// Build-scripts also need to be linked, so just add a dummy buildscript ensuring this works. +fn main() { + println!("Build-script is running.") +} diff --git a/lib/corrosion/test/rust2cpp/rust2cpp/rust/src/lib.rs b/lib/corrosion/test/rust2cpp/rust2cpp/rust/src/lib.rs new file mode 100644 index 000000000..194e56507 --- /dev/null +++ b/lib/corrosion/test/rust2cpp/rust2cpp/rust/src/lib.rs @@ -0,0 +1,7 @@ +use std::os::raw::c_char; + +#[no_mangle] +pub extern "C" fn rust_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust!", name); +} diff --git a/lib/corrosion/test/rustflags/CMakeLists.txt b/lib/corrosion/test/rustflags/CMakeLists.txt new file mode 100644 index 000000000..fd2ea5297 --- /dev/null +++ b/lib/corrosion/test/rustflags/CMakeLists.txt @@ -0,0 +1,7 @@ +corrosion_tests_add_test(rustflags "rustflags-cpp-exe") + +set_tests_properties("rustflags_run_rustflags-cpp-exe" PROPERTIES PASS_REGULAR_EXPRESSION + "Hello, Cpp! I'm Rust!\r?\nHello, Cpp again! I'm Rust in (Debug|Release) mode again!\r?\nHello, Cpp again! I'm Rust again, third time the charm!\r?\n$" + ) + +corrosion_tests_add_test(cargo_config_rustflags "cargo_config_rustflags") diff --git a/lib/corrosion/test/rustflags/cargo_config_rustflags/.cargo/config.toml b/lib/corrosion/test/rustflags/cargo_config_rustflags/.cargo/config.toml new file mode 100644 index 000000000..fb683000e --- /dev/null +++ b/lib/corrosion/test/rustflags/cargo_config_rustflags/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["--cfg=some_cargo_config_rustflag"] diff --git a/lib/corrosion/test/rustflags/cargo_config_rustflags/CMakeLists.txt b/lib/corrosion/test/rustflags/cargo_config_rustflags/CMakeLists.txt new file mode 100644 index 000000000..507bde8c6 --- /dev/null +++ b/lib/corrosion/test/rustflags/cargo_config_rustflags/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH Cargo.toml) + +# Do not use `corrosion_add_target_rustflags()` here, since we want to test if the rustflag from `.cargo/config.toml` +# is picked up. + +# Local rustflags should not interfere with `.cargo/config.toml`, so enable one. +corrosion_add_target_local_rustflags(cargo_config_rustflags "--cfg=local_rustflag") diff --git a/lib/corrosion/test/rustflags/cargo_config_rustflags/Cargo.lock b/lib/corrosion/test/rustflags/cargo_config_rustflags/Cargo.lock new file mode 100644 index 000000000..906ca1141 --- /dev/null +++ b/lib/corrosion/test/rustflags/cargo_config_rustflags/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "cargo_config_rustflags" +version = "0.1.0" diff --git a/lib/corrosion/test/rustflags/cargo_config_rustflags/Cargo.toml b/lib/corrosion/test/rustflags/cargo_config_rustflags/Cargo.toml new file mode 100644 index 000000000..298b193e9 --- /dev/null +++ b/lib/corrosion/test/rustflags/cargo_config_rustflags/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "cargo_config_rustflags" +version = "0.1.0" +edition = "2018" diff --git a/lib/corrosion/test/rustflags/cargo_config_rustflags/src/main.rs b/lib/corrosion/test/rustflags/cargo_config_rustflags/src/main.rs new file mode 100644 index 000000000..f4919bb1d --- /dev/null +++ b/lib/corrosion/test/rustflags/cargo_config_rustflags/src/main.rs @@ -0,0 +1,16 @@ + +#[cfg(some_cargo_config_rustflag)] +fn print_line() { + println!("Rustflag is enabled"); +} + +// test that local rustflags don't override global rustflags set via `.cargo/config` +#[cfg(local_rustflag)] +fn test_local_rustflag() { + println!("local_rustflag was enabled"); +} + +fn main() { + print_line(); + test_local_rustflag(); +} diff --git a/lib/corrosion/test/rustflags/rustflags/CMakeLists.txt b/lib/corrosion/test/rustflags/rustflags/CMakeLists.txt new file mode 100644 index 000000000..9a4f25cd3 --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate(MANIFEST_PATH rust/Cargo.toml) + +add_executable(rustflags-cpp-exe main.cpp) +target_link_libraries(rustflags-cpp-exe PUBLIC rustflag_test_lib) + +# Test --cfg=key="value" rustflag. +corrosion_add_target_rustflags(rustflag_test_lib --cfg=test_rustflag_cfg1="test_rustflag_cfg1_value") + +# Test using a generator expression to produce a rustflag and passing multiple rustflags. +corrosion_add_target_rustflags(rustflag_test_lib + --cfg=test_rustflag_cfg2="$,$>,debug,release>" + "--cfg=test_rustflag_cfg3" +) + +corrosion_add_target_local_rustflags(rustflag_test_lib "--cfg=test_local_rustflag1") +corrosion_add_target_local_rustflags(rustflag_test_lib --cfg=test_local_rustflag2="value") diff --git a/lib/corrosion/test/rustflags/rustflags/main.cpp b/lib/corrosion/test/rustflags/rustflags/main.cpp new file mode 100644 index 000000000..0b9b1b92a --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/main.cpp @@ -0,0 +1,13 @@ +extern "C" void rust_function(char const *name); +extern "C" void rust_second_function(char const *name); +extern "C" void rust_third_function(char const *name); + +int main(int argc, char **argv) { + if (argc < 2) { + rust_function("Cpp"); + rust_second_function("Cpp again"); + rust_third_function("Cpp again"); + } else { + rust_function(argv[1]); + } +} diff --git a/lib/corrosion/test/rustflags/rustflags/rust/Cargo.lock b/lib/corrosion/test/rustflags/rustflags/rust/Cargo.lock new file mode 100644 index 000000000..333ce249d --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/rust/Cargo.lock @@ -0,0 +1,14 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "rustflag-test-lib" +version = "0.1.0" +dependencies = [ + "some_dependency", +] + +[[package]] +name = "some_dependency" +version = "0.1.0" diff --git a/lib/corrosion/test/rustflags/rustflags/rust/Cargo.toml b/lib/corrosion/test/rustflags/rustflags/rust/Cargo.toml new file mode 100644 index 000000000..2bdd26f76 --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/rust/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "rustflag-test-lib" +version = "0.1.0" +license = "MIT" +edition = "2018" + +[dependencies] +some_dependency = { path = "some_dependency" } + +[lib] +crate-type=["staticlib"] diff --git a/lib/corrosion/test/rustflags/rustflags/rust/some_dependency/Cargo.toml b/lib/corrosion/test/rustflags/rustflags/rust/some_dependency/Cargo.toml new file mode 100644 index 000000000..94627d036 --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/rust/some_dependency/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "some_dependency" +version = "0.1.0" +license = "MIT" +edition = "2018" + diff --git a/lib/corrosion/test/rustflags/rustflags/rust/some_dependency/src/lib.rs b/lib/corrosion/test/rustflags/rustflags/rust/some_dependency/src/lib.rs new file mode 100644 index 000000000..d240a7caa --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/rust/some_dependency/src/lib.rs @@ -0,0 +1,10 @@ +//! Test that the local rustflags are only passed to the main crate and not to dependencies. +#[cfg(test_local_rustflag1)] +const _: [(); 1] = [(); 2]; + +#[cfg(test_local_rustflag2 = "value")] +const _: [(); 1] = [(); 2]; + +pub fn some_function() -> u32 { + 42 +} diff --git a/lib/corrosion/test/rustflags/rustflags/rust/src/lib.rs b/lib/corrosion/test/rustflags/rustflags/rust/src/lib.rs new file mode 100644 index 000000000..f6da1f6e1 --- /dev/null +++ b/lib/corrosion/test/rustflags/rustflags/rust/src/lib.rs @@ -0,0 +1,40 @@ +#[cfg(test_rustflag_cfg1 = "test_rustflag_cfg1_value")] +use std::os::raw::c_char; + +#[no_mangle] +#[cfg(test_rustflag_cfg1 = "test_rustflag_cfg1_value")] +pub extern "C" fn rust_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust!", name); +} + +#[no_mangle] +#[cfg(all(debug_assertions, test_rustflag_cfg2 = "debug"))] +pub extern "C" fn rust_second_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust in Debug mode again!", name); +} + +#[no_mangle] +#[cfg(all(not(debug_assertions), test_rustflag_cfg2 = "release"))] +pub extern "C" fn rust_second_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust in Release mode again!", name); +} + +#[no_mangle] +#[cfg(test_rustflag_cfg3)] +pub extern "C" fn rust_third_function(name: *const c_char) { + let name = unsafe { std::ffi::CStr::from_ptr(name).to_str().unwrap() }; + println!("Hello, {}! I'm Rust again, third time the charm!", name); + assert_eq!(some_dependency::some_function(), 42); +} + +#[cfg(not(test_rustflag_cfg3))] +const _: [(); 1] = [(); 2]; + +#[cfg(not(test_local_rustflag1))] +const _: [(); 1] = [(); 2]; + +#[cfg(not(test_local_rustflag2 = "value"))] +const _: [(); 1] = [(); 2]; diff --git a/lib/corrosion/test/workspace/CMakeLists.txt b/lib/corrosion/test/workspace/CMakeLists.txt new file mode 100644 index 000000000..39bec842c --- /dev/null +++ b/lib/corrosion/test/workspace/CMakeLists.txt @@ -0,0 +1,6 @@ +corrosion_tests_add_test(workspace "my_program") + +set_tests_properties("workspace_run_my_program" PROPERTIES PASS_REGULAR_EXPRESSION + "^Ok\r?\n$" + ) + diff --git a/lib/corrosion/test/workspace/workspace/CMakeLists.txt b/lib/corrosion/test/workspace/workspace/CMakeLists.txt new file mode 100644 index 000000000..71a8c7d8f --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.15) +project(test_project VERSION 0.1.0) +include(../../test_header.cmake) + +corrosion_import_crate( + MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Cargo.toml + CRATES member1 member2 + IMPORTED_CRATES imported_crate_list +) + +#NOTE: member3 also contains a binary called my_program, but that shouldn't be a problem since it is not imported +add_executable(my_program main.cpp) +target_link_libraries(my_program PUBLIC member1 member2) + +# Test that the list of imported crates matches our expectations. +if(NOT DEFINED imported_crate_list) + message(FATAL_ERROR "Corrosion failed to set the variable passed via IMPORTED_CRATES.") +endif() +set(expected_crates member1 member2) +foreach(crate ${expected_crates}) + if(NOT "${crate}" IN_LIST imported_crate_list) + message(FATAL_ERROR "Expected ${crate} to be imported, but it wasn't. Imported crate list:\n" + "${imported_crate_list}" + ) + endif() +endforeach() +set(additional_crates ${imported_crate_list}) +list(REMOVE_ITEM additional_crates ${expected_crates}) +if(additional_crates) + message(FATAL_ERROR "Corrosion unexpectedly imported the following crates: ${additional_crates}") +endif() + diff --git a/lib/corrosion/test/workspace/workspace/Cargo.lock b/lib/corrosion/test/workspace/workspace/Cargo.lock new file mode 100644 index 000000000..d99a3234e --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/Cargo.lock @@ -0,0 +1,18 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "member1" +version = "0.1.0" + +[[package]] +name = "member2" +version = "0.1.0" + +[[package]] +name = "member3" +version = "0.1.0" +dependencies = [ + "member1", +] diff --git a/lib/corrosion/test/workspace/workspace/Cargo.toml b/lib/corrosion/test/workspace/workspace/Cargo.toml new file mode 100644 index 000000000..997ca5e24 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/Cargo.toml @@ -0,0 +1,5 @@ +[workspace] +members=["member1", "member2", "member3"] + +[workspace.package] +version = "0.1.0" diff --git a/lib/corrosion/test/workspace/workspace/main.cpp b/lib/corrosion/test/workspace/workspace/main.cpp new file mode 100644 index 000000000..5102b28e6 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/main.cpp @@ -0,0 +1,4 @@ +#include +int main() { + std::cout << "Ok"; +} diff --git a/lib/corrosion/test/workspace/workspace/member1/Cargo.toml b/lib/corrosion/test/workspace/workspace/member1/Cargo.toml new file mode 100644 index 000000000..b8a4d0a6d --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/member1/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "member1" +version = "0.1.0" +edition = "2018" +description = "descr;\"hello\\" + +[lib] +crate-type = [ "lib", "cdylib" ] diff --git a/lib/corrosion/test/workspace/workspace/member1/src/lib.rs b/lib/corrosion/test/workspace/workspace/member1/src/lib.rs new file mode 100644 index 000000000..31e1bb209 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/member1/src/lib.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} diff --git a/lib/corrosion/test/workspace/workspace/member2/Cargo.toml b/lib/corrosion/test/workspace/workspace/member2/Cargo.toml new file mode 100644 index 000000000..9fd2fe423 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/member2/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "member2" +version = "0.1.0" +authors = ["Olivier Goffart "] +edition = "2018" + +[lib] +crate-type = ["staticlib"] diff --git a/lib/corrosion/test/workspace/workspace/member2/src/lib.rs b/lib/corrosion/test/workspace/workspace/member2/src/lib.rs new file mode 100644 index 000000000..31e1bb209 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/member2/src/lib.rs @@ -0,0 +1,7 @@ +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} diff --git a/lib/corrosion/test/workspace/workspace/member3/Cargo.toml b/lib/corrosion/test/workspace/workspace/member3/Cargo.toml new file mode 100644 index 000000000..ac7ef1f58 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/member3/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "member3" +version = "0.1.0" +authors = ["Olivier Goffart "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[[bin]] +name = "my_program" +path = "src/main.rs" + +[dependencies] +member1 = { path = "../member1" } diff --git a/lib/corrosion/test/workspace/workspace/member3/src/main.rs b/lib/corrosion/test/workspace/workspace/member3/src/main.rs new file mode 100644 index 000000000..e7a11a969 --- /dev/null +++ b/lib/corrosion/test/workspace/workspace/member3/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 89bc7f31b..893d8ad29 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -138,7 +138,7 @@ else () message("-- OMPTL sorting fallback") endif () -target_link_libraries(mmseqs-framework tinyexpr ${ZSTD_LIBRARIES} microtar tantan) +target_link_libraries(mmseqs-framework tinyexpr ${ZSTD_LIBRARIES} microtar tantan block_aligner_c) # if (CYGWIN) # target_link_libraries(mmseqs-framework nedmalloc) # endif () diff --git a/src/alignment/Alignment.cpp b/src/alignment/Alignment.cpp index 59625c842..d1f4db067 100644 --- a/src/alignment/Alignment.cpp +++ b/src/alignment/Alignment.cpp @@ -292,7 +292,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con std::vector swResults; swResults.reserve(300); - Matcher matcher(querySeqType, targetSeqType, maxMatcherSeqLen, m, &evaluer, compBiasCorrection, compBiasCorrectionScale, gapOpen, gapExtend, correlationScoreWeight, zdrop); + Matcher matcher(querySeqType, maxMatcherSeqLen, m, &evaluer, compBiasCorrection, compBiasCorrectionScale, gapOpen, gapExtend, correlationScoreWeight, zdrop); std::vector swRealignResults; Matcher *realigner = NULL; @@ -300,7 +300,7 @@ void Alignment::run(const std::string &outDB, const std::string &outDBIndex, con swRealignResults.reserve(300); realigner = &matcher; if (realign_m != NULL) { - realigner = new Matcher(querySeqType, targetSeqType, maxMatcherSeqLen, realign_m, &evaluer, compBiasCorrection, compBiasCorrectionScale, gapOpen, gapExtend, 0.0, zdrop); + realigner = new Matcher(querySeqType, maxMatcherSeqLen, realign_m, &evaluer, compBiasCorrection, compBiasCorrectionScale, gapOpen, gapExtend, 0.0, zdrop); } } diff --git a/src/alignment/EvalueComputation.h b/src/alignment/EvalueComputation.h index ff31cd17e..2b95fa31c 100644 --- a/src/alignment/EvalueComputation.h +++ b/src/alignment/EvalueComputation.h @@ -60,7 +60,12 @@ class EvalueComputation { 5.0543294182155085, 15.130999712620039, 5.0543294182155085, 15.130999712620039, 5.0543962679167036, 15.129930117400917}}, - + {"nucleotide.out", 5, 2, true, {0.62092274139392822363, 0.35177597988201619872, + 0.74528059208662511548, -0.71027220445456995535, + 0.74528059208662511548, -0.71027220445456995535, + 1.0135243407674570104, -2.5226486486783059604, + 1.0135243407674570104, -2.5226486486783059604, + 1.0031949332622873694, -2.3780369436059309862 }}, {"blosum62.out", 11, 1, true, {0.27359865037097330642, 0.044620920658722244834, 1.5938724404943873658, -19.959867650284412122, 1.5938724404943873658, -19.959867650284412122, @@ -139,7 +144,6 @@ class EvalueComputation { // evaluer.parameters().beta_I<<"\t" << // evaluer.parameters().sigma<<"\t" << // evaluer.parameters().tau<<"\t" << std::endl; - } delete [] tmpMatData; delete [] tmpMat; diff --git a/src/alignment/Matcher.cpp b/src/alignment/Matcher.cpp index efce6684e..34d9a7214 100644 --- a/src/alignment/Matcher.cpp +++ b/src/alignment/Matcher.cpp @@ -7,18 +7,17 @@ #include -Matcher::Matcher(int querySeqType, int targetSeqType, int maxSeqLen, BaseMatrix *m, EvalueComputation * evaluer, +Matcher::Matcher(int querySeqType, int maxSeqLen, BaseMatrix *m, EvalueComputation * evaluer, bool aaBiasCorrection, float aaBiasCorrectionScale, int gapOpen, int gapExtend, float correlationScoreWeight, int zdrop) - : gapOpen(gapOpen), gapExtend(gapExtend), correlationScoreWeight(correlationScoreWeight), m(m), evaluer(evaluer), tinySubMat(NULL) { - setSubstitutionMatrix(m); - + : gapOpen(gapOpen), gapExtend(gapExtend), correlationScoreWeight(correlationScoreWeight), m(m), evaluer(evaluer), tinySubMat(NULL) { if (Parameters::isEqualDbtype(querySeqType, Parameters::DBTYPE_NUCLEOTIDES)) { nuclaligner = new BandedNucleotideAligner(m, maxSeqLen, gapOpen, gapExtend, zdrop); aligner = NULL; } else { nuclaligner = NULL; aligner = new SmithWaterman(maxSeqLen, m->alphabetSize, aaBiasCorrection, - aaBiasCorrectionScale, targetSeqType); + aaBiasCorrectionScale, (SubstitutionMatrix*) m); + setSubstitutionMatrix(m); } //std::cout << "lambda=" << lambdaLog2 << " logKLog2=" << logKLog2 << std::endl; } @@ -77,10 +76,10 @@ Matcher::result_t Matcher::getSWResult(Sequence* dbSeq, const int diagonal, bool alignmentMode = Matcher::SCORE_COV_SEQID; } else { if (isIdentity == false) { - alignment = aligner->ssw_align(dbSeq->numSequence, dbSeq->numConsensusSequence, - dbSeq->getAlignmentProfile(), dbSeq->L, backtrace, + alignment = aligner->ssw_align(dbSeq->numSequence, + dbSeq->L, backtrace, gapOpen, gapExtend, alignmentMode, evalThr, evaluer, covMode, - covThr, correlationScoreWeight, maskLen, dbSeq->getId()); + covThr, correlationScoreWeight, maskLen); } else { alignment = aligner->scoreIdentical(dbSeq->numSequence, dbSeq->L, evaluer, alignmentMode, backtrace); } @@ -106,7 +105,7 @@ Matcher::result_t Matcher::getSWResult(Sequence* dbSeq, const int diagonal, bool // try to estimate sequence id if(alignmentMode == Matcher::SCORE_COV_SEQID){ // compute sequence id - if(alignment.cigar){ + if (backtrace.size() > 0) { // OVERWRITE alnLength with gapped value alnLength = backtrace.size(); } diff --git a/src/alignment/Matcher.h b/src/alignment/Matcher.h index a9c28beb4..4deab4392 100644 --- a/src/alignment/Matcher.h +++ b/src/alignment/Matcher.h @@ -142,7 +142,7 @@ class Matcher{ } }; - Matcher(int querySeqType, int targetSeqType, int maxSeqLen, BaseMatrix *m, + Matcher(int querySeqType, int maxSeqLen, BaseMatrix *m, EvalueComputation * evaluer, bool aaBiasCorrection, float aaBiasCorrectionScale, int gapOpen, int gapExtend, float correlationScoreWeight, int zdrop); diff --git a/src/alignment/MultipleAlignment.cpp b/src/alignment/MultipleAlignment.cpp index efe957538..e36a3b6e0 100644 --- a/src/alignment/MultipleAlignment.cpp +++ b/src/alignment/MultipleAlignment.cpp @@ -4,6 +4,7 @@ #include "Sequence.h" #include "SubstitutionMatrix.h" #include "Util.h" +#include "Orf.h" MultipleAlignment::MultipleAlignment(size_t maxSeqLen, SubstitutionMatrix *subMat) : subMat(subMat), maxSeqLen(maxSeqLen), maxMsaSeqLen(maxSeqLen * 2) { @@ -91,17 +92,29 @@ size_t MultipleAlignment::updateGapsInCenterSequence(char **msaSequence, Sequenc } return centerSeqPos; } - void MultipleAlignment::updateGapsInSequenceSet(char **msaSequence, size_t centerSeqSize, const std::vector> &seqs, const std::vector &alignmentResults, unsigned int *queryGaps, bool noDeletionMSA) { for(size_t i = 0; i < seqs.size(); i++) { const Matcher::result_t& result = alignmentResults[i]; - const std::string& bt = result.backtrace; - char *edgeSeqMSA = msaSequence[i+1]; - const std::vector &edgeSeq = seqs[i]; unsigned int queryPos = result.qStartPos; + unsigned int queryEndPos = result.qEndPos; unsigned int targetPos = result.dbStartPos; + unsigned int targetEndPos = result.dbEndPos; + std::string bt = result.backtrace; + bool reverse = false; + if (queryPos > queryEndPos){ + // swap start and end position of query and db + std::swap(queryPos, queryEndPos); + std::swap(targetPos, targetEndPos); + // flip backtrace + std::reverse(bt.begin(), bt.end()); + reverse = true; + } + + char *edgeSeqMSA = msaSequence[i+1]; + const std::vector &edgeSeq = seqs[i]; + // HACK: score was 0 and sequence was rejected, so we fill in an empty gap sequence // Needed for pairaln with dummy sequences if(targetPos == UINT_MAX) { @@ -114,7 +127,7 @@ void MultipleAlignment::updateGapsInSequenceSet(char **msaSequence, size_t cente } size_t bufferPos = 0; // fill initial positions with gaps (local alignment) - for(int pos = 0; pos < result.qStartPos; pos++){ + for(unsigned int pos = 0; pos < queryPos; pos++){ edgeSeqMSA[bufferPos] = '-'; bufferPos++; } @@ -132,10 +145,12 @@ void MultipleAlignment::updateGapsInSequenceSet(char **msaSequence, size_t cente if(bt.at(alnPos) == 'D'){ while (alnPos < bt.size() && bt.at(alnPos) == 'D') { if(noDeletionMSA == false) { - edgeSeqMSA[bufferPos] = subMat->num2aa[edgeSeq[targetPos]]; + unsigned char letter = (reverse == true) ? Orf::complement(subMat->num2aa[edgeSeq[targetPos]]) : + subMat->num2aa[edgeSeq[targetPos]]; + edgeSeqMSA[bufferPos] = letter; bufferPos++; } - targetPos++; + targetPos += (reverse == true) ? -1 : 1; alnPos++; } if(alnPos >= bt.size()){ @@ -145,10 +160,12 @@ void MultipleAlignment::updateGapsInSequenceSet(char **msaSequence, size_t cente bufferPos++; queryPos++; } else if(bt.at(alnPos) == 'M'){ - edgeSeqMSA[bufferPos] = subMat->num2aa[edgeSeq[targetPos]]; + unsigned char letter = (reverse == true) ? Orf::complement(subMat->num2aa[edgeSeq[targetPos]]) : + subMat->num2aa[edgeSeq[targetPos]]; + edgeSeqMSA[bufferPos] = letter; bufferPos++; queryPos++; - targetPos++; + targetPos += (reverse == true) ? -1 : 1; } continue; }else if(bt.at(alnPos) == 'M'){ @@ -161,11 +178,13 @@ void MultipleAlignment::updateGapsInSequenceSet(char **msaSequence, size_t cente } } // M state - edgeSeqMSA[bufferPos] = subMat->num2aa[edgeSeq[targetPos]]; + unsigned char letter = (reverse == true) ? Orf::complement(subMat->num2aa[edgeSeq[targetPos]]) : + subMat->num2aa[edgeSeq[targetPos]]; + edgeSeqMSA[bufferPos] = letter; bufferPos++; queryPos++; - targetPos++; + targetPos += (reverse == true) ? -1 : 1; } } } diff --git a/src/alignment/StripedSmithWaterman.cpp b/src/alignment/StripedSmithWaterman.cpp index d0d80e4c5..ffe0744b0 100644 --- a/src/alignment/StripedSmithWaterman.cpp +++ b/src/alignment/StripedSmithWaterman.cpp @@ -30,26 +30,33 @@ #include "Util.h" #include "SubstitutionMatrix.h" #include "Debug.h" +#include "block_aligner.h" #include +#define MAX_SIZE 4096 + +struct s_block{ + PaddedBytes* query; + PosBias* query_bias; + AAMatrix* mat_aa; + BlockHandle block_trace; + int16_t* query_bias_arr; +}; + SmithWaterman::SmithWaterman(size_t maxSequenceLength, int aaSize, bool aaBiasCorrection, - float aaBiasCorrectionScale, int targetSeqType) { + float aaBiasCorrectionScale, SubstitutionMatrix * subMat) { maxSequenceLength += 1; + this->subMat = subMat; this->aaBiasCorrectionScale = aaBiasCorrectionScale; this->aaBiasCorrection = aaBiasCorrection; - int segmentSize = (maxSequenceLength+7)/8; - segSize = segmentSize; + // int32_t alignment needs larger seqSize, was +7/8 for word before + segSize = (maxSequenceLength+3)/4; vHStore = (simd_int*) mem_align(ALIGN_INT, segSize * sizeof(simd_int)); vHLoad = (simd_int*) mem_align(ALIGN_INT, segSize * sizeof(simd_int)); vE = (simd_int*) mem_align(ALIGN_INT, segSize * sizeof(simd_int)); vHmax = (simd_int*) mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - // setting up target - target_profile_byte = (simd_int*) mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); - - - isTargetProfile = Parameters::isEqualDbtype(targetSeqType, Parameters::DBTYPE_HMM_PROFILE); isQueryProfile = false; // setting up query @@ -57,55 +64,43 @@ SmithWaterman::SmithWaterman(size_t maxSequenceLength, int aaSize, bool aaBiasCo // query profile profile->profile_byte = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); profile->profile_word = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); - profile->profile_rev_byte = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); + profile->profile_int = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); + + profile->profile_rev_byte = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); profile->profile_rev_word = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); -#ifdef GAP_POS_SCORING - profile->profile_gDelOpen_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelOpen_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelClose_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelClose_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gIns_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gIns_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelOpen_rev_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelOpen_rev_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelClose_rev_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gDelClose_rev_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gIns_rev_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->profile_gIns_rev_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->gDelOpen = new uint8_t[maxSequenceLength]; - profile->gDelClose = new uint8_t[maxSequenceLength]; - profile->gDelOpen_rev = new uint8_t[maxSequenceLength]; - profile->gDelClose_rev = new uint8_t[maxSequenceLength]; - profile->gIns_rev = new uint8_t[maxSequenceLength]; -#endif - // query consensus profile - profile->consens_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->consens_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->consens_rev_byte = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - profile->consens_rev_word = (simd_int*)mem_align(ALIGN_INT, segSize * sizeof(simd_int)); - // query sequence + profile->profile_rev_int = (simd_int*)mem_align(ALIGN_INT, aaSize * segSize * sizeof(simd_int)); + + // query sequence profile->query_sequence = new int8_t[maxSequenceLength]; profile->query_rev_sequence = new int8_t[maxSequenceLength]; - profile->query_consens_sequence = new int8_t[maxSequenceLength]; - profile->query_consens_rev_sequence = new int8_t[maxSequenceLength]; profile->composition_bias = new int8_t[maxSequenceLength]; profile->composition_bias_rev = new int8_t[maxSequenceLength]; profile->profile_word_linear = new short*[aaSize]; profile_word_linear_data = new short[aaSize*maxSequenceLength]; - profile->mat_rev = new int8_t[std::max(maxSequenceLength, (size_t)aaSize) * aaSize * 2]; + profile->profile_int_linear = new int32_t*[aaSize]; + profile_int_linear_data = new int32_t[aaSize*maxSequenceLength]; + profile->mat_rev = new int8_t[std::max(maxSequenceLength, (size_t)aaSize) * aaSize * 2]; // why multiply 2? profile->mat = new int8_t[std::max(maxSequenceLength, (size_t)aaSize) * aaSize * 2]; tmp_composition_bias = new float[maxSequenceLength]; scorePerCol = new int8_t[maxSequenceLength]; /* array to record the largest score of each reference position */ - maxColumn = new uint8_t[maxSequenceLength*sizeof(uint16_t)]; - memset(maxColumn, 0, maxSequenceLength*sizeof(uint16_t)); + maxColumn = new uint8_t[maxSequenceLength*sizeof(uint32_t)]; + memset(maxColumn, 0, maxSequenceLength*sizeof(uint32_t)); memset(profile->query_sequence, 0, maxSequenceLength * sizeof(int8_t)); memset(profile->query_rev_sequence, 0, maxSequenceLength * sizeof(int8_t)); - memset(profile->query_consens_sequence, 0, maxSequenceLength * sizeof(int8_t)); - memset(profile->query_consens_rev_sequence, 0, maxSequenceLength * sizeof(int8_t)); memset(profile->mat_rev, 0, maxSequenceLength * aaSize); memset(profile->composition_bias, 0, maxSequenceLength * sizeof(int8_t)); memset(profile->composition_bias_rev, 0, maxSequenceLength * sizeof(int8_t)); + + // blockaligner + block = new s_block(); + block->query = block_new_padded_aa(maxSequenceLength, MAX_SIZE); + block->query_bias = block_new_pos_bias(maxSequenceLength, MAX_SIZE); + block->mat_aa = block_new_simple_aamatrix(1, -1); + block->block_trace = block_new_aa_trace_xdrop(maxSequenceLength, maxSequenceLength, MAX_SIZE); + block->query_bias_arr = new int16_t[maxSequenceLength]; + + profile->pos_aa_rev = new int8_t[maxSequenceLength * 32]; } SmithWaterman::~SmithWaterman(){ @@ -113,53 +108,39 @@ SmithWaterman::~SmithWaterman(){ free(vHLoad); free(vE); free(vHmax); - free(target_profile_byte); free(profile->profile_byte); free(profile->profile_word); + free(profile->profile_int); free(profile->profile_rev_byte); free(profile->profile_rev_word); - free(profile->consens_byte); - free(profile->consens_word); - free(profile->consens_rev_byte); - free(profile->consens_rev_word); -#ifdef GAP_POS_SCORING - free(profile->profile_gDelOpen_byte); - free(profile->profile_gDelOpen_word); - free(profile->profile_gDelClose_byte); - free(profile->profile_gDelClose_word); - free(profile->profile_gIns_byte); - free(profile->profile_gIns_word); - free(profile->profile_gDelOpen_rev_byte); - free(profile->profile_gDelOpen_rev_word); - free(profile->profile_gDelClose_rev_byte); - free(profile->profile_gDelClose_rev_word); - free(profile->profile_gIns_rev_byte); - free(profile->profile_gIns_rev_word); - delete[] profile->gDelOpen; - delete[] profile->gDelClose; - delete[] profile->gDelOpen_rev; - delete[] profile->gDelClose_rev; - delete[] profile->gIns_rev; -#endif + free(profile->profile_rev_int); delete [] profile->query_rev_sequence; delete [] profile->query_sequence; - delete [] profile->query_consens_sequence; - delete [] profile->query_consens_rev_sequence; delete [] profile->composition_bias; delete [] profile->composition_bias_rev; delete [] profile->profile_word_linear; delete [] profile_word_linear_data; + delete [] profile->profile_int_linear; + delete [] profile_int_linear_data; delete [] profile->mat_rev; delete [] profile->mat; delete [] tmp_composition_bias; delete [] scorePerCol; delete [] maxColumn; + delete [] profile->pos_aa_rev; delete profile; + + block_free_padded_aa(block->query); + block_free_pos_bias(block->query_bias); + block_free_aamatrix(block->mat_aa); + block_free_aa_trace_xdrop(block->block_trace); + delete [] block->query_bias_arr; + delete block; } /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */ -template +template void SmithWaterman::createQueryProfile(simd_int *profile, const int8_t *query_sequence, const int8_t * composition_bias, const int8_t *mat, const int32_t query_length, const int32_t aaSize, uint8_t bias, const int32_t offset, const int32_t entryLength) { const int32_t segLen = (query_length + Elements - 1) / Elements; @@ -171,7 +152,15 @@ void SmithWaterman::createQueryProfile(simd_int *profile, const int8_t *query_se // if will be optmized out by compiler if(type == SUBSTITUTIONMATRIX) { // substitution score for query_seq constrained by nt // query_sequence starts from 1 to n - *t++ = ( j >= query_length) ? bias : mat[nt * aaSize + query_sequence[j + offset ]] + composition_bias[j + offset] + bias; // mat[nt][q[j]] mat eq 20*20 + // *t++ = ( j >= query_length) ? bias : mat[nt * aaSize + query_sequence[j + offset ]] + composition_bias[j + offset] + bias; // mat[nt][q[j]] mat eq 20*20 + if (j >= query_length) { + *t++ = bias; + } else { + const int q = query_sequence[j + offset]; + const float cb = composition_bias[j + offset]; + + *t++ = mat[nt * aaSize + q] + cb + bias; + } } if(type == PROFILE) { // profile starts by 0 // *t++ = (j >= query_length) ? bias : (mat[nt * entryLength + (j + (offset - 1))] + bias); //mat eq L*20 // mat[nt][j] @@ -182,38 +171,11 @@ void SmithWaterman::createQueryProfile(simd_int *profile, const int8_t *query_se } j += segLen; } + // std::cout << std::endl; } } } -template -void SmithWaterman::createConsensProfile(simd_int *profile, const int8_t *consens_sequence, const int32_t query_length, const int32_t offset) { - const int32_t segLen = (query_length + Elements - 1) / Elements; - T* t = (T*) profile; - for (int32_t i = 0; i < segLen; i++) { - int32_t j = i; - for (size_t segNum = 0; LIKELY(segNum < Elements); segNum++) { - // beyond the length of query so pad with neutral consensus values - *t++ = (j >= query_length) ? 20 : consens_sequence[j + offset]; -// *t++ = (j >= query_length) ? 20 : consens_sequence[j + (offset - 1)]; - j += segLen; - } - } - -} - - -void SmithWaterman::createTargetProfile(simd_int *profile, const int8_t *mat, const int target_length, - const int32_t aaSize, uint8_t bias) { - const int32_t segSize = 32; - int8_t* t = (int8_t*) profile; - for (int i = 0; i < target_length; i++) { - for (int j = 0; j < segSize; j++) { - // beyond the length of amino acids so pad with neutral weights - *t++ = (j >= aaSize) ? bias : mat[i + j * target_length] + bias; - } - } -} template void SmithWaterman::updateQueryProfile(simd_int *profile, const int32_t query_length, const int32_t aaSize, uint8_t shift) { @@ -254,31 +216,8 @@ void SmithWaterman::reverseMat(int8_t *mat_rev, const int8_t *mat, const int32_t } } -#ifdef GAP_POS_SCORING -template -void createGapProfile(simd_int* profile_gDelOpen, simd_int* profile_gDelClose, simd_int* profile_gIns, - const uint8_t* gDelOpen, const uint8_t* gDelClose, const uint8_t* gIns, - const int32_t query_length, const int32_t offset) { - const int32_t segLen = (query_length - offset + Elements - 1) / Elements; - T* delOpen = (T*) profile_gDelOpen; - T* delClose = (T*) profile_gDelClose; - T* ins = (T*) profile_gIns; - for (int32_t i = 0; LIKELY(i < segLen); ++i) { - int32_t j = i; - for (size_t segNum = 0; LIKELY(segNum < Elements); ++segNum) { - *delOpen++ = (j < query_length) ? gDelOpen[j + offset + 1] : 0; // offset + 1 because it calculates F for the next column - *delClose++ = (j < query_length) ? gDelClose[j + offset + 1] : 0; - *ins++ = (j < query_length) ? gIns[j + offset] : 0; - j += segLen; - } - } -} -#endif - s_align SmithWaterman::ssw_align ( const unsigned char *db_num_sequence, - const unsigned char *db_consens_sequence, - const int8_t *db_mat, int32_t db_length, std::string & backtrace, const uint8_t gap_open, @@ -287,92 +226,121 @@ s_align SmithWaterman::ssw_align ( const double evalueThr, EvalueComputation * evaluer, const int covMode, const float covThr, const float correlationScoreWeight, - const int32_t maskLen, const size_t id) { + const int32_t maskLen) { s_align alignment; // check if both query and target are profiles - if (isQueryProfile && isTargetProfile) { - alignment = ssw_align_private(db_consens_sequence, db_mat, db_length, backtrace, gap_open, - gap_extend, alignmentMode, evalueThr, evaluer, covMode, covThr, correlationScoreWeight, maskLen, id); - } else if (isQueryProfile && !isTargetProfile) { - alignment = ssw_align_private(db_num_sequence, db_mat, db_length, backtrace, gap_open, - gap_extend, alignmentMode, evalueThr, evaluer, covMode, covThr, correlationScoreWeight, maskLen, id); - } else if (!isQueryProfile && isTargetProfile) { - alignment = ssw_align_private(db_num_sequence, db_mat, db_length, backtrace, gap_open, - gap_extend, alignmentMode, evalueThr, evaluer, covMode, covThr, correlationScoreWeight, maskLen, id); + if (profile->isProfile) { + alignment = ssw_align_private(db_num_sequence, db_length, backtrace, gap_open, + gap_extend, alignmentMode, evalueThr, evaluer, covMode, covThr, correlationScoreWeight, maskLen); } else { - alignment = ssw_align_private(db_num_sequence, db_mat, db_length, backtrace, gap_open, - gap_extend, alignmentMode, evalueThr, evaluer, covMode, covThr, correlationScoreWeight, maskLen, id); + alignment = ssw_align_private(db_num_sequence, db_length, backtrace, gap_open, + gap_extend, alignmentMode, evalueThr, evaluer, covMode, covThr, correlationScoreWeight, maskLen); } return alignment; } -template + +template s_align SmithWaterman::ssw_align_private ( + const unsigned char *db_sequence, + int32_t db_length, + std::string & backtrace, + const uint8_t gap_open, + const uint8_t gap_extend, + const uint8_t alignmentMode, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled + const double evalueThr, + EvalueComputation * evaluer, + const int covMode, const float covThr, const float correlationScoreWeight, + const int32_t maskLen) { + + int32_t query_length = profile->query_length; + + // find the alignment position + s_align align = alignScoreEndPos(db_sequence, db_length, gap_open, gap_extend, maskLen); + + // no residue could be aligned + if (align.dbEndPos1 == -1) { + return align; + } + + align.qCov = computeCov(0, align.qEndPos1, query_length); + align.tCov = computeCov(0, align.dbEndPos1, db_length); + + bool hasLowerCoverage = !(Util::hasCoverage(covThr, covMode, align.qCov, align.tCov)); + align.evalue = evaluer->computeEvalue(align.score1, query_length); + bool hasLowerEvalue = align.evalue > evalueThr; + + if (alignmentMode == 0 || ((alignmentMode == 2 || alignmentMode == 1) && (hasLowerEvalue || hasLowerCoverage))) { + return align; + } + + // run very shot and long overflowing alignments with SW instead of block aligner + // short alignments are very fast with byte SW, long alignments produce slightly different scores FIXME + if (align.word != 1) { + return alignStartPosBacktrace(db_sequence, db_length, gap_open, gap_extend, alignmentMode, backtrace, align, evaluer, covMode, covThr, correlationScoreWeight, maskLen); + } + + bool blockAlignFailed = false; + s_align alignTmp = alignStartPosBacktraceBlock(db_sequence, db_length, gap_open, gap_extend, backtrace, align); + if (align.score1 == UINT32_MAX) { + blockAlignFailed = true; + } else { + align = alignTmp; + } + + if (blockAlignFailed) { + Debug(Debug::WARNING) << "Block alignment failed, falling back to Smith-Waterman\n"; + align = alignStartPosBacktrace(db_sequence, db_length, gap_open, gap_extend, alignmentMode, backtrace, align, evaluer, covMode, covThr, correlationScoreWeight, maskLen); + } + + // Check is needed (Below is for alignStartPosBacktraceBlock not for alignStartPosBacktrace since as it's already handled internally.) + // align.qCov = computeCov(align.qStartPos1, align.qEndPos1, query_length); + // align.tCov = computeCov(align.dbStartPos1, align.dbEndPos1, db_length); + // hasLowerCoverage = !(Util::hasCoverage(covThr, covMode, align.qCov, align.tCov)); + + return align; +} + +template +s_align SmithWaterman::alignScoreEndPos ( const unsigned char *db_sequence, - const int8_t *db_mat, int32_t db_length, - std::string & backtrace, - const uint8_t gap_open, + const uint8_t gap_open, const uint8_t gap_extend, - const uint8_t alignmentMode, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled - const double evalueThr, - EvalueComputation * evaluer, - const int covMode, const float covThr, const float correlationScoreWeight, - const int32_t maskLen, const size_t id) { + const int32_t maskLen) { + int32_t query_length = profile->query_length; - target_id = id; - int32_t word = 0, query_length = profile->query_length; - int32_t band_width = 0; - cigar* path; s_align r; r.dbStartPos1 = -1; r.qStartPos1 = -1; r.cigar = 0; r.cigarLen = 0; - std::pair bests; - std::pair bests_reverse; - - simd_int* db_profile_byte = target_profile_byte; - - const int32_t qry_n = profile->query_length; - const int32_t db_n = db_length; - const unsigned char * db_consens_seq = db_sequence; - const int8_t *db_matrix = db_mat; + std::pair bests; + if (!profile->profile_byte) { + Debug(Debug::ERROR) << "Not initialized profile\n"; + } + // 1. byte + bests = sw_sse2_byte(db_sequence, 0, db_length, query_length, gap_open, gap_extend, + profile->profile_byte, UCHAR_MAX, profile->bias, maskLen); + r.word = 0; + // 2. word + if (bests.first.score == 255) { + bests = sw_sse2_word(db_sequence, 0, db_length, query_length, gap_open, gap_extend, + profile->profile_word, USHRT_MAX, maskLen); + r.word = 1; + } + // 3. int + // Comment out int32_t now for benchmark + if (bests.first.score == INT16_MAX) { + bests = sw_sse2_int(db_sequence, 0, db_length, query_length, gap_open, gap_extend, + profile->profile_int, USHRT_MAX, maskLen); + r.word = 2; + } - // find the alignment position - if (profile->profile_byte) { - if (type == PROFILE_PROFILE) { - uint8_t db_bias = computeBias(db_length, db_mat, profile->alphabetSize); - if (db_bias > profile->bias) { - uint8_t shift = abs(profile->bias - db_bias); - updateQueryProfile(profile->profile_byte, profile->query_length, profile->alphabetSize, shift); - } - profile->bias = std::max(db_bias, profile->bias); - createTargetProfile(db_profile_byte, db_mat, db_length, profile->alphabetSize - 1, profile->bias); - } - bests = sw_sse2_byte(db_sequence, db_profile_byte, 0, db_length, query_length, gap_open, gap_extend, - profile->profile_byte, profile->consens_byte, -#ifdef GAP_POS_SCORING - profile->profile_gDelOpen_byte, profile->profile_gDelClose_byte, profile->profile_gIns_byte, -#endif - UCHAR_MAX, profile->bias, maskLen); - if (bests.first.score == 255) { - bests = sw_sse2_word(db_sequence, db_profile_byte, 0, db_length, query_length, gap_open, gap_extend, - profile->profile_word, profile->consens_word, -#ifdef GAP_POS_SCORING - profile->profile_gDelOpen_word, profile->profile_gDelClose_word, profile->profile_gIns_word, -#endif - USHRT_MAX, profile->bias, maskLen); - word = 1; - } - } else { - fprintf(stderr, "Please call the function ssw_init before ssw_align.\n"); - EXIT(EXIT_FAILURE); - } r.score1 = bests.first.score; - r.dbEndPos1 = bests.first.ref; - r.qEndPos1 = bests.first.read; + r.dbEndPos1 = bests.first.ref; + r.qEndPos1 = bests.first.read; if (maskLen >= 15) { r.score2 = bests.second.score; @@ -381,142 +349,300 @@ s_align SmithWaterman::ssw_align_private ( r.score2 = 0; r.ref_end2 = -1; } + return r; +} - // no residue could be aligned - if (r.dbEndPos1 == -1) { - return r; - } - int32_t queryOffset = query_length - r.qEndPos1 -1; - r.evalue = evaluer->computeEvalue(r.score1, query_length); - bool hasLowerEvalue = r.evalue > evalueThr; - r.qCov = computeCov(0, r.qEndPos1, query_length); - r.tCov = computeCov(0, r.dbEndPos1, db_length); - bool hasLowerCoverage = !(Util::hasCoverage(covThr, covMode, r.qCov, r.tCov)); +template +s_align SmithWaterman::alignStartPosBacktraceBlock( + const unsigned char *db_sequence, + int32_t db_length, + const uint8_t gap_open, + const uint8_t gap_extend, + std::string & backtrace, + s_align r) { + size_t query_len = profile->query_length; + size_t target_len = db_length; + Gaps gaps; + gaps.open = -gap_open; + gaps.extend = -gap_extend; + + int32_t target_score = r.score1; + AAProfile* queryProfile = nullptr; + PosBias* target_bias = nullptr; + + // set query + int32_t queryAlnLen = r.qEndPos1 + 1; + int32_t queryStartPos = query_len - queryAlnLen; + if (type == PROFILE_SEQ) { + queryProfile = block_new_aaprofile(queryAlnLen, MAX_SIZE, gaps.extend); + // Fill pos_aa_block and aa_pos_block with the relevant range(0-qEndPos, queryAlnLen) + // extracted from the matrix initialized in ssw_init(0-qLen) for blockaligner + // Replaced block_set_aaprofile, which set every position independently + int8_t* pos_aa_block = aaprofile_pos_aa(queryProfile); + int16_t* aa_pos_block = aaprofile_aa_pos(queryProfile); + size_t curr_len_block = block_get_curr_len_aaprofile(queryProfile); + + for (int i = 0; i < queryAlnLen; i++) { + // source: &profile->pos_aa_rev[(queryStartPos + i) * 32] + // dest: &pos_aa_block[(i + 1) * 32] + memcpy( + &pos_aa_block[(i + 1) * 32], + &profile->pos_aa_rev[(queryStartPos + i) * 32], + subMat->alphabetSize + ); + } + // transpose pos_aa_block to aa_pos_block + for (int i = 0; i <= queryAlnLen; i++) { + for (int b = 0; b < subMat->alphabetSize; b++) { // or 32 'A'-'Z' + int8_t val = pos_aa_block[i * 32 + b]; + aa_pos_block[b * curr_len_block + i] = static_cast(val); + } + } + // set all gap open and close values, including the costs of padding + block_set_all_gap_open_C_aaprofile(queryProfile, gaps.open); + block_set_all_gap_close_C_aaprofile(queryProfile, 0); + block_set_all_gap_open_R_aaprofile(queryProfile, gaps.open); + } else if (type == SEQ_SEQ) { + // Since we use num in traceback, we don't need to convert num to aa. + // block_set_bytes_padded_aa(block->query, (const uint8_t*) (block->query_sequence_str.data() + queryStartPos), queryAlnLen, MAX_SIZE); + block_set_bytes_padded_aa_numsequence(block->query, (const uint8_t*) (profile->query_rev_sequence + queryStartPos), queryAlnLen, MAX_SIZE); + block_set_pos_bias(block->query_bias, block->query_bias_arr + queryStartPos, queryAlnLen); + } + // set target + int32_t targetAlnLen = r.dbEndPos1 + 1; + int32_t targetStartPos = target_len - targetAlnLen; + PaddedBytes* target = block_new_padded_aa(target_len, MAX_SIZE); + + // Since we use num in traceback, we don't need to convert num to aa. + // std::string db_sequence_str; + // // copy this db_sequence,db_sequence + r.dbEndPos1 + 1 in reverse order to db_sequence_str and mappping to ascii using subMat->num2aa + // for(int i = targetAlnLen - 1; i >= 0; i--){ + // db_sequence_str.push_back(subMat->num2aa[db_sequence[i]]); + // } + // block_set_bytes_padded_aa(target, (const uint8_t*) db_sequence_str.data(), targetAlnLen, MAX_SIZE); + int8_t* db_rev_sequence = new int8_t[target_len]; + std::reverse_copy(db_sequence, db_sequence + db_length, db_rev_sequence); + block_set_bytes_padded_aa_numsequence(target, (const uint8_t*) (db_rev_sequence+targetStartPos), targetAlnLen, MAX_SIZE); + + if (type == SEQ_SEQ){ + target_bias = block_new_pos_bias(targetAlnLen, MAX_SIZE); // fill 0 + } - if (alignmentMode == 0 || ((alignmentMode == 2 || alignmentMode == 1) && (hasLowerEvalue || hasLowerCoverage))) { - return r; + // substitute profile values to queryProfile + AlignResult res; + size_t min_size = 32; + res.score = -1000000000; + res.query_idx = -1; + res.reference_idx = -1; + + if (type == SEQ_SEQ){ + while (min_size <= MAX_SIZE && res.score < target_score) { + // allow max block size to grow + SizeRange range; + range.min = min_size; + range.max = MAX_SIZE; + // estimated x-drop threshold + int32_t x_drop = -(min_size * gaps.extend + gaps.open); + block_align_aa_trace_xdrop_posbias(block->block_trace, block->query, block->query_bias, target, target_bias, + block->mat_aa, gaps, range, x_drop); + res = block_res_aa_trace_xdrop(block->block_trace); + min_size *= 2; + } + } else if (type == PROFILE_SEQ) { + while (min_size <= MAX_SIZE && res.score < target_score) { + // allow max block size to grow + SizeRange range; + range.min = min_size; + range.max = MAX_SIZE; + // estimated x-drop threshold + int32_t x_drop = -(min_size * gaps.extend + gaps.open); + block_align_profile_aa_trace_xdrop(block->block_trace, target, queryProfile, range, x_drop); + res = block_res_aa_trace_xdrop(block->block_trace); + min_size *= 2; + } } - if (word == 0) { - if (type == PROFILE_SEQ || type == PROFILE_PROFILE) { - createQueryProfile(profile->profile_rev_byte, profile->query_rev_sequence, NULL, profile->mat_rev, - r.qEndPos1 + 1, profile->alphabetSize, profile->bias, queryOffset, profile->query_length); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - createGapProfile(profile->profile_gDelOpen_rev_byte, profile->profile_gDelClose_rev_byte, profile->profile_gIns_rev_byte, - profile->gDelOpen_rev, profile->gDelClose_rev, profile->gIns_rev, profile->query_length, queryOffset); + size_t cigar_len, queryPos, targetPos; + uint32_t aaIds; + + Cigar* cigar = block_new_cigar(res.query_idx, res.reference_idx); + // char ops_char[] = {' ', 'M', '=', 'X', 'I', 'D'}; + if (res.score != target_score && !(target_score == INT16_MAX && res.score >= target_score)) { + r.score1 = UINT32_MAX; + goto cleanup; + } + + block_cigar_aa_trace_xdrop(block->block_trace, res.query_idx, res.reference_idx, cigar); + cigar_len = block_len_cigar(cigar); + + // Note: 'M' signals either query match or mismatch + aaIds = 0; + queryPos = 0; + targetPos = 0; + + for (size_t i = 0; i < cigar_len; i++) { + OpLen o = block_get_cigar(cigar, i); + if(o.op == 1){ + for(size_t j = 0; j < o.len; j++){ + // change traceback with int not char + if(profile->query_rev_sequence[queryPos + j + queryStartPos] == db_rev_sequence[targetPos + j + targetStartPos]){ + aaIds++; + } } -#endif - if (type == PROFILE_PROFILE) { - createConsensProfile(profile->consens_rev_byte, profile->query_consens_rev_sequence, - r.qEndPos1 + 1, queryOffset); - } - } else { - createQueryProfile(profile->profile_rev_byte, - profile->query_rev_sequence, - profile->composition_bias_rev, - profile->mat,r.qEndPos1 + 1, - profile->alphabetSize, profile->bias, - queryOffset, 0); - } - bests_reverse = sw_sse2_byte(db_sequence, db_profile_byte, 1, r.dbEndPos1 + 1, r.qEndPos1 + 1, gap_open, - gap_extend, profile->profile_rev_byte, profile->consens_rev_byte, -#ifdef GAP_POS_SCORING - profile->profile_gDelOpen_rev_byte, profile->profile_gDelClose_rev_byte, profile->profile_gIns_rev_byte, -#endif - r.score1, profile->bias, maskLen); - } else { - if (type == PROFILE_SEQ || type == PROFILE_PROFILE) { - createQueryProfile(profile->profile_rev_word, - profile->query_rev_sequence, NULL, profile->mat_rev, - r.qEndPos1 + 1, profile->alphabetSize, 0, queryOffset, - profile->query_length); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - createGapProfile(profile->profile_gDelOpen_rev_word, - profile->profile_gDelClose_rev_word, - profile->profile_gIns_rev_word, profile->gDelOpen_rev, - profile->gDelClose_rev, profile->gIns_rev, - profile->query_length, queryOffset); + queryPos += o.len; + targetPos += o.len; + backtrace.append(o.len,'M'); + }else if(o.op == 4){ + switch (type) { + case SEQ_SEQ: + queryPos += o.len; + backtrace.append(o.len,'I'); + break; + case PROFILE_SEQ: + targetPos += o.len; + backtrace.append(o.len,'D'); + break; } -#endif - if (type == PROFILE_PROFILE) { - createConsensProfile(profile->consens_rev_word, - profile->query_consens_rev_sequence, - r.qEndPos1 + 1, queryOffset); - } - } else { - createQueryProfile(profile->profile_rev_word, - profile->query_rev_sequence, - profile->composition_bias_rev, - profile->mat, - r.qEndPos1 + 1, profile->alphabetSize, 0, - queryOffset, 0); - } - bests_reverse = sw_sse2_word(db_sequence, db_profile_byte, 1, r.dbEndPos1 + 1, r.qEndPos1 + 1, gap_open, - gap_extend, - profile->profile_rev_word, profile->consens_rev_word, -#ifdef GAP_POS_SCORING - profile->profile_gDelOpen_rev_word, profile->profile_gDelClose_rev_word, profile->profile_gIns_rev_word, -#endif - r.score1, profile->bias, maskLen); - } + }else if(o.op == 5){ + switch (type) { + case SEQ_SEQ: + targetPos += o.len; + backtrace.append(o.len,'D'); + break; + case PROFILE_SEQ: + queryPos += o.len; + backtrace.append(o.len,'I'); + break; + } + } + } + r.identicalAACnt = aaIds; + + //reverse backtrace + std::reverse(backtrace.begin(), backtrace.end()); + r.qStartPos1 = (r.qEndPos1 + 1) - queryPos; + r.dbStartPos1 = (r.dbEndPos1 + 1) - targetPos; + r.qCov = computeCov(r.qStartPos1, r.qEndPos1, query_len); + r.tCov = computeCov(r.dbStartPos1, r.dbEndPos1, db_length); - if(bests_reverse.first.score != r.score1){ - fprintf(stderr, "Score of forward/backward SW differ: %d %d. Q: %lu T: %lu.\n", r.score1, bests_reverse.first.score, query_id, target_id); - fprintf(stderr, "Start: Q: %d, T: %d. End: Q: %d, T %d\n", r.qEndPos1 - bests_reverse.first.read, bests_reverse.first.ref, r.qEndPos1, r.dbEndPos1); - // if qry is not a profile, just exit - if (!(type == PROFILE_SEQ) || !(type == PROFILE_PROFILE)) { +cleanup: + block_free_padded_aa(target); + block_free_cigar(cigar); + if (type == PROFILE_SEQ) { + block_free_aaprofile(queryProfile); + } else if (type == SEQ_SEQ) { + block_free_pos_bias(target_bias); + } + delete [] db_rev_sequence; + return r; +} + +template +s_align SmithWaterman::alignStartPosBacktrace ( + const unsigned char *db_sequence, + int32_t db_length, + const uint8_t gap_open, + const uint8_t gap_extend, + const uint8_t alignmentMode, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled + std::string & backtrace, + s_align r, + EvalueComputation * evaluer, + const int covMode, const float covThr, + const float correlationScoreWeight, + const int32_t maskLen) { + int32_t query_length = profile->query_length; + int32_t queryOffset = query_length - r.qEndPos1 - 1; + + std::pair bests_reverse; + + // Find the beginning position of the best alignment. + if (r.word == 0) { + if (type == PROFILE_SEQ) { // or type == PROFILE_SEQ + createQueryProfile(profile->profile_rev_byte, profile->query_rev_sequence, NULL, profile->mat_rev, + r.qEndPos1 + 1, profile->alphabetSize, profile->bias, queryOffset, profile->query_length); + } else if (type == SEQ_SEQ) { // or type == SEQ_SEQ + createQueryProfile(profile->profile_rev_byte, profile->query_rev_sequence, profile->composition_bias_rev, profile->mat, + r.qEndPos1 + 1, profile->alphabetSize, profile->bias, queryOffset, 0); + } else { + fprintf(stderr, "Unknown type in alignStartPosBacktrace: %d\n", type); + EXIT(EXIT_FAILURE); + } + bests_reverse = sw_sse2_byte(db_sequence, 1, r.dbEndPos1 + 1, r.qEndPos1 + 1, gap_open, + gap_extend, profile->profile_rev_byte, + r.score1, profile->bias, maskLen); + } else if (r.word == 1) { + if (type == PROFILE_SEQ) { + createQueryProfile(profile->profile_rev_word, profile->query_rev_sequence, NULL, profile->mat_rev, + r.qEndPos1 + 1, profile->alphabetSize, 0, queryOffset, profile->query_length); + } else if (type == SEQ_SEQ) { + createQueryProfile(profile->profile_rev_word, profile->query_rev_sequence, profile->composition_bias_rev, profile->mat, + r.qEndPos1 + 1, profile->alphabetSize, 0, queryOffset, 0); + } else { + fprintf(stderr, "Unknown type in alignStartPosBacktrace: %d\n", type); + EXIT(EXIT_FAILURE); + } + bests_reverse = sw_sse2_word(db_sequence, 1, r.dbEndPos1 + 1, r.qEndPos1 + 1, gap_open, + gap_extend, profile->profile_rev_word, + r.score1, maskLen); + } + // Comment out int32_t now for benchmark + else if (r.word == 2) { + if ((type == PROFILE_SEQ)) { + createQueryProfile(profile->profile_rev_int, profile->query_rev_sequence, NULL, profile->mat_rev, + r.qEndPos1 + 1, profile->alphabetSize, 0, queryOffset, profile->query_length); + } else if (type == SEQ_SEQ) { + createQueryProfile(profile->profile_rev_int, profile->query_rev_sequence, profile->composition_bias_rev, profile->mat, + r.qEndPos1 + 1, profile->alphabetSize, 0, queryOffset, 0); + } + bests_reverse = sw_sse2_int(db_sequence, 1, r.dbEndPos1 + 1, r.qEndPos1 + 1, gap_open, + gap_extend, profile->profile_rev_int, + r.score1, maskLen); + } + + if(bests_reverse.first.score != r.score1){ + Debug(Debug::ERROR) << "r.word: " << r.word << "\n"; + Debug(Debug::ERROR) << "bests_reverse.first.score: " << bests_reverse.first.score << "\n"; + Debug(Debug::ERROR) << "r.score1: " << r.score1 << "\n"; + Debug(Debug::ERROR) << "Score of forward/backward SW differ. This should not happen.\n"; + Debug(Debug::ERROR) << "Start: Q: " << (r.qEndPos1 - bests_reverse.first.read) << ", T: " << bests_reverse.first.ref << ". End: Q: " << r.qEndPos1 << ", T " << r.dbEndPos1 << "\n"; + // if qry is not a profile, just exit + if (!(type == PROFILE_SEQ)) { EXIT(EXIT_FAILURE); } - } + } - r.dbStartPos1 = bests_reverse.first.ref; - r.qStartPos1 = r.qEndPos1 - bests_reverse.first.read; + r.dbStartPos1 = bests_reverse.first.ref; + r.qStartPos1 = r.qEndPos1 - bests_reverse.first.read; if (r.dbStartPos1 == -1) { fprintf(stderr, "Target start position is -1. This should not happen.\n"); EXIT(EXIT_FAILURE); } - r.qCov = computeCov(r.qStartPos1, r.qEndPos1, query_length); - r.tCov = computeCov(r.dbStartPos1, r.dbEndPos1, db_length); - hasLowerCoverage = !(Util::hasCoverage(covThr, covMode, r.qCov, r.tCov)); + r.qCov = computeCov(r.qStartPos1, r.qEndPos1, query_length); + r.tCov = computeCov(r.dbStartPos1, r.dbEndPos1, db_length); + bool hasLowerCoverage = !(Util::hasCoverage(covThr, covMode, r.qCov, r.tCov)); + // only start and end point are needed if (alignmentMode == 1 || hasLowerCoverage) { return r; } - // Generate cigar. -// db_length and query_length updated + // Generate cigar. db_length = r.dbEndPos1 - r.dbStartPos1 + 1; query_length = r.qEndPos1 - r.qStartPos1 + 1; - band_width = abs(db_length - query_length) + 1; - + int32_t band_width = abs(db_length - query_length) + 1; - // TODO: fix banded_sw - if (type == PROFILE_PROFILE) { - path = banded_sw(db_consens_seq + r.dbStartPos1, profile->query_sequence + r.qStartPos1, profile->query_consens_sequence + r.qStartPos1, NULL, db_length, - query_length, r.qStartPos1, r.dbStartPos1, r.score1, gap_open, gap_extend, -#ifdef GAP_POS_SCORING - profile->gDelOpen + r.qStartPos1, profile->gDelClose + r.qStartPos1, profile->gIns + r.qStartPos1, -#endif - band_width, profile->mat, db_matrix, qry_n, db_n); - } else if (type == PROFILE_SEQ) { - path = banded_sw(db_sequence + r.dbStartPos1, profile->query_sequence + r.qStartPos1, NULL, profile->composition_bias + r.qStartPos1, db_length, - query_length, r.qStartPos1, r.dbStartPos1, r.score1, gap_open, gap_extend, -#ifdef GAP_POS_SCORING - profile->gDelOpen + r.qStartPos1, profile->gDelClose + r.qStartPos1, profile->gIns + r.qStartPos1, -#endif - band_width, profile->mat, NULL, profile->query_length, 0); + cigar* path; + if (type == PROFILE_SEQ) { + path = banded_sw(db_sequence + r.dbStartPos1, profile->query_sequence + r.qStartPos1, profile->composition_bias + r.qStartPos1, db_length, + query_length, r.qStartPos1, r.score1, gap_open, gap_extend, + band_width, profile->mat, profile->query_length); } else { - path = banded_sw(db_sequence + r.dbStartPos1, profile->query_sequence + r.qStartPos1, NULL, profile->composition_bias + r.qStartPos1, db_length, - query_length, r.qStartPos1, r.dbStartPos1, r.score1, gap_open, gap_extend, -#ifdef GAP_POS_SCORING - nullptr, nullptr, nullptr, -#endif - band_width, profile->mat, NULL, profile->alphabetSize, 0); + path = banded_sw(db_sequence + r.dbStartPos1, profile->query_sequence + r.qStartPos1, profile->composition_bias + r.qStartPos1, db_length, + query_length, r.qStartPos1, r.score1, gap_open, gap_extend, + band_width, profile->mat, profile->alphabetSize); db_length = r.dbEndPos1 - r.dbStartPos1 + 1; query_length = r.qEndPos1 - r.qStartPos1 + 1; band_width = abs(db_length - query_length) + 1; @@ -526,25 +652,43 @@ s_align SmithWaterman::ssw_align_private ( r.cigar = path->seq; r.cigarLen = path->length; } - delete path; uint32_t aaIds = 0; size_t mStateCnt = 0; - if (type == PROFILE_PROFILE) { - computerBacktrace(profile, db_sequence, r, backtrace, aaIds, scorePerCol, mStateCnt); - }else{ - computerBacktrace(profile, db_sequence, r, backtrace, aaIds, scorePerCol, mStateCnt); - } + // Need check below for Profile_seq + computerBacktrace(profile, db_sequence, r, backtrace, aaIds, scorePerCol, mStateCnt); r.identicalAACnt = aaIds; - if(correlationScoreWeight > 0.0){ + if(correlationScoreWeight > 0.0){ int correlationScore = computeCorrelationScore(scorePerCol, mStateCnt); r.score1 += static_cast(correlationScore) * correlationScoreWeight; r.evalue = evaluer->computeEvalue(r.score1, query_length); + } + if(path != NULL) { + delete path; } return r; } -template +template +s_align SmithWaterman::ssw_align_private(const unsigned char*, int32_t, std::string&, const uint8_t, const uint8_t, const uint8_t, const double, EvalueComputation*, const int, const float, const float, const int32_t); +template +s_align SmithWaterman::ssw_align_private(const unsigned char*, int32_t, std::string&, const uint8_t, const uint8_t, const uint8_t, const double, EvalueComputation*, const int, const float, const float, const int32_t); + +template +s_align SmithWaterman::alignScoreEndPos(const unsigned char*, int32_t, const uint8_t, const uint8_t, const int32_t); +template +s_align SmithWaterman::alignScoreEndPos(const unsigned char*, int32_t, const uint8_t, const uint8_t, const int32_t); + +template +s_align SmithWaterman::alignStartPosBacktraceBlock(const unsigned char*, int32_t, const uint8_t, const uint8_t, std::string&, s_align); +template +s_align SmithWaterman::alignStartPosBacktraceBlock(const unsigned char*, int32_t, const uint8_t, const uint8_t, std::string&, s_align); + +template +s_align SmithWaterman::alignStartPosBacktrace(const unsigned char*, int32_t, const uint8_t, const uint8_t, const uint8_t, std::string&, s_align, EvalueComputation*, const int, const float, const float, const int32_t); +template +s_align SmithWaterman::alignStartPosBacktrace(const unsigned char*, int32_t, const uint8_t, const uint8_t, const uint8_t, std::string&, s_align, EvalueComputation*, const int, const float, const float, const int32_t); + void SmithWaterman::computerBacktrace(s_profile * query, const unsigned char * db_sequence, s_align & alignment, std::string & backtrace, uint32_t & aaIds, int8_t * scorePerCol, size_t & mStatesCnt){ @@ -556,12 +700,7 @@ void SmithWaterman::computerBacktrace(s_profile * query, const unsigned char * d for (uint32_t i = 0; i < length; ++i){ if (letter == 'M') { aaIds += (db_sequence[targetPos] == query->query_sequence[queryPos]); - if(type == PROFILE){ - scorePerCol[mStatesCnt] = query->mat[db_sequence[targetPos] * query->query_length + queryPos]; - } - if(type == SUBSTITUTIONMATRIX){ - scorePerCol[mStatesCnt] = query->mat[query->query_sequence[queryPos] * query->alphabetSize + db_sequence[targetPos]] + query->composition_bias[queryPos]; - } + scorePerCol[mStatesCnt] = query->mat[query->query_sequence[queryPos] * query->alphabetSize + db_sequence[targetPos]] + query->composition_bias[queryPos]; ++mStatesCnt; ++queryPos; ++targetPos; @@ -635,29 +774,21 @@ int SmithWaterman::computeCorrelationScore(int8_t * scorePreCol, size_t length){ } -template +template std::pair SmithWaterman::sw_sse2_byte ( const unsigned char *db_sequence, - const simd_int* db_profile_byte, int8_t ref_dir, // 0: forward ref; 1: reverse ref int32_t db_length, int32_t query_length, const uint8_t gap_open, /* will be used as - */ const uint8_t gap_extend, /* will be used as - */ const simd_int* query_profile_byte, /* profile_byte loaded in ssw_init */ - const simd_int* query_consens_byte, /* profile_consens_byte loaded in ssw_init */ -#ifdef GAP_POS_SCORING - const simd_int *gap_open_del, - const simd_int *gap_close_del, - const simd_int *gap_open_ins, -#endif uint8_t terminate, /* the best alignment score: used to terminate the matrix calculation when locating the alignment beginning point. If this score is set to 0, it will not be used */ uint8_t bias, /* Shift 0 point to a positive value. */ int32_t maskLen) { - uint8_t max = 0; /* the max alignment score */ int32_t end_query = query_length - 1; int32_t end_db = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */ @@ -703,13 +834,7 @@ std::pair SmithWater step = -1; } -#ifndef AVX2 - const simd_int sixten = simdi8_set(16); - const simd_int fiveten = simdi8_set(15); -#endif - // store the query consensus profile - const simd_int* vQueryCons = query_consens_byte; for (i = begin; LIKELY(i != end); i += step) { // cnt = i; simd_int e, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0. @@ -720,20 +845,6 @@ std::pair SmithWater vH = simdi8_shiftl (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */ const simd_int* vP = query_profile_byte + db_sequence[i] * segLen; /* Right part of the query_profile_byte */ -#ifdef AVX2 - simd_int target_scores1 = simdi8_set(0); - if (type == PROFILE_PROFILE) { - target_scores1 = simdi_load(&db_profile_byte[i]); - } -#else - simd_int target_scores1 = simdi8_set(0); - simd_int target_scores2 = simdi8_set(0); - if (type == PROFILE_PROFILE) { - target_scores1 = simdi_load(&db_profile_byte[i]); - target_scores2 = simdi_load(&db_profile_byte[i + 16]); - } -#endif - /* Swap the 2 H buffers. */ simd_int* pv = pvHLoad; pvHLoad = pvHStore; @@ -741,26 +852,7 @@ std::pair SmithWater /* inner loop to process the query sequence */ for (j = 0; LIKELY(j < segLen); ++j) { - simd_int score = simdi8_set(0); - if (type == PROFILE_PROFILE) { -#ifdef AVX2 - __m256i scoreLookup = UngappedAlignment::Shuffle(target_scores1, simdi_load(vQueryCons + j)); -#else - const __m128i vQueryConsJ = _mm_load_si128(vQueryCons + j); - __m128i score01 = _mm_shuffle_epi8(target_scores1, vQueryConsJ); - __m128i score16 = _mm_shuffle_epi8(target_scores2, vQueryConsJ); - __m128i lookup_mask01 = _mm_cmplt_epi8(vQueryConsJ, sixten); - __m128i lookup_mask16 = _mm_cmplt_epi8(fiveten, vQueryConsJ); - score01 = _mm_and_si128(lookup_mask01, score01); - score16 = _mm_and_si128(lookup_mask16, score16); - __m128i scoreLookup = _mm_add_epi8(score01, score16); -#endif - //score = simdui8_max(scoreLookup, simdi_load(vP + j)); - score = simdui8_avg(scoreLookup, simdi_load(vP + j)); - } else { - score = simdi_load(vP + j); - } - + simd_int score = simdi_load(vP + j); vH = simdui8_adds(vH, score); vH = simdui8_subs(vH, vBias); /* vH will be always > 0 */ @@ -768,52 +860,23 @@ std::pair SmithWater /* Get max from vH, vE and vF. */ e = simdi_load(pvE + j); vH = simdui8_max(vH, e); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vH = simdui8_max(vH, simdui8_subs(vF, simdi_load(gap_close_del + j))); - } else { -#endif - vH = simdui8_max(vH, vF); -#ifdef GAP_POS_SCORING - } -#endif + vH = simdui8_max(vH, vF); vMaxColumn = simdui8_max(vMaxColumn, vH); /* Save vH values. */ simdi_store(pvHStore + j, vH); /* Update vE value. */ -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - // copy vH for update of vF - vTemp = vH; - vH = simdui8_subs(vH, simdi_load(gap_open_ins + j)); /* saturation arithmetic, result >= 0 */ - } else { -#endif - vH = simdui8_subs(vH, vGapO); /* saturation arithmetic, result >= 0 */ -#ifdef GAP_POS_SCORING - } -#endif - + vH = simdui8_subs(vH, vGapO); /* saturation arithmetic, result >= 0 */ e = simdui8_subs(e, vGapE); e = simdui8_max(e, vH); simdi_store(pvE + j, e); /* Update vF value. */ vF = simdui8_subs(vF, vGapE); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vF = simdui8_max(vF, simdui8_subs(vTemp, simdi_load(gap_open_del + j))); - } else { -#endif - vF = simdui8_max(vF, vH); -#ifdef GAP_POS_SCORING - } -#endif - + vF = simdui8_max(vF, vH); /* Load the next vH. */ vH = simdi_load(pvHLoad + j); - } /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */ @@ -824,28 +887,10 @@ std::pair SmithWater /* we are at the end, we need to shift the vF value over */ /* to the next column. */ vF = simdi8_shiftl (vF, 1); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vTemp = simdui8_subs(vH, simdi_load(gap_open_del + j)); - } else { -#endif - vTemp = simdui8_subs(vH, vGapO); -#ifdef GAP_POS_SCORING - } -#endif + vTemp = simdui8_subs(vH, vGapO); vTemp = simdui8_subs (vF, vTemp); while (simd_any(vTemp)) { -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vH = simdui8_max (vH, simdui8_subs(vF, simdi_load(gap_close_del + j))); - simdi_store(pvE + j, simdui8_max(simdi_load(pvE + j), simdui8_subs(vH, simdi_load(gap_open_ins + j)))); - } else { -#endif - vH = simdui8_max (vH, vF); -#ifdef GAP_POS_SCORING - } -#endif - + vH = simdui8_max (vH, vF); vMaxColumn = simdui8_max(vMaxColumn, vH); simdi_store (pvHStore + j, vH); @@ -857,16 +902,7 @@ std::pair SmithWater vF = simdi8_shiftl (vF, 1); } vH = simdi_load (pvHStore + j); - -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vTemp = simdui8_subs(vH, simdi_load(gap_open_del + j)); - } else { -#endif - vTemp = simdui8_subs(vH, vGapO); -#ifdef GAP_POS_SCORING - } -#endif + vTemp = simdui8_subs(vH, vGapO); vTemp = simdui8_subs (vF, vTemp); } @@ -939,23 +975,15 @@ std::pair SmithWater return std::make_pair(best0, best1); } -template +template std::pair SmithWaterman::sw_sse2_word (const unsigned char* db_sequence, - const simd_int* db_profile_byte, int8_t ref_dir, // 0: forward ref; 1: reverse ref int32_t db_length, int32_t query_length, const uint8_t gap_open, /* will be used as - */ const uint8_t gap_extend, /* will be used as - */ const simd_int* query_profile_word, - const simd_int* query_consens_word, -#ifdef GAP_POS_SCORING - const simd_int *gap_open_del, - const simd_int *gap_close_del, - const simd_int *gap_open_ins, -#endif uint16_t terminate, - const uint16_t bias, int32_t maskLen) { uint16_t max = 0; /* the max alignment score */ @@ -986,9 +1014,6 @@ std::pair SmithWater /* 16 byte insertion extension vector */ simd_int vGapE = simdi16_set(gap_extend); - /* 16 byte bias vector */ - simd_int vBias = simdi16_set(bias); - //simd_int vBias = simdi16_set(-bias); // set as a negative value for simd use simd_int vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */ simd_int vMaxMark = vZero; /* Trace the highest score till the previous column. */ int32_t edge, begin = 0, end = db_length, step = 1; @@ -1002,13 +1027,6 @@ std::pair SmithWater step = -1; } - // store the query consensus profile - const simd_int* vQueryCons = query_consens_word; - -#ifndef AVX2 - const simd_int sixten = simdi8_set(16); - const simd_int fiveten = simdi8_set(15); -#endif for (i = begin; LIKELY(i != end); i += step) { simd_int e, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0. @@ -1019,98 +1037,34 @@ std::pair SmithWater vH = simdi8_shiftl (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */ const simd_int* vP = query_profile_word + db_sequence[i] * segLen; /* Right part of the query_profile_byte */ -#ifdef AVX2 - simd_int target_scores1 = simdi16_set(0); - if (type == PROFILE_PROFILE) { - target_scores1 = simdi_load(&db_profile_byte[i]); - } -#else - simd_int target_scores1 = simdi16_set(0); - simd_int target_scores2 = simdi16_set(0); - if (type == PROFILE_PROFILE) { - target_scores1 = simdi_load(&db_profile_byte[i]); - target_scores2 = simdi_load(&db_profile_byte[i + 16]); - } -#endif - /* Swap the 2 H buffers. */ simd_int* pv = pvHLoad; pvHLoad = pvHStore; pvHStore = pv; /* inner loop to process the query sequence */ - for (j = 0; LIKELY(j < segLen); j ++) { - simd_int score = simdi16_set(0); - if (type == PROFILE_PROFILE) { -#ifdef AVX2 - __m256i scoreLookup = UngappedAlignment::Shuffle(target_scores1, simdi_load(vQueryCons + j)); -#else - const __m128i vQueryConsJ = _mm_load_si128(vQueryCons + j); - __m128i score01 = _mm_shuffle_epi8(target_scores1, vQueryConsJ); - __m128i score16 = _mm_shuffle_epi8(target_scores2, vQueryConsJ); - __m128i lookup_mask01 = _mm_cmplt_epi8(vQueryConsJ, sixten); - __m128i lookup_mask16 = _mm_cmplt_epi8(fiveten, vQueryConsJ); - score01 = _mm_and_si128(lookup_mask01, score01); - score16 = _mm_and_si128(lookup_mask16, score16); - __m128i scoreLookup = _mm_add_epi8(score01, score16); -#endif - scoreLookup = simdi_and(scoreLookup, simdi16_set(0x00FF)); - score = simdui16_avg(scoreLookup, simdi16_add(simdi_load(vP + j), vBias)); - score = simdi16_sub(score, vBias); - //scoreLookup = simdi16_add(scoreLookup, vBias); - //score = simdi16_max(scoreLookup, simdi_load(vP + j)); - } else { - score = simdi_load(vP + j); - } - + for (j = 0; LIKELY(j < segLen); ++j) { + simd_int score = simdi_load(vP + j); vH = simdi16_adds(vH, score); /* Get max from vH, vE and vF. */ e = simdi_load(pvE + j); vH = simdi16_max(vH, e); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vH = simdi16_max(vH, simdui16_subs(vF, simdi_load(gap_close_del + j))); - } else { -#endif - vH = simdi16_max(vH, vF); -#ifdef GAP_POS_SCORING - } -#endif - + vH = simdi16_max(vH, vF); vMaxColumn = simdi16_max(vMaxColumn, vH); /* Save vH values. */ simdi_store(pvHStore + j, vH); /* Update vE value. */ -#ifdef GAP_POS_SCORING - simd_int vTemp; - if (posSpecificGaps) { - // copy vH for update of vF - vTemp = vH; - vH = simdui16_subs(vH, simdi_load(gap_open_ins + j)); /* saturation arithmetic, result >= 0 */ - } else { -#endif - vH = simdui16_subs(vH, vGapO); /* saturation arithmetic, result >= 0 */ -#ifdef GAP_POS_SCORING - } -#endif + vH = simdui16_subs(vH, vGapO); /* saturation arithmetic, result >= 0 */ e = simdui16_subs(e, vGapE); e = simdi16_max(e, vH); simdi_store(pvE + j, e); /* Update vF value. */ vF = simdui16_subs(vF, vGapE); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vF = simdi16_max(vF, simdui16_subs(vTemp, simdi_load(gap_open_del + j))); - } else { -#endif - vF = simdi16_max(vF, vH); -#ifdef GAP_POS_SCORING - } -#endif + vF = simdi16_max(vF, vH); /* Load the next vH. */ vH = simdi_load(pvHLoad + j); @@ -1121,28 +1075,10 @@ std::pair SmithWater vF = simdi8_shiftl (vF, 2); for (j = 0; LIKELY(j < segLen); ++j) { vH = simdi_load(pvHStore + j); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vH = simdi16_max(vH, simdui16_subs(vF, simdi_load(gap_close_del + j))); - simdi_store(pvE + j, simdi16_max(simdi_load(pvE + j), simdui16_subs(vH, simdi_load(gap_open_ins + j)))); - } else { -#endif - vH = simdi16_max(vH, vF); -#ifdef GAP_POS_SCORING - } -#endif - + vH = simdi16_max(vH, vF); vMaxColumn = simdi16_max(vMaxColumn, vH); //newly added line simdi_store(pvHStore + j, vH); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - vH = simdui16_subs(vH, simdi_load(gap_open_del + j)); - } else { -#endif - vH = simdui16_subs(vH, vGapO); -#ifdef GAP_POS_SCORING - } -#endif + vH = simdui16_subs(vH, vGapO); vF = simdui16_subs(vF, vGapE); if (UNLIKELY(!simd_any(simdi16_gt(vF, vH)))) goto end; } @@ -1213,20 +1149,191 @@ std::pair SmithWater return std::make_pair(best0, best1); } +template +std::pair SmithWaterman::sw_sse2_int (const unsigned char* db_sequence, + int8_t ref_dir, // 0: forward ref; 1: reverse ref + int32_t db_length, + int32_t query_length, + const uint8_t gap_open, /* will be used as - */ + const uint8_t gap_extend, /* will be used as - */ + const simd_int* query_profile_int, + uint32_t terminate, + int32_t maskLen) { +#define max4(m, vm) ((m) = simdi32_hmax((vm))); + + uint32_t max = 0; /* the max alignment score */ + int32_t end_read = query_length - 1; + int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */ + const unsigned int SIMD_SIZE = VECSIZE_INT; + int32_t segLen = (query_length + SIMD_SIZE-1) / SIMD_SIZE; /* number of segment */ + /* array to record the alignment read ending position of the largest score of each reference position */ + memset(this->maxColumn, 0, db_length * sizeof(uint32_t)); + uint32_t * maxColumn = (uint32_t *) this->maxColumn; + + /* Define 16 byte 0 vector. */ + simd_int vZero = simdi32_set(0); + simd_int* pvHStore = vHStore; + simd_int* pvHLoad = vHLoad; + simd_int* pvE = vE; + simd_int* pvHmax = vHmax; + memset(pvHStore, 0, segLen*sizeof(simd_int)); + memset(pvHLoad, 0, segLen*sizeof(simd_int)); + memset(pvE, 0, segLen*sizeof(simd_int)); + memset(pvHmax, 0, segLen*sizeof(simd_int)); + + int32_t i, j, k; + /* 16 byte insertion begin vector */ + simd_int vGapO = simdi32_set(gap_open); + + /* 16 byte insertion extension vector */ + simd_int vGapE = simdi32_set(gap_extend); + + simd_int vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */ + simd_int vMaxMark = vZero; /* Trace the highest score till the previous column. */ + simd_int vTemp; + int32_t edge, begin = 0, end = db_length, step = 1; + + /* outer loop to process the reference sequence */ + if (ref_dir == 1) { + begin = db_length - 1; + end = -1; + step = -1; + } + + + for (i = begin; LIKELY(i != end); i += step) { + simd_int e, vF = vZero; + simd_int vMaxColumn = vZero; /* Initialize F value to 0. + Any errors to vH values will be corrected in the Lazy_F loop. + */ + simd_int vH = pvHStore[segLen - 1]; + vH = simdi8_shiftl(vH, 4); /* Shift the 128-bit value in vH left by 4 byte. */ + + /* Swap the 2 H buffers. */ + simd_int* pv = pvHLoad; + + const simd_int* vP = query_profile_int + db_sequence[i] * segLen; /* Right part of the query_profile_byte */ + + pvHLoad = pvHStore; + pvHStore = pv; + + /* inner loop to process the query sequence */ + for (j = 0; LIKELY(j < segLen); ++j) { + simd_int score = simdi_load(vP + j); + // vH = simdi32_adds(vH, score); + vH = simdi32_add(vH, score); + /* Get max from vH, vE and vF. */ + e = simdi_load(pvE + j); + vH = simdi32_max(vH, e); + vH = simdi32_max(vH, vF); + + vMaxColumn = simdi32_max(vMaxColumn, vH); + + /* Save vH values. */ + simdi_store(pvHStore + j, vH); + + /* Update vE value. */ + vH = simdui32_subs(vH, vGapO); /* saturation arithmetic, result >= 0 */ + e = simdui32_subs(e, vGapE); + e = simdi32_max(e, vH); + simdi_store(pvE + j, e); + + /* Update vF value. */ + vF = simdui32_subs(vF, vGapE); + vF = simdi32_max(vF, vH); + + /* Load the next vH. */ + vH = simdi_load(pvHLoad + j); + } + + /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */ + for (k = 0; LIKELY(k < (int32_t) SIMD_SIZE); ++k) { + vF = simdi8_shiftl(vF, 4); + for (j = 0; LIKELY(j < segLen); ++j) { + vH = simdi_load(pvHStore + j); + vH = simdi32_max(vH, vF); + vMaxColumn = simdi32_max(vMaxColumn, vH); //newly added line + simdi_store(pvHStore + j, vH); + vH = simdui32_subs(vH, vGapO); + vF = simdui32_subs(vF, vGapE); + if (UNLIKELY(! simdi8_movemask(simdi32_gt(vF, vH)))) goto end; + } + } + end: + vMaxScore = simdi32_max(vMaxScore, vMaxColumn); + vTemp = simdi32_eq(vMaxMark, vMaxScore); + uint32_t cmp = simdi8_movemask(vTemp); + if (cmp != SIMD_MOVEMASK_MAX) { + uint32_t temp; + vMaxMark = vMaxScore; + max4(temp, vMaxScore); + vMaxScore = vMaxMark; + + if (LIKELY(temp > max)) { + max = temp; + end_ref = i; + for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j]; + } + } + + /* Record the max score of current column. */ + max4(maxColumn[i], vMaxColumn); + if (maxColumn[i] == terminate) break; + } + + /* Trace the alignment ending position on read. */ + uint32_t *t = (uint32_t*)pvHmax; + int32_t column_len = segLen * SIMD_SIZE; + for (i = 0; LIKELY(i < column_len); ++i, ++t) { + int32_t temp; + if (*t == max) { + temp = i / SIMD_SIZE + i % SIMD_SIZE * segLen; + if (temp < end_read) end_read = temp; + } + } + + /* Find the most possible 2nd best alignment. */ + alignment_end best0; + best0.score = max; + best0.ref = end_ref; + best0.read = end_read; + + alignment_end best1; + best1.score = 0; + best1.ref = 0; + best1.read = 0; + + edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0; + for (i = 0; i < edge; i ++) { + if (maxColumn[i] > best1.score) { + best1.score = maxColumn[i]; + best1.ref = i; + } + } + edge = (end_ref + maskLen) > db_length ? db_length : (end_ref + maskLen); + for (i = edge; i < db_length; i ++) { + if (maxColumn[i] > best1.score) { + best1.score = maxColumn[i]; + best1.ref = i; + } + } + + return std::make_pair(best0, best1); +#undef max4 +} + void SmithWaterman::ssw_init(const Sequence* q, const int8_t* mat, const BaseMatrix *m) { - - query_id = q->getId(); - profile->bias = 0; - profile->query_length = q->L; - profile->sequence_type = q->getSequenceType(); - isQueryProfile = (Parameters::isEqualDbtype(profile->sequence_type, Parameters::DBTYPE_HMM_PROFILE)); - const int32_t alphabetSize = m->alphabetSize; + //init profile + profile->bias = 0; + profile->query_length = q->L; + const int32_t alphabetSize = m->alphabetSize; profile->alphabetSize = m->alphabetSize; - int32_t compositionBias = 0; bool isProfile = Parameters::isEqualDbtype(q->getSequenceType(), Parameters::DBTYPE_HMM_PROFILE); + profile->isProfile = isProfile; + int32_t compositionBias = 0; if (!isProfile && aaBiasCorrection) { SubstitutionMatrix::calcLocalAaBiasCorrection(m, q->numSequence, q->L, tmp_composition_bias, aaBiasCorrectionScale); for (int i =0; i < q->L; i++) { @@ -1237,47 +1344,20 @@ void SmithWaterman::ssw_init(const Sequence* q, } else { memset(profile->composition_bias, 0, q->L* sizeof(int8_t)); } + // copy memory to local memory - if (Parameters::isEqualDbtype(profile->sequence_type, Parameters::DBTYPE_HMM_PROFILE)) { + if (isProfile) { memcpy(profile->mat, mat, q->L * Sequence::PROFILE_AA_SIZE * sizeof(int8_t)); // set neutral state 'X' (score=0) - memset(profile->mat + ((alphabetSize - 1) * q->L), 0, q->L * sizeof(int8_t )); + memset(profile->mat + ((alphabetSize - 1) * q->L), 0, q->L * sizeof(int8_t)); } else { memcpy(profile->mat, mat, alphabetSize * alphabetSize * sizeof(int8_t)); } memcpy(profile->query_sequence, q->numSequence, q->L); - // numConsensusSequence points to NULL if not profile - if (isQueryProfile) { - memcpy(profile->query_consens_sequence, q->numConsensusSequence, q->L); - } - // create gap-penalties profile - if (isProfile) { -#ifdef GAP_POS_SCORING - profile->gIns = q->gIns; - // insertion penalties are shifted by one position for the reverse direction (2nd to last becomes first) - std::reverse_copy(q->gIns, q->gIns + q->L - 1, profile->gIns_rev); - - for (int32_t i = 0; i < q->L; ++i) { - profile->gDelOpen[i] = q->gDel[i] & 0xF; - profile->gDelClose[i] = q->gDel[i] >> 4; - } - profile->gDelClose_rev[0] = 0; - profile->gDelOpen_rev[0] = 0; - std::reverse_copy(profile->gDelOpen + 1, profile->gDelOpen + q->L, profile->gDelClose_rev + 1); - std::reverse_copy(profile->gDelClose + 1, profile->gDelClose + q->L, profile->gDelOpen_rev + 1); -#endif - for (int32_t i = 0; i < alphabetSize; i++) { - const int8_t *startToRead = profile->mat + (i * q->L); - int8_t *startToWrite = profile->mat_rev + (i * q->L); - std::reverse_copy(startToRead, startToRead + q->L, startToWrite); - } - } int32_t bias = 0; - int32_t matSize = alphabetSize * alphabetSize; - if (Parameters::isEqualDbtype(q->getSequenceType(), Parameters::DBTYPE_HMM_PROFILE)) { - matSize = q->L * Sequence::PROFILE_AA_SIZE; - } + int32_t matSize = isProfile ? q->L * Sequence::PROFILE_AA_SIZE : alphabetSize * alphabetSize; + for (int32_t i = 0; i < matSize; i++) { if (mat[i] < bias) { bias = mat[i]; @@ -1287,25 +1367,18 @@ void SmithWaterman::ssw_init(const Sequence* q, profile->bias = bias; if (isProfile) { - // offset = 1 when createQueryProfile + createConsensProfile is old versions + // offset = 1 when createQueryProfile // create byte version of profiles createQueryProfile(profile->profile_byte, profile->query_sequence, NULL, - profile->mat, q->L, alphabetSize, profile->bias, 0, q->L); - createConsensProfile(profile->consens_byte, profile->query_consens_sequence, q->L, 0); -#ifdef GAP_POS_SCORING - createGapProfile(profile->profile_gDelOpen_byte, profile->profile_gDelClose_byte, - profile->profile_gIns_byte, profile->gDelOpen, profile->gDelClose, q->gIns, q->L, 0); -#endif - // create word version of profiles - createQueryProfile(profile->profile_word, profile->query_sequence, NULL, - profile->mat, q->L, alphabetSize, 0, 0, q->L); - createConsensProfile(profile->consens_word, profile->query_consens_sequence, q->L, 0); -#ifdef GAP_POS_SCORING - createGapProfile(profile->profile_gDelOpen_word, profile->profile_gDelClose_word, profile->profile_gIns_word, - profile->gDelOpen, profile->gDelClose, q->gIns, q->L, 0); -#endif - // create linear version of word profile - for (int32_t i = 0; i< alphabetSize; i++) { + profile->mat, q->L, alphabetSize, profile->bias, 0, q->L); + // create word version of profiles + createQueryProfile(profile->profile_word, profile->query_sequence, NULL, + profile->mat, q->L, alphabetSize, 0, 0, q->L); + // create int version of profiles + createQueryProfile(profile->profile_int, profile->query_sequence, NULL, + profile->mat, q->L, alphabetSize, 0, 0, q->L); + // create linear version of word profile + for (int32_t i = 0; i< alphabetSize; i++) { profile->profile_word_linear[i] = &profile_word_linear_data[i*q->L]; for (int j = 0; j < q->L; j++) { profile->profile_word_linear[i][j] = mat[i * q->L + j]; @@ -1316,7 +1389,9 @@ void SmithWaterman::ssw_init(const Sequence* q, createQueryProfile(profile->profile_byte, profile->query_sequence, profile->composition_bias, profile->mat, q->L, alphabetSize, bias, 0, 0); // create word version of query profile createQueryProfile(profile->profile_word, profile->query_sequence, profile->composition_bias, profile->mat, q->L, alphabetSize, 0, 0, 0); - // create linear version of word profile + // create int version of query profile + createQueryProfile(profile->profile_int, profile->query_sequence, profile->composition_bias, profile->mat, q->L, alphabetSize, 0, 0, 0); + // create linear version of word profile for (int32_t i = 0; i< alphabetSize; i++) { profile->profile_word_linear[i] = &profile_word_linear_data[i*q->L]; for (int j = 0; j < q->L; j++) { @@ -1326,35 +1401,50 @@ void SmithWaterman::ssw_init(const Sequence* q, } // create reverse structures - if (isProfile) { - std::reverse_copy(profile->query_sequence, profile->query_sequence + q->L, profile->query_rev_sequence); - std::reverse_copy(profile->query_consens_sequence, profile->query_consens_sequence + q->L, profile->query_consens_rev_sequence); - } else { - std::reverse_copy(profile->query_sequence, profile->query_sequence + q->L, profile->query_rev_sequence); - std::reverse_copy(profile->composition_bias, profile->composition_bias + q->L, profile->composition_bias_rev); - } - + std::reverse_copy(profile->query_sequence, profile->query_sequence + q->L, profile->query_rev_sequence); + std::reverse_copy(profile->composition_bias, profile->composition_bias + q->L, profile->composition_bias_rev); if (isProfile) { - for (int32_t i = 0; i < alphabetSize; i++) { + for (int32_t i = 0; i < alphabetSize; i++) { const int8_t *startToRead = profile->mat + (i * q->L); int8_t *startToWrite = profile->mat_rev + (i * q->L); std::reverse_copy(startToRead, startToRead + q->L, startToWrite); } - } + memset(profile->pos_aa_rev, 0x80, q->L*32); // do we need it? + int rowIdx = 0; + for (int i = 0; i < profile->query_length; i++) { + for (int aa = 0; aa < subMat->alphabetSize; aa++) { + int score = profile->mat_rev[aa * profile->query_length + i]; + // int idx = rowIdx + (subMat->num2aa[aa] - 'A'); //orig + int idx = rowIdx + aa; //new + profile->pos_aa_rev[idx] = static_cast(score); + } + rowIdx += 32; + } + } + else { + for (int i = 0; i < q->L; i++) { + block->query_bias_arr[i] = profile->composition_bias_rev[i]; + } + + for (int aa1 = 0; aa1 < subMat->alphabetSize; aa1++) { + for (int aa2 = 0; aa2 < subMat->alphabetSize; aa2++) { + // instead of num2aa, use aa directly + block_set_aamatrix_num(block->mat_aa, aa1, aa2, + subMat->subMatrix[aa1][aa2]); + } + } + } } -template +template SmithWaterman::cigar * SmithWaterman::banded_sw(const unsigned char *db_sequence, const int8_t *query_sequence, - const int8_t *query_consens_sequence, const int8_t * compositionBias, + const int8_t * compositionBias, int32_t db_length, int32_t query_length, int32_t queryStart, - int32_t targetStart, int32_t score, const uint32_t gap_open, + int32_t score, const uint32_t gap_open, const uint32_t gap_extend, -#ifdef GAP_POS_SCORING - uint8_t *gDelOpen, uint8_t *gDelClose, uint8_t *gIns, -#endif - int32_t band_width, const int8_t *mat, const int8_t *db_mat, - const int32_t qry_n, const int32_t tgt_n) { + int32_t band_width, const int8_t *mat, + const int32_t qry_n) { #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) /* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */ @@ -1375,7 +1465,6 @@ SmithWaterman::cigar * SmithWaterman::banded_sw(const unsigned char *db_sequence e_b = (int32_t*)malloc(s1 * sizeof(int32_t)); h_c = (int32_t*)malloc(s1 * sizeof(int32_t)); direction = (int8_t*)malloc(s2 * sizeof(int8_t)); - int32_t subScore = 0; do { width = band_width * 2 + 3, width_d = band_width * 2 + 1; @@ -1419,57 +1508,23 @@ SmithWaterman::cigar * SmithWaterman::banded_sw(const unsigned char *db_sequence set_d(df, band_width, i, j, 1); set_d(dh, band_width, i, j, 2); -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - temp1 = i == 0 ? -gap_open : h_b[e] - gDelOpen[i]; - } else { -#endif - temp1 = i == 0 ? -gap_open : h_b[e] - gap_open; -#ifdef GAP_POS_SCORING - } -#endif - + temp1 = i == 0 ? -gap_open : h_b[e] - gap_open; temp2 = i == 0 ? -gap_extend : e_b[e] - gap_extend; e_b[u] = temp1 > temp2 ? temp1 : temp2; direction_line[de] = temp1 > temp2 ? 3 : 2; -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - temp1 = h_c[b] - gIns[i]; - } else { -#endif - temp1 = h_c[b] - gap_open; -#ifdef GAP_POS_SCORING - } -#endif - + temp1 = h_c[b] - gap_open; temp2 = f - gap_extend; f = temp1 > temp2 ? temp1 : temp2; direction_line[df] = temp1 > temp2 ? 5 : 4; f1 = f > 0 ? f : 0; -#ifdef GAP_POS_SCORING - if (posSpecificGaps) { - e1 = std::max(0, e_b[u] - gDelClose[i + 1]); - } else { -#endif - e1 = e_b[u] > 0 ? e_b[u] : 0; -#ifdef GAP_POS_SCORING - } -#endif + e1 = e_b[u] > 0 ? e_b[u] : 0; temp1 = e1 > f1 ? e1 : f1; //TODO: careful with the variable names - if (type == PROFILE_PROFILE) { - // both db_sequence and query_sequence must be the consensus sequence - int32_t minScore = std::min(mat[db_sequence[j] * qry_n + (queryStart + i)], db_mat[query_consens_sequence[i] * tgt_n + (targetStart + j)]); - int32_t absMinScore = abs(minScore); - int32_t maxScore = std::max(mat[db_sequence[j] * qry_n + (queryStart + i)], db_mat[query_consens_sequence[i] * tgt_n + (targetStart + j)]); - subScore = (absMinScore + minScore) + (absMinScore + maxScore) ; - subScore = ((subScore + 1) / 2) - absMinScore; - temp2 = h_b[d] + subScore; - } else if (type == PROFILE_SEQ) { + if (type == PROFILE_SEQ) { // db_sequence is a numerical sequence temp2 = h_b[d] + mat[db_sequence[j] * qry_n + (queryStart + i)]; } else { diff --git a/src/alignment/StripedSmithWaterman.h b/src/alignment/StripedSmithWaterman.h index d4d3aa555..bbe13d930 100644 --- a/src/alignment/StripedSmithWaterman.h +++ b/src/alignment/StripedSmithWaterman.h @@ -45,6 +45,9 @@ #include "Sequence.h" #include "EvalueComputation.h" + +struct s_block; + typedef struct { short qStartPos; short dbStartPos; @@ -52,7 +55,6 @@ typedef struct { short dbEndPos; } aln_t; - typedef struct { uint32_t score1; uint32_t score2; @@ -67,54 +69,32 @@ typedef struct { int32_t cigarLen; double evalue; uint32_t identicalAACnt; + int word; } s_align; class SmithWaterman{ public: SmithWaterman(size_t maxSequenceLength, int aaSize, bool aaBiasCorrection, - float aaBiasCorrectionScale, int targetSeqType); + float aaBiasCorrectionScale, SubstitutionMatrix * subMat); ~SmithWaterman(); // TODO: private or public? struct s_profile{ simd_int* profile_byte; // 0: none simd_int* profile_word; // 0: none + simd_int* profile_int; simd_int* profile_rev_byte; // 0: none simd_int* profile_rev_word; // 0: none -#ifdef GAP_POS_SCORING - // gap penalties - simd_int* profile_gDelOpen_byte; - simd_int* profile_gDelOpen_word; - simd_int* profile_gDelClose_byte; - simd_int* profile_gDelClose_word; - simd_int* profile_gIns_byte; - simd_int* profile_gIns_word; - simd_int* profile_gDelOpen_rev_byte; - simd_int* profile_gDelOpen_rev_word; - simd_int* profile_gDelClose_rev_byte; - simd_int* profile_gDelClose_rev_word; - simd_int* profile_gIns_rev_byte; - simd_int* profile_gIns_rev_word; - uint8_t* gDelOpen; - uint8_t* gDelClose; - uint8_t* gIns; - uint8_t* gDelOpen_rev; - uint8_t* gDelClose_rev; - uint8_t* gIns_rev; -#endif - // profile-profile - simd_int* consens_byte; - simd_int* consens_word; - simd_int* consens_rev_byte; - simd_int* consens_rev_word; + simd_int* profile_rev_int; int8_t* query_sequence; int8_t* query_rev_sequence; - int8_t* query_consens_sequence; - int8_t* query_consens_rev_sequence; int8_t* composition_bias; int8_t* composition_bias_rev; + int8_t* composition_bias_target; + int8_t* composition_bias_target_rev; int8_t* mat; + bool isProfile; // Memory layout of if mat + queryProfile is qL * AA // Query length // A -1 -3 -2 -1 -4 -2 -2 -3 -1 -3 -2 -2 7 -1 -2 -1 -1 -2 -5 -3 @@ -128,12 +108,13 @@ class SmithWaterman{ // ... // Y -1 -3 -2 -1 -4 -2 -2 -3 -1 -3 -2 -2 7 -1 -2 -1 -1 -2 -5 -3 int8_t* mat_rev; // needed for queryProfile + int8_t* pos_aa_rev; int32_t query_length; - int32_t sequence_type; + // int32_t sequence_type; int32_t alphabetSize; uint8_t bias; short ** profile_word_linear; - simd_int *target_profile_byte; + int32_t ** profile_int_linear; }; // prints a __m128 vector containing 8 signed shorts @@ -192,9 +173,40 @@ class SmithWaterman{ while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are 0-based coordinate. */ + template + s_align alignScoreEndPos ( + const unsigned char *db_sequence, + int32_t db_length, + const uint8_t gap_open, + const uint8_t gap_extend, + const int32_t maskLen); + + template + s_align alignStartPosBacktrace ( + const unsigned char *db_sequence, + int32_t db_length, + + const uint8_t gap_open, + const uint8_t gap_extend, + const uint8_t alignmentMode, + std::string & backtrace, + s_align r, + EvalueComputation * evaluer, + const int covMode, + const float covThr, + const float correlationScoreWeight, + const int32_t maskLen); + + template + s_align alignStartPosBacktraceBlock( + const unsigned char *db_sequence, + int32_t db_length, + const uint8_t gap_open, + const uint8_t gap_extend, + std::string & backtrace, + s_align r); + s_align ssw_align (const unsigned char *db_num_sequence, - const unsigned char *db_consens_sequence, - const int8_t *db_profile, int32_t db_length, std::string &backtrace, const uint8_t gap_open, @@ -203,7 +215,7 @@ class SmithWaterman{ const double filters, EvalueComputation * filterd, const int covMode, const float covThr, const float correlationScoreWeight, - const int32_t maskLen, const size_t id); + const int32_t maskLen); /*! @function computed ungapped alignment score @@ -237,7 +249,6 @@ class SmithWaterman{ */ void ssw_init(const Sequence *q, const int8_t *mat, const BaseMatrix *m); - static char cigar_int_to_op (uint32_t cigar_int); static uint32_t cigar_int_to_len (uint32_t cigar_int); @@ -245,6 +256,9 @@ class SmithWaterman{ static float computeCov(unsigned int startPos, unsigned int endPos, unsigned int len); + int isProfileSearch(){ + return profile->isProfile; + } s_align scoreIdentical(unsigned char *dbSeq, int L, EvalueComputation * evaluer, int alignmentMode, std::string &backtrace); static void seq_reverse(int8_t * reverse, const int8_t* seq, int32_t end) /* end is 0-based alignment ending position */ @@ -261,19 +275,17 @@ class SmithWaterman{ static int computeCorrelationScore(int8_t * scorePreCol, size_t length); - template void computerBacktrace(s_profile * query, const unsigned char * db_sequence, s_align & alignment, std::string & backtrace, uint32_t & aaIds, int8_t * scorePerCol, size_t & mStatesCnt); + static uint32_t to_cigar_int (uint32_t length, char op_letter); // ssw_init const static unsigned int SUBSTITUTIONMATRIX = 1; const static unsigned int PROFILE = 2; // ssw_align const static unsigned int SEQ_SEQ = 3; - const static unsigned int SEQ_PROFILE = 4; - const static unsigned int PROFILE_SEQ = 5; - const static unsigned int PROFILE_PROFILE = 6; + const static unsigned int PROFILE_SEQ = 4; private: @@ -284,11 +296,10 @@ class SmithWaterman{ uint8_t * maxColumn; // target variables - simd_int* target_profile_byte; int segSize; // needed for type checking query and target databases - bool isTargetProfile, isQueryProfile; + bool isQueryProfile; typedef struct { uint16_t score; @@ -303,9 +314,8 @@ class SmithWaterman{ } cigar; - template + template s_align ssw_align_private (const unsigned char*db_sequence, - const int8_t *db_profile, int32_t db_length, std::string &backtrace, const uint8_t gap_open, @@ -314,7 +324,7 @@ class SmithWaterman{ const double filters, EvalueComputation * filterd, const int covMode, const float covThr, const float correlationScoreWeight, - const int32_t maskLen, const size_t id); + const int32_t maskLen); /* Striped Smith-Waterman Record the highest score of each reference position. @@ -323,21 +333,14 @@ class SmithWaterman{ wight_match > 0, all other weights < 0. The returned positions are 0-based. */ - template + template std::pair sw_sse2_byte(const unsigned char *db_sequence, - const simd_int* db_profile_byte, int8_t ref_dir, // 0: forward ref; 1: reverse ref int32_t db_length, int32_t query_length, const uint8_t gap_open, /* will be used as - */ const uint8_t gap_extend, /* will be used as - */ const simd_int* query_profile_byte, - const simd_int* query_consens_byte, -#ifdef GAP_POS_SCORING - const simd_int* gap_open_del, - const simd_int* gap_close_del, - const simd_int* gap_open_ins, -#endif uint8_t terminate, /* the best alignment score: used to terminate the matrix calculation when locating the alignment beginning point. If this score @@ -345,34 +348,34 @@ class SmithWaterman{ uint8_t bias, /* Shift 0 point to a positive value. */ int32_t maskLen); - template + template std::pair sw_sse2_word (const unsigned char* db_sequence, - const simd_int* db_profile_byte, int8_t ref_dir, // 0: forward ref; 1: reverse ref int32_t db_length, int32_t query_length, const uint8_t gap_open, /* will be used as - */ const uint8_t gap_extend, /* will be used as - */ - const simd_int*query_profile_word, - const simd_int* query_consens_word, -#ifdef GAP_POS_SCORING - const simd_int* gap_open_del, - const simd_int* gap_close_del, - const simd_int* gap_open_ins, -#endif + const simd_int* query_profile_word, uint16_t terminate, - uint16_t bias, int32_t maskLen); - template + template + std::pair sw_sse2_int (const unsigned char* db_sequence, + int8_t ref_dir, // 0: forward ref; 1: reverse ref + int32_t db_length, + int32_t query_length, + const uint8_t gap_open, /* will be used as - */ + const uint8_t gap_extend, /* will be used as - */ + const simd_int* query_profile_int, + uint32_t terminate, + int32_t maskLen); + + template SmithWaterman::cigar *banded_sw(const unsigned char *db_sequence, const int8_t *query_sequence, - const int8_t *query_consens_sequence, const int8_t * compositionBias, - int32_t db_length, int32_t query_length, int32_t queryStart, int32_t targetStart, + const int8_t * compositionBias, + int32_t db_length, int32_t query_length, int32_t queryStart, int32_t score, const uint32_t gap_open, const uint32_t gap_extend, -#ifdef GAP_POS_SCORING - uint8_t *gDelOpen, uint8_t *gDelClose, uint8_t *gIns, -#endif - int32_t band_width, const int8_t *mat, const int8_t *target_mat, const int32_t qry_n, const int32_t tgt_n); + int32_t band_width, const int8_t *mat, const int32_t qry_n); /*! @function Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length @@ -380,13 +383,9 @@ class SmithWaterman{ @param op_letter CIGAR operation character ('M', 'I', etc) @return 32-bit unsigned integer, representing encoded CIGAR operation and length */ - inline uint32_t to_cigar_int (uint32_t length, char op_letter); s_profile* profile; - - size_t query_id; - size_t target_id; - + s_block* block; template void createQueryProfile(simd_int *profile, const int8_t *query_sequence, const int8_t * composition_bias, @@ -395,13 +394,11 @@ class SmithWaterman{ float *tmp_composition_bias; int8_t * scorePerCol; short * profile_word_linear_data; + int32_t * profile_int_linear_data; + bool aaBiasCorrection; float aaBiasCorrectionScale; - - template - void createConsensProfile(simd_int *profile, const int8_t *consens_sequence, const int32_t query_length, const int32_t offset); - - void createTargetProfile(simd_int *profile, const int8_t *mat, const int target_length, const int32_t aaSize, uint8_t bias); + SubstitutionMatrix * subMat; template void updateQueryProfile(simd_int *profile, const int32_t query_length, const int32_t aaSize, uint8_t shift); diff --git a/src/prefiltering/ungappedprefilter.cpp b/src/prefiltering/ungappedprefilter.cpp index 8916adb94..e497835a3 100644 --- a/src/prefiltering/ungappedprefilter.cpp +++ b/src/prefiltering/ungappedprefilter.cpp @@ -359,7 +359,7 @@ void runFilterOnCpu(Parameters & par, BaseMatrix * subMat, int8_t * tinySubMat, Sequence qSeq(par.maxSeqLen, querySeqType, subMat, 0, false, par.compBiasCorrection); Sequence tSeq(par.maxSeqLen, targetSeqType, subMat, 0, false, par.compBiasCorrection); SmithWaterman aligner(par.maxSeqLen, subMat->alphabetSize, - par.compBiasCorrection, par.compBiasCorrectionScale, targetSeqType); + par.compBiasCorrection, par.compBiasCorrectionScale, NULL); std::string resultBuffer; resultBuffer.reserve(262144); @@ -418,8 +418,6 @@ void runFilterOnCpu(Parameters & par, BaseMatrix * subMat, int8_t * tinySubMat, } else { res = aligner.ssw_align( tSeq.numSequence, - tSeq.numConsensusSequence, - tSeq.getAlignmentProfile(), tSeq.L, backtrace, par.gapOpen.values.aminoacid(), @@ -430,8 +428,7 @@ void runFilterOnCpu(Parameters & par, BaseMatrix * subMat, int8_t * tinySubMat, par.covMode, par.covThr, par.correlationScoreWeight, - qSeq.L / 2, - tId + qSeq.L / 2 ); } score = res.score1; diff --git a/src/test/TestAlignment.cpp b/src/test/TestAlignment.cpp index 394278e6c..23108b54b 100644 --- a/src/test/TestAlignment.cpp +++ b/src/test/TestAlignment.cpp @@ -67,7 +67,7 @@ int main (int, const char**) { Sequence* dbSeq = new Sequence(10000, 0, &subMat, kmer_size, true, false); //dbSeq->mapSequence(1,"lala2",ref_seq); dbSeq->mapSequence(1,1,tim2.c_str(), tim2.size()); - SmithWaterman aligner(15000, subMat.alphabetSize, true, 1.0, Parameters::DBTYPE_AMINO_ACIDS); + SmithWaterman aligner(15000, subMat.alphabetSize, true, 1.0, &subMat); int8_t * tinySubMat = new int8_t[subMat.alphabetSize*subMat.alphabetSize]; for (int i = 0; i < subMat.alphabetSize; i++) { for (int j = 0; j < subMat.alphabetSize; j++) { @@ -92,8 +92,6 @@ int main (int, const char**) { std::string backtrace; s_align alignment = aligner.ssw_align( dbSeq->numSequence, - dbSeq->numConsensusSequence, - dbSeq->getAlignmentProfile(), dbSeq->L, backtrace, gap_open, gap_extend, @@ -102,8 +100,7 @@ int main (int, const char**) { &evalueComputation, 0, 0.0, 0.0, - maskLen, - dbSeq->getId() + maskLen ); if(alignment.cigar){ std::cout << "Cigar" << std::endl; diff --git a/src/test/TestAlignmentPerformance.cpp b/src/test/TestAlignmentPerformance.cpp index d2b59e9c9..f60b448e1 100644 --- a/src/test/TestAlignmentPerformance.cpp +++ b/src/test/TestAlignmentPerformance.cpp @@ -69,7 +69,7 @@ int main (int, const char**) { Sequence* query = new Sequence(10000, 0, &subMat, kmer_size, true, false); Sequence* dbSeq = new Sequence(10000, 0, &subMat, kmer_size, true, false); //dbSeq->mapSequence(1,"lala2",ref_seq); - SmithWaterman aligner(15000, subMat.alphabetSize, false, 1.0, Parameters::DBTYPE_AMINO_ACIDS); + SmithWaterman aligner(15000, subMat.alphabetSize, false, 1.0, &subMat); int8_t * tinySubMat = new int8_t[subMat.alphabetSize*subMat.alphabetSize]; for (int i = 0; i < subMat.alphabetSize; i++) { for (int j = 0; j < subMat.alphabetSize; j++) { @@ -96,8 +96,6 @@ int main (int, const char**) { std::string backtrace; s_align alignment = aligner.ssw_align( dbSeq->numSequence, - dbSeq->numConsensusSequence, - dbSeq->getAlignmentProfile(), dbSeq->L, backtrace, gap_open, gap_extend, @@ -106,8 +104,7 @@ int main (int, const char**) { &evalueComputation, 0, 0.0, 0.0, - maskLen, - dbSeq->getId() + maskLen ); if(mode == 0 ){ cells += query->L * dbSeq->L; diff --git a/src/test/TestAlignmentTraceback.cpp b/src/test/TestAlignmentTraceback.cpp index e002d18c4..924ebb32e 100644 --- a/src/test/TestAlignmentTraceback.cpp +++ b/src/test/TestAlignmentTraceback.cpp @@ -165,7 +165,7 @@ int main(int, const char**) { Sequence* dbSeq = new Sequence(10000, 0, &subMat, kmer_size, true, false); //dbSeq->mapSequence(1,"lala2",ref_seq); dbSeq->mapSequence(1,1,tim2.c_str(), tim2.size()); - SmithWaterman aligner(15000, subMat.alphabetSize, false, 1.0, Parameters::DBTYPE_AMINO_ACIDS); + SmithWaterman aligner(15000, subMat.alphabetSize, false, 1.0, &subMat); int8_t * tinySubMat = new int8_t[subMat.alphabetSize*subMat.alphabetSize]; for (int i = 0; i < subMat.alphabetSize; i++) { for (int j = 0; j < subMat.alphabetSize; j++) { diff --git a/src/test/TestMultipleAlignment.cpp b/src/test/TestMultipleAlignment.cpp index c16c26eef..537251d4e 100644 --- a/src/test/TestMultipleAlignment.cpp +++ b/src/test/TestMultipleAlignment.cpp @@ -33,7 +33,7 @@ int main(int, const char**) { // BaseMatrix::print(subMat.subMatrix, subMat.alphabetSize); std::cout << "\n"; EvalueComputation evaluer(100000, &subMat, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid()); - Matcher * aligner = new Matcher(Parameters::DBTYPE_AMINO_ACIDS, Parameters::DBTYPE_AMINO_ACIDS, 10000, &subMat, &evaluer, false, 1.0, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid(), 0.0, 40); + Matcher * aligner = new Matcher(Parameters::DBTYPE_AMINO_ACIDS, 10000, &subMat, &evaluer, false, 1.0, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid(), 0.0, 40); std::vector alnResults; std::vector> seqSet; std::cout << "Sequence (id 0):\n"; diff --git a/src/test/TestProfileAlignment.cpp b/src/test/TestProfileAlignment.cpp index 74a3779c3..5a749f869 100644 --- a/src/test/TestProfileAlignment.cpp +++ b/src/test/TestProfileAlignment.cpp @@ -795,7 +795,7 @@ int main (int, const char**) { const char* sequence2 = "LFILNIISMNKQTKVKGYLLLLLVISSLFISLVGHGYTANKVSAPNPAKEYPQDNLSVIDMKNLPGTQIKSMVKDELQQFLEEQGFRRLKNKSLVDLRRIWLGFMYEDFFYTMHKKTDLPISVIYAFFIIEATNAGIESKLMAKALNPGGIKYRGTGKKMKAMDDCY"; dbSeq->mapSequence(1,1,sequence2, strlen(sequence2)); - SmithWaterman aligner(15000, subMat.alphabetSize, false, 1.0, Parameters::DBTYPE_AMINO_ACIDS); + SmithWaterman aligner(15000, subMat.alphabetSize, false, 1.0, &subMat); int8_t * tinySubMat = new int8_t[subMat.alphabetSize*subMat.alphabetSize]; aligner.ssw_init(s, s->getAlignmentProfile(), &subMat); int32_t maskLen = s->L / 2; @@ -805,8 +805,6 @@ int main (int, const char**) { std::string backtrace; s_align alignment = aligner.ssw_align( dbSeq->numSequence, - dbSeq->numConsensusSequence, - dbSeq->getAlignmentProfile(), dbSeq->L, backtrace, gap_open, gap_extend, @@ -815,8 +813,7 @@ int main (int, const char**) { &evalueComputation, 0, 0.0, 0.0, - maskLen, - dbSeq->getId() + maskLen ); if(alignment.cigar){ std::cout << "Cigar" << std::endl; diff --git a/src/test/TestUngappedCpuPerf.cpp b/src/test/TestUngappedCpuPerf.cpp index accfc340b..c8c6ec490 100644 --- a/src/test/TestUngappedCpuPerf.cpp +++ b/src/test/TestUngappedCpuPerf.cpp @@ -67,7 +67,7 @@ int main (int, const char**) { Util::decomposeDomain(targets, thread_idx, par.threads, &ignore, &total); sanityCheck += total; - SmithWaterman aligner(seqLen, subMat.alphabetSize, false, 1.0, Parameters::DBTYPE_AMINO_ACIDS); + SmithWaterman aligner(seqLen, subMat.alphabetSize, false, 1.0, &subMat); Sequence qSeq(seqLen, Parameters::DBTYPE_AMINO_ACIDS, &subMat, 0, false, false); qSeq.mapSequence(0, 0, seq, seqLen); aligner.ssw_init(&qSeq, tinySubMat, &subMat); diff --git a/src/util/alignall.cpp b/src/util/alignall.cpp index 3a07596f6..732c9e144 100644 --- a/src/util/alignall.cpp +++ b/src/util/alignall.cpp @@ -67,7 +67,7 @@ int alignall(int argc, const char **argv, const Command &command) { #ifdef OPENMP thread_idx = (unsigned int) omp_get_thread_num(); #endif - Matcher matcher(targetSeqType, targetSeqType, par.maxSeqLen, subMat, &evaluer, par.compBiasCorrection, par.compBiasCorrectionScale, gapOpen, gapExtend, 0.0, par.zdrop); + Matcher matcher(targetSeqType, par.maxSeqLen, subMat, &evaluer, par.compBiasCorrection, par.compBiasCorrectionScale, gapOpen, gapExtend, 0.0, par.zdrop); Sequence query(par.maxSeqLen, targetSeqType, subMat, 0, false, par.compBiasCorrection); Sequence target(par.maxSeqLen, targetSeqType, subMat, 0, false, par.compBiasCorrection); diff --git a/src/util/proteomecluster.cpp b/src/util/proteomecluster.cpp index 0fb787563..08399b853 100644 --- a/src/util/proteomecluster.cpp +++ b/src/util/proteomecluster.cpp @@ -496,7 +496,7 @@ int proteomecluster(int argc, const char **argv, const Command &command){ #ifdef OPENMP thread_idx = (unsigned int) omp_get_thread_num(); #endif - Matcher matcher(tProteinSeqType, tProteinSeqType, par.maxSeqLen, &subMat, &evaluer, par.compBiasCorrection, par.compBiasCorrectionScale, gapOpen, gapExtend, 0.0, par.zdrop); + Matcher matcher(tProteinSeqType, par.maxSeqLen, &subMat, &evaluer, par.compBiasCorrection, par.compBiasCorrectionScale, gapOpen, gapExtend, 0.0, par.zdrop); Sequence query(par.maxSeqLen, tProteinSeqType, &subMat, 0, false, par.compBiasCorrection); Sequence target(par.maxSeqLen, tProteinSeqType, &subMat, 0, false, par.compBiasCorrection); std::vector localsharedEntryCount(proteomeList.size(), 0); diff --git a/src/util/result2msa.cpp b/src/util/result2msa.cpp index b6d969ede..a43722ea4 100644 --- a/src/util/result2msa.cpp +++ b/src/util/result2msa.cpp @@ -150,7 +150,7 @@ int result2msa(int argc, const char **argv, const Command &command) { thread_idx = (unsigned int) omp_get_thread_num(); #endif - Matcher matcher(qDbr->getDbtype(), tDbr->getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, + Matcher matcher(qDbr->getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, par.compBiasCorrectionScale, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid(), 0.0, par.zdrop); MultipleAlignment aligner(maxSequenceLength, &subMat); PSSMCalculator calculator( diff --git a/src/util/result2profile.cpp b/src/util/result2profile.cpp index bb3d24bae..301879237 100644 --- a/src/util/result2profile.cpp +++ b/src/util/result2profile.cpp @@ -147,7 +147,7 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret thread_idx = (unsigned int) omp_get_thread_num(); #endif - Matcher matcher(qDbr->getDbtype(), tDbr->getDbtype(), maxSequenceLength, &subMat, &evalueComputation, + Matcher matcher(qDbr->getDbtype(), maxSequenceLength, &subMat, &evalueComputation, par.compBiasCorrection, par.compBiasCorrectionScale, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid(), 0.0, par.zdrop); Masker masker(subMat); diff --git a/src/util/transitivealign.cpp b/src/util/transitivealign.cpp index 847345f54..950c8f015 100644 --- a/src/util/transitivealign.cpp +++ b/src/util/transitivealign.cpp @@ -61,7 +61,7 @@ int transitivealign(int argc, const char **argv, const Command &command) { thread_idx = (unsigned int) omp_get_thread_num(); #endif // TODO: is this right? targetSeqType defined as -1 temporarily - Matcher matcher(querySeqType, -1, par.maxSeqLen, subMat, &evaluer, + Matcher matcher(querySeqType, par.maxSeqLen, subMat, &evaluer, par.compBiasCorrection, par.compBiasCorrectionScale, par.gapOpen.values.aminoacid(), par.gapExtend.values.aminoacid(), 0.0, par.zdrop); // Sequence query(par.maxSeqLen, targetSeqType, subMat, par.kmerSize, par.spacedKmer, par.compBiasCorrection); diff --git a/src/workflow/Search.cpp b/src/workflow/Search.cpp index c70c5a82b..0e5ef3c43 100644 --- a/src/workflow/Search.cpp +++ b/src/workflow/Search.cpp @@ -485,6 +485,11 @@ int search(int argc, const char **argv, const Command& command) { par.realign = true; } + // disable realign for iterative nucl search + if (searchMode & Parameters::SEARCH_MODE_FLAG_QUERY_NUCLEOTIDE && searchMode & Parameters::SEARCH_MODE_FLAG_TARGET_NUCLEOTIDE) { + par.realign = false; + } + if (i > 0) { // par.queryProfile = true; par.realign = false; diff --git a/util/build_osx.sh b/util/build_osx.sh index 7a2f51899..b3fe893b1 100755 --- a/util/build_osx.sh +++ b/util/build_osx.sh @@ -55,6 +55,7 @@ cmake \ -DCMAKE_C_FLAGS="-arch x86_64" -DCMAKE_CXX_FLAGS="-arch x86_64" -DCMAKE_ASM_FLAGS="-arch arm64" \ -DBUILD_SHARED_LIBS=OFF -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \ -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP_AMD64}" -DOpenMP_C_LIB_NAMES=omp -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP_AMD64}" -DOpenMP_CXX_LIB_NAMES=omp -DOpenMP_omp_LIBRARY=${LIBOMP_AMD64}/libomp.a \ + -DRust_CARGO_TARGET=x86_64-apple-darwin \ "$REPO" make -j${CPUS} @@ -94,6 +95,7 @@ cmake \ -DCMAKE_C_FLAGS="-arch arm64" -DCMAKE_CXX_FLAGS="-arch arm64" -DCMAKE_ASM_FLAGS="-arch arm64" \ -DBUILD_SHARED_LIBS=OFF -DCMAKE_FIND_LIBRARY_SUFFIXES=".a" \ -DOpenMP_C_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP_AARCH64}" -DOpenMP_C_LIB_NAMES=omp -DOpenMP_CXX_FLAGS="-Xpreprocessor -fopenmp -I${LIBOMP_AARCH64}" -DOpenMP_CXX_LIB_NAMES=omp -DOpenMP_omp_LIBRARY=${LIBOMP_AARCH64}/libomp.a \ + -DRust_CARGO_TARGET=aarch64-apple-darwin \ "$REPO" make -j${CPUS} diff --git a/util/regression b/util/regression index df963bd4b..c96a27e3c 160000 --- a/util/regression +++ b/util/regression @@ -1 +1 @@ -Subproject commit df963bd4ba8ac22fbce2e153a0bfe4780cf1e372 +Subproject commit c96a27e3cb3424798fd3a35974028588c308b86e diff --git a/util/update_blockaligner.sh b/util/update_blockaligner.sh new file mode 100755 index 000000000..cb567c534 --- /dev/null +++ b/util/update_blockaligner.sh @@ -0,0 +1,2 @@ +#!/bin/sh -e +git subtree pull --prefix lib/block-aligner https://github.com/Gyuuul2/block-aligner 3di --squash