From 265f947cf457976d407f7e8cacb466ae9499f5a0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 29 Apr 2025 14:32:26 +0200 Subject: [PATCH 1/4] Hack in ISA-L support for compression --- Makefile | 2 +- bgzf.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++--- configure.ac | 2 +- 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 594f4ab00..1faeee57a 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ AR = ar RANLIB = ranlib # Default libraries to link if configure is not used -htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl +htslib_default_libs = -lz -lm -lbz2 -llzma -lcurl -lisal CPPFLAGS = # TODO: make the 64-bit support for VCF optional via configure, for now add -DVCF_ALLOW_INT64 diff --git a/bgzf.c b/bgzf.c index 3237ae054..39a81d0a4 100644 --- a/bgzf.c +++ b/bgzf.c @@ -41,6 +41,13 @@ #include #endif +/* Should be fixed in the build system later */ +#define HAVE_ISAL 1 +#ifdef HAVE_ISAL +#include +#include +#endif + #include "htslib/hts.h" #include "htslib/bgzf.h" #include "htslib/hfile.h" @@ -547,11 +554,68 @@ BGZF *bgzf_hopen(hFILE *hfp, const char *mode) return fp; } -#ifdef HAVE_LIBDEFLATE +#if HAVE_ISAL +uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { + return crc32_gzip_refl(crc, buf, len); +} +#elif HAVE_LIBDEFLATE uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { return libdeflate_crc32(crc, buf, len); } +#else +uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { + return crc32(crc, buf, len); +} +#endif +#if HAVE_ISAL +static int _bgzf_compress_isal(uint8_t *dst, size_t *dlen, const void *src, size_t slen, int level) +{ + if (level < 0 || level > 3) { + return -1; + } + struct isal_zstream *z = malloc(sizeof(struct isal_zstream)); + if (z == NULL) { + return -1; + } + static int level_buf_sizes[] = { + ISAL_DEF_LVL0_DEFAULT, + ISAL_DEF_LVL1_DEFAULT, + ISAL_DEF_LVL2_DEFAULT, + ISAL_DEF_LVL3_DEFAULT + }; + size_t level_buf_size = level_buf_sizes[level]; + uint8_t *level_buf = malloc(level_buf_size); + if (level_buf == NULL) { + free(z); + } + isal_deflate_init(z); + z->level = level; + z->level_buf = level_buf; + z->level_buf_size = level_buf_size; + z->gzip_flag = IGZIP_GZIP_NO_HDR; // Also calculates length and CRC + z->next_in = (uint8_t *)src; + z->avail_in = slen; + z->next_out = dst + BLOCK_HEADER_LENGTH; + z->avail_out = *dlen - BLOCK_HEADER_LENGTH; + z->flush = FULL_FLUSH; + z->end_of_stream = 1; + + int ret = isal_deflate(z); + if (ret != COMP_OK) { + hts_log_error("Call to _bgzf_compress_isal failed"); + free(level_buf); + free(z); + return -1; + } + *dlen = z->next_out - (uint8_t *)dst; + memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block + packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes + return 0; +} +#endif + +#if HAVE_LIBDEFLATE int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) { if (slen == 0) { @@ -575,6 +639,13 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le } else { level = level > 0 ? level : 6; // libdeflate doesn't honour -1 as default + + #if HAVE_ISAL + if (level == 1 || level == 2) { + return _bgzf_compress_isal(dst, dlen, src, slen, level); + } + #endif + // NB levels go up to 12 here. int lvl_map[] = {0,1,2,3,5,6,7,8,10,12}; level = lvl_map[level>9 ?9 :level]; @@ -611,9 +682,6 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le #else -uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { - return crc32(crc, buf, len); -} int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) { diff --git a/configure.ac b/configure.ac index d19bea255..f038ccbd5 100644 --- a/configure.ac +++ b/configure.ac @@ -516,7 +516,7 @@ AS_IF([test "x$with_libdeflate" != "xno"], AC_CHECK_LIB([deflate], [libdeflate_deflate_compress],[:],[libdeflate='missing library']) AS_IF([test "$libdeflate" = "ok"], [AC_DEFINE([HAVE_LIBDEFLATE], 1, [Define if libdeflate is available.]) - LIBS="-ldeflate $LIBS" + LIBS="-ldeflate -lisal $LIBS" private_LIBS="$private_LIBS -ldeflate" static_LIBS="$static_LIBS -ldeflate"], [AS_IF([test "x$with_libdeflate" != "xcheck"], From 68ed38229f288ac29fa91c9061ec90717c897b5c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 29 Apr 2025 14:50:19 +0200 Subject: [PATCH 2/4] Fix memory leak --- bgzf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bgzf.c b/bgzf.c index 39a81d0a4..7a071eed0 100644 --- a/bgzf.c +++ b/bgzf.c @@ -609,6 +609,8 @@ static int _bgzf_compress_isal(uint8_t *dst, size_t *dlen, const void *src, size return -1; } *dlen = z->next_out - (uint8_t *)dst; + free(level_buf); + free(z); memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes return 0; From b33d1f1a756ba7086fa96e31ccc38ba5682347e2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 29 Apr 2025 15:01:16 +0200 Subject: [PATCH 3/4] Fix trailing spaces --- bgzf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bgzf.c b/bgzf.c index 7a071eed0..f6c099cae 100644 --- a/bgzf.c +++ b/bgzf.c @@ -562,7 +562,7 @@ uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { return libdeflate_crc32(crc, buf, len); } -#else +#else uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len) { return crc32(crc, buf, len); } @@ -579,9 +579,9 @@ static int _bgzf_compress_isal(uint8_t *dst, size_t *dlen, const void *src, size return -1; } static int level_buf_sizes[] = { - ISAL_DEF_LVL0_DEFAULT, - ISAL_DEF_LVL1_DEFAULT, - ISAL_DEF_LVL2_DEFAULT, + ISAL_DEF_LVL0_DEFAULT, + ISAL_DEF_LVL1_DEFAULT, + ISAL_DEF_LVL2_DEFAULT, ISAL_DEF_LVL3_DEFAULT }; size_t level_buf_size = level_buf_sizes[level]; @@ -593,7 +593,7 @@ static int _bgzf_compress_isal(uint8_t *dst, size_t *dlen, const void *src, size z->level = level; z->level_buf = level_buf; z->level_buf_size = level_buf_size; - z->gzip_flag = IGZIP_GZIP_NO_HDR; // Also calculates length and CRC + z->gzip_flag = IGZIP_GZIP_NO_HDR; // Also calculates length and CRC z->next_in = (uint8_t *)src; z->avail_in = slen; z->next_out = dst + BLOCK_HEADER_LENGTH; @@ -615,7 +615,7 @@ static int _bgzf_compress_isal(uint8_t *dst, size_t *dlen, const void *src, size packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes return 0; } -#endif +#endif #if HAVE_LIBDEFLATE int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int level) @@ -644,10 +644,10 @@ int bgzf_compress(void *_dst, size_t *dlen, const void *src, size_t slen, int le #if HAVE_ISAL if (level == 1 || level == 2) { - return _bgzf_compress_isal(dst, dlen, src, slen, level); + return _bgzf_compress_isal(dst, dlen, src, slen, level); } #endif - + // NB levels go up to 12 here. int lvl_map[] = {0,1,2,3,5,6,7,8,10,12}; level = lvl_map[level>9 ?9 :level]; From 18e6f0fb1f96bbeaec51b763b1b9bdf5f9fdbd92 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 29 Apr 2025 15:18:47 +0200 Subject: [PATCH 4/4] Isal decompression --- bgzf.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/bgzf.c b/bgzf.c index f6c099cae..bf251f4e2 100644 --- a/bgzf.c +++ b/bgzf.c @@ -789,7 +789,44 @@ static int deflate_block(BGZF *fp, int block_length) return comp_size; } -#ifdef HAVE_LIBDEFLATE +#ifdef HAVE_ISAL +static int bgzf_uncompress(uint8_t *dst, size_t *dlen, + const uint8_t *src, size_t slen, + uint32_t expected_crc) { + struct inflate_state *z = malloc(sizeof(struct inflate_state)); + if (z == NULL) { + hts_log_error("Allocating isal inflate state failed"); + return -1; + } + isal_inflate_init(z); + z->next_in = (uint8_t *)src; + z->avail_in = slen; + z->next_out = dst; + z->avail_out = *dlen; + z->crc_flag = ISAL_GZIP_NO_HDR; + + int ret = isal_inflate(z); + if (ret != ISAL_DECOMP_OK) { + hts_log_error("Inflate operation failed: %d", ret); + free(z); + return -1; + + } + *dlen = z->next_out - dst; + uint32_t crc = z->crc; + free(z); + #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + // Pretend the CRC was OK so the fuzzer doesn't have to get it right + crc = expected_crc; + #endif + if (crc != expected_crc) { + hts_log_error("CRC32 checksum mismatch"); + return -2; + } + return 0; +} + +#elif HAVE_LIBDEFLATE static int bgzf_uncompress(uint8_t *dst, size_t *dlen, const uint8_t *src, size_t slen,