Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ NONCONFIGURE_OBJS = hfile_libcurl.o
PLUGIN_EXT =
PLUGIN_OBJS =

bgzf_internal_h = bgzf_internal.h $(htslib_bgzf_h)
cram_h = cram/cram.h $(cram_samtools_h) $(header_h) $(cram_structs_h) $(cram_io_h) cram/cram_encode.h cram/cram_decode.h cram/cram_stats.h cram/cram_codecs.h cram/cram_index.h $(htslib_cram_h)
cram_io_h = cram/cram_io.h $(cram_misc_h)
cram_misc_h = cram/misc.h
Expand Down Expand Up @@ -487,7 +488,7 @@ hts-object-files: $(LIBHTS_OBJS)
$(CC) -shared $(LDFLAGS) -o $@ $< hts.dll.a $(LIBS)


bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(htslib_khash_h)
bgzf.o bgzf.pico: bgzf.c config.h $(htslib_hts_h) $(htslib_bgzf_h) $(htslib_hfile_h) $(htslib_thread_pool_h) $(htslib_hts_endian_h) cram/pooled_alloc.h $(hts_internal_h) $(bgzf_internal_h) $(htslib_khash_h)
errmod.o errmod.pico: errmod.c config.h $(htslib_hts_h) $(htslib_ksort_h) $(htslib_hts_os_h)
kstring.o kstring.pico: kstring.c config.h $(htslib_kstring_h)
header.o header.pico: header.c config.h $(textutils_internal_h) $(header_h)
Expand All @@ -499,7 +500,7 @@ hfile_s3.o hfile_s3.pico: hfile_s3.c config.h $(hfile_internal_h) $(htslib_hts_h
hts.o hts.pico: hts.c config.h os/lzma_stub.h $(htslib_hts_h) $(htslib_bgzf_h) $(cram_h) $(htslib_hfile_h) $(htslib_hts_endian_h) version.h config_vars.h $(hts_internal_h) $(hfile_internal_h) $(sam_internal_h) $(htslib_hts_expr_h) $(htslib_hts_os_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_ksort_h) $(htslib_tbx_h) $(htscodecs_htscodecs_h)
hts_expr.o hts_expr.pico: hts_expr.c config.h $(htslib_hts_expr_h) $(htslib_hts_log_h) $(textutils_internal_h)
hts_os.o hts_os.pico: hts_os.c config.h $(htslib_hts_defs_h) os/rand.c
vcf.o vcf.pico: vcf.c config.h $(fuzz_settings_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h)
vcf.o vcf.pico: vcf.c config.h $(fuzz_settings_h) $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_hfile_h) $(hts_internal_h) $(htslib_khash_str2int_h) $(htslib_kstring_h) $(htslib_sam_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_hts_endian_h) $(bgzf_internal_h)
sam.o sam.pico: sam.c config.h $(fuzz_settings_h) $(htslib_hts_defs_h) $(htslib_sam_h) $(htslib_bgzf_h) $(cram_h) $(hts_internal_h) $(sam_internal_h) $(htslib_hfile_h) $(htslib_hts_endian_h) $(htslib_hts_expr_h) $(header_h) $(htslib_khash_h) $(htslib_kseq_h) $(htslib_kstring_h)
sam_mods.o sam_mods.pico: sam_mods.c config.h $(htslib_sam_h) $(textutils_internal_h)
simd.o simd.pico: simd.c config.h $(htslib_sam_h) $(sam_internal_h)
Expand Down
58 changes: 30 additions & 28 deletions bgzf.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@
#include "htslib/hts_endian.h"
#include "cram/pooled_alloc.h"
#include "hts_internal.h"
#include "bgzf_internal.h"
#include "htslib/khash.h"

#ifndef EFTYPE
#define EFTYPE ENOEXEC
#endif

#define BGZF_CACHE
#define BGZF_MT

#define BLOCK_HEADER_LENGTH 18
Expand All @@ -76,21 +77,15 @@
*/
static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";

#ifdef BGZF_CACHE
typedef struct {
int size;
uint8_t *block;
int64_t end_offset;
} cache_t;

#include "htslib/khash.h"
KHASH_MAP_INIT_INT64(cache, cache_t)
#endif
KHASH_MAP_INIT_INT64(bgzf_cache, cache_t)

struct bgzf_cache_t {
khash_t(cache) *h;
khint_t last_pos;
};
// struct bgzf_cache_t is defined in bgzf_internal.h

#ifdef BGZF_MT

Expand Down Expand Up @@ -409,20 +404,21 @@ static BGZF *bgzf_read_init(hFILE *hfpr, const char *filename)
errno = EFTYPE;
return NULL;
}
#ifdef BGZF_CACHE

if (!(fp->cache = malloc(sizeof(*fp->cache)))) {
free(fp->uncompressed_block);
free(fp);
return NULL;
}
if (!(fp->cache->h = kh_init(cache))) {
if (!(fp->cache->h = kh_init(bgzf_cache))) {
free(fp->uncompressed_block);
free(fp->cache);
free(fp);
return NULL;
}
fp->cache->last_pos = 0;
#endif
fp->cache->private_data = NULL;
fp->cache->private_data_cleanup = (bgzf_private_data_cleanup_func *) NULL;
return fp;
}

Expand All @@ -442,6 +438,15 @@ static BGZF *bgzf_write_init(const char *mode)
fp = (BGZF*)calloc(1, sizeof(BGZF));
if (fp == NULL) goto mem_fail;
fp->is_write = 1;

fp->cache = malloc(sizeof(bgzf_cache_t));
if (!fp->cache)
goto mem_fail;
fp->cache->h = NULL;
fp->cache->last_pos = 0;
fp->cache->private_data = NULL;
fp->cache->private_data_cleanup = (bgzf_private_data_cleanup_func *) NULL;

int compress_level = mode2level(mode);
if ( compress_level==-2 )
{
Expand Down Expand Up @@ -479,6 +484,7 @@ static BGZF *bgzf_write_init(const char *mode)

fail:
if (fp != NULL) {
free(fp->cache);
free(fp->uncompressed_block);
free(fp->gz_stream);
free(fp);
Expand Down Expand Up @@ -896,15 +902,16 @@ static int check_header(const uint8_t *header)
&& unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1;
}

#ifdef BGZF_CACHE
static void free_cache(BGZF *fp)
{
khint_t k;
if (fp->is_write) return;
khash_t(cache) *h = fp->cache->h;
for (k = kh_begin(h); k < kh_end(h); ++k)
if (kh_exist(h, k)) free(kh_val(h, k).block);
kh_destroy(cache, h);
if (fp->cache->h) {
khash_t(bgzf_cache) *h = fp->cache->h;
for (k = kh_begin(h); k < kh_end(h); ++k)
if (kh_exist(h, k)) free(kh_val(h, k).block);
kh_destroy(bgzf_cache, h);
}
bgzf_clear_private_data(fp);
free(fp->cache);
}

Expand All @@ -913,8 +920,8 @@ static int load_block_from_cache(BGZF *fp, int64_t block_address)
khint_t k;
cache_t *p;

khash_t(cache) *h = fp->cache->h;
k = kh_get(cache, h, block_address);
khash_t(bgzf_cache) *h = fp->cache->h;
k = kh_get(bgzf_cache, h, block_address);
if (k == kh_end(h)) return 0;
p = &kh_val(h, k);
if (fp->block_length != 0) fp->block_offset = 0;
Expand All @@ -937,7 +944,7 @@ static void cache_block(BGZF *fp, int size)
uint8_t *block = NULL;
cache_t *p;
//fprintf(stderr, "Cache block at %llx\n", (int)fp->block_address);
khash_t(cache) *h = fp->cache->h;
khash_t(bgzf_cache) *h = fp->cache->h;
if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return;
if (fp->block_length < 0 || fp->block_length > BGZF_MAX_BLOCK_SIZE) return;
if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > (uint32_t)fp->cache_size) {
Expand All @@ -959,13 +966,13 @@ static void cache_block(BGZF *fp, int size)

if (k != k_orig) {
block = kh_val(h, k).block;
kh_del(cache, h, k);
kh_del(bgzf_cache, h, k);
}
} else {
block = (uint8_t*)malloc(BGZF_MAX_BLOCK_SIZE);
}
if (!block) return;
k = kh_put(cache, h, fp->block_address, &ret);
k = kh_put(bgzf_cache, h, fp->block_address, &ret);
if (ret <= 0) { // kh_put failed, or in there already (shouldn't happen)
free(block);
return;
Expand All @@ -976,11 +983,6 @@ static void cache_block(BGZF *fp, int size)
p->block = block;
memcpy(p->block, fp->uncompressed_block, p->size);
}
#else
static void free_cache(BGZF *fp) {}
static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
static void cache_block(BGZF *fp, int size) {}
#endif

/*
* Absolute htell in this compressed file.
Expand Down
70 changes: 70 additions & 0 deletions bgzf_internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/* bgzf_internal.h -- internal bgzf functions; not part of the public API.

Copyright (C) 2025 Genome Research Ltd.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */

#include <assert.h>
#include "htslib/bgzf.h"

/*
* BGZF private data interface
* This exists so that we can pass BCF headers into interfaces that have
* traditionally only taken a BGZF pointer without a corresponding bcf_hdr_t *,
* notably the bcf_readrec() function used by BCF iterators.
*
* To preserve the BGZF API and ABI, this is tagged on to the existing
* opaque bgzf_cache_t structure. bgzf_cache_t is now defined here so we can
* inline lookups.
*/

typedef void bgzf_private_data_cleanup_func(void *private_data);

struct kh_bgzf_cache_s;

struct bgzf_cache_t {
struct kh_bgzf_cache_s *h;
unsigned int last_pos;
void *private_data;
bgzf_private_data_cleanup_func *private_data_cleanup;
};

// Set private data. cleanup will be called on bgzf_close() or
// bgzf_clear_private_data();

static inline void bgzf_set_private_data(BGZF *fp, void *private_data,
bgzf_private_data_cleanup_func *fn) {
assert(fp->cache != NULL);
fp->cache->private_data = private_data;
fp->cache->private_data_cleanup = fn;
}

static inline void bgzf_clear_private_data(BGZF *fp) {
assert(fp->cache != NULL);
if (fp->cache->private_data) {
if (fp->cache->private_data_cleanup)
fp->cache->private_data_cleanup(fp->cache->private_data);
fp->cache->private_data = NULL;
}
}

static inline void * bgzf_get_private_data(BGZF *fp) {
assert(fp->cache != NULL);
return fp->cache->private_data;
}
Loading
Loading