Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ Noteworthy changes in release a.b
sam_hdr_str() should be used instead. The old cigar_tab field is
now marked as deprecated; use the new bam_cigar_table[] instead.

- The bam1_core_t structure's l_qname and l_extranul fields have been
rearranged and enlarged; l_qname still includes the extra NULs.
(Almost all code should use bam_get_qname(), bam_get_cigar(), etc,
and has no need to use these fields directly.) HTSlib now supports
the SAM specification's full 254 QNAME length again. (#520)

- bcf_index_load() no longer tries the '.tbi' suffix when looking for
BCF index files (.tbi indexes are for text files, not binary BCF).

Expand Down
2 changes: 0 additions & 2 deletions cram/cram_samtools.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ int bam_construct_seq(bam_seq_t **bp, size_t extra_len,
//b->l_aux = extra_len; // we fill this out later

qname_nuls = 4 - qname_len%4;
if (qname_len + qname_nuls > 255) // Check for core.l_qname overflow
return -1;
bam_len = qname_len + qname_nuls + ncigar*4 + (len+1)/2 + len + extra_len;
if (b->m_data < bam_len) {
b->m_data = bam_len;
Expand Down
5 changes: 2 additions & 3 deletions htslib/sam.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,9 @@ typedef struct {
int32_t pos;
uint16_t bin;
uint8_t qual;
uint8_t l_qname;
uint16_t flag;
uint8_t unused1;
uint8_t l_extranul;
uint16_t flag;
uint16_t l_qname;
uint32_t n_cigar;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This diff is pretty unhelpfully displayed. Equivalently and more clearly:

     int32_t pos;
     uint16_t bin;
     uint8_t qual;
-    uint8_t l_qname;
+    uint8_t l_extranul;
     uint16_t flag;
-    uint8_t unused1;
-    uint8_t l_extranul;
+    uint16_t l_qname;
     uint32_t n_cigar;
     int32_t l_qseq;
     int32_t mtid;

int32_t l_qseq;
int32_t mtid;
Expand Down
9 changes: 6 additions & 3 deletions sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,8 +545,6 @@ int bam_read1(BGZF *fp, bam1_t *b)
c->tid = x[0]; c->pos = x[1];
c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff;
c->l_extranul = (c->l_qname%4 != 0)? (4 - c->l_qname%4) : 0;
if ((uint32_t) c->l_qname + c->l_extranul > 255) // l_qname would overflow
return -4;
c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff;
c->l_qseq = x[4];
c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7];
Expand Down Expand Up @@ -590,6 +588,11 @@ int bam_write1(BGZF *fp, const bam1_t *b)
const bam1_core_t *c = &b->core;
uint32_t x[8], block_len = b->l_data - c->l_extranul + 32, y;
int i, ok;
if (c->l_qname - c->l_extranul > 255) {
hts_log_error("QNAME \"%s\" is longer than 254 characters", bam_get_qname(b));
errno = EOVERFLOW;
return -1;
}
if (c->n_cigar > 0xffff) block_len += 16; // "16" for "CGBI", 4-byte tag length and 8-byte fake CIGAR
x[0] = c->tid;
x[1] = c->pos;
Expand Down Expand Up @@ -1662,7 +1665,7 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b)
q = _read_token(p);

_parse_warn(p - q <= 1, "empty query name");
_parse_err(p - q > 252, "query name too long");
_parse_err(p - q > 255, "query name too long");
// resize large enough for name + extranul
if ((p-q)+4 > SIZE_MAX - s->l || ks_resize(&str, str.l+(p-q)+4) < 0) goto err_ret;
memcpy(str.s+str.l, q, p-q); str.l += p-q;
Expand Down
11 changes: 10 additions & 1 deletion test/sam.c
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,9 @@ static void test_header_updates(void) {
sam_hdr_destroy(header);
}

#define ABC50 "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxy"
#define ABC250 ABC50 ABC50 ABC50 ABC50 ABC50

static void samrecord_layout(void)
{
static const char qnames[] = "data:,"
Expand All @@ -936,7 +939,13 @@ static void samrecord_layout(void)
"bc\t0\tCHROMOSOME_II\t200\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
"def\t0\tCHROMOSOME_II\t300\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
"ghij\t0\tCHROMOSOME_II\t400\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
"klmno\t0\tCHROMOSOME_II\t500\t10\t4M\t*\t0\t0\tATGC\tqqqq\n";
"klmno\t0\tCHROMOSOME_II\t500\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
ABC250 "\t0\tCHROMOSOME_II\t600\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
ABC250 "1\t0\tCHROMOSOME_II\t650\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
ABC250 "12\t0\tCHROMOSOME_II\t700\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
ABC250 "123\t0\tCHROMOSOME_II\t750\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
ABC250 "1234\t0\tCHROMOSOME_II\t800\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
;

size_t bam1_t_size, bam1_t_size2;

Expand Down