Skip to content

Commit ad77218

Browse files
committed
review update
1 parent aebd6ee commit ad77218

File tree

3 files changed

+38
-171
lines changed

3 files changed

+38
-171
lines changed

htslib/vcf.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1518,21 +1518,6 @@ HTSLIB_EXPORT
15181518
int bcf_format_gt_v2(const bcf_hdr_t *hdr, bcf_fmt_t *fmt, int isample,
15191519
kstring_t *str) HTS_RESULT_USED;
15201520

1521-
1522-
/**
1523-
* update44phasing - converts GT to/from v4.4 way representation
1524-
* @param h - bcf header, to get version
1525-
* @param v - pointer to bcf data
1526-
* @param setreset - whether to set or reset
1527-
* Returns 0 on success and -1 on failure
1528-
* For data read, to be converted to v44, setreset to be 1. For data write, to
1529-
* be converted to v < v44, setreset to be 0.
1530-
* If the version in header is >= 4.4, no change is made. Otherwise 1st phasing
1531-
* is set if there are no other unphased ones.
1532-
*/
1533-
HTSLIB_EXPORT int update44phasing(bcf_hdr_t *h, bcf1_t *b, int setreset)
1534-
HTS_RESULT_USED;
1535-
15361521
static inline int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
15371522
{
15381523
return bcf_format_gt_v2(NULL, fmt, isample, str);

synced_bcf_reader.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -708,11 +708,6 @@ static int _reader_fill_buffer(bcf_srs_t *files, bcf_sr_t *reader)
708708
ret = bcf_itr_next(reader->file, reader->itr, reader->buffer[reader->nbuffer+1]);
709709
if ( ret < -1 ) files->errnum = bcf_read_error;
710710
if ( ret < 0 ) break; // no more lines or an error
711-
//set phasing of 1st value as in vcf v44
712-
if ((ret = update44phasing(reader->header, reader->buffer[reader->nbuffer+1], 1))) {
713-
files->errnum = bcf_read_error;
714-
break;
715-
}
716711
bcf_subset_format(reader->header,reader->buffer[reader->nbuffer+1]);
717712
}
718713

vcf.c

Lines changed: 38 additions & 151 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,135 +1814,6 @@ static void bcf_record_check_err(const bcf_hdr_t *hdr, bcf1_t *rec,
18141814
(*reports)++;
18151815
}
18161816

1817-
/**
1818-
* updatephasing - updates 1st phasing based on other phasing status
1819-
* @param p - pointer to phase value array
1820-
* @param end - end of array
1821-
* @param q - pointer to consumed data
1822-
* @param set - whether to set (while read) or reset (for write)
1823-
* @param samples - no. of samples in array
1824-
* @param ploidy - no. of phasing values per sample
1825-
* @param type - value type (one of BCF_BT_...)
1826-
* Returns 0 on success and 1 on failure
1827-
*/
1828-
static int updatephasing(uint8_t *p, uint8_t *end, uint8_t **q, int set, int samples, int ploidy, int type)
1829-
{
1830-
int j, k, upd = set ? 1 : -1;
1831-
size_t bytes;
1832-
for (j = 0; j < samples; ++j) { //for each sample
1833-
int anyunphased = 0;
1834-
uint8_t *ptr1 = p;
1835-
int32_t al1 = 0, val;
1836-
for (k = 0; k < ploidy; ++k) { //in each ploidy
1837-
switch(type) {
1838-
case BCF_BT_INT8:
1839-
val = *(uint8_t*)p;
1840-
break;
1841-
case BCF_BT_INT16:
1842-
val = *(uint16_t*)p;
1843-
break;
1844-
case BCF_BT_INT32:
1845-
val = *(uint32_t*)p;
1846-
break;
1847-
//wont have anything bigger than 32bit for GT
1848-
default: //invalid
1849-
return 1;
1850-
break;
1851-
}
1852-
if (!k) al1 = val;
1853-
else if (!(val & 1)) {
1854-
anyunphased = 1;
1855-
}
1856-
//get to next phasing or skip the rest for this sample
1857-
bytes = (anyunphased ? ploidy - k : 1) << bcf_type_shift[type];
1858-
if (end - p < bytes)
1859-
return 1;
1860-
p += bytes;
1861-
if (anyunphased) {
1862-
break; //no further check required
1863-
}
1864-
}
1865-
if (!anyunphased && al1 > 1) { //no other unphased
1866-
/*set phased on read or make unphased on write as upto 4.3 1st
1867-
phasing is not described explicitly and has to be inferred*/
1868-
switch(type) {
1869-
case BCF_BT_INT8:
1870-
*(uint8_t*)ptr1 += upd;
1871-
break;
1872-
case BCF_BT_INT16:
1873-
*(uint16_t*)ptr1 += upd;
1874-
break;
1875-
case BCF_BT_INT32:
1876-
*(uint32_t*)ptr1 += upd;
1877-
break;
1878-
}
1879-
}
1880-
}
1881-
*q = p;
1882-
return 0;
1883-
}
1884-
1885-
/**
1886-
* update44phasing - converts GT to/from v4.4 way representation
1887-
* @param h - bcf header, to get version
1888-
* @param v - pointer to bcf data
1889-
* @param setreset - whether to set or reset
1890-
* Returns 0 on success and -1 on failure
1891-
* For data read, to be converted to v44, setreset to be 1. For data write, to
1892-
* be converted to v < v44, setreset to be 0.
1893-
* If the version in header is >= 4.4, no change is made. Otherwise 1st phasing
1894-
* is set if there are no other unphased ones.
1895-
*/
1896-
HTSLIB_EXPORT int update44phasing(bcf_hdr_t *h, bcf1_t *b, int setreset)
1897-
{
1898-
int i, idgt = -1, ver = VCF_DEF, num, type;
1899-
uint8_t *ptr = NULL, *end = NULL;
1900-
if (!b) return 1;
1901-
1902-
ver = bcf_get_version(h, "");
1903-
idgt = h ? bcf_hdr_id2int(h, BCF_DT_ID, "GT") : -1;
1904-
if (ver >= VCF44 || idgt == -1) return 0; //no change required
1905-
1906-
if (b->unpacked & BCF_UN_FMT) { //unpacked, get from decoded data
1907-
for (i=0; i<b->n_fmt; i++)
1908-
{
1909-
if ( b->d.fmt[i].id == idgt ) {
1910-
ptr = b->d.fmt[i].p;
1911-
end = ptr + b->d.fmt[i].p_len;
1912-
num = b->d.fmt[i].n;
1913-
type = b->d.fmt[i].type;
1914-
break;
1915-
}
1916-
}
1917-
} else { //get from indiv.s binary stream
1918-
ptr = (uint8_t *) b->indiv.s;
1919-
end = ptr + b->indiv.l;
1920-
int found = 0;
1921-
for (i = 0; i < b->n_fmt; ++i) {
1922-
int32_t key = -1;
1923-
if (bcf_dec_typed_int1_safe(ptr, end, &ptr, &key) != 0) return 1;
1924-
if (bcf_dec_size_safe(ptr, end, &ptr, &num, &type) != 0) return 1;
1925-
if (type > BCF_BT_CHAR) return 1; //invalid type
1926-
if (idgt == key) {
1927-
found = 1;
1928-
break;
1929-
} else { //skip and check next
1930-
size_t bytes = ((size_t) num << bcf_type_shift[type]) * b->n_sample;
1931-
if (end - ptr < bytes) return 1;
1932-
ptr += bytes;
1933-
}
1934-
}
1935-
if (!found) {
1936-
ptr = end = NULL;
1937-
}
1938-
}
1939-
if (ptr) {
1940-
//with GT and v < v44, need phase conversion
1941-
if (updatephasing(ptr, end, &ptr, setreset, b->n_sample, num, type)) return 1;
1942-
}
1943-
return 0;
1944-
}
1945-
19461817
static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
19471818
uint8_t *ptr, *end;
19481819
size_t bytes;
@@ -1961,10 +1832,6 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
19611832
(1 << BCF_BT_FLOAT) |
19621833
(1 << BCF_BT_CHAR));
19631834
int32_t max_id = hdr ? hdr->n[BCF_DT_ID] : 0;
1964-
int idgt = -1, ver = VCF_DEF;
1965-
1966-
ver = bcf_get_version(hdr, NULL);
1967-
idgt = hdr ? bcf_hdr_id2int(hdr, BCF_DT_ID, "GT") : -1;
19681835

19691836
// Check for valid contig ID
19701837
if (rec->rid < 0
@@ -2074,16 +1941,9 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
20741941
bcf_record_check_err(hdr, rec, "type", &reports, type);
20751942
err |= BCF_ERR_TAG_INVALID;
20761943
}
2077-
if (ver < VCF44 && idgt != -1 && idgt == key) {
2078-
//with GT and v < v44, need phase conversion
2079-
if (updatephasing(ptr, end, &ptr, 1, rec->n_sample, num, type)) {
2080-
err |= BCF_ERR_TAG_INVALID;
2081-
}
2082-
} else {
2083-
bytes = ((size_t) num << bcf_type_shift[type]) * rec->n_sample;
2084-
if (end - ptr < bytes) goto bad_indiv;
2085-
ptr += bytes;
2086-
}
1944+
bytes = ((size_t) num << bcf_type_shift[type]) * rec->n_sample;
1945+
if (end - ptr < bytes) goto bad_indiv;
1946+
ptr += bytes;
20871947
}
20881948

20891949
if (!err && rec->rlen < 0) {
@@ -2172,8 +2032,6 @@ int bcf_readrec(BGZF *fp, void *null, void *vv, int *tid, hts_pos_t *beg, hts_po
21722032
if (ret == 0) ret = bcf_record_check(NULL, v);
21732033
if (ret >= 0)
21742034
*tid = v->rid, *beg = v->pos, *end = v->pos + v->rlen;
2175-
/*bcf data read may need conversion to vcf44 phasing format, as header is
2176-
not availble here, it has to be done after this returns the data*/
21772035
return ret;
21782036
}
21792037

@@ -2442,11 +2300,6 @@ int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v)
24422300
return -1;
24432301
}
24442302

2445-
if (update44phasing(h, v, 0)) { //reset phasing update made after read
2446-
hts_log_error("Failed to set prorper phasing at %s:%"PRIhts_pos"", bcf_seqname_safe(h,v), v->pos+1);
2447-
return -1;
2448-
}
2449-
24502303
BGZF *fp = hfp->fp.bgzf;
24512304
uint8_t x[32];
24522305
u32_to_le(v->shared.l + 24, x); // to include six 32-bit integers
@@ -6086,12 +5939,13 @@ int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, c
60865939

60875940
int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type)
60885941
{
6089-
int i,j, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag);
5942+
int i,j, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag), gt = 0;
60905943
if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header
60915944
if ( tag[0]=='G' && tag[1]=='T' && tag[2]==0 )
60925945
{
60935946
// Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT.
60945947
if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2;
5948+
gt = 1;
60955949
}
60965950
else if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=type ) return -2; // expected different type
60975951

@@ -6151,6 +6005,39 @@ int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, v
61516005
default: hts_log_error("Unexpected type %d at %s:%"PRIhts_pos, fmt->type, bcf_seqname_safe(hdr,line), line->pos+1); exit(1);
61526006
}
61536007
#undef BRANCH
6008+
6009+
if (gt && bcf_get_version(hdr, NULL) < VCF44) {
6010+
//convert to v44 phasing if required
6011+
int32_t *v = *dst, *ptr1, val1, anyunphased = 0, end = 0;
6012+
for (i=0; i<nsmpl; i++) {
6013+
anyunphased = 0; val1 = 0, end = 0;
6014+
for (j=0; j<fmt->n; j++) {
6015+
if (*v != bcf_int32_vector_end) {
6016+
if (!j) {
6017+
val1 = *v;
6018+
ptr1 = v;
6019+
} else {
6020+
if (!(*v & 1)) { //unphased or unkonwn
6021+
anyunphased = 1;
6022+
}
6023+
}
6024+
} else {
6025+
end = 1;
6026+
}
6027+
6028+
if (val1 & 1 || anyunphased || end) {
6029+
//phased || an unphased found || end of data, skip sample
6030+
v += (fmt->n - j);
6031+
break;
6032+
}
6033+
++v;
6034+
}
6035+
if (val1 && !(val1 & 1) && !anyunphased) {
6036+
//valid unphased one w/o any other unphased, make phased
6037+
*ptr1 |= 1;
6038+
}
6039+
}
6040+
}
61546041
return nsmpl*fmt->n;
61556042
}
61566043

0 commit comments

Comments
 (0)