@@ -1814,135 +1814,6 @@ static void bcf_record_check_err(const bcf_hdr_t *hdr, bcf1_t *rec,
1814
1814
(* reports )++ ;
1815
1815
}
1816
1816
1817
- /**
1818
- * updatephasing - updates 1st phasing based on other phasing status
1819
- * @param p - pointer to phase value array
1820
- * @param end - end of array
1821
- * @param q - pointer to consumed data
1822
- * @param set - whether to set (while read) or reset (for write)
1823
- * @param samples - no. of samples in array
1824
- * @param ploidy - no. of phasing values per sample
1825
- * @param type - value type (one of BCF_BT_...)
1826
- * Returns 0 on success and 1 on failure
1827
- */
1828
- static int updatephasing (uint8_t * p , uint8_t * end , uint8_t * * q , int set , int samples , int ploidy , int type )
1829
- {
1830
- int j , k , upd = set ? 1 : -1 ;
1831
- size_t bytes ;
1832
- for (j = 0 ; j < samples ; ++ j ) { //for each sample
1833
- int anyunphased = 0 ;
1834
- uint8_t * ptr1 = p ;
1835
- int32_t al1 = 0 , val ;
1836
- for (k = 0 ; k < ploidy ; ++ k ) { //in each ploidy
1837
- switch (type ) {
1838
- case BCF_BT_INT8 :
1839
- val = * (uint8_t * )p ;
1840
- break ;
1841
- case BCF_BT_INT16 :
1842
- val = * (uint16_t * )p ;
1843
- break ;
1844
- case BCF_BT_INT32 :
1845
- val = * (uint32_t * )p ;
1846
- break ;
1847
- //wont have anything bigger than 32bit for GT
1848
- default : //invalid
1849
- return 1 ;
1850
- break ;
1851
- }
1852
- if (!k ) al1 = val ;
1853
- else if (!(val & 1 )) {
1854
- anyunphased = 1 ;
1855
- }
1856
- //get to next phasing or skip the rest for this sample
1857
- bytes = (anyunphased ? ploidy - k : 1 ) << bcf_type_shift [type ];
1858
- if (end - p < bytes )
1859
- return 1 ;
1860
- p += bytes ;
1861
- if (anyunphased ) {
1862
- break ; //no further check required
1863
- }
1864
- }
1865
- if (!anyunphased && al1 > 1 ) { //no other unphased
1866
- /*set phased on read or make unphased on write as upto 4.3 1st
1867
- phasing is not described explicitly and has to be inferred*/
1868
- switch (type ) {
1869
- case BCF_BT_INT8 :
1870
- * (uint8_t * )ptr1 += upd ;
1871
- break ;
1872
- case BCF_BT_INT16 :
1873
- * (uint16_t * )ptr1 += upd ;
1874
- break ;
1875
- case BCF_BT_INT32 :
1876
- * (uint32_t * )ptr1 += upd ;
1877
- break ;
1878
- }
1879
- }
1880
- }
1881
- * q = p ;
1882
- return 0 ;
1883
- }
1884
-
1885
- /**
1886
- * update44phasing - converts GT to/from v4.4 way representation
1887
- * @param h - bcf header, to get version
1888
- * @param v - pointer to bcf data
1889
- * @param setreset - whether to set or reset
1890
- * Returns 0 on success and -1 on failure
1891
- * For data read, to be converted to v44, setreset to be 1. For data write, to
1892
- * be converted to v < v44, setreset to be 0.
1893
- * If the version in header is >= 4.4, no change is made. Otherwise 1st phasing
1894
- * is set if there are no other unphased ones.
1895
- */
1896
- HTSLIB_EXPORT int update44phasing (bcf_hdr_t * h , bcf1_t * b , int setreset )
1897
- {
1898
- int i , idgt = -1 , ver = VCF_DEF , num , type ;
1899
- uint8_t * ptr = NULL , * end = NULL ;
1900
- if (!b ) return 1 ;
1901
-
1902
- ver = bcf_get_version (h , "" );
1903
- idgt = h ? bcf_hdr_id2int (h , BCF_DT_ID , "GT" ) : -1 ;
1904
- if (ver >= VCF44 || idgt == -1 ) return 0 ; //no change required
1905
-
1906
- if (b -> unpacked & BCF_UN_FMT ) { //unpacked, get from decoded data
1907
- for (i = 0 ; i < b -> n_fmt ; i ++ )
1908
- {
1909
- if ( b -> d .fmt [i ].id == idgt ) {
1910
- ptr = b -> d .fmt [i ].p ;
1911
- end = ptr + b -> d .fmt [i ].p_len ;
1912
- num = b -> d .fmt [i ].n ;
1913
- type = b -> d .fmt [i ].type ;
1914
- break ;
1915
- }
1916
- }
1917
- } else { //get from indiv.s binary stream
1918
- ptr = (uint8_t * ) b -> indiv .s ;
1919
- end = ptr + b -> indiv .l ;
1920
- int found = 0 ;
1921
- for (i = 0 ; i < b -> n_fmt ; ++ i ) {
1922
- int32_t key = -1 ;
1923
- if (bcf_dec_typed_int1_safe (ptr , end , & ptr , & key ) != 0 ) return 1 ;
1924
- if (bcf_dec_size_safe (ptr , end , & ptr , & num , & type ) != 0 ) return 1 ;
1925
- if (type > BCF_BT_CHAR ) return 1 ; //invalid type
1926
- if (idgt == key ) {
1927
- found = 1 ;
1928
- break ;
1929
- } else { //skip and check next
1930
- size_t bytes = ((size_t ) num << bcf_type_shift [type ]) * b -> n_sample ;
1931
- if (end - ptr < bytes ) return 1 ;
1932
- ptr += bytes ;
1933
- }
1934
- }
1935
- if (!found ) {
1936
- ptr = end = NULL ;
1937
- }
1938
- }
1939
- if (ptr ) {
1940
- //with GT and v < v44, need phase conversion
1941
- if (updatephasing (ptr , end , & ptr , setreset , b -> n_sample , num , type )) return 1 ;
1942
- }
1943
- return 0 ;
1944
- }
1945
-
1946
1817
static int bcf_record_check (const bcf_hdr_t * hdr , bcf1_t * rec ) {
1947
1818
uint8_t * ptr , * end ;
1948
1819
size_t bytes ;
@@ -1961,10 +1832,6 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
1961
1832
(1 << BCF_BT_FLOAT ) |
1962
1833
(1 << BCF_BT_CHAR ));
1963
1834
int32_t max_id = hdr ? hdr -> n [BCF_DT_ID ] : 0 ;
1964
- int idgt = -1 , ver = VCF_DEF ;
1965
-
1966
- ver = bcf_get_version (hdr , NULL );
1967
- idgt = hdr ? bcf_hdr_id2int (hdr , BCF_DT_ID , "GT" ) : -1 ;
1968
1835
1969
1836
// Check for valid contig ID
1970
1837
if (rec -> rid < 0
@@ -2074,16 +1941,9 @@ static int bcf_record_check(const bcf_hdr_t *hdr, bcf1_t *rec) {
2074
1941
bcf_record_check_err (hdr , rec , "type" , & reports , type );
2075
1942
err |= BCF_ERR_TAG_INVALID ;
2076
1943
}
2077
- if (ver < VCF44 && idgt != -1 && idgt == key ) {
2078
- //with GT and v < v44, need phase conversion
2079
- if (updatephasing (ptr , end , & ptr , 1 , rec -> n_sample , num , type )) {
2080
- err |= BCF_ERR_TAG_INVALID ;
2081
- }
2082
- } else {
2083
- bytes = ((size_t ) num << bcf_type_shift [type ]) * rec -> n_sample ;
2084
- if (end - ptr < bytes ) goto bad_indiv ;
2085
- ptr += bytes ;
2086
- }
1944
+ bytes = ((size_t ) num << bcf_type_shift [type ]) * rec -> n_sample ;
1945
+ if (end - ptr < bytes ) goto bad_indiv ;
1946
+ ptr += bytes ;
2087
1947
}
2088
1948
2089
1949
if (!err && rec -> rlen < 0 ) {
@@ -2172,8 +2032,6 @@ int bcf_readrec(BGZF *fp, void *null, void *vv, int *tid, hts_pos_t *beg, hts_po
2172
2032
if (ret == 0 ) ret = bcf_record_check (NULL , v );
2173
2033
if (ret >= 0 )
2174
2034
* tid = v -> rid , * beg = v -> pos , * end = v -> pos + v -> rlen ;
2175
- /*bcf data read may need conversion to vcf44 phasing format, as header is
2176
- not availble here, it has to be done after this returns the data*/
2177
2035
return ret ;
2178
2036
}
2179
2037
@@ -2442,11 +2300,6 @@ int bcf_write(htsFile *hfp, bcf_hdr_t *h, bcf1_t *v)
2442
2300
return -1 ;
2443
2301
}
2444
2302
2445
- if (update44phasing (h , v , 0 )) { //reset phasing update made after read
2446
- hts_log_error ("Failed to set prorper phasing at %s:%" PRIhts_pos "" , bcf_seqname_safe (h ,v ), v -> pos + 1 );
2447
- return -1 ;
2448
- }
2449
-
2450
2303
BGZF * fp = hfp -> fp .bgzf ;
2451
2304
uint8_t x [32 ];
2452
2305
u32_to_le (v -> shared .l + 24 , x ); // to include six 32-bit integers
@@ -6086,12 +5939,13 @@ int bcf_get_format_string(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, c
6086
5939
6087
5940
int bcf_get_format_values (const bcf_hdr_t * hdr , bcf1_t * line , const char * tag , void * * dst , int * ndst , int type )
6088
5941
{
6089
- int i ,j , tag_id = bcf_hdr_id2int (hdr , BCF_DT_ID , tag );
5942
+ int i ,j , tag_id = bcf_hdr_id2int (hdr , BCF_DT_ID , tag ), gt = 0 ;
6090
5943
if ( !bcf_hdr_idinfo_exists (hdr ,BCF_HL_FMT ,tag_id ) ) return -1 ; // no such FORMAT field in the header
6091
5944
if ( tag [0 ]== 'G' && tag [1 ]== 'T' && tag [2 ]== 0 )
6092
5945
{
6093
5946
// Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT.
6094
5947
if ( bcf_hdr_id2type (hdr ,BCF_HL_FMT ,tag_id )!= BCF_HT_STR ) return -2 ;
5948
+ gt = 1 ;
6095
5949
}
6096
5950
else if ( bcf_hdr_id2type (hdr ,BCF_HL_FMT ,tag_id )!= type ) return -2 ; // expected different type
6097
5951
@@ -6151,6 +6005,39 @@ int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, v
6151
6005
default : hts_log_error ("Unexpected type %d at %s:%" PRIhts_pos , fmt -> type , bcf_seqname_safe (hdr ,line ), line -> pos + 1 ); exit (1 );
6152
6006
}
6153
6007
#undef BRANCH
6008
+
6009
+ if (gt && bcf_get_version (hdr , NULL ) < VCF44 ) {
6010
+ //convert to v44 phasing if required
6011
+ int32_t * v = * dst , * ptr1 , val1 , anyunphased = 0 , end = 0 ;
6012
+ for (i = 0 ; i < nsmpl ; i ++ ) {
6013
+ anyunphased = 0 ; val1 = 0 , end = 0 ;
6014
+ for (j = 0 ; j < fmt -> n ; j ++ ) {
6015
+ if (* v != bcf_int32_vector_end ) {
6016
+ if (!j ) {
6017
+ val1 = * v ;
6018
+ ptr1 = v ;
6019
+ } else {
6020
+ if (!(* v & 1 )) { //unphased or unkonwn
6021
+ anyunphased = 1 ;
6022
+ }
6023
+ }
6024
+ } else {
6025
+ end = 1 ;
6026
+ }
6027
+
6028
+ if (val1 & 1 || anyunphased || end ) {
6029
+ //phased || an unphased found || end of data, skip sample
6030
+ v += (fmt -> n - j );
6031
+ break ;
6032
+ }
6033
+ ++ v ;
6034
+ }
6035
+ if (val1 && !(val1 & 1 ) && !anyunphased ) {
6036
+ //valid unphased one w/o any other unphased, make phased
6037
+ * ptr1 |= 1 ;
6038
+ }
6039
+ }
6040
+ }
6154
6041
return nsmpl * fmt -> n ;
6155
6042
}
6156
6043
0 commit comments