@@ -38,6 +38,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838
3939// Hash table for removing multiple lines from the header
4040KHASH_SET_INIT_STR (rm )
41+ // Used for long refs in SAM files
42+ KHASH_DECLARE (s2i , kh_cstr_t , int64_t )
43+
4144typedef khash_t (rm ) rmhash_t ;
4245
4346static int sam_hdr_link_pg (sam_hdr_t * bh );
@@ -91,7 +94,8 @@ static int sam_hrecs_update_hashes(sam_hrecs_t *hrecs,
9194 sam_hrec_tag_t * tag = h_type -> tag ;
9295 int nref = hrecs -> nref ;
9396 const char * name = NULL ;
94- int len = -1 , r ;
97+ hts_pos_t len = -1 ;
98+ int r ;
9599 khint_t k ;
96100
97101 while (tag ) {
@@ -100,7 +104,7 @@ static int sam_hrecs_update_hashes(sam_hrecs_t *hrecs,
100104 name = tag -> str + 3 ;
101105 } else if (tag -> str [0 ] == 'L' && tag -> str [1 ] == 'N' ) {
102106 assert (tag -> len >= 3 );
103- len = atoi (tag -> str + 3 );
107+ len = strtoll (tag -> str + 3 , NULL , 10 );
104108 }
105109 tag = tag -> next ;
106110 }
@@ -134,7 +138,8 @@ static int sam_hrecs_update_hashes(sam_hrecs_t *hrecs,
134138 // Check lengths match; correct if not.
135139 if (len != hrecs -> ref [nref ].len ) {
136140 char tmp [32 ];
137- snprintf (tmp , sizeof (tmp ), "%u" , hrecs -> ref [nref ].len );
141+ snprintf (tmp , sizeof (tmp ), "%" PRIhts_pos ,
142+ hrecs -> ref [nref ].len );
138143 if (sam_hrecs_update (hrecs , h_type , "LN" , tmp , NULL ) < 0 )
139144 return -1 ;
140145 }
@@ -831,7 +836,11 @@ int sam_hdr_update_target_arrays(sam_hdr_t *bh, const sam_hrecs_t *hrecs,
831836 if (!bh -> target_name [i ])
832837 return -1 ;
833838 }
834- bh -> target_len [i ] = hrecs -> ref [i ].len ;
839+ if (hrecs -> ref [i ].len < UINT32_MAX ) {
840+ bh -> target_len [i ] = hrecs -> ref [i ].len ;
841+ } else {
842+ bh -> target_len [i ] = UINT32_MAX ;
843+ }
835844 }
836845
837846 // Free up any names that have been removed
@@ -901,7 +910,17 @@ static int sam_hrecs_refs_from_targets_array(sam_hrecs_t *hrecs,
901910 int r ;
902911 hrecs -> ref [tid ].name = string_dup (hrecs -> str_pool , bh -> target_name [tid ]);
903912 if (!hrecs -> ref [tid ].name ) goto fail ;
904- hrecs -> ref [tid ].len = bh -> target_len [tid ];
913+ if (bh -> target_len [tid ] < UINT32_MAX || !bh -> sdict ) {
914+ hrecs -> ref [tid ].len = bh -> target_len [tid ];
915+ } else {
916+ khash_t (s2i ) * long_refs = (khash_t (s2i ) * ) bh -> sdict ;
917+ k = kh_get (s2i , long_refs , hrecs -> ref [tid ].name );
918+ if (k < kh_end (long_refs )) {
919+ hrecs -> ref [tid ].len = kh_val (long_refs , k );
920+ } else {
921+ hrecs -> ref [tid ].len = UINT32_MAX ;
922+ }
923+ }
905924 hrecs -> ref [tid ].ty = NULL ;
906925 k = kh_put (m_s2i , hrecs -> ref_hash , hrecs -> ref [tid ].name , & r );
907926 if (r < 0 ) goto fail ;
@@ -948,7 +967,7 @@ static int add_stub_ref_sq_lines(sam_hrecs_t *hrecs) {
948967
949968 for (tid = 0 ; tid < hrecs -> nref ; tid ++ ) {
950969 if (hrecs -> ref [tid ].ty == NULL ) {
951- snprintf (len , sizeof (len ), "%d" , hrecs -> ref [tid ].len );
970+ snprintf (len , sizeof (len ), "%" PRIhts_pos , hrecs -> ref [tid ].len );
952971 if (sam_hrecs_add (hrecs , "SQ" ,
953972 "SN" , hrecs -> ref [tid ].name ,
954973 "LN" , len , NULL ) != 0 )
@@ -1841,7 +1860,7 @@ const char *sam_hdr_tid2name(const sam_hdr_t *h, int tid) {
18411860 return NULL ;
18421861}
18431862
1844- uint32_t sam_hdr_tid2len (const sam_hdr_t * h , int tid ) {
1863+ hts_pos_t sam_hdr_tid2len (const sam_hdr_t * h , int tid ) {
18451864 sam_hrecs_t * hrecs ;
18461865
18471866 if (!h )
@@ -1850,8 +1869,19 @@ uint32_t sam_hdr_tid2len(const sam_hdr_t *h, int tid) {
18501869 if ((hrecs = h -> hrecs ) != NULL && tid < hrecs -> nref ) {
18511870 return hrecs -> ref [tid ].len ;
18521871 } else {
1853- if (tid < h -> n_targets )
1854- return h -> target_len [tid ];
1872+ if (tid < h -> n_targets ) {
1873+ if (h -> target_len [tid ] < UINT32_MAX || !h -> sdict ) {
1874+ return h -> target_len [tid ];
1875+ } else {
1876+ khash_t (s2i ) * long_refs = (khash_t (s2i ) * ) h -> sdict ;
1877+ khint_t k = kh_get (s2i , long_refs , h -> target_name [tid ]);
1878+ if (k < kh_end (long_refs )) {
1879+ return kh_val (long_refs , k );
1880+ } else {
1881+ return UINT32_MAX ;
1882+ }
1883+ }
1884+ }
18551885 }
18561886
18571887 return 0 ;
0 commit comments