@@ -8,6 +8,13 @@ use std::ptr;
88#[ cfg( test) ]  
99mod  tests; 
1010
11+ /// When hashing something that ends up affecting properties like symbol names, 
12+ /// we want these symbol names to be calculated independently of other factors 
13+ /// like what architecture you're compiling *from*. 
14+ /// 
15+ /// To that end, we always convert integers to little-endian format or handle 
16+ /// them in an endian-independent way, and extend the architecture-dependent 
17+ /// `isize` and `usize` types to 64 bits if needed before hashing. 
1118#[ derive( Debug ,  Clone ) ]  
1219pub  struct  SipHasher128  { 
1320    k0 :  u64 , 
@@ -125,15 +132,17 @@ impl SipHasher128 {
125132
126133    // A specialized write function for values with size <= 8. 
127134    // 
128-     // The hashing of multi-byte integers depends on endianness. E.g.: 
129-     // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])` 
130-     // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])` 
135+     // The input must be zero-extended to 64-bits by the caller. The extension 
136+     // isn't hashed, but the implementation requires it for correctness. 
137+     // 
138+     // This function, given the same integer type and value, has the same effect 
139+     // on both little- and big-endian hardware. It operates on values without 
140+     // depending on their sequence in memory, so is independent of endianness. 
131141    // 
132-     // This function does the right thing for little-endian hardware. On 
133-     // big-endian hardware `x` must be byte-swapped first to give the right 
134-     // behaviour. After any byte-swapping, the input must be zero-extended to 
135-     // 64-bits. The caller is responsible for the byte-swapping and 
136-     // zero-extension. 
142+     // The equivalent write() call *does* need the value's bytes converted to 
143+     // little-endian (without zero-extension) for equivalent behavior on little- 
144+     // and big-endian hardware, as write() *does* operate on byte sequences. 
145+     // I.e. write_u32(0xDDCCBBAA) == write(&0xDDCCBBAA_u32.to_le_bytes()). 
137146    #[ inline]  
138147    fn  short_write < T > ( & mut  self ,  _x :  T ,  x :  u64 )  { 
139148        let  size = mem:: size_of :: < T > ( ) ; 
@@ -167,12 +176,9 @@ impl SipHasher128 {
167176        //   left-shift it five bytes, giving 0xHHGG_FF00_0000_0000. We then 
168177        //   bitwise-OR that value into `self.tail`, resulting in 
169178        //   0xHHGG_FFEE_DDCC_BBAA. `self.tail` is now full, and we can use it 
170-         //   to update `self.state`. (As mentioned above, this assumes a 
171-         //   little-endian machine; on a big-endian machine we would have 
172-         //   byte-swapped 0xIIHH_GGFF in the caller, giving 0xFFGG_HHII, and we 
173-         //   would then end up bitwise-ORing 0xGGHH_II00_0000_0000 into 
174-         //   `self.tail`). 
175-         // 
179+         //   to update `self.state`. The analysis is the same whether we are on 
180+         //   a little-endian or big-endian machine, as the bitwise operations 
181+         //   are endian-independent. 
176182        self . tail  |= x << ( 8  *  self . ntail ) ; 
177183        if  size < needed { 
178184            self . ntail  += size; 
@@ -186,8 +192,7 @@ impl SipHasher128 {
186192
187193        // Continuing scenario 2: we have one byte left over from the input. We 
188194        // set `self.ntail` to 1 and `self.tail` to `0x0000_0000_IIHH_GGFF >> 
189-         // 8*3`, which is 0x0000_0000_0000_00II. (Or on a big-endian machine 
190-         // the prior byte-swapping would leave us with 0x0000_0000_0000_00FF.) 
195+         // 8*3`, which is 0x0000_0000_0000_00II. 
191196        // 
192197        // The `if` is needed to avoid shifting by 64 bits, which Rust 
193198        // complains about. 
@@ -222,22 +227,30 @@ impl Hasher for SipHasher128 {
222227
223228    #[ inline]  
224229    fn  write_u16 ( & mut  self ,  i :  u16 )  { 
225-         self . short_write ( i,  i. to_le ( )  as  u64 ) ; 
230+         self . short_write ( i,  i as  u64 ) ; 
226231    } 
227232
228233    #[ inline]  
229234    fn  write_u32 ( & mut  self ,  i :  u32 )  { 
230-         self . short_write ( i,  i. to_le ( )  as  u64 ) ; 
235+         self . short_write ( i,  i as  u64 ) ; 
231236    } 
232237
233238    #[ inline]  
234239    fn  write_u64 ( & mut  self ,  i :  u64 )  { 
235-         self . short_write ( i,  i. to_le ( )  as  u64 ) ; 
240+         self . short_write ( i,  i as  u64 ) ; 
241+     } 
242+ 
243+     #[ inline]  
244+     fn  write_u128 ( & mut  self ,  i :  u128 )  { 
245+         self . write ( & i. to_le_bytes ( ) ) ; 
236246    } 
237247
238248    #[ inline]  
239249    fn  write_usize ( & mut  self ,  i :  usize )  { 
240-         self . short_write ( i,  i. to_le ( )  as  u64 ) ; 
250+         // Always treat usize as u64 so we get the same results on 32 and 64 bit 
251+         // platforms. This is important for symbol hashes when cross compiling, 
252+         // for example. 
253+         self . write_u64 ( i as  u64 ) ; 
241254    } 
242255
243256    #[ inline]  
@@ -247,22 +260,31 @@ impl Hasher for SipHasher128 {
247260
248261    #[ inline]  
249262    fn  write_i16 ( & mut  self ,  i :  i16 )  { 
250-         self . short_write ( i,  ( i as  u16 ) . to_le ( )  as  u64 ) ; 
263+         self . short_write ( i,  i as  u16  as  u64 ) ; 
251264    } 
252265
253266    #[ inline]  
254267    fn  write_i32 ( & mut  self ,  i :  i32 )  { 
255-         self . short_write ( i,  ( i as  u32 ) . to_le ( )  as  u64 ) ; 
268+         self . short_write ( i,  i as  u32  as  u64 ) ; 
256269    } 
257270
258271    #[ inline]  
259272    fn  write_i64 ( & mut  self ,  i :  i64 )  { 
260-         self . short_write ( i,  ( i as  u64 ) . to_le ( )  as  u64 ) ; 
273+         self . short_write ( i,  i as  u64 ) ; 
274+     } 
275+ 
276+     #[ inline]  
277+     fn  write_i128 ( & mut  self ,  i :  i128 )  { 
278+         self . write ( & i. to_le_bytes ( ) ) ; 
261279    } 
262280
263281    #[ inline]  
264282    fn  write_isize ( & mut  self ,  i :  isize )  { 
265-         self . short_write ( i,  ( i as  usize ) . to_le ( )  as  u64 ) ; 
283+         // Always treat isize as i64 so we get the same results on 32 and 64 bit 
284+         // platforms. This is important for symbol hashes when cross compiling, 
285+         // for example. Sign extending here is preferable as it means that the 
286+         // same negative number hashes the same on both 32 and 64 bit platforms. 
287+         self . write_i64 ( i as  i64 ) ; 
266288    } 
267289
268290    #[ inline]  
0 commit comments