@@ -115,17 +115,27 @@ _Py_popcount32(uint32_t x)
115115    const  uint32_t  M2  =  0x33333333 ;
116116    // Binary: 0000 1111 0000 1111 ... 
117117    const  uint32_t  M4  =  0x0F0F0F0F ;
118-     // 256**4 + 256**3 + 256**2 + 256**1 
119-     const  uint32_t  SUM  =  0x01010101 ;
120118
121119    // Put count of each 2 bits into those 2 bits 
122120    x  =  x  -  ((x  >> 1 ) &  M1 );
123121    // Put count of each 4 bits into those 4 bits 
124122    x  =  (x  &  M2 ) +  ((x  >> 2 ) &  M2 );
125123    // Put count of each 8 bits into those 8 bits 
126124    x  =  (x  +  (x  >> 4 )) &  M4 ;
127-     // Sum of the 4 byte counts 
128-     return  (x  *  SUM ) >> 24 ;
125+     // Sum of the 4 byte counts. 
126+     // Take care when considering changes to the next line. Portability and 
127+     // correctness are delicate here, thanks to C's "integer promotions" (C99 
128+     // §6.3.1.1p2). On machines where the `int` type has width greater than 32 
129+     // bits, `x` will be promoted to an `int`, and following C's "usual 
130+     // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be 
131+     // performed as a multiplication of two `unsigned int` operands. In this 
132+     // case it's critical that we cast back to `uint32_t` in order to keep only 
133+     // the least significant 32 bits. On machines where the `int` type has 
134+     // width no greater than 32, the multiplication is of two 32-bit unsigned 
135+     // integer types, and the (uint32_t) cast is a no-op. In both cases, we 
136+     // avoid the risk of undefined behaviour due to overflow of a 
137+     // multiplication of signed integer types. 
138+     return  (uint32_t )(x  *  0x01010101U ) >> 24 ;
129139#endif 
130140}
131141
0 commit comments