@@ -580,6 +580,39 @@ pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 {
580580 constify_imm4 ! ( rounding, call)
581581}
582582
583+ /// Find minimal u16 element in vector.
584+ /// Place it in the first element of resulting vector and it's index
585+ /// in second element (formally bits [16..18] inclusive).
586+ /// All other elements are set to zero.
587+ #[ inline( always) ]
588+ #[ target_feature = "+sse4.1" ]
589+ #[ cfg_attr( test, assert_instr( phminposuw) ) ]
590+ pub unsafe fn _mm_minpos_epu16 ( a : u16x8 ) -> u16x8 {
591+ phminposuw ( a)
592+ }
593+
594+ /// Multiply the low 32-bit integers from each packed 64-bit element
595+ /// in a and b, and store the signed 64-bit results in dst.
596+ #[ inline( always) ]
597+ #[ target_feature = "+sse4.1" ]
598+ #[ cfg_attr( test, assert_instr( pmuldq) ) ]
599+ pub unsafe fn _mm_mul_epi32 ( a : i32x4 , b : i32x4 ) -> i64x2 {
600+ pmuldq ( a, b)
601+ }
602+
603+ /// Multiply the packed 32-bit integers in a and b, producing intermediate
604+ /// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
605+ /// reinterpreted as a signed integer.
606+ /// While pmulld i32x4::splat(2), i32x4::splat(2) returns the obvious
607+ /// i32x4::splat(4), pmulld i32x4::splat(i32::MAX), i32x4::splat(2)
608+ /// would return a negative number.
609+ #[ inline( always) ]
610+ #[ target_feature = "+sse4.1" ]
611+ #[ cfg_attr( test, assert_instr( pmulld) ) ]
612+ pub unsafe fn _mm_mullo_epi32 ( a : i32x4 , b : i32x4 ) -> i32x4 {
613+ a * b
614+ }
615+
583616
584617#[ allow( improper_ctypes) ]
585618extern "C" {
@@ -627,6 +660,10 @@ extern "C" {
627660 fn roundsd ( a : f64x2 , b : f64x2 , rounding : i32 ) -> f64x2 ;
628661 #[ link_name = "llvm.x86.sse41.round.ss" ]
629662 fn roundss ( a : f32x4 , b : f32x4 , rounding : i32 ) -> f32x4 ;
663+ #[ link_name = "llvm.x86.sse41.phminposuw" ]
664+ fn phminposuw ( a : u16x8 ) -> u16x8 ;
665+ #[ link_name = "llvm.x86.sse41.pmuldq" ]
666+ fn pmuldq ( a : i32x4 , b : i32x4 ) -> i64x2 ;
630667}
631668
632669#[ cfg( test) ]
@@ -1109,4 +1146,46 @@ mod tests {
11091146 let e = f32x4:: new ( -2.0 , 3.5 , 7.5 , 15.5 ) ;
11101147 assert_eq ! ( r, e) ;
11111148 }
1149+
1150+ #[ simd_test = "sse4.1" ]
1151+ unsafe fn _mm_minpos_epu16_1 ( ) {
1152+ let a = u16x8:: new ( 23 , 18 , 44 , 97 , 50 , 13 , 67 , 66 ) ;
1153+ let r = sse41:: _mm_minpos_epu16 ( a) ;
1154+ let e = u16x8:: new ( 13 , 5 , 0 , 0 , 0 , 0 , 0 , 0 ) ;
1155+ assert_eq ! ( r, e) ;
1156+ }
1157+
1158+ #[ simd_test = "sse4.1" ]
1159+ unsafe fn _mm_minpos_epu16_2 ( ) {
1160+ let a = u16x8:: new ( 0 , 18 , 44 , 97 , 50 , 13 , 67 , 66 ) ;
1161+ let r = sse41:: _mm_minpos_epu16 ( a) ;
1162+ let e = u16x8:: new ( 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) ;
1163+ assert_eq ! ( r, e) ;
1164+ }
1165+
1166+ #[ simd_test = "sse4.1" ]
1167+ unsafe fn _mm_mul_epi32 ( ) {
1168+ let a =
1169+ i32x4:: new ( 15 , 2 /* ignored */ , 1234567 , 4 /* ignored */ ) ;
1170+ let b = i32x4:: new (
1171+ -20 ,
1172+ -256 , /* ignored */
1173+ 666666 ,
1174+ 666666 , /* ignored */
1175+ ) ;
1176+ let r = sse41:: _mm_mul_epi32 ( a, b) ;
1177+ let e = i64x2:: new ( -300 , 823043843622 ) ;
1178+ assert_eq ! ( r, e) ;
1179+ }
1180+
1181+ #[ simd_test = "sse4.1" ]
1182+ unsafe fn _mm_mullo_epi32 ( ) {
1183+ let a = i32x4:: new ( 15 , -2 , 1234567 , 99999 ) ;
1184+ let b = i32x4:: new ( -20 , -256 , 666666 , -99999 ) ;
1185+ let r = sse41:: _mm_mullo_epi32 ( a, b) ;
1186+ // Attention, most significant bit in r[2] is treated as a sign bit!
1187+ // 1234567 * 666666 = -1589877210
1188+ let e = i32x4:: new ( -300 , 512 , -1589877210 , -1409865409 ) ;
1189+ assert_eq ! ( r, e) ;
1190+ }
11121191}
0 commit comments