@@ -49,10 +49,10 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
4949define <4 x i16 > @sabd_4h_promoted_ops (<4 x i8 > %a , <4 x i8 > %b ) #0 {
5050; CHECK-LABEL: sabd_4h_promoted_ops:
5151; CHECK: // %bb.0:
52- ; CHECK-NEXT: shl v0.4h, v0.4h, #8
5352; CHECK-NEXT: shl v1.4h, v1.4h, #8
54- ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
53+ ; CHECK-NEXT: shl v0.4h, v0.4h, #8
5554; CHECK-NEXT: sshr v1.4h, v1.4h, #8
55+ ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
5656; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h
5757; CHECK-NEXT: ret
5858 %a.sext = sext <4 x i8 > %a to <4 x i16 >
@@ -103,10 +103,10 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
103103define <2 x i32 > @sabd_2s_promoted_ops (<2 x i16 > %a , <2 x i16 > %b ) #0 {
104104; CHECK-LABEL: sabd_2s_promoted_ops:
105105; CHECK: // %bb.0:
106- ; CHECK-NEXT: shl v0.2s, v0.2s, #16
107106; CHECK-NEXT: shl v1.2s, v1.2s, #16
108- ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
107+ ; CHECK-NEXT: shl v0.2s, v0.2s, #16
109108; CHECK-NEXT: sshr v1.2s, v1.2s, #16
109+ ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
110110; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
111111; CHECK-NEXT: ret
112112 %a.sext = sext <2 x i16 > %a to <2 x i32 >
@@ -144,27 +144,10 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
144144define <2 x i64 > @sabd_2d (<2 x i64 > %a , <2 x i64 > %b ) #0 {
145145; CHECK-LABEL: sabd_2d:
146146; CHECK: // %bb.0:
147- ; CHECK-NEXT: mov x8, v0.d[1]
148- ; CHECK-NEXT: mov x9, v1.d[1]
149- ; CHECK-NEXT: fmov x10, d0
150- ; CHECK-NEXT: fmov x12, d1
151- ; CHECK-NEXT: asr x14, x10, #63
152- ; CHECK-NEXT: asr x11, x8, #63
153- ; CHECK-NEXT: asr x13, x9, #63
154- ; CHECK-NEXT: asr x15, x12, #63
155- ; CHECK-NEXT: subs x8, x8, x9
156- ; CHECK-NEXT: sbc x9, x11, x13
157- ; CHECK-NEXT: subs x10, x10, x12
158- ; CHECK-NEXT: sbc x11, x14, x15
159- ; CHECK-NEXT: asr x9, x9, #63
160- ; CHECK-NEXT: asr x11, x11, #63
161- ; CHECK-NEXT: eor x8, x8, x9
162- ; CHECK-NEXT: eor x10, x10, x11
163- ; CHECK-NEXT: sub x8, x8, x9
164- ; CHECK-NEXT: sub x10, x10, x11
165- ; CHECK-NEXT: fmov d1, x8
166- ; CHECK-NEXT: fmov d0, x10
167- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
147+ ; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
148+ ; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
149+ ; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
150+ ; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
168151; CHECK-NEXT: ret
169152 %a.sext = sext <2 x i64 > %a to <2 x i128 >
170153 %b.sext = sext <2 x i64 > %b to <2 x i128 >
@@ -232,8 +215,8 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
232215define <4 x i16 > @uabd_4h_promoted_ops (<4 x i8 > %a , <4 x i8 > %b ) #0 {
233216; CHECK-LABEL: uabd_4h_promoted_ops:
234217; CHECK: // %bb.0:
235- ; CHECK-NEXT: bic v0.4h, #255, lsl #8
236218; CHECK-NEXT: bic v1.4h, #255, lsl #8
219+ ; CHECK-NEXT: bic v0.4h, #255, lsl #8
237220; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h
238221; CHECK-NEXT: ret
239222 %a.zext = zext <4 x i8 > %a to <4 x i16 >
@@ -285,8 +268,8 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
285268; CHECK-LABEL: uabd_2s_promoted_ops:
286269; CHECK: // %bb.0:
287270; CHECK-NEXT: movi d2, #0x00ffff0000ffff
288- ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
289271; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
272+ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
290273; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s
291274; CHECK-NEXT: ret
292275 %a.zext = zext <2 x i16 > %a to <2 x i32 >
@@ -324,23 +307,9 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
324307define <2 x i64 > @uabd_2d (<2 x i64 > %a , <2 x i64 > %b ) #0 {
325308; CHECK-LABEL: uabd_2d:
326309; CHECK: // %bb.0:
327- ; CHECK-NEXT: mov x8, v0.d[1]
328- ; CHECK-NEXT: mov x9, v1.d[1]
329- ; CHECK-NEXT: fmov x10, d0
330- ; CHECK-NEXT: fmov x11, d1
331- ; CHECK-NEXT: subs x8, x8, x9
332- ; CHECK-NEXT: ngc x9, xzr
333- ; CHECK-NEXT: subs x10, x10, x11
334- ; CHECK-NEXT: ngc x11, xzr
335- ; CHECK-NEXT: asr x9, x9, #63
336- ; CHECK-NEXT: asr x11, x11, #63
337- ; CHECK-NEXT: eor x8, x8, x9
338- ; CHECK-NEXT: eor x10, x10, x11
339- ; CHECK-NEXT: sub x8, x8, x9
340- ; CHECK-NEXT: sub x10, x10, x11
341- ; CHECK-NEXT: fmov d1, x8
342- ; CHECK-NEXT: fmov d0, x10
343- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
310+ ; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
311+ ; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
312+ ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
344313; CHECK-NEXT: ret
345314 %a.zext = zext <2 x i64 > %a to <2 x i128 >
346315 %b.zext = zext <2 x i64 > %b to <2 x i128 >
@@ -484,9 +453,8 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
484453; CHECK-LABEL: smaxmin_v2i64:
485454; CHECK: // %bb.0:
486455; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
487- ; CHECK-NEXT: cmgt v3.2d, v1.2d, v0.2d
488- ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
489- ; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
456+ ; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
457+ ; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
490458; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
491459; CHECK-NEXT: ret
492460 %a = tail call <2 x i64 > @llvm.smax.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
@@ -531,11 +499,9 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
531499define <2 x i64 > @umaxmin_v2i64 (<2 x i64 > %0 , <2 x i64 > %1 ) {
532500; CHECK-LABEL: umaxmin_v2i64:
533501; CHECK: // %bb.0:
534- ; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
535- ; CHECK-NEXT: cmhi v3.2d, v1.2d, v0.2d
536- ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
537- ; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
538- ; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
502+ ; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
503+ ; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
504+ ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
539505; CHECK-NEXT: ret
540506 %a = tail call <2 x i64 > @llvm.umax.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
541507 %b = tail call <2 x i64 > @llvm.umin.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
0 commit comments