@@ -49,11 +49,12 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
4949define <4 x i16 > @sabd_4h_promoted_ops (<4 x i8 > %a , <4 x i8 > %b ) #0 {
5050; CHECK-LABEL: sabd_4h_promoted_ops:
5151; CHECK: // %bb.0:
52- ; CHECK-NEXT: shl v0.4h, v0.4h, #8
5352; CHECK-NEXT: shl v1.4h, v1.4h, #8
54- ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
53+ ; CHECK-NEXT: shl v0.4h, v0.4h, #8
5554; CHECK-NEXT: sshr v1.4h, v1.4h, #8
55+ ; CHECK-NEXT: sshr v0.4h, v0.4h, #8
5656; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h
57+ ; CHECK-NEXT: bic v0.4h, #255, lsl #8
5758; CHECK-NEXT: ret
5859 %a.sext = sext <4 x i8 > %a to <4 x i16 >
5960 %b.sext = sext <4 x i8 > %b to <4 x i16 >
@@ -103,11 +104,13 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
103104define <2 x i32 > @sabd_2s_promoted_ops (<2 x i16 > %a , <2 x i16 > %b ) #0 {
104105; CHECK-LABEL: sabd_2s_promoted_ops:
105106; CHECK: // %bb.0:
106- ; CHECK-NEXT: shl v0.2s, v0.2s, #16
107107; CHECK-NEXT: shl v1.2s, v1.2s, #16
108- ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
108+ ; CHECK-NEXT: shl v0.2s, v0.2s, #16
109+ ; CHECK-NEXT: movi d2, #0x00ffff0000ffff
109110; CHECK-NEXT: sshr v1.2s, v1.2s, #16
111+ ; CHECK-NEXT: sshr v0.2s, v0.2s, #16
110112; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
113+ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
111114; CHECK-NEXT: ret
112115 %a.sext = sext <2 x i16 > %a to <2 x i32 >
113116 %b.sext = sext <2 x i16 > %b to <2 x i32 >
@@ -146,25 +149,16 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
146149; CHECK: // %bb.0:
147150; CHECK-NEXT: mov x8, v0.d[1]
148151; CHECK-NEXT: mov x9, v1.d[1]
149- ; CHECK-NEXT: fmov x10, d0
150- ; CHECK-NEXT: fmov x12, d1
151- ; CHECK-NEXT: asr x14, x10, #63
152- ; CHECK-NEXT: asr x11, x8, #63
153- ; CHECK-NEXT: asr x13, x9, #63
154- ; CHECK-NEXT: asr x15, x12, #63
152+ ; CHECK-NEXT: fmov x11, d1
153+ ; CHECK-NEXT: sub x10, x9, x8
155154; CHECK-NEXT: subs x8, x8, x9
156- ; CHECK-NEXT: sbc x9, x11, x13
157- ; CHECK-NEXT: subs x10, x10, x12
158- ; CHECK-NEXT: sbc x11, x14, x15
159- ; CHECK-NEXT: asr x9, x9, #63
160- ; CHECK-NEXT: asr x11, x11, #63
161- ; CHECK-NEXT: eor x8, x8, x9
162- ; CHECK-NEXT: eor x10, x10, x11
163- ; CHECK-NEXT: sub x8, x8, x9
164- ; CHECK-NEXT: sub x10, x10, x11
165- ; CHECK-NEXT: fmov d1, x8
166- ; CHECK-NEXT: fmov d0, x10
167- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
155+ ; CHECK-NEXT: fmov x9, d0
156+ ; CHECK-NEXT: csel x8, x8, x10, gt
157+ ; CHECK-NEXT: sub x10, x11, x9
158+ ; CHECK-NEXT: subs x9, x9, x11
159+ ; CHECK-NEXT: csel x9, x9, x10, gt
160+ ; CHECK-NEXT: fmov d0, x9
161+ ; CHECK-NEXT: mov v0.d[1], x8
168162; CHECK-NEXT: ret
169163 %a.sext = sext <2 x i64 > %a to <2 x i128 >
170164 %b.sext = sext <2 x i64 > %b to <2 x i128 >
@@ -232,8 +226,8 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
232226define <4 x i16 > @uabd_4h_promoted_ops (<4 x i8 > %a , <4 x i8 > %b ) #0 {
233227; CHECK-LABEL: uabd_4h_promoted_ops:
234228; CHECK: // %bb.0:
235- ; CHECK-NEXT: bic v0.4h, #255, lsl #8
236229; CHECK-NEXT: bic v1.4h, #255, lsl #8
230+ ; CHECK-NEXT: bic v0.4h, #255, lsl #8
237231; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h
238232; CHECK-NEXT: ret
239233 %a.zext = zext <4 x i8 > %a to <4 x i16 >
@@ -285,8 +279,8 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
285279; CHECK-LABEL: uabd_2s_promoted_ops:
286280; CHECK: // %bb.0:
287281; CHECK-NEXT: movi d2, #0x00ffff0000ffff
288- ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
289282; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
283+ ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
290284; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s
291285; CHECK-NEXT: ret
292286 %a.zext = zext <2 x i16 > %a to <2 x i32 >
@@ -324,23 +318,9 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
324318define <2 x i64 > @uabd_2d (<2 x i64 > %a , <2 x i64 > %b ) #0 {
325319; CHECK-LABEL: uabd_2d:
326320; CHECK: // %bb.0:
327- ; CHECK-NEXT: mov x8, v0.d[1]
328- ; CHECK-NEXT: mov x9, v1.d[1]
329- ; CHECK-NEXT: fmov x10, d0
330- ; CHECK-NEXT: fmov x11, d1
331- ; CHECK-NEXT: subs x8, x8, x9
332- ; CHECK-NEXT: ngc x9, xzr
333- ; CHECK-NEXT: subs x10, x10, x11
334- ; CHECK-NEXT: ngc x11, xzr
335- ; CHECK-NEXT: asr x9, x9, #63
336- ; CHECK-NEXT: asr x11, x11, #63
337- ; CHECK-NEXT: eor x8, x8, x9
338- ; CHECK-NEXT: eor x10, x10, x11
339- ; CHECK-NEXT: sub x8, x8, x9
340- ; CHECK-NEXT: sub x10, x10, x11
341- ; CHECK-NEXT: fmov d1, x8
342- ; CHECK-NEXT: fmov d0, x10
343- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
321+ ; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
322+ ; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
323+ ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
344324; CHECK-NEXT: ret
345325 %a.zext = zext <2 x i64 > %a to <2 x i128 >
346326 %b.zext = zext <2 x i64 > %b to <2 x i128 >
@@ -439,8 +419,18 @@ define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) #0 {
439419define <2 x i64 > @sabd_v2i64_nsw (<2 x i64 > %a , <2 x i64 > %b ) #0 {
440420; CHECK-LABEL: sabd_v2i64_nsw:
441421; CHECK: // %bb.0:
442- ; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
443- ; CHECK-NEXT: abs v0.2d, v0.2d
422+ ; CHECK-NEXT: mov x8, v0.d[1]
423+ ; CHECK-NEXT: mov x9, v1.d[1]
424+ ; CHECK-NEXT: fmov x11, d1
425+ ; CHECK-NEXT: sub x10, x9, x8
426+ ; CHECK-NEXT: subs x8, x8, x9
427+ ; CHECK-NEXT: fmov x9, d0
428+ ; CHECK-NEXT: csel x8, x8, x10, gt
429+ ; CHECK-NEXT: sub x10, x11, x9
430+ ; CHECK-NEXT: subs x9, x9, x11
431+ ; CHECK-NEXT: csel x9, x9, x10, gt
432+ ; CHECK-NEXT: fmov d0, x9
433+ ; CHECK-NEXT: mov v0.d[1], x8
444434; CHECK-NEXT: ret
445435 %sub = sub nsw <2 x i64 > %a , %b
446436 %abs = call <2 x i64 > @llvm.abs.v2i64 (<2 x i64 > %sub , i1 true )
@@ -483,11 +473,18 @@ define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
483473define <2 x i64 > @smaxmin_v2i64 (<2 x i64 > %0 , <2 x i64 > %1 ) {
484474; CHECK-LABEL: smaxmin_v2i64:
485475; CHECK: // %bb.0:
486- ; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
487- ; CHECK-NEXT: cmgt v3.2d, v1.2d, v0.2d
488- ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
489- ; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
490- ; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
476+ ; CHECK-NEXT: mov x8, v0.d[1]
477+ ; CHECK-NEXT: mov x9, v1.d[1]
478+ ; CHECK-NEXT: fmov x11, d1
479+ ; CHECK-NEXT: sub x10, x9, x8
480+ ; CHECK-NEXT: subs x8, x8, x9
481+ ; CHECK-NEXT: fmov x9, d0
482+ ; CHECK-NEXT: csel x8, x8, x10, gt
483+ ; CHECK-NEXT: sub x10, x11, x9
484+ ; CHECK-NEXT: subs x9, x9, x11
485+ ; CHECK-NEXT: csel x9, x9, x10, gt
486+ ; CHECK-NEXT: fmov d0, x9
487+ ; CHECK-NEXT: mov v0.d[1], x8
491488; CHECK-NEXT: ret
492489 %a = tail call <2 x i64 > @llvm.smax.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
493490 %b = tail call <2 x i64 > @llvm.smin.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
@@ -531,11 +528,9 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
531528define <2 x i64 > @umaxmin_v2i64 (<2 x i64 > %0 , <2 x i64 > %1 ) {
532529; CHECK-LABEL: umaxmin_v2i64:
533530; CHECK: // %bb.0:
534- ; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
535- ; CHECK-NEXT: cmhi v3.2d, v1.2d, v0.2d
536- ; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
537- ; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
538- ; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
531+ ; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
532+ ; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
533+ ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
539534; CHECK-NEXT: ret
540535 %a = tail call <2 x i64 > @llvm.umax.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
541536 %b = tail call <2 x i64 > @llvm.umin.v2i64 (<2 x i64 > %0 , <2 x i64 > %1 )
0 commit comments