55define <8 x i16 > @concat_add (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c , <4 x i16 > %d ) {
66; CHECK-LABEL: concat_add:
77; CHECK: // %bb.0:
8- ; CHECK-NEXT: add v2.4h, v2.4h, v3.4h
9- ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
8+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
9+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
10+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
11+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
12+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
1013; CHECK-NEXT: mov v0.d[1], v2.d[0]
14+ ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
1115; CHECK-NEXT: ret
1216 %x = add <4 x i16 > %a , %b
1317 %y = add <4 x i16 > %c , %d
@@ -33,13 +37,9 @@ define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
3337define <8 x i16 > @concat_addtunc2 (<4 x i32 > %a , <4 x i32 > %b , <4 x i32 > %c , <4 x i32 > %d ) {
3438; CHECK-LABEL: concat_addtunc2:
3539; CHECK: // %bb.0:
36- ; CHECK-NEXT: xtn v1.4h, v1.4s
37- ; CHECK-NEXT: xtn v0.4h, v0.4s
38- ; CHECK-NEXT: xtn v2.4h, v2.4s
39- ; CHECK-NEXT: xtn v3.4h, v3.4s
40- ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
41- ; CHECK-NEXT: add v1.4h, v2.4h, v3.4h
42- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
40+ ; CHECK-NEXT: uzp1 v1.8h, v1.8h, v3.8h
41+ ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
42+ ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h
4343; CHECK-NEXT: ret
4444 %at = trunc <4 x i32 > %a to <4 x i16 >
4545 %bt = trunc <4 x i32 > %b to <4 x i16 >
@@ -54,9 +54,13 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
5454define <8 x i16 > @concat_sub (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c , <4 x i16 > %d ) {
5555; CHECK-LABEL: concat_sub:
5656; CHECK: // %bb.0:
57- ; CHECK-NEXT: sub v2.4h, v2.4h, v3.4h
58- ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
57+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
58+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
59+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
60+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
61+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
5962; CHECK-NEXT: mov v0.d[1], v2.d[0]
63+ ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
6064; CHECK-NEXT: ret
6165 %x = sub <4 x i16 > %a , %b
6266 %y = sub <4 x i16 > %c , %d
@@ -67,9 +71,13 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
6771define <8 x i16 > @concat_mul (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c , <4 x i16 > %d ) {
6872; CHECK-LABEL: concat_mul:
6973; CHECK: // %bb.0:
70- ; CHECK-NEXT: mul v2.4h, v2.4h, v3.4h
71- ; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
74+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
75+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
76+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
77+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
78+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
7279; CHECK-NEXT: mov v0.d[1], v2.d[0]
80+ ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
7381; CHECK-NEXT: ret
7482 %x = mul <4 x i16 > %a , %b
7583 %y = mul <4 x i16 > %c , %d
@@ -80,9 +88,13 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
8088define <8 x i16 > @concat_xor (<4 x i16 > %a , <4 x i16 > %b , <4 x i16 > %c , <4 x i16 > %d ) {
8189; CHECK-LABEL: concat_xor:
8290; CHECK: // %bb.0:
83- ; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b
84- ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
91+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
92+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
93+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
94+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
95+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
8596; CHECK-NEXT: mov v0.d[1], v2.d[0]
97+ ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
8698; CHECK-NEXT: ret
8799 %x = xor <4 x i16 > %a , %b
88100 %y = xor <4 x i16 > %c , %d
@@ -93,9 +105,13 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
93105define <8 x half > @concat_fadd (<4 x half > %a , <4 x half > %b , <4 x half > %c , <4 x half > %d ) {
94106; CHECK-LABEL: concat_fadd:
95107; CHECK: // %bb.0:
96- ; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
97- ; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
108+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
109+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
110+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
111+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
112+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
98113; CHECK-NEXT: mov v0.d[1], v2.d[0]
114+ ; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
99115; CHECK-NEXT: ret
100116 %x = fadd <4 x half > %a , %b
101117 %y = fadd <4 x half > %c , %d
@@ -106,9 +122,13 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
106122define <8 x half > @concat_fmul (<4 x half > %a , <4 x half > %b , <4 x half > %c , <4 x half > %d ) {
107123; CHECK-LABEL: concat_fmul:
108124; CHECK: // %bb.0:
109- ; CHECK-NEXT: fmul v2.4h, v2.4h, v3.4h
110- ; CHECK-NEXT: fmul v0.4h, v0.4h, v1.4h
125+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
126+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
127+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
128+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
129+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
111130; CHECK-NEXT: mov v0.d[1], v2.d[0]
131+ ; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h
112132; CHECK-NEXT: ret
113133 %x = fmul <4 x half > %a , %b
114134 %y = fmul <4 x half > %c , %d
@@ -119,9 +139,13 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
119139define <8 x half > @concat_min (<4 x half > %a , <4 x half > %b , <4 x half > %c , <4 x half > %d ) {
120140; CHECK-LABEL: concat_min:
121141; CHECK: // %bb.0:
122- ; CHECK-NEXT: fminnm v2.4h, v2.4h, v3.4h
123- ; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h
142+ ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
143+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
144+ ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3
145+ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
146+ ; CHECK-NEXT: mov v1.d[1], v3.d[0]
124147; CHECK-NEXT: mov v0.d[1], v2.d[0]
148+ ; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h
125149; CHECK-NEXT: ret
126150 %x = call <4 x half > @llvm.minnum.v4f16 (<4 x half > %a , <4 x half > %b )
127151 %y = call <4 x half > @llvm.minnum.v4f16 (<4 x half > %c , <4 x half > %d )
@@ -146,21 +170,16 @@ define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture n
146170; CHECK-LABEL: signOf_neon:
147171; CHECK: // %bb.0: // %entry
148172; CHECK-NEXT: ldp q1, q2, [x0]
149- ; CHECK-NEXT: movi v0.8b , #1
173+ ; CHECK-NEXT: movi v0.16b , #1
150174; CHECK-NEXT: ldp q3, q4, [x1]
151175; CHECK-NEXT: cmhi v5.8h, v1.8h, v3.8h
152176; CHECK-NEXT: cmhi v6.8h, v2.8h, v4.8h
153177; CHECK-NEXT: cmhi v1.8h, v3.8h, v1.8h
154178; CHECK-NEXT: cmhi v2.8h, v4.8h, v2.8h
155- ; CHECK-NEXT: xtn v3.8b, v5.8h
156- ; CHECK-NEXT: xtn v4.8b, v6.8h
157- ; CHECK-NEXT: xtn v1.8b, v1.8h
158- ; CHECK-NEXT: xtn v2.8b, v2.8h
159- ; CHECK-NEXT: and v3.8b, v3.8b, v0.8b
160- ; CHECK-NEXT: and v4.8b, v4.8b, v0.8b
161- ; CHECK-NEXT: orr v0.8b, v3.8b, v1.8b
162- ; CHECK-NEXT: orr v1.8b, v4.8b, v2.8b
163- ; CHECK-NEXT: mov v0.d[1], v1.d[0]
179+ ; CHECK-NEXT: uzp1 v3.16b, v5.16b, v6.16b
180+ ; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b
181+ ; CHECK-NEXT: and v0.16b, v3.16b, v0.16b
182+ ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
164183; CHECK-NEXT: ret
165184entry:
166185 %0 = load <8 x i16 >, ptr %a , align 2
0 commit comments