@@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
7070; NOFP16-NEXT: .cfi_offset w22, -32
7171; NOFP16-NEXT: .cfi_offset w30, -48
7272; NOFP16-NEXT: mov w21, w0
73- ; NOFP16-NEXT: and w0, w2 , #0xffff
73+ ; NOFP16-NEXT: and w0, w1 , #0xffff
7474; NOFP16-NEXT: mov x19, x3
75- ; NOFP16-NEXT: mov w20, w1
75+ ; NOFP16-NEXT: mov w20, w2
7676; NOFP16-NEXT: bl __gnu_h2f_ieee
7777; NOFP16-NEXT: mov w22, w0
7878; NOFP16-NEXT: and w0, w21, #0xffff
7979; NOFP16-NEXT: bl __gnu_h2f_ieee
80- ; NOFP16-NEXT: mov w21 , w0
80+ ; NOFP16-NEXT: mov w8 , w0
8181; NOFP16-NEXT: and w0, w20, #0xffff
82+ ; NOFP16-NEXT: orr x21, x8, x22, lsl #32
8283; NOFP16-NEXT: bl __gnu_h2f_ieee
83- ; NOFP16-NEXT: mov w8, w21
84- ; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
85- ; NOFP16-NEXT: str w22, [x19, #8]
86- ; NOFP16-NEXT: orr x8, x8, x0, lsl #32
84+ ; NOFP16-NEXT: str x21, [x19]
8785; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
88- ; NOFP16-NEXT: str x8 , [x19]
86+ ; NOFP16-NEXT: str w0 , [x19, #8 ]
8987; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
9088; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
9189; NOFP16-NEXT: ret
@@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
182180define void @outgoing_v4f16_return (ptr %ptr ) #0 {
183181; NOFP16-LABEL: outgoing_v4f16_return:
184182; NOFP16: // %bb.0:
185- ; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
186- ; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
187- ; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
188- ; NOFP16-NEXT: .cfi_def_cfa_offset 48
183+ ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
184+ ; NOFP16-NEXT: .cfi_def_cfa_offset 16
189185; NOFP16-NEXT: .cfi_offset w19, -8
190- ; NOFP16-NEXT: .cfi_offset w20, -16
191- ; NOFP16-NEXT: .cfi_offset w21, -24
192- ; NOFP16-NEXT: .cfi_offset w22, -32
193- ; NOFP16-NEXT: .cfi_offset w23, -40
194- ; NOFP16-NEXT: .cfi_offset w30, -48
186+ ; NOFP16-NEXT: .cfi_offset w30, -16
195187; NOFP16-NEXT: mov x19, x0
196188; NOFP16-NEXT: bl v4f16_result
197- ; NOFP16-NEXT: and w0, w0, #0xffff
198- ; NOFP16-NEXT: mov w20, w1
199- ; NOFP16-NEXT: mov w21, w2
200- ; NOFP16-NEXT: mov w22, w3
201- ; NOFP16-NEXT: bl __gnu_h2f_ieee
202- ; NOFP16-NEXT: mov w23, w0
203- ; NOFP16-NEXT: and w0, w20, #0xffff
204- ; NOFP16-NEXT: bl __gnu_h2f_ieee
205- ; NOFP16-NEXT: mov w20, w0
206- ; NOFP16-NEXT: and w0, w21, #0xffff
207- ; NOFP16-NEXT: bl __gnu_h2f_ieee
208- ; NOFP16-NEXT: mov w21, w0
209- ; NOFP16-NEXT: and w0, w22, #0xffff
210- ; NOFP16-NEXT: bl __gnu_h2f_ieee
211- ; NOFP16-NEXT: bl __gnu_f2h_ieee
212- ; NOFP16-NEXT: strh w0, [x19, #6]
213- ; NOFP16-NEXT: mov w0, w21
214- ; NOFP16-NEXT: bl __gnu_f2h_ieee
215- ; NOFP16-NEXT: strh w0, [x19, #4]
216- ; NOFP16-NEXT: mov w0, w20
217- ; NOFP16-NEXT: bl __gnu_f2h_ieee
218- ; NOFP16-NEXT: strh w0, [x19, #2]
219- ; NOFP16-NEXT: mov w0, w23
220- ; NOFP16-NEXT: bl __gnu_f2h_ieee
189+ ; NOFP16-NEXT: strh w2, [x19, #4]
190+ ; NOFP16-NEXT: strh w3, [x19, #6]
191+ ; NOFP16-NEXT: strh w1, [x19, #2]
221192; NOFP16-NEXT: strh w0, [x19]
222- ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
223- ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
224- ; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
193+ ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
225194; NOFP16-NEXT: ret
226195 %val = call <4 x half > @v4f16_result ()
227196 store <4 x half > %val , ptr %ptr
@@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
231200define void @outgoing_v8f16_return (ptr %ptr ) #0 {
232201; NOFP16-LABEL: outgoing_v8f16_return:
233202; NOFP16: // %bb.0:
234- ; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
235- ; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
236- ; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
237- ; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
238- ; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
239- ; NOFP16-NEXT: .cfi_def_cfa_offset 80
203+ ; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
204+ ; NOFP16-NEXT: .cfi_def_cfa_offset 16
240205; NOFP16-NEXT: .cfi_offset w19, -8
241- ; NOFP16-NEXT: .cfi_offset w20, -16
242- ; NOFP16-NEXT: .cfi_offset w21, -24
243- ; NOFP16-NEXT: .cfi_offset w22, -32
244- ; NOFP16-NEXT: .cfi_offset w23, -40
245- ; NOFP16-NEXT: .cfi_offset w24, -48
246- ; NOFP16-NEXT: .cfi_offset w25, -56
247- ; NOFP16-NEXT: .cfi_offset w26, -64
248- ; NOFP16-NEXT: .cfi_offset w27, -72
249- ; NOFP16-NEXT: .cfi_offset w30, -80
206+ ; NOFP16-NEXT: .cfi_offset w30, -16
250207; NOFP16-NEXT: mov x19, x0
251208; NOFP16-NEXT: bl v8f16_result
252- ; NOFP16-NEXT: and w0, w0, #0xffff
253- ; NOFP16-NEXT: mov w21, w1
254- ; NOFP16-NEXT: mov w22, w2
255- ; NOFP16-NEXT: mov w23, w3
256- ; NOFP16-NEXT: mov w24, w4
257- ; NOFP16-NEXT: mov w25, w5
258- ; NOFP16-NEXT: mov w26, w6
259- ; NOFP16-NEXT: mov w27, w7
260- ; NOFP16-NEXT: bl __gnu_h2f_ieee
261- ; NOFP16-NEXT: mov w20, w0
262- ; NOFP16-NEXT: and w0, w21, #0xffff
263- ; NOFP16-NEXT: bl __gnu_h2f_ieee
264- ; NOFP16-NEXT: mov w21, w0
265- ; NOFP16-NEXT: and w0, w22, #0xffff
266- ; NOFP16-NEXT: bl __gnu_h2f_ieee
267- ; NOFP16-NEXT: mov w22, w0
268- ; NOFP16-NEXT: and w0, w23, #0xffff
269- ; NOFP16-NEXT: bl __gnu_h2f_ieee
270- ; NOFP16-NEXT: mov w23, w0
271- ; NOFP16-NEXT: and w0, w24, #0xffff
272- ; NOFP16-NEXT: bl __gnu_h2f_ieee
273- ; NOFP16-NEXT: mov w24, w0
274- ; NOFP16-NEXT: and w0, w25, #0xffff
275- ; NOFP16-NEXT: bl __gnu_h2f_ieee
276- ; NOFP16-NEXT: mov w25, w0
277- ; NOFP16-NEXT: and w0, w26, #0xffff
278- ; NOFP16-NEXT: bl __gnu_h2f_ieee
279- ; NOFP16-NEXT: mov w26, w0
280- ; NOFP16-NEXT: and w0, w27, #0xffff
281- ; NOFP16-NEXT: bl __gnu_h2f_ieee
282- ; NOFP16-NEXT: bl __gnu_f2h_ieee
283- ; NOFP16-NEXT: strh w0, [x19, #14]
284- ; NOFP16-NEXT: mov w0, w26
285- ; NOFP16-NEXT: bl __gnu_f2h_ieee
286- ; NOFP16-NEXT: strh w0, [x19, #12]
287- ; NOFP16-NEXT: mov w0, w25
288- ; NOFP16-NEXT: bl __gnu_f2h_ieee
289- ; NOFP16-NEXT: strh w0, [x19, #10]
290- ; NOFP16-NEXT: mov w0, w24
291- ; NOFP16-NEXT: bl __gnu_f2h_ieee
292- ; NOFP16-NEXT: strh w0, [x19, #8]
293- ; NOFP16-NEXT: mov w0, w23
294- ; NOFP16-NEXT: bl __gnu_f2h_ieee
295- ; NOFP16-NEXT: strh w0, [x19, #6]
296- ; NOFP16-NEXT: mov w0, w22
297- ; NOFP16-NEXT: bl __gnu_f2h_ieee
298- ; NOFP16-NEXT: strh w0, [x19, #4]
299- ; NOFP16-NEXT: mov w0, w21
300- ; NOFP16-NEXT: bl __gnu_f2h_ieee
301- ; NOFP16-NEXT: strh w0, [x19, #2]
302- ; NOFP16-NEXT: mov w0, w20
303- ; NOFP16-NEXT: bl __gnu_f2h_ieee
209+ ; NOFP16-NEXT: strh w5, [x19, #10]
210+ ; NOFP16-NEXT: strh w7, [x19, #14]
211+ ; NOFP16-NEXT: strh w6, [x19, #12]
212+ ; NOFP16-NEXT: strh w4, [x19, #8]
213+ ; NOFP16-NEXT: strh w3, [x19, #6]
214+ ; NOFP16-NEXT: strh w2, [x19, #4]
215+ ; NOFP16-NEXT: strh w1, [x19, #2]
304216; NOFP16-NEXT: strh w0, [x19]
305- ; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
306- ; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
307- ; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
308- ; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
309- ; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
217+ ; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
310218; NOFP16-NEXT: ret
311219 %val = call <8 x half > @v8f16_result ()
312220 store <8 x half > %val , ptr %ptr
0 commit comments