@@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
7070; NOFP16-NEXT:    .cfi_offset w22, -32 
7171; NOFP16-NEXT:    .cfi_offset w30, -48 
7272; NOFP16-NEXT:    mov w21, w0 
73- ; NOFP16-NEXT:    and w0, w2 , #0xffff 
73+ ; NOFP16-NEXT:    and w0, w1 , #0xffff 
7474; NOFP16-NEXT:    mov x19, x3 
75- ; NOFP16-NEXT:    mov w20, w1  
75+ ; NOFP16-NEXT:    mov w20, w2  
7676; NOFP16-NEXT:    bl __gnu_h2f_ieee 
7777; NOFP16-NEXT:    mov w22, w0 
7878; NOFP16-NEXT:    and w0, w21, #0xffff 
7979; NOFP16-NEXT:    bl __gnu_h2f_ieee 
80- ; NOFP16-NEXT:    mov w21 , w0 
80+ ; NOFP16-NEXT:    mov w8 , w0 
8181; NOFP16-NEXT:    and w0, w20, #0xffff 
82+ ; NOFP16-NEXT:    orr x21, x8, x22, lsl #32 
8283; NOFP16-NEXT:    bl __gnu_h2f_ieee 
83- ; NOFP16-NEXT:    mov w8, w21 
84- ; NOFP16-NEXT:    // kill: def $w0 killed $w0 def $x0 
85- ; NOFP16-NEXT:    str w22, [x19, #8] 
86- ; NOFP16-NEXT:    orr x8, x8, x0, lsl #32 
84+ ; NOFP16-NEXT:    str x21, [x19] 
8785; NOFP16-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload 
88- ; NOFP16-NEXT:    str x8 , [x19] 
86+ ; NOFP16-NEXT:    str w0 , [x19, #8 ] 
8987; NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload 
9088; NOFP16-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload 
9189; NOFP16-NEXT:    ret 
@@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
182180define  void  @outgoing_v4f16_return (ptr  %ptr ) #0  {
183181; NOFP16-LABEL: outgoing_v4f16_return: 
184182; NOFP16:       // %bb.0: 
185- ; NOFP16-NEXT:    stp x30, x23, [sp, #-48]! // 16-byte Folded Spill 
186- ; NOFP16-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill 
187- ; NOFP16-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill 
188- ; NOFP16-NEXT:    .cfi_def_cfa_offset 48 
183+ ; NOFP16-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 
184+ ; NOFP16-NEXT:    .cfi_def_cfa_offset 16 
189185; NOFP16-NEXT:    .cfi_offset w19, -8 
190- ; NOFP16-NEXT:    .cfi_offset w20, -16 
191- ; NOFP16-NEXT:    .cfi_offset w21, -24 
192- ; NOFP16-NEXT:    .cfi_offset w22, -32 
193- ; NOFP16-NEXT:    .cfi_offset w23, -40 
194- ; NOFP16-NEXT:    .cfi_offset w30, -48 
186+ ; NOFP16-NEXT:    .cfi_offset w30, -16 
195187; NOFP16-NEXT:    mov x19, x0 
196188; NOFP16-NEXT:    bl v4f16_result 
197- ; NOFP16-NEXT:    and w0, w0, #0xffff 
198- ; NOFP16-NEXT:    mov w20, w1 
199- ; NOFP16-NEXT:    mov w21, w2 
200- ; NOFP16-NEXT:    mov w22, w3 
201- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
202- ; NOFP16-NEXT:    mov w23, w0 
203- ; NOFP16-NEXT:    and w0, w20, #0xffff 
204- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
205- ; NOFP16-NEXT:    mov w20, w0 
206- ; NOFP16-NEXT:    and w0, w21, #0xffff 
207- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
208- ; NOFP16-NEXT:    mov w21, w0 
209- ; NOFP16-NEXT:    and w0, w22, #0xffff 
210- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
211- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
212- ; NOFP16-NEXT:    strh w0, [x19, #6] 
213- ; NOFP16-NEXT:    mov w0, w21 
214- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
215- ; NOFP16-NEXT:    strh w0, [x19, #4] 
216- ; NOFP16-NEXT:    mov w0, w20 
217- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
218- ; NOFP16-NEXT:    strh w0, [x19, #2] 
219- ; NOFP16-NEXT:    mov w0, w23 
220- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
189+ ; NOFP16-NEXT:    strh w2, [x19, #4] 
190+ ; NOFP16-NEXT:    strh w3, [x19, #6] 
191+ ; NOFP16-NEXT:    strh w1, [x19, #2] 
221192; NOFP16-NEXT:    strh w0, [x19] 
222- ; NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload 
223- ; NOFP16-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload 
224- ; NOFP16-NEXT:    ldp x30, x23, [sp], #48 // 16-byte Folded Reload 
193+ ; NOFP16-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload 
225194; NOFP16-NEXT:    ret 
226195  %val  = call  <4  x half > @v4f16_result ()
227196  store  <4  x half > %val , ptr  %ptr 
@@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
231200define  void  @outgoing_v8f16_return (ptr  %ptr ) #0  {
232201; NOFP16-LABEL: outgoing_v8f16_return: 
233202; NOFP16:       // %bb.0: 
234- ; NOFP16-NEXT:    stp x30, x27, [sp, #-80]! // 16-byte Folded Spill 
235- ; NOFP16-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill 
236- ; NOFP16-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill 
237- ; NOFP16-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill 
238- ; NOFP16-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill 
239- ; NOFP16-NEXT:    .cfi_def_cfa_offset 80 
203+ ; NOFP16-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 
204+ ; NOFP16-NEXT:    .cfi_def_cfa_offset 16 
240205; NOFP16-NEXT:    .cfi_offset w19, -8 
241- ; NOFP16-NEXT:    .cfi_offset w20, -16 
242- ; NOFP16-NEXT:    .cfi_offset w21, -24 
243- ; NOFP16-NEXT:    .cfi_offset w22, -32 
244- ; NOFP16-NEXT:    .cfi_offset w23, -40 
245- ; NOFP16-NEXT:    .cfi_offset w24, -48 
246- ; NOFP16-NEXT:    .cfi_offset w25, -56 
247- ; NOFP16-NEXT:    .cfi_offset w26, -64 
248- ; NOFP16-NEXT:    .cfi_offset w27, -72 
249- ; NOFP16-NEXT:    .cfi_offset w30, -80 
206+ ; NOFP16-NEXT:    .cfi_offset w30, -16 
250207; NOFP16-NEXT:    mov x19, x0 
251208; NOFP16-NEXT:    bl v8f16_result 
252- ; NOFP16-NEXT:    and w0, w0, #0xffff 
253- ; NOFP16-NEXT:    mov w21, w1 
254- ; NOFP16-NEXT:    mov w22, w2 
255- ; NOFP16-NEXT:    mov w23, w3 
256- ; NOFP16-NEXT:    mov w24, w4 
257- ; NOFP16-NEXT:    mov w25, w5 
258- ; NOFP16-NEXT:    mov w26, w6 
259- ; NOFP16-NEXT:    mov w27, w7 
260- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
261- ; NOFP16-NEXT:    mov w20, w0 
262- ; NOFP16-NEXT:    and w0, w21, #0xffff 
263- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
264- ; NOFP16-NEXT:    mov w21, w0 
265- ; NOFP16-NEXT:    and w0, w22, #0xffff 
266- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
267- ; NOFP16-NEXT:    mov w22, w0 
268- ; NOFP16-NEXT:    and w0, w23, #0xffff 
269- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
270- ; NOFP16-NEXT:    mov w23, w0 
271- ; NOFP16-NEXT:    and w0, w24, #0xffff 
272- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
273- ; NOFP16-NEXT:    mov w24, w0 
274- ; NOFP16-NEXT:    and w0, w25, #0xffff 
275- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
276- ; NOFP16-NEXT:    mov w25, w0 
277- ; NOFP16-NEXT:    and w0, w26, #0xffff 
278- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
279- ; NOFP16-NEXT:    mov w26, w0 
280- ; NOFP16-NEXT:    and w0, w27, #0xffff 
281- ; NOFP16-NEXT:    bl __gnu_h2f_ieee 
282- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
283- ; NOFP16-NEXT:    strh w0, [x19, #14] 
284- ; NOFP16-NEXT:    mov w0, w26 
285- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
286- ; NOFP16-NEXT:    strh w0, [x19, #12] 
287- ; NOFP16-NEXT:    mov w0, w25 
288- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
289- ; NOFP16-NEXT:    strh w0, [x19, #10] 
290- ; NOFP16-NEXT:    mov w0, w24 
291- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
292- ; NOFP16-NEXT:    strh w0, [x19, #8] 
293- ; NOFP16-NEXT:    mov w0, w23 
294- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
295- ; NOFP16-NEXT:    strh w0, [x19, #6] 
296- ; NOFP16-NEXT:    mov w0, w22 
297- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
298- ; NOFP16-NEXT:    strh w0, [x19, #4] 
299- ; NOFP16-NEXT:    mov w0, w21 
300- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
301- ; NOFP16-NEXT:    strh w0, [x19, #2] 
302- ; NOFP16-NEXT:    mov w0, w20 
303- ; NOFP16-NEXT:    bl __gnu_f2h_ieee 
209+ ; NOFP16-NEXT:    strh w5, [x19, #10] 
210+ ; NOFP16-NEXT:    strh w7, [x19, #14] 
211+ ; NOFP16-NEXT:    strh w6, [x19, #12] 
212+ ; NOFP16-NEXT:    strh w4, [x19, #8] 
213+ ; NOFP16-NEXT:    strh w3, [x19, #6] 
214+ ; NOFP16-NEXT:    strh w2, [x19, #4] 
215+ ; NOFP16-NEXT:    strh w1, [x19, #2] 
304216; NOFP16-NEXT:    strh w0, [x19] 
305- ; NOFP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload 
306- ; NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload 
307- ; NOFP16-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload 
308- ; NOFP16-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload 
309- ; NOFP16-NEXT:    ldp x30, x27, [sp], #80 // 16-byte Folded Reload 
217+ ; NOFP16-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload 
310218; NOFP16-NEXT:    ret 
311219  %val  = call  <8  x half > @v8f16_result ()
312220  store  <8  x half > %val , ptr  %ptr 
0 commit comments