diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 83bb1dfe86c6a..b5f8a61fa98f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -3740,7 +3740,11 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; + case ISD::FP_TO_UINT: + case ISD::LRINT: + case ISD::LLRINT: + Res = SoftPromoteHalfOp_Op0WithStrict(N); + break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break; @@ -3819,7 +3823,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op); } -SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_Op0WithStrict(SDNode *N) { EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 586c3411791f9..d580ce0026e69 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); - SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); + SDValue SoftPromoteHalfOp_Op0WithStrict(SDNode *N); SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N); SDValue SoftPromoteHalfOp_SETCC(SDNode *N); SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index f4ac6bb76b3fe..2a40fb9b476f8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FLOG, MVT::f16, Promote); setOperationAction(ISD::FLOG10, MVT::f16, Promote); setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::LRINT, MVT::f16, Expand); setOperationAction(ISD::FROUND, MVT::f16, Legal); setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal); diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll index a1a04db8622c7..7274a8b0ce34e 100644 --- a/llvm/test/CodeGen/ARM/llrint-conv.ll +++ b/llvm/test/CodeGen/ARM/llrint-conv.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 -; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 define i64 @testmsxh_builtin(half %x) { ; CHECK-SOFT-LABEL: testmsxh_builtin: @@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) { ; CHECK-NOFP16-NEXT: bl llrintf ; CHECK-NOFP16-NEXT: pop {r11, pc} ; +; CHECK-FPv8-LABEL: testmsxh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: .save {r11, lr} +; CHECK-FPv8-NEXT: push {r11, lr} +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: bl llrintf +; CHECK-FPv8-NEXT: pop {r11, pc} +; ; CHECK-FP16-LABEL: testmsxh_builtin: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r11, lr} diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll index 23a2685aa1122..2de234919a148 100644 --- a/llvm/test/CodeGen/ARM/lrint-conv.ll +++ b/llvm/test/CodeGen/ARM/lrint-conv.ll @@ -1,14 +1,43 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 -; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8 +; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 -; FIXME: crash -; define i32 @testmswh_builtin(half %x) { -; entry: -; %0 = tail call i32 @llvm.lrint.i32.f16(half %x) -; ret i32 %0 -; } +define i32 @testmswh_builtin(half %x) { +; CHECK-SOFT-LABEL: testmswh_builtin: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: .save {r11, lr} +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: bl __aeabi_h2f +; CHECK-SOFT-NEXT: pop {r11, lr} +; CHECK-SOFT-NEXT: b lrintf +; +; CHECK-NOFP16-LABEL: testmswh_builtin: +; CHECK-NOFP16: @ %bb.0: @ %entry +; CHECK-NOFP16-NEXT: .save {r11, lr} +; CHECK-NOFP16-NEXT: push {r11, lr} +; CHECK-NOFP16-NEXT: vmov r0, s0 +; CHECK-NOFP16-NEXT: bl __aeabi_h2f +; CHECK-NOFP16-NEXT: vmov s0, r0 +; CHECK-NOFP16-NEXT: pop {r11, lr} +; CHECK-NOFP16-NEXT: b lrintf +; +; CHECK-FPv8-LABEL: testmswh_builtin: +; CHECK-FPv8: @ %bb.0: @ %entry +; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-FPv8-NEXT: b lrintf +; +; CHECK-FP16-LABEL: testmswh_builtin: +; CHECK-FP16: @ %bb.0: @ %entry +; CHECK-FP16-NEXT: vrintx.f16 s0, s0 +; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-FP16-NEXT: vmov r0, s0 +; CHECK-FP16-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.lrint.i32.f16(half %x) + ret i32 %0 +} define i32 @testmsws_builtin(float %x) { ; CHECK-LABEL: testmsws_builtin: @@ -39,8 +68,3 @@ entry: %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x) ret i32 %0 } - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-FP16: {{.*}} -; CHECK-NOFP16: {{.*}} -; CHECK-SOFT: {{.*}} diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll index c1159da77707c..c3c88840b1a6a 100644 --- a/llvm/test/CodeGen/ARM/vector-lrint.ll +++ b/llvm/test/CodeGen/ARM/vector-lrint.ll @@ -9,31 +9,1290 @@ ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64 -; FIXME: crash "Do not know how to soft promote this operator's operand!" -; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { -; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) -; ret <1 x iXLen> %a -; } - -; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { -; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) -; ret <2 x iXLen> %a -; } +define <1 x iXLen> @lrint_v1f16(<1 x half> %x) { +; LE-I32-LABEL: lrint_v1f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: bl __aeabi_f2h +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v1f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r11, lr} +; LE-I64-NEXT: push {r11, lr} +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_f2h +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d0[0], r0 +; LE-I64-NEXT: vmov.32 d0[1], r1 +; LE-I64-NEXT: pop {r11, pc} +; +; BE-I32-LABEL: lrint_v1f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_f2h +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v1f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r11, lr} +; BE-I64-NEXT: push {r11, lr} +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: bl __aeabi_f2h +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: pop {r11, pc} + %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x) + ret <1 x iXLen> %a +} -; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { -; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) -; ret <4 x iXLen> %a -; } +define <2 x iXLen> @lrint_v2f16(<2 x half> %x) { +; LE-I32-LABEL: lrint_v2f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r11, lr} +; LE-I32-NEXT: push {r11, lr} +; LE-I32-NEXT: .vsave {d8} +; LE-I32-NEXT: vpush {d8} +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: vmov.f32 s16, s1 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov r1, s16 +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: mov r0, r1 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: vorr d0, d8, d8 +; LE-I32-NEXT: vpop {d8} +; LE-I32-NEXT: pop {r11, pc} +; +; LE-I64-LABEL: lrint_v2f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r11, lr} +; LE-I64-NEXT: push {r4, r5, r11, lr} +; LE-I64-NEXT: .vsave {d8, d9} +; LE-I64-NEXT: vpush {d8, d9} +; LE-I64-NEXT: vmov r0, s1 +; LE-I64-NEXT: vmov.f32 s16, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d9[0], r4 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d9[1], r5 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q4, q4 +; LE-I64-NEXT: vpop {d8, d9} +; LE-I64-NEXT: pop {r4, r5, r11, pc} +; +; BE-I32-LABEL: lrint_v2f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r11, lr} +; BE-I32-NEXT: push {r11, lr} +; BE-I32-NEXT: .vsave {d8} +; BE-I32-NEXT: vpush {d8} +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: vmov.f32 s16, s1 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov r1, s16 +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: mov r0, r1 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 d0, d8 +; BE-I32-NEXT: vpop {d8} +; BE-I32-NEXT: pop {r11, pc} +; +; BE-I64-LABEL: lrint_v2f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r11, lr} +; BE-I64-NEXT: push {r4, r5, r11, lr} +; BE-I64-NEXT: .vsave {d8} +; BE-I64-NEXT: vpush {d8} +; BE-I64-NEXT: vmov r0, s1 +; BE-I64-NEXT: vmov.f32 s16, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d8[0], r4 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d8[1], r5 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d8 +; BE-I64-NEXT: vrev64.32 d0, d16 +; BE-I64-NEXT: vpop {d8} +; BE-I64-NEXT: pop {r4, r5, r11, pc} + %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x) + ret <2 x iXLen> %a +} -; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { -; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) -; ret <8 x iXLen> %a -; } +define <4 x iXLen> @lrint_v4f16(<4 x half> %x) { +; LE-I32-LABEL: lrint_v4f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r11, lr} +; LE-I32-NEXT: push {r4, r5, r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11} +; LE-I32-NEXT: vpush {d8, d9, d10, d11} +; LE-I32-NEXT: vmov r0, s3 +; LE-I32-NEXT: vmov.f32 s16, s2 +; LE-I32-NEXT: vmov.f32 s18, s1 +; LE-I32-NEXT: vmov.f32 s20, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vmov r0, s16 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: vmov r0, s20 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r5 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vmov r0, s18 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: vmov.32 d11[1], r4 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q5, q5 +; LE-I32-NEXT: vpop {d8, d9, d10, d11} +; LE-I32-NEXT: pop {r4, r5, r11, pc} +; +; LE-I64-LABEL: lrint_v4f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr} +; LE-I64-NEXT: .vsave {d12, d13} +; LE-I64-NEXT: vpush {d12, d13} +; LE-I64-NEXT: .vsave {d8, d9, d10} +; LE-I64-NEXT: vpush {d8, d9, d10} +; LE-I64-NEXT: vmov r0, s1 +; LE-I64-NEXT: vmov.f32 s16, s3 +; LE-I64-NEXT: vmov.f32 s20, s2 +; LE-I64-NEXT: vmov.f32 s18, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s18 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r7 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vmov r0, s20 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d13[0], r5 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov.32 d13[1], r4 +; LE-I64-NEXT: vmov.32 d9[1], r6 +; LE-I64-NEXT: vmov.32 d12[1], r7 +; LE-I64-NEXT: vmov.32 d8[1], r1 +; LE-I64-NEXT: vorr q0, q6, q6 +; LE-I64-NEXT: vorr q1, q4, q4 +; LE-I64-NEXT: vpop {d8, d9, d10} +; LE-I64-NEXT: vpop {d12, d13} +; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc} +; +; BE-I32-LABEL: lrint_v4f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r11, lr} +; BE-I32-NEXT: push {r4, r5, r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11} +; BE-I32-NEXT: vpush {d8, d9, d10, d11} +; BE-I32-NEXT: vmov r0, s3 +; BE-I32-NEXT: vmov.f32 s16, s2 +; BE-I32-NEXT: vmov.f32 s18, s1 +; BE-I32-NEXT: vmov.f32 s20, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vmov r0, s16 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: vmov r0, s20 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r5 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: vmov r0, s18 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: vmov.32 d11[1], r4 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vpop {d8, d9, d10, d11} +; BE-I32-NEXT: pop {r4, r5, r11, pc} +; +; BE-I64-LABEL: lrint_v4f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr} +; BE-I64-NEXT: .vsave {d8, d9, d10} +; BE-I64-NEXT: vpush {d8, d9, d10} +; BE-I64-NEXT: vmov r0, s1 +; BE-I64-NEXT: vmov.f32 s16, s3 +; BE-I64-NEXT: vmov.f32 s18, s2 +; BE-I64-NEXT: vmov.f32 s20, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: vmov r0, s20 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r7 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d8[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov r0, s18 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d9[0], r5 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov.32 d9[1], r4 +; BE-I64-NEXT: vmov.32 d8[1], r6 +; BE-I64-NEXT: vmov.32 d10[1], r7 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d9 +; BE-I64-NEXT: vrev64.32 d3, d8 +; BE-I64-NEXT: vrev64.32 d0, d10 +; BE-I64-NEXT: vrev64.32 d2, d16 +; BE-I64-NEXT: vpop {d8, d9, d10} +; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc} + %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x) + ret <4 x iXLen> %a +} -; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { -; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) -; ret <16 x iXLen> %a -; } +define <8 x iXLen> @lrint_v8f16(<8 x half> %x) { +; LE-I32-LABEL: lrint_v8f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; LE-I32-NEXT: vmov r0, s7 +; LE-I32-NEXT: vmov.f32 s18, s6 +; LE-I32-NEXT: vmov.f32 s16, s5 +; LE-I32-NEXT: vmov.f32 s20, s4 +; LE-I32-NEXT: vmov.f32 s22, s3 +; LE-I32-NEXT: vmov.f32 s24, s2 +; LE-I32-NEXT: vmov.f32 s26, s1 +; LE-I32-NEXT: vmov.f32 s28, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: mov r8, r0 +; LE-I32-NEXT: vmov r0, s26 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r9, r0 +; LE-I32-NEXT: vmov r0, s22 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: vmov r0, s28 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r7, r0 +; LE-I32-NEXT: vmov r0, s24 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vmov r0, s18 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: vmov r0, s20 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r5 +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r4 +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r7 +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r6 +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r9 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vmov r0, s16 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: vmov.32 d11[1], r8 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q6, q6 +; LE-I32-NEXT: vorr q1, q5, q5 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; +; LE-I64-LABEL: lrint_v8f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #8 +; LE-I64-NEXT: sub sp, sp, #8 +; LE-I64-NEXT: vmov r0, s1 +; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s16, s7 +; LE-I64-NEXT: vmov.f32 s18, s5 +; LE-I64-NEXT: vmov.f32 s20, s4 +; LE-I64-NEXT: vmov.f32 s22, s3 +; LE-I64-NEXT: vmov.f32 s24, s2 +; LE-I64-NEXT: vmov.f32 s26, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r9, r0 +; LE-I64-NEXT: vmov r0, s26 +; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r10, r0 +; LE-I64-NEXT: vmov r0, s22 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s24 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov r0, s18 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: vmov r0, s20 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r4, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r4 +; LE-I64-NEXT: mov r11, r1 +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r6 +; LE-I64-NEXT: mov r8, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r7 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r5 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov s0, r10 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d9[0], r9 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d15[1], r5 +; LE-I64-NEXT: vmov.32 d9[1], r0 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vmov.32 d11[1], r11 +; LE-I64-NEXT: vmov.32 d8[1], r4 +; LE-I64-NEXT: vmov.32 d14[1], r7 +; LE-I64-NEXT: vorr q0, q4, q4 +; LE-I64-NEXT: vmov.32 d12[1], r8 +; LE-I64-NEXT: vorr q1, q7, q7 +; LE-I64-NEXT: vmov.32 d10[1], r1 +; LE-I64-NEXT: vorr q2, q6, q6 +; LE-I64-NEXT: vorr q3, q5, q5 +; LE-I64-NEXT: add sp, sp, #8 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v8f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I32-NEXT: vmov r0, s1 +; BE-I32-NEXT: vmov.f32 s18, s7 +; BE-I32-NEXT: vmov.f32 s20, s6 +; BE-I32-NEXT: vmov.f32 s16, s5 +; BE-I32-NEXT: vmov.f32 s22, s4 +; BE-I32-NEXT: vmov.f32 s24, s3 +; BE-I32-NEXT: vmov.f32 s26, s2 +; BE-I32-NEXT: vmov.f32 s28, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: mov r8, r0 +; BE-I32-NEXT: vmov r0, s24 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r9, r0 +; BE-I32-NEXT: vmov r0, s18 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r6, r0 +; BE-I32-NEXT: vmov r0, s26 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: vmov r0, s20 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vmov r0, s28 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: vmov r0, s22 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r5 +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r4 +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r7 +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r6 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r9 +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: vmov r0, s16 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: vmov.32 d12[1], r8 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q6 +; BE-I32-NEXT: vrev64.32 q1, q5 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; +; BE-I64-LABEL: lrint_v8f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: .pad #8 +; BE-I64-NEXT: sub sp, sp, #8 +; BE-I64-NEXT: vmov r0, s1 +; BE-I64-NEXT: vmov.f32 s18, s7 +; BE-I64-NEXT: vmov.f32 s16, s6 +; BE-I64-NEXT: vmov.f32 s20, s5 +; BE-I64-NEXT: vmov.f32 s22, s4 +; BE-I64-NEXT: vmov.f32 s24, s3 +; BE-I64-NEXT: vmov.f32 s26, s2 +; BE-I64-NEXT: vmov.f32 s28, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: vmov r0, s28 +; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r10, r0 +; BE-I64-NEXT: vmov r0, s24 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: vmov r0, s26 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: vmov r0, s20 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r6, r0 +; BE-I64-NEXT: vmov r0, s22 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov r0, s18 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r4 +; BE-I64-NEXT: mov r11, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r6 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r7 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r5 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r10 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d8[0], r9 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; BE-I64-NEXT: vmov.32 d13[1], r5 +; BE-I64-NEXT: vmov.32 d8[1], r0 +; BE-I64-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEXT: vmov.32 d9[1], r11 +; BE-I64-NEXT: vmov.32 d14[1], r4 +; BE-I64-NEXT: vmov.32 d12[1], r7 +; BE-I64-NEXT: vmov.32 d10[1], r8 +; BE-I64-NEXT: vmov.32 d16[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d8 +; BE-I64-NEXT: vrev64.32 d3, d13 +; BE-I64-NEXT: vrev64.32 d5, d11 +; BE-I64-NEXT: vrev64.32 d7, d9 +; BE-I64-NEXT: vrev64.32 d0, d14 +; BE-I64-NEXT: vrev64.32 d2, d12 +; BE-I64-NEXT: vrev64.32 d4, d10 +; BE-I64-NEXT: vrev64.32 d6, d16 +; BE-I64-NEXT: add sp, sp, #8 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x) + ret <8 x iXLen> %a +} + +define <16 x iXLen> @lrint_v16f16(<16 x half> %x) { +; LE-I32-LABEL: lrint_v16f16: +; LE-I32: @ %bb.0: +; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: .pad #8 +; LE-I32-NEXT: sub sp, sp, #8 +; LE-I32-NEXT: vmov r0, s15 +; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill +; LE-I32-NEXT: vmov.f32 s26, s14 +; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill +; LE-I32-NEXT: vmov.f32 s20, s12 +; LE-I32-NEXT: vmov.f32 s22, s11 +; LE-I32-NEXT: vmov.f32 s18, s10 +; LE-I32-NEXT: vmov.f32 s17, s9 +; LE-I32-NEXT: vmov.f32 s24, s8 +; LE-I32-NEXT: vmov.f32 s19, s7 +; LE-I32-NEXT: vmov.f32 s30, s6 +; LE-I32-NEXT: vmov.f32 s21, s5 +; LE-I32-NEXT: vmov.f32 s16, s4 +; LE-I32-NEXT: vmov.f32 s23, s3 +; LE-I32-NEXT: vmov.f32 s28, s2 +; LE-I32-NEXT: vmov.f32 s25, s1 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: mov r8, r0 +; LE-I32-NEXT: vmov r0, s17 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r9, r0 +; LE-I32-NEXT: vmov r0, s22 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r10, r0 +; LE-I32-NEXT: vmov r0, s21 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r7, r0 +; LE-I32-NEXT: vmov r0, s19 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r4, r0 +; LE-I32-NEXT: vmov r0, s25 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r5, r0 +; LE-I32-NEXT: vmov r0, s23 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: mov r6, r0 +; LE-I32-NEXT: vmov r0, s20 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[0], r0 +; LE-I32-NEXT: vmov r0, s26 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d11[0], r0 +; LE-I32-NEXT: vmov r0, s24 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d12[0], r0 +; LE-I32-NEXT: vmov r0, s18 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d13[0], r0 +; LE-I32-NEXT: vmov r0, s16 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d8[0], r0 +; LE-I32-NEXT: vmov r0, s30 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d9[0], r0 +; LE-I32-NEXT: vmov r0, s28 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload +; LE-I32-NEXT: vmov.32 d15[0], r0 +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r6 +; LE-I32-NEXT: vmov.32 d14[0], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r5 +; LE-I32-NEXT: vmov.32 d15[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r4 +; LE-I32-NEXT: vmov.32 d14[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r7 +; LE-I32-NEXT: vmov.32 d9[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r10 +; LE-I32-NEXT: vmov.32 d8[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov s0, r9 +; LE-I32-NEXT: vmov.32 d13[1], r0 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; LE-I32-NEXT: vmov.32 d12[1], r0 +; LE-I32-NEXT: vmov r0, s0 +; LE-I32-NEXT: bl __aeabi_h2f +; LE-I32-NEXT: vmov s0, r0 +; LE-I32-NEXT: vmov.32 d11[1], r8 +; LE-I32-NEXT: bl lrintf +; LE-I32-NEXT: vmov.32 d10[1], r0 +; LE-I32-NEXT: vorr q0, q7, q7 +; LE-I32-NEXT: vorr q1, q4, q4 +; LE-I32-NEXT: vorr q2, q6, q6 +; LE-I32-NEXT: vorr q3, q5, q5 +; LE-I32-NEXT: add sp, sp, #8 +; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; LE-I64-LABEL: lrint_v16f16: +; LE-I64: @ %bb.0: +; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; LE-I64-NEXT: .pad #4 +; LE-I64-NEXT: sub sp, sp, #4 +; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: .pad #120 +; LE-I64-NEXT: sub sp, sp, #120 +; LE-I64-NEXT: mov r11, r0 +; LE-I64-NEXT: vmov r0, s7 +; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s23, s13 +; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s25, s12 +; LE-I64-NEXT: vmov.f32 s27, s11 +; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill +; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill +; LE-I64-NEXT: vmov.f32 s24, s8 +; LE-I64-NEXT: vmov.f32 s19, s6 +; LE-I64-NEXT: vmov.f32 s29, s5 +; LE-I64-NEXT: vmov.f32 s17, s4 +; LE-I64-NEXT: vmov.f32 s16, s3 +; LE-I64-NEXT: vmov.f32 s21, s2 +; LE-I64-NEXT: vmov.f32 s26, s1 +; LE-I64-NEXT: vmov.f32 s18, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov r0, s25 +; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s27 +; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: vmov r0, s29 +; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: vmov r0, s23 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vmov.32 d17[0], r6 +; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: vmov r0, s17 +; LE-I64-NEXT: vmov r8, s21 +; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill +; LE-I64-NEXT: vmov r10, s19 +; LE-I64-NEXT: vmov.32 d10[0], r5 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vmov.32 d11[0], r6 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: mov r0, r10 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d11[0], r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: mov r7, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r6, r0 +; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d11[1], r0 +; LE-I64-NEXT: vmov r0, s18 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: mov r5, r0 +; LE-I64-NEXT: vmov r0, s16 +; LE-I64-NEXT: vmov.32 d10[1], r7 +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov s0, r0 +; LE-I64-NEXT: vmov.32 d15[1], r4 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d9[0], r0 +; LE-I64-NEXT: vmov r0, s26 +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov r8, s24 +; LE-I64-NEXT: vmov.32 d14[1], r9 +; LE-I64-NEXT: mov r10, r1 +; LE-I64-NEXT: vmov s24, r5 +; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload +; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill +; LE-I64-NEXT: vmov r7, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: vmov s22, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d14[0], r0 +; LE-I64-NEXT: vmov s24, r6 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d15[0], r0 +; LE-I64-NEXT: mov r0, r7 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s24 +; LE-I64-NEXT: vmov s22, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s22 +; LE-I64-NEXT: vmov.32 d8[0], r0 +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: mov r9, r1 +; LE-I64-NEXT: vmov.32 d15[1], r6 +; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d13[0], r0 +; LE-I64-NEXT: mov r0, r8 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload +; LE-I64-NEXT: mov r7, r0 +; LE-I64-NEXT: vmov.32 d14[1], r5 +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload +; LE-I64-NEXT: vmov s20, r0 +; LE-I64-NEXT: vmov.32 d13[1], r6 +; LE-I64-NEXT: vmov r4, s0 +; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload +; LE-I64-NEXT: vmov r0, s0 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s20 +; LE-I64-NEXT: vmov s16, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: mov r5, r1 +; LE-I64-NEXT: vmov.32 d12[0], r0 +; LE-I64-NEXT: vmov s18, r7 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.32 d11[0], r0 +; LE-I64-NEXT: mov r0, r4 +; LE-I64-NEXT: mov r6, r1 +; LE-I64-NEXT: bl __aeabi_h2f +; LE-I64-NEXT: vmov.f32 s0, s18 +; LE-I64-NEXT: vmov s16, r0 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: vmov.f32 s0, s16 +; LE-I64-NEXT: vmov.32 d10[0], r0 +; LE-I64-NEXT: mov r4, r1 +; LE-I64-NEXT: vmov.32 d11[1], r6 +; LE-I64-NEXT: bl lrintf +; LE-I64-NEXT: add lr, sp, #80 +; LE-I64-NEXT: vmov.32 d10[1], r4 +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #40 +; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #8 +; LE-I64-NEXT: vmov.32 d16[0], r0 +; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #24 +; LE-I64-NEXT: vmov.32 d19[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d21[1], r10 +; LE-I64-NEXT: vmov.32 d18[1], r0 +; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload +; LE-I64-NEXT: vmov.32 d12[1], r5 +; LE-I64-NEXT: vmov.32 d17[1], r0 +; LE-I64-NEXT: add r0, r11, #64 +; LE-I64-NEXT: vmov.32 d16[1], r1 +; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]! +; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]! +; LE-I64-NEXT: vmov.32 d20[1], r9 +; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128] +; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]! +; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: add lr, sp, #56 +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]! +; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload +; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128] +; LE-I64-NEXT: add sp, sp, #120 +; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; LE-I64-NEXT: add sp, sp, #4 +; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; BE-I32-LABEL: lrint_v16f16: +; BE-I32: @ %bb.0: +; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: .pad #16 +; BE-I32-NEXT: sub sp, sp, #16 +; BE-I32-NEXT: vmov r0, s1 +; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s30, s15 +; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s17, s12 +; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s19, s11 +; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill +; BE-I32-NEXT: vmov.f32 s21, s9 +; BE-I32-NEXT: vmov.f32 s23, s7 +; BE-I32-NEXT: vmov.f32 s24, s6 +; BE-I32-NEXT: vmov.f32 s25, s5 +; BE-I32-NEXT: vmov.f32 s26, s4 +; BE-I32-NEXT: vmov.f32 s27, s3 +; BE-I32-NEXT: vmov.f32 s28, s2 +; BE-I32-NEXT: vmov.f32 s29, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: mov r8, r0 +; BE-I32-NEXT: vmov r0, s27 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r9, r0 +; BE-I32-NEXT: vmov r0, s25 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r10, r0 +; BE-I32-NEXT: vmov r0, s23 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r7, r0 +; BE-I32-NEXT: vmov r0, s21 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r4, r0 +; BE-I32-NEXT: vmov r0, s19 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r5, r0 +; BE-I32-NEXT: vmov r0, s30 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: mov r6, r0 +; BE-I32-NEXT: vmov r0, s17 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[0], r0 +; BE-I32-NEXT: vmov r0, s29 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d10[0], r0 +; BE-I32-NEXT: vmov r0, s28 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d11[0], r0 +; BE-I32-NEXT: vmov r0, s26 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d14[0], r0 +; BE-I32-NEXT: vmov r0, s24 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d15[0], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d12[0], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d9[0], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r6 +; BE-I32-NEXT: vmov.32 d13[0], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r5 +; BE-I32-NEXT: vmov.32 d9[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r4 +; BE-I32-NEXT: vmov.32 d13[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r7 +; BE-I32-NEXT: vmov.32 d12[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r10 +; BE-I32-NEXT: vmov.32 d15[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov s0, r9 +; BE-I32-NEXT: vmov.32 d14[1], r0 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload +; BE-I32-NEXT: vmov.32 d11[1], r0 +; BE-I32-NEXT: vmov r0, s0 +; BE-I32-NEXT: bl __aeabi_h2f +; BE-I32-NEXT: vmov s0, r0 +; BE-I32-NEXT: vmov.32 d10[1], r8 +; BE-I32-NEXT: bl lrintf +; BE-I32-NEXT: vmov.32 d8[1], r0 +; BE-I32-NEXT: vrev64.32 q0, q5 +; BE-I32-NEXT: vrev64.32 q1, q7 +; BE-I32-NEXT: vrev64.32 q2, q6 +; BE-I32-NEXT: vrev64.32 q3, q4 +; BE-I32-NEXT: add sp, sp, #16 +; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; +; BE-I64-LABEL: lrint_v16f16: +; BE-I64: @ %bb.0: +; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; BE-I64-NEXT: .pad #4 +; BE-I64-NEXT: sub sp, sp, #4 +; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: .pad #112 +; BE-I64-NEXT: sub sp, sp, #112 +; BE-I64-NEXT: mov r11, r0 +; BE-I64-NEXT: vmov r0, s14 +; BE-I64-NEXT: vmov.f32 s17, s15 +; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s21, s12 +; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s23, s11 +; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s19, s9 +; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill +; BE-I64-NEXT: vmov.f32 s26, s8 +; BE-I64-NEXT: vmov.f32 s24, s6 +; BE-I64-NEXT: vmov.f32 s18, s5 +; BE-I64-NEXT: vmov.f32 s25, s3 +; BE-I64-NEXT: vmov.f32 s16, s2 +; BE-I64-NEXT: vmov.f32 s27, s1 +; BE-I64-NEXT: vmov.f32 s29, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: mov r8, r0 +; BE-I64-NEXT: vmov r0, s29 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r9, r0 +; BE-I64-NEXT: vmov r0, s27 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r7, r0 +; BE-I64-NEXT: vmov r0, s21 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r6, r0 +; BE-I64-NEXT: vmov r0, s25 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r5, r0 +; BE-I64-NEXT: vmov r0, s23 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s0, r5 +; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s0, r6 +; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s0, r7 +; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill +; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov s0, r9 +; BE-I64-NEXT: mov r10, r1 +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: vmov r0, s17 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vmov.32 d10[0], r8 +; BE-I64-NEXT: vmov r6, s19 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r6 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r6, r0 +; BE-I64-NEXT: vmov r0, s18 +; BE-I64-NEXT: vmov.32 d10[1], r4 +; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov r0, s16 +; BE-I64-NEXT: vmov.32 d11[1], r7 +; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.32 d15[1], r5 +; BE-I64-NEXT: vmov s0, r0 +; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload +; BE-I64-NEXT: vmov r5, s26 +; BE-I64-NEXT: vmov.32 d16[0], r0 +; BE-I64-NEXT: vmov s26, r4 +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: mov r8, r1 +; BE-I64-NEXT: vmov.32 d14[1], r10 +; BE-I64-NEXT: vmov r4, s24 +; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill +; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s26 +; BE-I64-NEXT: vmov s22, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s22 +; BE-I64-NEXT: mov r7, r1 +; BE-I64-NEXT: vmov.32 d13[0], r0 +; BE-I64-NEXT: vmov s24, r6 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d14[0], r0 +; BE-I64-NEXT: mov r0, r4 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s24 +; BE-I64-NEXT: vmov s22, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s22 +; BE-I64-NEXT: mov r9, r1 +; BE-I64-NEXT: vmov.32 d12[0], r0 +; BE-I64-NEXT: vmov.32 d14[1], r6 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d11[0], r0 +; BE-I64-NEXT: mov r0, r5 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload +; BE-I64-NEXT: mov r4, r0 +; BE-I64-NEXT: vmov.32 d13[1], r7 +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload +; BE-I64-NEXT: vmov s20, r0 +; BE-I64-NEXT: vmov.32 d11[1], r6 +; BE-I64-NEXT: vmov r7, s0 +; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload +; BE-I64-NEXT: vmov r0, s0 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s20 +; BE-I64-NEXT: vmov s16, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r5, r1 +; BE-I64-NEXT: vmov.32 d10[0], r0 +; BE-I64-NEXT: vmov s18, r4 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d15[0], r0 +; BE-I64-NEXT: mov r0, r7 +; BE-I64-NEXT: mov r4, r1 +; BE-I64-NEXT: bl __aeabi_h2f +; BE-I64-NEXT: vmov.f32 s0, s18 +; BE-I64-NEXT: vmov s16, r0 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.f32 s0, s16 +; BE-I64-NEXT: mov r6, r1 +; BE-I64-NEXT: vmov.32 d9[0], r0 +; BE-I64-NEXT: vmov.32 d15[1], r4 +; BE-I64-NEXT: bl lrintf +; BE-I64-NEXT: vmov.32 d24[0], r0 +; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload +; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload +; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d23[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload +; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload +; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d21, d20 +; BE-I64-NEXT: vmov.32 d22[1], r0 +; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload +; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload +; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d20, d26 +; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload +; BE-I64-NEXT: vmov.32 d10[1], r5 +; BE-I64-NEXT: vmov.32 d12[1], r9 +; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload +; BE-I64-NEXT: vrev64.32 d27, d26 +; BE-I64-NEXT: vmov.32 d25[1], r0 +; BE-I64-NEXT: add r0, r11, #64 +; BE-I64-NEXT: vmov.32 d30[1], r8 +; BE-I64-NEXT: vmov.32 d9[1], r6 +; BE-I64-NEXT: vrev64.32 d26, d28 +; BE-I64-NEXT: vrev64.32 d29, d10 +; BE-I64-NEXT: vmov.32 d24[1], r1 +; BE-I64-NEXT: vrev64.32 d1, d12 +; BE-I64-NEXT: vrev64.32 d28, d23 +; BE-I64-NEXT: vrev64.32 d23, d22 +; BE-I64-NEXT: vrev64.32 d22, d30 +; BE-I64-NEXT: vrev64.32 d31, d25 +; BE-I64-NEXT: vrev64.32 d0, d9 +; BE-I64-NEXT: vrev64.32 d30, d24 +; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]! +; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]! +; BE-I64-NEXT: vrev64.32 d19, d13 +; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128] +; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]! +; BE-I64-NEXT: vrev64.32 d18, d14 +; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]! +; BE-I64-NEXT: vrev64.32 d17, d15 +; BE-I64-NEXT: vrev64.32 d16, d11 +; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]! +; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128] +; BE-I64-NEXT: add sp, sp, #112 +; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; BE-I64-NEXT: add sp, sp, #4 +; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x) + ret <16 x iXLen> %a +} define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; LE-I32-LABEL: lrint_v1f32: