diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index aa663556deb76..8c82161bd15c6 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -7123,19 +7123,6 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, ImmMask <<= 1; } - if (DAG.getDataLayout().isBigEndian()) { - // Reverse the order of elements within the vector. - unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8; - unsigned Mask = (1 << BytesPerElem) - 1; - unsigned NumElems = 8 / BytesPerElem; - unsigned NewImm = 0; - for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) { - unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask); - NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem; - } - Imm = NewImm; - } - // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; @@ -7968,7 +7955,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov); } // Try an immediate VMVN. @@ -7978,7 +7965,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov); } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. @@ -18606,7 +18593,9 @@ static SDValue PerformBITCASTCombine(SDNode *N, // We may have a bitcast of something that has already had this bitcast // combine performed on it, so skip past any VECTOR_REG_CASTs. - while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST) + if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST && + Src.getOperand(0).getValueType().getScalarSizeInBits() <= + Src.getValueType().getScalarSizeInBits()) Src = Src.getOperand(0); // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll index 4026495a0f2b4..a4f5d1c61eae7 100644 --- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll +++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll @@ -101,9 +101,8 @@ define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) { ; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff ; CHECK-NEXT: vldr d17, [r0] ; CHECK-NEXT: vrev64.16 d18, d0 -; CHECK-NEXT: vrev64.16 d17, d17 -; CHECK-NEXT: vrev64.16 d16, d16 ; CHECK-NEXT: vadd.i16 d16, d18, d16 +; CHECK-NEXT: vrev64.16 d17, d17 ; CHECK-NEXT: vadd.f16 d16, d16, d17 ; CHECK-NEXT: vrev64.16 d16, d16 ; CHECK-NEXT: vstr d16, [r0] diff --git a/llvm/test/CodeGen/ARM/big-endian-vmov.ll b/llvm/test/CodeGen/ARM/big-endian-vmov.ll index 1cb7a030d58c2..7e0947ccfd58e 100644 --- a/llvm/test/CodeGen/ARM/big-endian-vmov.ll +++ b/llvm/test/CodeGen/ARM/big-endian-vmov.ll @@ -10,7 +10,8 @@ define arm_aapcs_vfpcc <8 x i8> @vmov_i8() { ; ; CHECK-BE-LABEL: vmov_i8: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff +; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000 +; CHECK-BE-NEXT: vrev64.8 d0, d16 ; CHECK-BE-NEXT: bx lr ret <8 x i8> } @@ -23,7 +24,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_a() { ; ; CHECK-BE-LABEL: vmov_i16_a: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xffff +; CHECK-BE-NEXT: vmov.i64 d16, #0xffff000000000000 +; CHECK-BE-NEXT: vrev64.16 d0, d16 ; CHECK-BE-NEXT: bx lr ret <4 x i16> } @@ -36,7 +38,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_b() { ; ; CHECK-BE-LABEL: vmov_i16_b: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff +; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000 +; CHECK-BE-NEXT: vrev64.16 d0, d16 ; CHECK-BE-NEXT: bx lr ret <4 x i16> } @@ -49,7 +52,8 @@ define arm_aapcs_vfpcc <4 x i16> @vmov_i16_c() { ; ; CHECK-BE-LABEL: vmov_i16_c: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff00 +; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000 +; CHECK-BE-NEXT: vrev64.16 d0, d16 ; CHECK-BE-NEXT: bx lr ret <4 x i16> } @@ -62,7 +66,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_a() { ; ; CHECK-BE-LABEL: vmov_i32_a: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xffffffff +; CHECK-BE-NEXT: vmov.i64 d16, #0xffffffff00000000 +; CHECK-BE-NEXT: vrev64.32 d0, d16 ; CHECK-BE-NEXT: bx lr ret <2 x i32> } @@ -75,7 +80,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_b() { ; ; CHECK-BE-LABEL: vmov_i32_b: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff +; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000 +; CHECK-BE-NEXT: vrev64.32 d0, d16 ; CHECK-BE-NEXT: bx lr ret <2 x i32> } @@ -88,7 +94,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_c() { ; ; CHECK-BE-LABEL: vmov_i32_c: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff00 +; CHECK-BE-NEXT: vmov.i64 d16, #0xff0000000000 +; CHECK-BE-NEXT: vrev64.32 d0, d16 ; CHECK-BE-NEXT: bx lr ret <2 x i32> } @@ -101,7 +108,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_d() { ; ; CHECK-BE-LABEL: vmov_i32_d: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff0000 +; CHECK-BE-NEXT: vmov.i64 d16, #0xff000000000000 +; CHECK-BE-NEXT: vrev64.32 d0, d16 ; CHECK-BE-NEXT: bx lr ret <2 x i32> } @@ -114,7 +122,8 @@ define arm_aapcs_vfpcc <2 x i32> @vmov_i32_e() { ; ; CHECK-BE-LABEL: vmov_i32_e: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 d0, #0xff000000 +; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000000000 +; CHECK-BE-NEXT: vrev64.32 d0, d16 ; CHECK-BE-NEXT: bx lr ret <2 x i32> } @@ -128,10 +137,16 @@ define arm_aapcs_vfpcc <1 x i64> @vmov_i64_a() { } define arm_aapcs_vfpcc <1 x i64> @vmov_i64_b() { -; CHECK-LABEL: vmov_i64_b: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i64 d0, #0xffff00ff0000ff -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: vmov_i64_b: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 d0, #0xffff00ff0000ff +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: vmov_i64_b: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: d16, #0xff0000ff00ffff00 +; CHECK-BE-NEXT: vrev64.32 d0, d16 +; CHECK-BE-NEXT: bx lr ret <1 x i64> } @@ -157,11 +172,18 @@ define arm_aapcs_vfpcc <4 x i32> @vmov_v4i32_b() { } define arm_aapcs_vfpcc <2 x i64> @and_v2i64_b(<2 x i64> %a) { -; CHECK-LABEL: and_v2i64_b: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i64 q8, #0xffff00ff0000ff -; CHECK-NEXT: vand q0, q0, q8 -; CHECK-NEXT: bx lr +; CHECK-LE-LABEL: and_v2i64_b: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 q8, #0xffff00ff0000ff +; CHECK-LE-NEXT: vand q0, q0, q8 +; CHECK-LE-NEXT: bx lr +; +; CHECK-BE-LABEL: and_v2i64_b: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: q8, #0xff0000ff00ffff00 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vand q0, q0, q8 +; CHECK-BE-NEXT: bx lr %b = and <2 x i64> %a, ret <2 x i64> %b } @@ -175,9 +197,8 @@ define arm_aapcs_vfpcc <4 x i32> @and_v4i32_b(<4 x i32> %a) { ; ; CHECK-BE-LABEL: and_v4i32_b: ; CHECK-BE: @ %bb.0: -; CHECK-BE-NEXT: vmov.i64 q8, #0xffff00ff0000ff +; CHECK-BE-NEXT: vmov.i64 q8, #0xff0000ff00ffff00 ; CHECK-BE-NEXT: vrev64.32 q9, q0 -; CHECK-BE-NEXT: vrev64.32 q8, q8 ; CHECK-BE-NEXT: vand q8, q9, q8 ; CHECK-BE-NEXT: vrev64.32 q0, q8 ; CHECK-BE-NEXT: bx lr @@ -198,7 +219,6 @@ define arm_aapcs_vfpcc <8 x i16> @vmvn_v16i8_m1() { ret <8 x i16> } -; FIXME: This is incorrect for BE define arm_aapcs_vfpcc <8 x i16> @and_v8i16_m1(<8 x i16> %a) { ; CHECK-LE-LABEL: and_v8i16_m1: ; CHECK-LE: @ %bb.0: @@ -227,7 +247,6 @@ define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_m1(<8 x i16> %a) { ; CHECK-BE: @ %bb.0: ; CHECK-BE-NEXT: vmvn.i32 q8, #0x10000 ; CHECK-BE-NEXT: vrev64.16 q9, q0 -; CHECK-BE-NEXT: vrev32.16 q8, q8 ; CHECK-BE-NEXT: veor q8, q9, q8 ; CHECK-BE-NEXT: vrev64.16 q0, q8 ; CHECK-BE-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll index 8835497669b32..ef0592b29ab37 100644 --- a/llvm/test/CodeGen/ARM/vmov.ll +++ b/llvm/test/CodeGen/ARM/vmov.ll @@ -139,10 +139,16 @@ define arm_aapcs_vfpcc <2 x i32> @v_mvni32f() nounwind { } define arm_aapcs_vfpcc <1 x i64> @v_movi64() nounwind { -; CHECK-LABEL: v_movi64: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i64 d0, #0xff0000ff0000ffff -; CHECK-NEXT: mov pc, lr +; CHECK-LE-LABEL: v_movi64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 d0, #0xff0000ff0000ffff +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: v_movi64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov.i64 d16, #0xffffff0000ff +; CHECK-BE-NEXT: vrev64.32 d0, d16 +; CHECK-BE-NEXT: mov pc, lr ret <1 x i64> < i64 18374687574888349695 > } @@ -889,11 +895,18 @@ define arm_aapcs_vfpcc void @v_movf32_sti64(ptr %p) { } define arm_aapcs_vfpcc void @v_movi64_sti64(ptr %p) { -; CHECK-LABEL: v_movi64_sti64: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i64 d16, #0xff -; CHECK-NEXT: vst1.64 {d16}, [r0:64] -; CHECK-NEXT: mov pc, lr +; CHECK-LE-LABEL: v_movi64_sti64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 d16, #0xff +; CHECK-LE-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: v_movi64_sti64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov.i64 d16, #0xff00000000 +; CHECK-BE-NEXT: vrev64.32 d16, d16 +; CHECK-BE-NEXT: vst1.64 {d16}, [r0:64] +; CHECK-BE-NEXT: mov pc, lr call void @llvm.arm.neon.vst1.p0.v1i64(ptr %p, <1 x i64> , i32 8) ret void } @@ -1094,11 +1107,18 @@ define arm_aapcs_vfpcc void @v_movQf32_sti64(ptr %p) { } define arm_aapcs_vfpcc void @v_movQi64_sti64(ptr %p) { -; CHECK-LABEL: v_movQi64_sti64: -; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i64 q8, #0xff -; CHECK-NEXT: vst1.64 {d16, d17}, [r0:64] -; CHECK-NEXT: mov pc, lr +; CHECK-LE-LABEL: v_movQi64_sti64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vmov.i64 q8, #0xff +; CHECK-LE-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: v_movQi64_sti64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vmov.i64 q8, #0xff00000000 +; CHECK-BE-NEXT: vrev64.32 q8, q8 +; CHECK-BE-NEXT: vst1.64 {d16, d17}, [r0:64] +; CHECK-BE-NEXT: mov pc, lr call void @llvm.arm.neon.vst1.p0.v2i64(ptr %p, <2 x i64> , i32 8) ret void } diff --git a/llvm/test/CodeGen/Thumb2/mve-be.ll b/llvm/test/CodeGen/Thumb2/mve-be.ll index 2f2ecc7647237..e1db733b13b41 100644 --- a/llvm/test/CodeGen/Thumb2/mve-be.ll +++ b/llvm/test/CodeGen/Thumb2/mve-be.ll @@ -232,7 +232,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_le(<4 x i32> %src) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x1 -; CHECK-BE-NEXT: vrev32.8 q0, q0 ; CHECK-BE-NEXT: vand q1, q1, q0 ; CHECK-BE-NEXT: vrev64.8 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -254,7 +253,6 @@ define arm_aapcs_vfpcc <16 x i8> @and_v16i8_be(<4 x i32> %src) { ; CHECK-BE: @ %bb.0: @ %entry ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x1000000 -; CHECK-BE-NEXT: vrev32.8 q0, q0 ; CHECK-BE-NEXT: vand q1, q1, q0 ; CHECK-BE-NEXT: vrev64.8 q0, q1 ; CHECK-BE-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll index c7c579f9d6536..4934d22320903 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll @@ -421,13 +421,14 @@ define void @foo_zext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) { ; CHECK-BE-NEXT: vmov.32 q1[3], r1 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: .LBB7_4: @ %else2 -; CHECK-BE-NEXT: vrev64.32 q3, q2 +; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vmov r2, s15 -; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-BE-NEXT: vand q0, q0, q1 +; CHECK-BE-NEXT: vrev64.32 q3, q1 +; CHECK-BE-NEXT: vrev64.32 q1, q2 +; CHECK-BE-NEXT: vmov r2, s7 +; CHECK-BE-NEXT: vand q0, q0, q3 ; CHECK-BE-NEXT: rsbs r3, r2, #0 -; CHECK-BE-NEXT: vmov r3, s13 +; CHECK-BE-NEXT: vmov r3, s5 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31 ; CHECK-BE-NEXT: csetm r12, lt ; CHECK-BE-NEXT: rsbs r2, r3, #0 @@ -537,13 +538,14 @@ define void @foo_zext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) { ; CHECK-BE-NEXT: vmov.32 q1[3], r1 ; CHECK-BE-NEXT: vrev64.32 q0, q1 ; CHECK-BE-NEXT: .LBB8_4: @ %else2 -; CHECK-BE-NEXT: vrev64.32 q3, q2 +; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff00000000 ; CHECK-BE-NEXT: movs r1, #0 -; CHECK-BE-NEXT: vmov r2, s15 -; CHECK-BE-NEXT: vmov.i64 q1, #0xffffffff -; CHECK-BE-NEXT: vand q0, q0, q1 +; CHECK-BE-NEXT: vrev64.32 q3, q1 +; CHECK-BE-NEXT: vrev64.32 q1, q2 +; CHECK-BE-NEXT: vmov r2, s7 +; CHECK-BE-NEXT: vand q0, q0, q3 ; CHECK-BE-NEXT: rsbs r3, r2, #0 -; CHECK-BE-NEXT: vmov r3, s13 +; CHECK-BE-NEXT: vmov r3, s5 ; CHECK-BE-NEXT: sbcs.w r2, r1, r2, asr #31 ; CHECK-BE-NEXT: csetm r12, lt ; CHECK-BE-NEXT: rsbs r2, r3, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll index 470007878ec84..0d0e45956080d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-bitcast.ll @@ -115,7 +115,6 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_to_v8i1(i8 %b, <8 x i16> %a) { ; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vrev32.16 q0, q0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.16 q0, q1 ; CHECK-BE-NEXT: add sp, #4 @@ -145,7 +144,6 @@ define arm_aapcs_vfpcc <16 x i8> @bitcast_to_v16i1(i16 %b, <16 x i8> %a) { ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: rbit r0, r0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vrev32.8 q0, q0 ; CHECK-BE-NEXT: lsrs r0, r0, #16 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll index a92adf6f1a067..ba3d5c22fc671 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-loadstore.ll @@ -105,7 +105,6 @@ define arm_aapcs_vfpcc <8 x i16> @load_v8i1(ptr %src, <8 x i16> %a) { ; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vrev32.16 q0, q0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.16 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -130,7 +129,6 @@ define arm_aapcs_vfpcc <16 x i8> @load_v16i1(ptr %src, <16 x i8> %a) { ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 ; CHECK-BE-NEXT: rbit r0, r0 -; CHECK-BE-NEXT: vrev32.8 q0, q0 ; CHECK-BE-NEXT: lsrs r0, r0, #16 ; CHECK-BE-NEXT: vmsr p0, r0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 @@ -416,10 +414,9 @@ define arm_aapcs_vfpcc <8 x i16> @load_predcast8(ptr %i, <8 x i16> %a) { ; ; CHECK-BE-LABEL: load_predcast8: ; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vrev64.16 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vldr p0, [r0] -; CHECK-BE-NEXT: vrev32.16 q0, q0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.16 q0, q1 ; CHECK-BE-NEXT: bx lr @@ -439,10 +436,9 @@ define arm_aapcs_vfpcc <16 x i8> @load_predcast16(ptr %i, <16 x i8> %a) { ; ; CHECK-BE-LABEL: load_predcast16: ; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr p0, [r0] ; CHECK-BE-NEXT: vrev64.8 q1, q0 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0 -; CHECK-BE-NEXT: vldr p0, [r0] -; CHECK-BE-NEXT: vrev32.8 q0, q0 ; CHECK-BE-NEXT: vpsel q1, q1, q0 ; CHECK-BE-NEXT: vrev64.8 q0, q1 ; CHECK-BE-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll index 3bc129d0fd92e..c17066126083a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll @@ -156,11 +156,10 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) { ; CHECK-BE-NEXT: .pad #8 ; CHECK-BE-NEXT: sub sp, #8 ; CHECK-BE-NEXT: vrev64.16 q4, q1 -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: vrev64.16 q2, q0 -; CHECK-BE-NEXT: vrev32.16 q1, q1 -; CHECK-BE-NEXT: vcmp.i16 eq, q2, zr -; CHECK-BE-NEXT: vpsel q1, q4, q1 +; CHECK-BE-NEXT: vrev64.16 q1, q0 +; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q4, q0 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-BE-NEXT: vrev64.16 q0, q1 ; CHECK-BE-NEXT: bl ext_i16 @@ -209,11 +208,10 @@ define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) { ; CHECK-BE-NEXT: .pad #8 ; CHECK-BE-NEXT: sub sp, #8 ; CHECK-BE-NEXT: vrev64.8 q4, q1 -; CHECK-BE-NEXT: vmov.i32 q1, #0x0 -; CHECK-BE-NEXT: vrev64.8 q2, q0 -; CHECK-BE-NEXT: vrev32.8 q1, q1 -; CHECK-BE-NEXT: vcmp.i8 eq, q2, zr -; CHECK-BE-NEXT: vpsel q1, q4, q1 +; CHECK-BE-NEXT: vrev64.8 q1, q0 +; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr +; CHECK-BE-NEXT: vmov.i32 q0, #0x0 +; CHECK-BE-NEXT: vpsel q1, q4, q0 ; CHECK-BE-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-BE-NEXT: vrev64.8 q0, q1 ; CHECK-BE-NEXT: bl ext_i8 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll index 729e4c5e89c75..fe63034c7acd3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -115,7 +115,6 @@ entry: ret <16 x i8> } -; FIXME: This is incorrect for BE define arm_aapcs_vfpcc <16 x i8> @xor_int8_32(<16 x i8> %a) { ; CHECKLE-LABEL: xor_int8_32: ; CHECKLE: @ %bb.0: @ %entry @@ -127,7 +126,6 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int8_32(<16 x i8> %a) { ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.i32 q1, #0x1 ; CHECKBE-NEXT: vrev64.8 q2, q0 -; CHECKBE-NEXT: vrev32.8 q1, q1 ; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr @@ -159,10 +157,9 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int8_64(<16 x i8> %a) { ; ; CHECKBE-LABEL: xor_int8_64: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xff0000ffff00ffff -; CHECKBE-NEXT: vrev64.8 q2, q1 -; CHECKBE-NEXT: vrev64.8 q1, q0 -; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vmov.i64 q1, #0xffff00ffff0000ff +; CHECKBE-NEXT: vrev64.8 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -372,9 +369,8 @@ define arm_aapcs_vfpcc <8 x i16> @xor_int16_64(<8 x i16> %a) { ; CHECKBE-LABEL: xor_int16_64: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.i64 q1, #0xff0000000000ff -; CHECKBE-NEXT: vrev64.16 q2, q1 -; CHECKBE-NEXT: vrev64.16 q1, q0 -; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -755,10 +751,9 @@ define arm_aapcs_vfpcc <4 x i32> @xor_int32_64(<4 x i32> %a) { ; ; CHECKBE-LABEL: xor_int32_64: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff0000ff00ff -; CHECKBE-NEXT: vrev64.32 q2, q1 -; CHECKBE-NEXT: vrev64.32 q1, q0 -; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vmov.i64 q1, #0xff00ffff00ff00 +; CHECKBE-NEXT: vrev64.32 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.32 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -841,11 +836,18 @@ entry: } define arm_aapcs_vfpcc <2 x i64> @xor_int64_ff(<2 x i64> %a) { -; CHECK-LABEL: xor_int64_ff: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i64 q1, #0xff -; CHECK-NEXT: veor q0, q0, q1 -; CHECK-NEXT: bx lr +; CHECKLE-LABEL: xor_int64_ff: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int64_ff: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xff00000000 +; CHECKBE-NEXT: vrev64.32 q2, q1 +; CHECKBE-NEXT: veor q0, q0, q2 +; CHECKBE-NEXT: bx lr entry: %b = xor <2 x i64> %a, ret <2 x i64> %b @@ -886,11 +888,18 @@ entry: } define arm_aapcs_vfpcc <2 x i64> @xor_int64_ff0000ff0000ffff(<2 x i64> %a) { -; CHECK-LABEL: xor_int64_ff0000ff0000ffff: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i64 q1, #0xff0000ff0000ffff -; CHECK-NEXT: veor q0, q0, q1 -; CHECK-NEXT: bx lr +; CHECKLE-LABEL: xor_int64_ff0000ff0000ffff: +; CHECKLE: @ %bb.0: @ %entry +; CHECKLE-NEXT: vmov.i64 q1, #0xff0000ff0000ffff +; CHECKLE-NEXT: veor q0, q0, q1 +; CHECKLE-NEXT: bx lr +; +; CHECKBE-LABEL: xor_int64_ff0000ff0000ffff: +; CHECKBE: @ %bb.0: @ %entry +; CHECKBE-NEXT: vmov.i64 q1, #0xffffff0000ff +; CHECKBE-NEXT: vrev64.32 q2, q1 +; CHECKBE-NEXT: veor q0, q0, q2 +; CHECKBE-NEXT: bx lr entry: %b = xor <2 x i64> %a, ret <2 x i64> %b @@ -984,10 +993,9 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f000f0f(<16 x i8> %a) { ; ; CHECKBE-LABEL: xor_int64_0f000f0f: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00 -; CHECKBE-NEXT: vrev64.8 q2, q1 -; CHECKBE-NEXT: vrev64.8 q1, q0 -; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vmov.i64 q1, #0xff000000ff00ff +; CHECKBE-NEXT: vrev64.8 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -1018,10 +1026,9 @@ define arm_aapcs_vfpcc <8 x i16> @xor_int64_ff00ffff(<8 x i16> %a) { ; ; CHECKBE-LABEL: xor_int64_ff00ffff: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff -; CHECKBE-NEXT: vrev64.16 q2, q1 -; CHECKBE-NEXT: vrev64.16 q1, q0 -; CHECKBE-NEXT: veor q1, q1, q2 +; CHECKBE-NEXT: vmov.i64 q1, #0xffffffff0000ffff +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -1055,7 +1062,6 @@ define arm_aapcs_vfpcc <16 x i8> @xor_int64_0f0f0f0f0f0f0f0f(<16 x i8> %a) { ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.i16 q1, #0xff ; CHECKBE-NEXT: vrev64.8 q2, q0 -; CHECKBE-NEXT: vrev16.8 q1, q1 ; CHECKBE-NEXT: veor q1, q2, q1 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr @@ -1195,10 +1201,9 @@ define arm_aapcs_vfpcc <16 x i8> @test(<16 x i8> %i) { ; ; CHECKBE-LABEL: test: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xff00ff000000ff00 -; CHECKBE-NEXT: vrev64.8 q2, q1 -; CHECKBE-NEXT: vrev64.8 q1, q0 -; CHECKBE-NEXT: vorr q1, q1, q2 +; CHECKBE-NEXT: vmov.i64 q1, #0xff000000ff00ff +; CHECKBE-NEXT: vrev64.8 q2, q0 +; CHECKBE-NEXT: vorr q1, q2, q1 ; CHECKBE-NEXT: vrev64.8 q0, q1 ; CHECKBE-NEXT: bx lr entry: @@ -1215,10 +1220,9 @@ define arm_aapcs_vfpcc <8 x i16> @test2(<8 x i16> %i) { ; ; CHECKBE-LABEL: test2: ; CHECKBE: @ %bb.0: @ %entry -; CHECKBE-NEXT: vmov.i64 q1, #0xffff0000ffffffff -; CHECKBE-NEXT: vrev64.16 q2, q1 -; CHECKBE-NEXT: vrev64.16 q1, q0 -; CHECKBE-NEXT: vorr q1, q1, q2 +; CHECKBE-NEXT: vmov.i64 q1, #0xffffffff0000ffff +; CHECKBE-NEXT: vrev64.16 q2, q0 +; CHECKBE-NEXT: vorr q1, q2, q1 ; CHECKBE-NEXT: vrev64.16 q0, q1 ; CHECKBE-NEXT: bx lr entry: