@@ -15,6 +15,15 @@ def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
1515 SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
1616def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
1717
18+ def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>,
19+ SDTCisInt<1>, SDTCisVec<1>,
20+ SDTCisSameAs<0, 2>,
21+ SDTCisSameAs<2, 3>]>;
22+ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
23+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
24+ def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
25+ SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
1827// Target nodes.
1928def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
2029def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO",
@@ -31,6 +40,23 @@ def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT",
3140def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT",
3241 SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>;
3342
43+ def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>;
44+ def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>;
45+ def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>;
46+ def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>;
47+ def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>;
48+ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
49+ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
50+
51+ def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>;
52+ def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>;
53+
54+ def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
55+ def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
56+ def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
57+ def immZExt4 : ImmLeaf<i64, [{return isUInt<4>(Imm);}]>;
58+ def immZExt8 : ImmLeaf<i64, [{return isUInt<8>(Imm);}]>;
59+
3460class VecCond<SDPatternOperator OpNode, ValueType TyNode,
3561 RegisterClass RC = LSX128>
3662 : Pseudo<(outs GPR:$rd), (ins RC:$vj),
@@ -1682,6 +1708,128 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
16821708def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
16831709 (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;
16841710
1711+ // VSHUF_{B/H/W/D}
1712+ def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk),
1713+ (VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>;
1714+ def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk),
1715+ (VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>;
1716+ def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk),
1717+ (VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>;
1718+ def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk),
1719+ (VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>;
1720+ def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk),
1721+ (VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>;
1722+ def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk),
1723+ (VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>;
1724+
1725+ // VPICKEV_{B/H/W/D}
1726+ def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk),
1727+ (VPICKEV_B v16i8:$vj, v16i8:$vk)>;
1728+ def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk),
1729+ (VPICKEV_H v8i16:$vj, v8i16:$vk)>;
1730+ def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk),
1731+ (VPICKEV_W v4i32:$vj, v4i32:$vk)>;
1732+ def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk),
1733+ (VPICKEV_D v2i64:$vj, v2i64:$vk)>;
1734+ def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk),
1735+ (VPICKEV_W v4f32:$vj, v4f32:$vk)>;
1736+ def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk),
1737+ (VPICKEV_D v2f64:$vj, v2f64:$vk)>;
1738+
1739+ // VPICKOD_{B/H/W/D}
1740+ def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk),
1741+ (VPICKOD_B v16i8:$vj, v16i8:$vk)>;
1742+ def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk),
1743+ (VPICKOD_H v8i16:$vj, v8i16:$vk)>;
1744+ def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk),
1745+ (VPICKOD_W v4i32:$vj, v4i32:$vk)>;
1746+ def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk),
1747+ (VPICKOD_D v2i64:$vj, v2i64:$vk)>;
1748+ def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk),
1749+ (VPICKOD_W v4f32:$vj, v4f32:$vk)>;
1750+ def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk),
1751+ (VPICKOD_D v2f64:$vj, v2f64:$vk)>;
1752+
1753+ // VPACKEV_{B/H/W/D}
1754+ def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk),
1755+ (VPACKEV_B v16i8:$vj, v16i8:$vk)>;
1756+ def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk),
1757+ (VPACKEV_H v8i16:$vj, v8i16:$vk)>;
1758+ def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk),
1759+ (VPACKEV_W v4i32:$vj, v4i32:$vk)>;
1760+ def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk),
1761+ (VPACKEV_D v2i64:$vj, v2i64:$vk)>;
1762+ def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk),
1763+ (VPACKEV_W v4f32:$vj, v4f32:$vk)>;
1764+ def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk),
1765+ (VPACKEV_D v2f64:$vj, v2f64:$vk)>;
1766+
1767+ // VPACKOD_{B/H/W/D}
1768+ def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk),
1769+ (VPACKOD_B v16i8:$vj, v16i8:$vk)>;
1770+ def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk),
1771+ (VPACKOD_H v8i16:$vj, v8i16:$vk)>;
1772+ def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk),
1773+ (VPACKOD_W v4i32:$vj, v4i32:$vk)>;
1774+ def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk),
1775+ (VPACKOD_D v2i64:$vj, v2i64:$vk)>;
1776+ def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk),
1777+ (VPACKOD_W v4f32:$vj, v4f32:$vk)>;
1778+ def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk),
1779+ (VPACKOD_D v2f64:$vj, v2f64:$vk)>;
1780+
1781+ // VILVL_{B/H/W/D}
1782+ def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk),
1783+ (VILVL_B v16i8:$vj, v16i8:$vk)>;
1784+ def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk),
1785+ (VILVL_H v8i16:$vj, v8i16:$vk)>;
1786+ def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk),
1787+ (VILVL_W v4i32:$vj, v4i32:$vk)>;
1788+ def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk),
1789+ (VILVL_D v2i64:$vj, v2i64:$vk)>;
1790+ def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk),
1791+ (VILVL_W v4f32:$vj, v4f32:$vk)>;
1792+ def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk),
1793+ (VILVL_D v2f64:$vj, v2f64:$vk)>;
1794+
1795+ // VILVH_{B/H/W/D}
1796+ def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk),
1797+ (VILVH_B v16i8:$vj, v16i8:$vk)>;
1798+ def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk),
1799+ (VILVH_H v8i16:$vj, v8i16:$vk)>;
1800+ def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk),
1801+ (VILVH_W v4i32:$vj, v4i32:$vk)>;
1802+ def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk),
1803+ (VILVH_D v2i64:$vj, v2i64:$vk)>;
1804+ def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk),
1805+ (VILVH_W v4f32:$vj, v4f32:$vk)>;
1806+ def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk),
1807+ (VILVH_D v2f64:$vj, v2f64:$vk)>;
1808+
1809+ // VSHUF4I_{B/H/W}
1810+ def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8),
1811+ (VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>;
1812+ def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8),
1813+ (VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>;
1814+ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
1815+ (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
1816+ def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
1817+ (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
1818+
1819+ // VREPLVEI_{B/H/W/D}
1820+ def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
1821+ (VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>;
1822+ def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3),
1823+ (VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>;
1824+ def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2),
1825+ (VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>;
1826+ def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1),
1827+ (VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>;
1828+ def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2),
1829+ (VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>;
1830+ def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1),
1831+ (VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>;
1832+
16851833// VREPLVEI_{W/D}
16861834def : Pat<(lsxsplatf32 FPR32:$fj),
16871835 (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
0 commit comments