diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -159,6 +159,8 @@ DUPLANE32, DUPLANE64, + DUPQLANE, + // Vector immedate moves MOVI, MOVIshift, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2049,6 +2049,7 @@ MAKE_CASE(AArch64ISD::DUPLANE16) MAKE_CASE(AArch64ISD::DUPLANE32) MAKE_CASE(AArch64ISD::DUPLANE64) + MAKE_CASE(AArch64ISD::DUPQLANE) MAKE_CASE(AArch64ISD::MOVI) MAKE_CASE(AArch64ISD::MOVIshift) MAKE_CASE(AArch64ISD::MOVIedit) @@ -10558,18 +10559,17 @@ return SDValue(); // The DUPQ operation is indepedent of element type so normalise to i64s. - SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); SDValue Idx128 = Op.getOperand(2); // DUPQ can be used when idx is in range. auto *CIdx = dyn_cast(Idx128); if (CIdx && (CIdx->getZExtValue() <= 3)) { SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64); - SDNode *DUPQ = - DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI); - return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0)); + return DAG.getNode(AArch64ISD::DUPQLANE, DL, VT, Op.getOperand(1), CI); } + SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); + // The ACLE says this must produce the same result as: // svtbl(data, svadd_x(svptrue_b64(), // svand_x(svptrue_b64(), svindex_u64(0, 1), 1), diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -111,6 +111,12 @@ let hasSideEffects = 0; } +def G_DUPQLANE : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src, type1:$quadword); + let hasSideEffects = 0; +} + // Represents a trn1 instruction. Produced post-legalization from // G_SHUFFLE_VECTORs with appropriate masks. def G_TRN1 : AArch64GenericInstruction { @@ -221,6 +227,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -280,6 +280,7 @@ SDTCisSameAs<0, 1>]>; def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64DupQLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -584,6 +585,8 @@ def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; +def AArch64dupqlane : SDNode<"AArch64ISD::DUPQLANE", SDT_AArch64DupQLane>; + def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -672,6 +672,23 @@ def : Pat<(nxv2bf16 (splat_vector (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv16i8 (AArch64dupqlane nxv16i8:$Op1, i64:$imm)), + (nxv16i8 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv8i16 (AArch64dupqlane nxv8i16:$Op1, i64:$imm)), + (nxv8i16 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv4i32 (AArch64dupqlane nxv4i32:$Op1, i64:$imm)), + (nxv4i32 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv2i64 (AArch64dupqlane nxv2i64:$Op1, i64:$imm)), + (nxv2i64 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv8f16 (AArch64dupqlane nxv8f16:$Op1, i64:$imm)), + (nxv8f16 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv4f32 (AArch64dupqlane nxv4f32:$Op1, i64:$imm)), + (nxv4f32 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv2f64 (AArch64dupqlane nxv2f64:$Op1, i64:$imm)), + (nxv2f64 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv8bf16 (AArch64dupqlane nxv8bf16:$Op1, i64:$imm)), + (nxv8bf16 (DUP_ZZI_Q $Op1, $imm))>; + // Duplicate +0.0 into all vector elements def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>;