diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -158,6 +158,7 @@ DUPLANE16, DUPLANE32, DUPLANE64, + DUPLANE128, // Vector immedate moves MOVI, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2049,6 +2049,7 @@ MAKE_CASE(AArch64ISD::DUPLANE16) MAKE_CASE(AArch64ISD::DUPLANE32) MAKE_CASE(AArch64ISD::DUPLANE64) + MAKE_CASE(AArch64ISD::DUPLANE128) MAKE_CASE(AArch64ISD::MOVI) MAKE_CASE(AArch64ISD::MOVIshift) MAKE_CASE(AArch64ISD::MOVIedit) @@ -10558,18 +10559,17 @@ return SDValue(); // The DUPQ operation is indepedent of element type so normalise to i64s. - SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); SDValue Idx128 = Op.getOperand(2); // DUPQ can be used when idx is in range. auto *CIdx = dyn_cast(Idx128); if (CIdx && (CIdx->getZExtValue() <= 3)) { SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64); - SDNode *DUPQ = - DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI); - return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0)); + return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI); } + SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); + // The ACLE says this must produce the same result as: // svtbl(data, svadd_x(svptrue_b64(), // svand_x(svptrue_b64(), svindex_u64(0, 1), 1), diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -583,6 +583,7 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; +def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -672,6 +672,23 @@ def : Pat<(nxv2bf16 (splat_vector (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)), + (nxv16i8 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv8i16 (AArch64duplane128 nxv8i16:$Op1, i64:$imm)), + (nxv8i16 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv4i32 (AArch64duplane128 nxv4i32:$Op1, i64:$imm)), + (nxv4i32 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv2i64 (AArch64duplane128 nxv2i64:$Op1, i64:$imm)), + (nxv2i64 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv8f16 (AArch64duplane128 nxv8f16:$Op1, i64:$imm)), + (nxv8f16 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv4f32 (AArch64duplane128 nxv4f32:$Op1, i64:$imm)), + (nxv4f32 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv2f64 (AArch64duplane128 nxv2f64:$Op1, i64:$imm)), + (nxv2f64 (DUP_ZZI_Q $Op1, $imm))>; + def : Pat<(nxv8bf16 (AArch64duplane128 nxv8bf16:$Op1, i64:$imm)), + (nxv8bf16 (DUP_ZZI_Q $Op1, $imm))>; + // Duplicate +0.0 into all vector elements def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>;