diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -158,6 +158,7 @@ DUPLANE16, DUPLANE32, DUPLANE64, + DUPLANE128, // Vector immedate moves MOVI, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2049,6 +2049,7 @@ MAKE_CASE(AArch64ISD::DUPLANE16) MAKE_CASE(AArch64ISD::DUPLANE32) MAKE_CASE(AArch64ISD::DUPLANE64) + MAKE_CASE(AArch64ISD::DUPLANE128) MAKE_CASE(AArch64ISD::MOVI) MAKE_CASE(AArch64ISD::MOVIshift) MAKE_CASE(AArch64ISD::MOVIedit) @@ -10558,18 +10559,17 @@ return SDValue(); // The DUPQ operation is indepedent of element type so normalise to i64s. - SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); SDValue Idx128 = Op.getOperand(2); // DUPQ can be used when idx is in range. auto *CIdx = dyn_cast(Idx128); if (CIdx && (CIdx->getZExtValue() <= 3)) { SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64); - SDNode *DUPQ = - DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI); - return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0)); + return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI); } + SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1)); + // The ACLE says this must produce the same result as: // svtbl(data, svadd_x(svptrue_b64(), // svand_x(svptrue_b64(), svindex_u64(0, 1), 1), diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -583,6 +583,7 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; +def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1225,6 +1225,23 @@ (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; + + def : Pat<(nxv16i8 (AArch64duplane128 nxv16i8:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv8i16 (AArch64duplane128 nxv8i16:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv4i32 (AArch64duplane128 nxv4i32:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv2i64 (AArch64duplane128 nxv2i64:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv8f16 (AArch64duplane128 nxv8f16:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv4f32 (AArch64duplane128 nxv4f32:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv2f64 (AArch64duplane128 nxv2f64:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; + def : Pat<(nxv8bf16 (AArch64duplane128 nxv8bf16:$Op1, i64:$imm)), + (!cast(NAME # _Q) $Op1, $imm)>; } class sve_int_perm_tbl sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,