diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1098,7 +1098,7 @@ setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::MULHS, VT, Custom); setOperationAction(ISD::MULHU, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -1228,7 +1228,7 @@ setOperationAction(ISD::MGATHER, VT, Custom); setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Custom); @@ -1290,7 +1290,7 @@ setOperationAction(ISD::MSCATTER, VT, Custom); setOperationAction(ISD::MLOAD, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); + setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); } setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom); @@ -10451,54 +10451,28 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); EVT VT = Op.getValueType(); - EVT ElemVT = VT.getScalarType(); - SDValue SplatVal = Op.getOperand(0); if (useSVEForFixedLengthVectorVT(VT)) return LowerToScalableOp(Op, DAG); - // Extend input splat value where needed to fit into a GPR (32b or 64b only) - // FPRs don't have this restriction. - switch (ElemVT.getSimpleVT().SimpleTy) { - case MVT::i1: { - // The only legal i1 vectors are SVE vectors, so we can use SVE-specific - // lowering code. - - // We can handle the constant cases during isel. - if (isa(SplatVal)) - return Op; + assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 && + "Unexpected vector type!"); - // The general case of i1. There isn't any natural way to do this, - // so we use some trickery with whilelo. - SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); - SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal, - DAG.getValueType(MVT::i1)); - SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, - MVT::i64); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, - DAG.getConstant(0, dl, MVT::i64), SplatVal); - } - case MVT::i8: - case MVT::i16: - case MVT::i32: - SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32); - break; - case MVT::i64: - SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64); - break; - case MVT::f16: - case MVT::bf16: - case MVT::f32: - case MVT::f64: - // Fine as is - break; - default: - report_fatal_error("Unsupported SPLAT_VECTOR input operand type"); - } + // We can handle the constant cases during isel. + if (isa(Op.getOperand(0))) + return Op; - return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal); + // There isn't a natural way to handle the general i1 case, so we use some + // trickery with whilelo. + SDLoc DL(Op); + SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64); + SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal, + DAG.getValueType(MVT::i1)); + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, + DAG.getConstant(0, DL, MVT::i64), SplatVal); } SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op, @@ -14625,7 +14599,7 @@ SDValue UnpkOp = Src->getOperand(0); SDValue Dup = N->getOperand(1); - if (Dup.getOpcode() != AArch64ISD::DUP) + if (Dup.getOpcode() != ISD::SPLAT_VECTOR) return SDValue(); SDLoc DL(N); @@ -14648,8 +14622,7 @@ // Otherwise, make sure we propagate the AND to the operand // of the unpack - Dup = DAG.getNode(AArch64ISD::DUP, DL, - UnpkOp->getValueType(0), + Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0), DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32)); SDValue And = DAG.getNode(ISD::AND, DL, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -313,8 +313,8 @@ def AArch64bic_node : SDNode<"AArch64ISD::BIC", SDT_AArch64Arith_Unpred>; def AArch64bic : PatFrags<(ops node:$op1, node:$op2), - [(and node:$op1, (xor node:$op2, (AArch64dup (i32 -1)))), - (and node:$op1, (xor node:$op2, (AArch64dup (i64 -1)))), + [(and node:$op1, (xor node:$op2, (splat_vector (i32 -1)))), + (and node:$op1, (xor node:$op2, (splat_vector (i64 -1)))), (and node:$op1, (xor node:$op2, (SVEAllActive))), (AArch64bic_node node:$op1, node:$op2)]>; @@ -636,7 +636,7 @@ defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">; // Splat scalar register (unpredicated, GPR or vector + element index) - defm DUP_ZR : sve_int_perm_dup_r<"dup", AArch64dup>; + defm DUP_ZR : sve_int_perm_dup_r<"dup", splat_vector>; defm DUP_ZZI : sve_int_perm_dup_i<"dup">; // Splat scalar register (predicated) @@ -644,67 +644,67 @@ defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>; // Duplicate FP scalar into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))), + def : Pat<(nxv8f16 (splat_vector (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))), + def : Pat<(nxv4f16 (splat_vector (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))), + def : Pat<(nxv2f16 (splat_vector (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 FPR32:$src))), + def : Pat<(nxv4f32 (splat_vector (f32 FPR32:$src))), (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 FPR32:$src))), + def : Pat<(nxv2f32 (splat_vector (f32 FPR32:$src))), (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))), + def : Pat<(nxv2f64 (splat_vector (f64 FPR64:$src))), (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), + def : Pat<(nxv8bf16 (splat_vector (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv4bf16 (AArch64dup (bf16 FPR16:$src))), + def : Pat<(nxv4bf16 (splat_vector (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; - def : Pat<(nxv2bf16 (AArch64dup (bf16 FPR16:$src))), + def : Pat<(nxv2bf16 (splat_vector (bf16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; // Duplicate +0.0 into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv2bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv8f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv2f16 (splat_vector (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>; + def : Pat<(nxv2f32 (splat_vector (f32 fpimm0))), (DUP_ZI_S 0, 0)>; + def : Pat<(nxv2f64 (splat_vector (f64 fpimm0))), (DUP_ZI_D 0, 0)>; + def : Pat<(nxv8bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv2bf16 (splat_vector (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; // Duplicate Int immediate into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))), + def : Pat<(nxv16i8 (splat_vector (i32 (SVECpyDupImm8Pat i32:$a, i32:$b)))), (DUP_ZI_B $a, $b)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)))), + def : Pat<(nxv8i16 (splat_vector (i32 (SVECpyDupImm16Pat i32:$a, i32:$b)))), (DUP_ZI_H $a, $b)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)))), + def : Pat<(nxv4i32 (splat_vector (i32 (SVECpyDupImm32Pat i32:$a, i32:$b)))), (DUP_ZI_S $a, $b)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))), + def : Pat<(nxv2i64 (splat_vector (i64 (SVECpyDupImm64Pat i32:$a, i32:$b)))), (DUP_ZI_D $a, $b)>; // Duplicate immediate FP into all vector elements. - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm:$val))), + def : Pat<(nxv2f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm:$val))), + def : Pat<(nxv4f32 (splat_vector (f32 fpimm:$val))), (DUP_ZR_S (MOVi32imm (bitcast_fpimm_to_i32 f32:$val)))>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm:$val))), + def : Pat<(nxv2f64 (splat_vector (f64 fpimm:$val))), (DUP_ZR_D (MOVi64imm (bitcast_fpimm_to_i64 f64:$val)))>; // Duplicate FP immediate into all vector elements let AddedComplexity = 2 in { - def : Pat<(nxv8f16 (AArch64dup fpimm16:$imm8)), + def : Pat<(nxv8f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv4f16 (AArch64dup fpimm16:$imm8)), + def : Pat<(nxv4f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv2f16 (AArch64dup fpimm16:$imm8)), + def : Pat<(nxv2f16 (splat_vector fpimm16:$imm8)), (FDUP_ZI_H fpimm16:$imm8)>; - def : Pat<(nxv4f32 (AArch64dup fpimm32:$imm8)), + def : Pat<(nxv4f32 (splat_vector fpimm32:$imm8)), (FDUP_ZI_S fpimm32:$imm8)>; - def : Pat<(nxv2f32 (AArch64dup fpimm32:$imm8)), + def : Pat<(nxv2f32 (splat_vector fpimm32:$imm8)), (FDUP_ZI_S fpimm32:$imm8)>; - def : Pat<(nxv2f64 (AArch64dup fpimm64:$imm8)), + def : Pat<(nxv2f64 (splat_vector fpimm64:$imm8)), (FDUP_ZI_D fpimm64:$imm8)>; } @@ -1051,7 +1051,7 @@ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32))), (!cast(Inst # _SXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; // base + vector of unsigned 32bit scaled offsets - def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))), + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), (!cast(Inst # _UXTW_SCALED) PPR:$gp, GPR64:$base, ZPR:$offs)>; } @@ -1066,7 +1066,7 @@ def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32))), (!cast(Inst # _SXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>; // base + vector of unsigned 32bit offsets - def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))), + def : Pat<(Ty (Load (SVEDup0Undef), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), (!cast(Inst # _UXTW) PPR:$gp, GPR64:$base, ZPR:$offs)>; } @@ -1227,7 +1227,7 @@ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32)), (!cast(Inst # _SXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; // base + vector of unsigned 32bit scaled offsets - def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF))))), + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))), (!cast(Inst # _UXTW_SCALED) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; } @@ -1242,7 +1242,7 @@ def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (sext_inreg (nxv2i64 ZPR:$offs), nxv2i32)), (!cast(Inst # _SXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; // base + vector of unsigned 32bit offsets - def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (AArch64dup (i64 0xFFFFFFFF))))), + def : Pat<(Store (Ty ZPR:$data), (nxv2i1 PPR:$gp), GPR64:$base, (and (nxv2i64 ZPR:$offs), (nxv2i64 (splat_vector (i64 0xFFFFFFFF))))), (!cast(Inst # _UXTW) ZPR:$data, PPR:$gp, GPR64:$base, ZPR:$offs)>; } @@ -1439,7 +1439,7 @@ // Patterns to generate adr instruction. // adr z0.d, [z0.d, z0.d, uxtw] def : Pat<(add nxv2i64:$Op1, - (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))), + (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))))), (ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>; // adr z0.d, [z0.d, z0.d, sxtw] def : Pat<(add nxv2i64:$Op1, @@ -1452,7 +1452,7 @@ def : Pat<(add Ty:$Op1, (Ty (AArch64lsl_p (PredTy (SVEAllActive)), Ty:$Op2, - (Ty (AArch64dup (ShiftTy ShiftAmt)))))), + (Ty (splat_vector (ShiftTy ShiftAmt)))))), (DestAdrIns $Op1, $Op2)>; } defm : adrShiftPat; @@ -1467,14 +1467,14 @@ multiclass adrXtwShiftPat { def : Pat<(add Ty:$Op1, (Ty (AArch64lsl_p (PredTy (SVEAllActive)), - (Ty (and Ty:$Op2, (Ty (AArch64dup (i64 0xFFFFFFFF))))), - (Ty (AArch64dup (i64 ShiftAmt)))))), + (Ty (and Ty:$Op2, (Ty (splat_vector (i64 0xFFFFFFFF))))), + (Ty (splat_vector (i64 ShiftAmt)))))), (!cast("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; def : Pat<(add Ty:$Op1, (Ty (AArch64lsl_p (PredTy (SVEAllActive)), (Ty (sext_inreg Ty:$Op2, nxv2i32)), - (Ty (AArch64dup (i64 ShiftAmt)))))), + (Ty (splat_vector (i64 ShiftAmt)))))), (!cast("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>; } defm : adrXtwShiftPat; @@ -1902,27 +1902,27 @@ // Unsigned integer -> Floating-point def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), + (nxv2i64 (splat_vector (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))), (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), + (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))), (UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg), (and (nxv4i32 ZPR:$Zs), - (nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), + (nxv4i32 (splat_vector (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))), (UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), + (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))), (UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg), (and (nxv2i64 ZPR:$Zs), - (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), + (nxv2i64 (splat_vector (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))), (UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>; defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>; @@ -2066,7 +2066,7 @@ let AddedComplexity = 1 in { class LD1RPat : - Pat<(vt (AArch64dup (index_vt (operator (CP GPR64:$base, immtype:$offset))))), + Pat<(vt (splat_vector (index_vt (operator (CP GPR64:$base, immtype:$offset))))), (load (ptrue 31), GPR64:$base, $offset)>; } @@ -2154,18 +2154,18 @@ GPR32:$op, sub_32), $imm), sub_32))>; - def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), + def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), (INCH_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), + def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), (INCW_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), + def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))), (INCD_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), + def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (splat_vector (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))), (DECH_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), + def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (splat_vector (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))), (DECW_ZPiI ZPR:$op, 31, $imm)>; - def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))), + def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (splat_vector (i64 (vscale (sve_cntd_imm i32:$imm))))))), (DECD_ZPiI ZPR:$op, 31, $imm)>; } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -382,17 +382,17 @@ class SVE_1_Op_Imm_OptLsl_Pat - : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), + : Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm, i32:$shift)))))), (inst $Op1, i32:$imm, i32:$shift)>; class SVE_1_Op_Imm_Arith_All_Active - : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), + : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))), (inst $Op1, i32:$imm)>; class SVE_1_Op_Imm_Log_Pat - : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), + : Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i64:$imm)))))), (inst $Op1, i64:$imm)>; class SVE_2_Op_Pat -: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), +: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), (inst $Pg, $Rn, i32:$imm)>; class SVE_Shift_DupImm_All_Active_Pat -: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), +: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), (inst $Rn, i32:$imm)>; class SVE_2_Op_Fp_Imm_Pat -: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))), +: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (splat_vector (it immL))))), (inst $Pg, $Zs1, imm)>; class SVE_2_Op_Fp_Imm_Pat_Zero : Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)), - (vt (AArch64dup (it immL))))), + (vt (splat_vector (it immL))))), (inst $Pg, $Zs1, imm)>; // Used to re-order the operands of BSP when lowering to BSL. BSP has the order: @@ -1195,27 +1195,27 @@ (!cast(NAME # _Q) ZPR128:$Zd, FPR128asZPR:$Qn, 0), 2>; // Duplicate extracted element of vector into all vector elements - def : Pat<(nxv16i8 (AArch64dup (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))), + def : Pat<(nxv16i8 (splat_vector (i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)))), (!cast(NAME # _B) ZPR:$vec, sve_elm_idx_extdup_b:$index)>; - def : Pat<(nxv8i16 (AArch64dup (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), + def : Pat<(nxv8i16 (splat_vector (i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv4i32 (AArch64dup (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), + def : Pat<(nxv4i32 (splat_vector (i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2i64 (AArch64dup (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2i64 (splat_vector (i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv8f16 (AArch64dup (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), + def : Pat<(nxv8f16 (splat_vector (f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv8bf16 (AArch64dup (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), + def : Pat<(nxv8bf16 (splat_vector (bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)))), (!cast(NAME # _H) ZPR:$vec, sve_elm_idx_extdup_h:$index)>; - def : Pat<(nxv4f16 (AArch64dup (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), + def : Pat<(nxv4f16 (splat_vector (f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2f16 (AArch64dup (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2f16 (splat_vector (f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv4f32 (AArch64dup (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), + def : Pat<(nxv4f32 (splat_vector (f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)))), (!cast(NAME # _S) ZPR:$vec, sve_elm_idx_extdup_s:$index)>; - def : Pat<(nxv2f32 (AArch64dup (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2f32 (splat_vector (f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; - def : Pat<(nxv2f64 (AArch64dup (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), + def : Pat<(nxv2f64 (splat_vector (f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)))), (!cast(NAME # _D) ZPR:$vec, sve_elm_idx_extdup_d:$index)>; } @@ -1768,7 +1768,7 @@ def : InstAlias<"mov $Zd, $imm", (!cast(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>; - def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))), + def : Pat<(nxv2i64 (splat_vector (i64 logical_imm64:$imm))), (!cast(NAME) logical_imm64:$imm)>; } @@ -4625,7 +4625,7 @@ def : InstAlias<"mov $Zd, $Pg/m, $imm", (!cast(NAME) zprty:$Zd, PPRAny:$Pg, cpyimm:$imm), 1>; def : Pat<(vselect predty:$Pg, - (intty (AArch64dup (scalarty (cpx i32:$imm, i32:$shift)))), + (intty (splat_vector (scalarty (cpx i32:$imm, i32:$shift)))), ZPR:$Zd), (!cast(NAME) $Zd, $Pg, $imm, $shift)>; } @@ -4662,8 +4662,8 @@ def : Pat<(intty (anyext (predty PPRAny:$Ps1))), (!cast(NAME) PPRAny:$Ps1, 1, 0)>; def : Pat<(vselect predty:$Pg, - (intty (AArch64dup (scalarty (cpx i32:$imm, i32:$shift)))), - (intty (AArch64dup (scalarty 0)))), + (intty (splat_vector (scalarty (cpx i32:$imm, i32:$shift)))), + (intty (splat_vector (scalarty 0)))), (!cast(NAME) $Pg, $imm, $shift)>; } @@ -4799,11 +4799,11 @@ Operand immtype, Instruction cmp> { def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), (intvt ZPR:$Zs1), - (intvt (AArch64dup (immtype:$imm))), + (intvt (splat_vector (immtype:$imm))), cc)), (cmp $Pg, $Zs1, immtype:$imm)>; def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), - (intvt (AArch64dup (immtype:$imm))), + (intvt (splat_vector (immtype:$imm))), (intvt ZPR:$Zs1), commuted_cc)), (cmp $Pg, $Zs1, immtype:$imm)>; @@ -5205,13 +5205,13 @@ (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5b)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5b)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("trunc_imm") $imm5b))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5b)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5b)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } @@ -5250,35 +5250,35 @@ (!cast(NAME # "_D") (i64 0), (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, (!cast("MOVi32imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, (!cast("MOVi64imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // mul(step_vector(1), dup(Y)) -> index(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (nxv8i16 (splat_vector(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (nxv4i32 (splat_vector(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (nxv2i64 (splat_vector(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } @@ -5306,13 +5306,13 @@ def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse simm5_8b_tgt:$imm5)), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_B") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse simm5_16b_tgt:$imm5)), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_H") GPR32:$Rm, (!cast("trunc_imm") $imm5))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse simm5_32b_tgt:$imm5)), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))), (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse simm5_64b_tgt:$imm5)), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))), (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } @@ -5340,25 +5340,25 @@ def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; // add(step_vector(step), dup(X)) -> index(X, step). - def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv16i8 (step_vector_oneuse i8:$imm)), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv8i16 (step_vector_oneuse i16:$imm)), (nxv8i16 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, (!cast("MOVi32imm") (!cast("trunc_imm") $imm)))>; - def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv4i32 (step_vector_oneuse i32:$imm)), (nxv4i32 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, (!cast("MOVi32imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64:$imm)), (nxv2i64 (splat_vector(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, (!cast("MOVi64imm") $imm))>; - def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (nxv2i64 (step_vector_oneuse i64imm_32bit_tgt:$imm)), (nxv2i64 (splat_vector(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, (SUBREG_TO_REG (i64 0), (!cast("MOVi32imm") (!cast("trunc_imm") $imm)), sub_32))>; // add(mul(step_vector(1), dup(Y)), dup(X)) -> index(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (step_vector_oneuse (i8 1))), (nxv16i8 (splat_vector(i32 GPR32:$Rm)))), (nxv16i8 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (step_vector_oneuse (i16 1))), (nxv8i16 (splat_vector(i32 GPR32:$Rm)))),(nxv8i16 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (step_vector_oneuse (i32 1))), (nxv4i32 (splat_vector(i32 GPR32:$Rm)))),(nxv4i32 (splat_vector(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (step_vector_oneuse (i64 1))), (nxv2i64 (splat_vector(i64 GPR64:$Rm)))),(nxv2i64 (splat_vector(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; }