diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15448,19 +15448,6 @@ DAG.getConstant(MinOffset, DL, MVT::i64)); } -static SDValue performStepVectorCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (!DCI.isAfterLegalizeDAG()) - return SDValue(); - - SDLoc DL(N); - EVT VT = N->getValueType(0); - SDValue StepVal = N->getOperand(0); - SDValue Zero = DAG.getConstant(0, DL, StepVal.getValueType()); - return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, VT, Zero, StepVal); -} - // Turns the vector of indices into a vector of byte offstes by scaling Offset // by (BitWidth / 8). static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, @@ -15975,8 +15962,6 @@ return performExtractVectorEltCombine(N, DAG); case ISD::VECREDUCE_ADD: return performVecReduceAddCombine(N, DCI.DAG, Subtarget); - case ISD::STEP_VECTOR: - return performStepVectorCombine(N, DCI, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -259,10 +259,8 @@ def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>; def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>; -def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>; -def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>; -def index_vector_oneuse : PatFrag<(ops node:$base, node:$idx), - (index_vector node:$base, node:$idx), [{ +def step_vector_oneuse : PatFrag<(ops node:$idx), + (step_vector node:$idx), [{ return N->hasOneUse(); }]>; @@ -1370,10 +1368,10 @@ defm INCP_ZP : sve_int_count_v<0b10000, "incp">; defm DECP_ZP : sve_int_count_v<0b10100, "decp">; - defm INDEX_RR : sve_int_index_rr<"index", index_vector, index_vector_oneuse, AArch64mul_p_oneuse>; - defm INDEX_IR : sve_int_index_ir<"index", index_vector, index_vector_oneuse, AArch64mul_p, AArch64mul_p_oneuse>; - defm INDEX_RI : sve_int_index_ri<"index", index_vector, index_vector_oneuse>; - defm INDEX_II : sve_int_index_ii<"index", index_vector, index_vector_oneuse>; + defm INDEX_RR : sve_int_index_rr<"index", step_vector, step_vector_oneuse, AArch64mul_p_oneuse>; + defm INDEX_IR : sve_int_index_ir<"index", step_vector, step_vector_oneuse, AArch64mul_p, AArch64mul_p_oneuse>; + defm INDEX_RI : sve_int_index_ri<"index", step_vector, step_vector_oneuse>; + defm INDEX_II : sve_int_index_ii<"index", step_vector, step_vector_oneuse>; // Unpredicated shifts defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4800,23 +4800,23 @@ def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>; def _D : sve_int_index_ii<0b11, asm, ZPR64, simm5_64b>; - def : Pat<(nxv16i8 (op simm5_8b:$imm5, simm5_8b:$imm5b)), + def : Pat<(nxv16i8 (op simm5_8b:$imm5b)), + (!cast(NAME # "_B") (i32 0), simm5_8b:$imm5b)>; + def : Pat<(nxv8i16 (op simm5_16b:$imm5b)), + (!cast(NAME # "_H") (i32 0), simm5_16b:$imm5b)>; + def : Pat<(nxv4i32 (op simm5_32b:$imm5b)), + (!cast(NAME # "_S") (i32 0), simm5_32b:$imm5b)>; + def : Pat<(nxv2i64 (op simm5_64b:$imm5b)), + (!cast(NAME # "_D") (i64 0), simm5_64b:$imm5b)>; + + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (oneuseop simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; - def : Pat<(nxv8i16 (op simm5_16b:$imm5, simm5_16b:$imm5b)), + def : Pat<(add (nxv8i16 (oneuseop simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; - def : Pat<(nxv4i32 (op simm5_32b:$imm5, simm5_32b:$imm5b)), + def : Pat<(add (nxv4i32 (oneuseop simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(nxv2i64 (op simm5_64b:$imm5, simm5_64b:$imm5b)), - (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; - - // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). - def : Pat<(add (nxv16i8 (oneuseop (i32 0), simm5_8b:$imm5b)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), - (!cast(NAME # "_B") simm5_8b:$imm5, simm5_8b:$imm5b)>; - def : Pat<(add (nxv8i16 (oneuseop (i32 0), simm5_16b:$imm5b)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), - (!cast(NAME # "_H") simm5_16b:$imm5, simm5_16b:$imm5b)>; - def : Pat<(add (nxv4i32 (oneuseop (i32 0), simm5_32b:$imm5b)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), - (!cast(NAME # "_S") simm5_32b:$imm5, simm5_32b:$imm5b)>; - def : Pat<(add (nxv2i64 (oneuseop (i64 0), simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (oneuseop simm5_64b:$imm5b)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, simm5_64b:$imm5b)>; } @@ -4843,43 +4843,43 @@ def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ir<0b11, asm, ZPR64, GPR64, simm5_64b>; - def : Pat<(nxv16i8 (op simm5_8b:$imm5, GPR32:$Rm)), - (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(nxv8i16 (op simm5_16b:$imm5, GPR32:$Rm)), - (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(nxv4i32 (op simm5_32b:$imm5, GPR32:$Rm)), - (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(nxv2i64 (op simm5_64b:$imm5, GPR64:$Rm)), - (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; + def : Pat<(nxv16i8 (op GPR32:$Rm)), + (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; + def : Pat<(nxv8i16 (op GPR32:$Rm)), + (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; + def : Pat<(nxv4i32 (op GPR32:$Rm)), + (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; + def : Pat<(nxv2i64 (op GPR64:$Rm)), + (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; - // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). - def : Pat<(add (nxv16i8 (oneuseop (i32 0), GPR32:$Rm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (oneuseop GPR32:$Rm)), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv8i16 (oneuseop (i32 0), GPR32:$Rm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (nxv8i16 (oneuseop GPR32:$Rm)), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv4i32 (oneuseop (i32 0), GPR32:$Rm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (nxv4i32 (oneuseop GPR32:$Rm)), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (nxv2i64 (oneuseop (i64 0), GPR64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (nxv2i64 (oneuseop GPR64:$Rm)), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; - // mul(index_vector(0, 1), dup(Y)) -> index_vector(0, Y). - def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 0), (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), + // mul(step_vector(1), dup(Y)) -> index(0, Y). + def : Pat<(mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_B") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 0), (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_H") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 0), (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), + def : Pat<(mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_S") (i32 0), GPR32:$Rm)>; - def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 0), (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (!cast(NAME # "_D") (i64 0), GPR64:$Rm)>; - // add(mul(index_vector(0, 1), dup(Y), dup(X)) -> index_vector(X, Y). - def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 0), (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), + // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). + def : Pat<(add (muloneuseop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(simm5_8b:$imm5)))), (!cast(NAME # "_B") simm5_8b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 0), (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), + def : Pat<(add (muloneuseop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (nxv8i16 (AArch64dup(simm5_16b:$imm5)))), (!cast(NAME # "_H") simm5_16b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 0), (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), + def : Pat<(add (muloneuseop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (nxv4i32 (AArch64dup(simm5_32b:$imm5)))), (!cast(NAME # "_S") simm5_32b:$imm5, GPR32:$Rm)>; - def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 0), (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), + def : Pat<(add (muloneuseop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (nxv2i64 (AArch64dup(simm5_64b:$imm5)))), (!cast(NAME # "_D") simm5_64b:$imm5, GPR64:$Rm)>; } @@ -4906,23 +4906,14 @@ def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>; def _D : sve_int_index_ri<0b11, asm, ZPR64, GPR64, simm5_64b>; - def : Pat<(nxv16i8 (op GPR32:$Rm, simm5_8b:$imm5)), + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (oneuseop simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; - def : Pat<(nxv8i16 (op GPR32:$Rm, simm5_16b:$imm5)), + def : Pat<(add (nxv8i16 (oneuseop simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; - def : Pat<(nxv4i32 (op GPR32:$Rm, simm5_32b:$imm5)), + def : Pat<(add (nxv4i32 (oneuseop simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(nxv2i64 (op GPR64:$Rm, simm5_64b:$imm5)), - (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; - - // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). - def : Pat<(add (nxv16i8 (oneuseop (i32 0), simm5_8b:$imm5)), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_B") GPR32:$Rm, simm5_8b:$imm5)>; - def : Pat<(add (nxv8i16 (oneuseop (i32 0), simm5_16b:$imm5)), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_H") GPR32:$Rm, simm5_16b:$imm5)>; - def : Pat<(add (nxv4i32 (oneuseop (i32 0), simm5_32b:$imm5)), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))), - (!cast(NAME # "_S") GPR32:$Rm, simm5_32b:$imm5)>; - def : Pat<(add (nxv2i64 (oneuseop (i64 0), simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), + def : Pat<(add (nxv2i64 (oneuseop simm5_64b:$imm5)), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))), (!cast(NAME # "_D") GPR64:$Rm, simm5_64b:$imm5)>; } @@ -4949,29 +4940,24 @@ def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>; def _D : sve_int_index_rr<0b11, asm, ZPR64, GPR64>; - def : SVE_2_Op_Pat(NAME # _B)>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; - - // add(index_vector(zero, step), dup(X)) -> index_vector(X, step). - def : Pat<(add (nxv16i8 (oneuseop (i32 0), GPR32:$Rm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + // add(step_vector(step), dup(X)) -> index(X, step). + def : Pat<(add (nxv16i8 (oneuseop GPR32:$Rm)), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv8i16 (oneuseop (i32 0), GPR32:$Rm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv8i16 (oneuseop GPR32:$Rm)), (nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv4i32 (oneuseop (i32 0), GPR32:$Rm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (nxv4i32 (oneuseop GPR32:$Rm)), (nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (nxv2i64 (oneuseop (i64 0), GPR64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (nxv2i64 (oneuseop GPR64:$Rm)), (nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; - // add(mul(index_vector(0, 1), dup(Y), dup(X)) -> index_vector(X, Y). - def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 0), (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), + // add(mul(step_vector(1), dup(Y), dup(X)) -> index(X, Y). + def : Pat<(add (mulop (nxv16i1 (AArch64ptrue 31)), (nxv16i8 (oneuseop (i32 1))), (nxv16i8 (AArch64dup(i32 GPR32:$Rm)))), (nxv16i8 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_B") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 0), (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv8i1 (AArch64ptrue 31)), (nxv8i16 (oneuseop (i32 1))), (nxv8i16 (AArch64dup(i32 GPR32:$Rm)))),(nxv8i16 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_H") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 0), (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), + def : Pat<(add (mulop (nxv4i1 (AArch64ptrue 31)), (nxv4i32 (oneuseop (i32 1))), (nxv4i32 (AArch64dup(i32 GPR32:$Rm)))),(nxv4i32 (AArch64dup(i32 GPR32:$Rn)))), (!cast(NAME # "_S") GPR32:$Rn, GPR32:$Rm)>; - def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 0), (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), + def : Pat<(add (mulop (nxv2i1 (AArch64ptrue 31)), (nxv2i64 (oneuseop (i64 1))), (nxv2i64 (AArch64dup(i64 GPR64:$Rm)))),(nxv2i64 (AArch64dup(i64 GPR64:$Rn)))), (!cast(NAME # "_D") GPR64:$Rn, GPR64:$Rm)>; }