diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4636,7 +4636,7 @@ } break; } - case AArch64ISD::SVE_LD2: { + case AArch64ISD::SVE_LD2_MERGE_ZERO: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 2, AArch64::LD2B_IMM); return; @@ -4653,7 +4653,7 @@ } break; } - case AArch64ISD::SVE_LD3: { + case AArch64ISD::SVE_LD3_MERGE_ZERO: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 3, AArch64::LD3B_IMM); return; @@ -4670,7 +4670,7 @@ } break; } - case AArch64ISD::SVE_LD4: { + case AArch64ISD::SVE_LD4_MERGE_ZERO: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 4, AArch64::LD4B_IMM); return; @@ -4732,12 +4732,12 @@ // For custom ISD nodes, we have to look at them individually to extract the // type of the data moved to/from memory. switch (Opcode) { - case AArch64ISD::LD1: - case AArch64ISD::LD1S: - case AArch64ISD::LDNF1: - case AArch64ISD::LDNF1S: + case AArch64ISD::LD1_MERGE_ZERO: + case AArch64ISD::LD1S_MERGE_ZERO: + case AArch64ISD::LDNF1_MERGE_ZERO: + case AArch64ISD::LDNF1S_MERGE_ZERO: return cast(Root->getOperand(3))->getVT(); - case AArch64ISD::ST1: + case AArch64ISD::ST1_PRED: return cast(Root->getOperand(4))->getVT(); default: break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -25,6 +25,26 @@ namespace AArch64ISD { +// For predicated nodes where the result is a vector, the operation is +// controlled by a governing predicate and the inactive lanes are explicitly +// defined with a value, please stick the following naming convention: +// +// _MERGE_OP The result value is a vector with inactive lanes equal +// to source operand OP. +// +// _MERGE_ZERO The result value is a vector with inactive lanes +// actively zeroed. +// +// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal +// to the last source operand which only purpose is being +// a passthru value. +// +// For other cases where no explicit action is needed to set the inactive lanes, +// or when the result is not a vector and it is needed or helpful to +// distinguish a node from similar unpredicated nodes, use: +// +// _PRED +// enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. @@ -53,18 +73,19 @@ SBC, // adc, sbc instructions // Arithmetic instructions - ADD_PRED, - FADD_PRED, - SDIV_PRED, - UDIV_PRED, - SMIN_PRED, - UMIN_PRED, - SMAX_PRED, - UMAX_PRED, - SHL_PRED, - SRL_PRED, - SRA_PRED, - SETCC_PRED, + ADD_MERGE_OP1, + FADD_MERGE_OP1, + SDIV_MERGE_OP1, + UDIV_MERGE_OP1, + SMIN_MERGE_OP1, + UMIN_MERGE_OP1, + SMAX_MERGE_OP1, + UMAX_MERGE_OP1, + SHL_MERGE_OP1, + SRL_MERGE_OP1, + SRA_MERGE_OP1, + + SETCC_MERGE_ZERO, // Arithmetic instructions which write flags. ADDS, @@ -243,80 +264,81 @@ PTEST, PTRUE, - DUP_PRED, + DUP_MERGE_PASSTHRU, INDEX_VECTOR, REINTERPRET_CAST, - LD1, - LD1S, - LDNF1, - LDNF1S, - LDFF1, - LDFF1S, - LD1RQ, - LD1RO, + LD1_MERGE_ZERO, + LD1S_MERGE_ZERO, + LDNF1_MERGE_ZERO, + LDNF1S_MERGE_ZERO, + LDFF1_MERGE_ZERO, + LDFF1S_MERGE_ZERO, + LD1RQ_MERGE_ZERO, + LD1RO_MERGE_ZERO, // Structured loads. - SVE_LD2, - SVE_LD3, - SVE_LD4, + SVE_LD2_MERGE_ZERO, + SVE_LD3_MERGE_ZERO, + SVE_LD4_MERGE_ZERO, // Unsigned gather loads. - GLD1, - GLD1_SCALED, - GLD1_UXTW, - GLD1_SXTW, - GLD1_UXTW_SCALED, - GLD1_SXTW_SCALED, - GLD1_IMM, + GLD1_MERGE_ZERO, + GLD1_SCALED_MERGE_ZERO, + GLD1_UXTW_MERGE_ZERO, + GLD1_SXTW_MERGE_ZERO, + GLD1_UXTW_SCALED_MERGE_ZERO, + GLD1_SXTW_SCALED_MERGE_ZERO, + GLD1_IMM_MERGE_ZERO, // Signed gather loads - GLD1S, - GLD1S_SCALED, - GLD1S_UXTW, - GLD1S_SXTW, - GLD1S_UXTW_SCALED, - GLD1S_SXTW_SCALED, - GLD1S_IMM, + GLD1S_MERGE_ZERO, + GLD1S_SCALED_MERGE_ZERO, + GLD1S_UXTW_MERGE_ZERO, + GLD1S_SXTW_MERGE_ZERO, + GLD1S_UXTW_SCALED_MERGE_ZERO, + GLD1S_SXTW_SCALED_MERGE_ZERO, + GLD1S_IMM_MERGE_ZERO, // Unsigned gather loads. - GLDFF1, - GLDFF1_SCALED, - GLDFF1_UXTW, - GLDFF1_SXTW, - GLDFF1_UXTW_SCALED, - GLDFF1_SXTW_SCALED, - GLDFF1_IMM, + GLDFF1_MERGE_ZERO, + GLDFF1_SCALED_MERGE_ZERO, + GLDFF1_UXTW_MERGE_ZERO, + GLDFF1_SXTW_MERGE_ZERO, + GLDFF1_UXTW_SCALED_MERGE_ZERO, + GLDFF1_SXTW_SCALED_MERGE_ZERO, + GLDFF1_IMM_MERGE_ZERO, // Signed gather loads. - GLDFF1S, - GLDFF1S_SCALED, - GLDFF1S_UXTW, - GLDFF1S_SXTW, - GLDFF1S_UXTW_SCALED, - GLDFF1S_SXTW_SCALED, - GLDFF1S_IMM, + GLDFF1S_MERGE_ZERO, + GLDFF1S_SCALED_MERGE_ZERO, + GLDFF1S_UXTW_MERGE_ZERO, + GLDFF1S_SXTW_MERGE_ZERO, + GLDFF1S_UXTW_SCALED_MERGE_ZERO, + GLDFF1S_SXTW_SCALED_MERGE_ZERO, + GLDFF1S_IMM_MERGE_ZERO, // Non-temporal gather loads - GLDNT1, - GLDNT1_INDEX, - GLDNT1S, + GLDNT1_MERGE_ZERO, + GLDNT1_INDEX_MERGE_ZERO, + GLDNT1S_MERGE_ZERO, - ST1, + // Contiguous masked store. + ST1_PRED, // Scatter store - SST1, - SST1_SCALED, - SST1_UXTW, - SST1_SXTW, - SST1_UXTW_SCALED, - SST1_SXTW_SCALED, - SST1_IMM, + SST1_PRED, + SST1_SCALED_PRED, + SST1_UXTW_PRED, + SST1_SXTW_PRED, + SST1_UXTW_SCALED_PRED, + SST1_SXTW_SCALED_PRED, + SST1_IMM_PRED, // Non-temporal scatter store - SSTNT1, - SSTNT1_INDEX, + SSTNT1_PRED, + SSTNT1_INDEX_PRED, // Strict (exception-raising) floating point comparison STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1336,245 +1336,243 @@ } const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { +#define MAKE_CASE(V) \ + case V: \ + return #V; switch ((AArch64ISD::NodeType)Opcode) { - case AArch64ISD::FIRST_NUMBER: break; - case AArch64ISD::CALL: return "AArch64ISD::CALL"; - case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; - case AArch64ISD::ADR: return "AArch64ISD::ADR"; - case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; - case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; - case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; - case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; - case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; - case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL"; - case AArch64ISD::CSINV: return "AArch64ISD::CSINV"; - case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; - case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; - case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ"; - case AArch64ISD::ADD_PRED: return "AArch64ISD::ADD_PRED"; - case AArch64ISD::SDIV_PRED: return "AArch64ISD::SDIV_PRED"; - case AArch64ISD::UDIV_PRED: return "AArch64ISD::UDIV_PRED"; - case AArch64ISD::SMIN_PRED: return "AArch64ISD::SMIN_PRED"; - case AArch64ISD::UMIN_PRED: return "AArch64ISD::UMIN_PRED"; - case AArch64ISD::SMAX_PRED: return "AArch64ISD::SMAX_PRED"; - case AArch64ISD::UMAX_PRED: return "AArch64ISD::UMAX_PRED"; - case AArch64ISD::SHL_PRED: return "AArch64ISD::SHL_PRED"; - case AArch64ISD::SRL_PRED: return "AArch64ISD::SRL_PRED"; - case AArch64ISD::SRA_PRED: return "AArch64ISD::SRA_PRED"; - case AArch64ISD::SETCC_PRED: return "AArch64ISD::SETCC_PRED"; - case AArch64ISD::ADC: return "AArch64ISD::ADC"; - case AArch64ISD::SBC: return "AArch64ISD::SBC"; - case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; - case AArch64ISD::SUBS: return "AArch64ISD::SUBS"; - case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; - case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; - case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; - case AArch64ISD::CCMP: return "AArch64ISD::CCMP"; - case AArch64ISD::CCMN: return "AArch64ISD::CCMN"; - case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP"; - case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; - case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP"; - case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE"; - case AArch64ISD::DUP: return "AArch64ISD::DUP"; - case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; - case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; - case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32"; - case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64"; - case AArch64ISD::MOVI: return "AArch64ISD::MOVI"; - case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift"; - case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit"; - case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl"; - case AArch64ISD::FMOV: return "AArch64ISD::FMOV"; - case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift"; - case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl"; - case AArch64ISD::BICi: return "AArch64ISD::BICi"; - case AArch64ISD::ORRi: return "AArch64ISD::ORRi"; - case AArch64ISD::BSP: return "AArch64ISD::BSP"; - case AArch64ISD::NEG: return "AArch64ISD::NEG"; - case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; - case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1"; - case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2"; - case AArch64ISD::UZP1: return "AArch64ISD::UZP1"; - case AArch64ISD::UZP2: return "AArch64ISD::UZP2"; - case AArch64ISD::TRN1: return "AArch64ISD::TRN1"; - case AArch64ISD::TRN2: return "AArch64ISD::TRN2"; - case AArch64ISD::REV16: return "AArch64ISD::REV16"; - case AArch64ISD::REV32: return "AArch64ISD::REV32"; - case AArch64ISD::REV64: return "AArch64ISD::REV64"; - case AArch64ISD::EXT: return "AArch64ISD::EXT"; - case AArch64ISD::VSHL: return "AArch64ISD::VSHL"; - case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR"; - case AArch64ISD::VASHR: return "AArch64ISD::VASHR"; - case AArch64ISD::VSLI: return "AArch64ISD::VSLI"; - case AArch64ISD::VSRI: return "AArch64ISD::VSRI"; - case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ"; - case AArch64ISD::CMGE: return "AArch64ISD::CMGE"; - case AArch64ISD::CMGT: return "AArch64ISD::CMGT"; - case AArch64ISD::CMHI: return "AArch64ISD::CMHI"; - case AArch64ISD::CMHS: return "AArch64ISD::CMHS"; - case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ"; - case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE"; - case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT"; - case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz"; - case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz"; - case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz"; - case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz"; - case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz"; - case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz"; - case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz"; - case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; - case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; - case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; - case AArch64ISD::SADDV: return "AArch64ISD::SADDV"; - case AArch64ISD::UADDV: return "AArch64ISD::UADDV"; - case AArch64ISD::SMINV: return "AArch64ISD::SMINV"; - case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; - case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; - case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; - case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED"; - case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED"; - case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED"; - case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED"; - case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED"; - case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED"; - case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED"; - case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N"; - case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N"; - case AArch64ISD::LASTA: return "AArch64ISD::LASTA"; - case AArch64ISD::LASTB: return "AArch64ISD::LASTB"; - case AArch64ISD::REV: return "AArch64ISD::REV"; - case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST"; - case AArch64ISD::TBL: return "AArch64ISD::TBL"; - case AArch64ISD::FADD_PRED: return "AArch64ISD::FADD_PRED"; - case AArch64ISD::FADDA_PRED: return "AArch64ISD::FADDA_PRED"; - case AArch64ISD::FADDV_PRED: return "AArch64ISD::FADDV_PRED"; - case AArch64ISD::FMAXV_PRED: return "AArch64ISD::FMAXV_PRED"; - case AArch64ISD::FMAXNMV_PRED: return "AArch64ISD::FMAXNMV_PRED"; - case AArch64ISD::FMINV_PRED: return "AArch64ISD::FMINV_PRED"; - case AArch64ISD::FMINNMV_PRED: return "AArch64ISD::FMINNMV_PRED"; - case AArch64ISD::NOT: return "AArch64ISD::NOT"; - case AArch64ISD::BIT: return "AArch64ISD::BIT"; - case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; - case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ"; - case AArch64ISD::TBZ: return "AArch64ISD::TBZ"; - case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ"; - case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; - case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH"; - case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; - case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; - case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST"; - case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; - case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; - case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; - case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I"; - case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I"; - case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; - case AArch64ISD::LD2post: return "AArch64ISD::LD2post"; - case AArch64ISD::LD3post: return "AArch64ISD::LD3post"; - case AArch64ISD::LD4post: return "AArch64ISD::LD4post"; - case AArch64ISD::ST2post: return "AArch64ISD::ST2post"; - case AArch64ISD::ST3post: return "AArch64ISD::ST3post"; - case AArch64ISD::ST4post: return "AArch64ISD::ST4post"; - case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post"; - case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post"; - case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post"; - case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post"; - case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post"; - case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post"; - case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost"; - case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost"; - case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost"; - case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost"; - case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost"; - case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost"; - case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost"; - case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost"; - case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost"; - case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost"; - case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost"; - case AArch64ISD::SMULL: return "AArch64ISD::SMULL"; - case AArch64ISD::UMULL: return "AArch64ISD::UMULL"; - case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE"; - case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS"; - case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE"; - case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS"; - case AArch64ISD::STG: return "AArch64ISD::STG"; - case AArch64ISD::STZG: return "AArch64ISD::STZG"; - case AArch64ISD::ST2G: return "AArch64ISD::ST2G"; - case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G"; - case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI"; - case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO"; - case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI"; - case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO"; - case AArch64ISD::INSR: return "AArch64ISD::INSR"; - case AArch64ISD::PTEST: return "AArch64ISD::PTEST"; - case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE"; - case AArch64ISD::LD1: return "AArch64ISD::LD1"; - case AArch64ISD::LD1S: return "AArch64ISD::LD1S"; - case AArch64ISD::LDNF1: return "AArch64ISD::LDNF1"; - case AArch64ISD::LDNF1S: return "AArch64ISD::LDNF1S"; - case AArch64ISD::LDFF1: return "AArch64ISD::LDFF1"; - case AArch64ISD::LDFF1S: return "AArch64ISD::LDFF1S"; - case AArch64ISD::LD1RQ: return "AArch64ISD::LD1RQ"; - case AArch64ISD::LD1RO: return "AArch64ISD::LD1RO"; - case AArch64ISD::SVE_LD2: return "AArch64ISD::SVE_LD2"; - case AArch64ISD::SVE_LD3: return "AArch64ISD::SVE_LD3"; - case AArch64ISD::SVE_LD4: return "AArch64ISD::SVE_LD4"; - case AArch64ISD::GLD1: return "AArch64ISD::GLD1"; - case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED"; - case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW"; - case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW"; - case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED"; - case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED"; - case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM"; - case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S"; - case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED"; - case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW"; - case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW"; - case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED"; - case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED"; - case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM"; - case AArch64ISD::GLDFF1: return "AArch64ISD::GLDFF1"; - case AArch64ISD::GLDFF1_SCALED: return "AArch64ISD::GLDFF1_SCALED"; - case AArch64ISD::GLDFF1_SXTW: return "AArch64ISD::GLDFF1_SXTW"; - case AArch64ISD::GLDFF1_UXTW: return "AArch64ISD::GLDFF1_UXTW"; - case AArch64ISD::GLDFF1_SXTW_SCALED:return "AArch64ISD::GLDFF1_SXTW_SCALED"; - case AArch64ISD::GLDFF1_UXTW_SCALED:return "AArch64ISD::GLDFF1_UXTW_SCALED"; - case AArch64ISD::GLDFF1_IMM: return "AArch64ISD::GLDFF1_IMM"; - case AArch64ISD::GLDFF1S: return "AArch64ISD::GLDFF1S"; - case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED"; - case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW"; - case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW"; - case AArch64ISD::GLDFF1S_SXTW_SCALED: - return "AArch64ISD::GLDFF1S_SXTW_SCALED"; - case AArch64ISD::GLDFF1S_UXTW_SCALED: - return "AArch64ISD::GLDFF1S_UXTW_SCALED"; - case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM"; - - case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1"; - case AArch64ISD::GLDNT1_INDEX: return "AArch64ISD::GLDNT1_INDEX"; - case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S"; - - case AArch64ISD::ST1: return "AArch64ISD::ST1"; - - case AArch64ISD::SST1: return "AArch64ISD::SST1"; - case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED"; - case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW"; - case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW"; - case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED"; - case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED"; - case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM"; - - case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1"; - case AArch64ISD::SSTNT1_INDEX: return "AArch64ISD::SSTNT1_INDEX"; - - case AArch64ISD::LDP: return "AArch64ISD::LDP"; - case AArch64ISD::STP: return "AArch64ISD::STP"; - case AArch64ISD::STNP: return "AArch64ISD::STNP"; - case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED"; - case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR"; - } + case AArch64ISD::FIRST_NUMBER: + break; + MAKE_CASE(AArch64ISD::CALL) + MAKE_CASE(AArch64ISD::ADRP) + MAKE_CASE(AArch64ISD::ADR) + MAKE_CASE(AArch64ISD::ADDlow) + MAKE_CASE(AArch64ISD::LOADgot) + MAKE_CASE(AArch64ISD::RET_FLAG) + MAKE_CASE(AArch64ISD::BRCOND) + MAKE_CASE(AArch64ISD::CSEL) + MAKE_CASE(AArch64ISD::FCSEL) + MAKE_CASE(AArch64ISD::CSINV) + MAKE_CASE(AArch64ISD::CSNEG) + MAKE_CASE(AArch64ISD::CSINC) + MAKE_CASE(AArch64ISD::THREAD_POINTER) + MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ) + MAKE_CASE(AArch64ISD::ADD_MERGE_OP1) + MAKE_CASE(AArch64ISD::SDIV_MERGE_OP1) + MAKE_CASE(AArch64ISD::UDIV_MERGE_OP1) + MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1) + MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1) + MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1) + MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1) + MAKE_CASE(AArch64ISD::SHL_MERGE_OP1) + MAKE_CASE(AArch64ISD::SRL_MERGE_OP1) + MAKE_CASE(AArch64ISD::SRA_MERGE_OP1) + MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) + MAKE_CASE(AArch64ISD::ADC) + MAKE_CASE(AArch64ISD::SBC) + MAKE_CASE(AArch64ISD::ADDS) + MAKE_CASE(AArch64ISD::SUBS) + MAKE_CASE(AArch64ISD::ADCS) + MAKE_CASE(AArch64ISD::SBCS) + MAKE_CASE(AArch64ISD::ANDS) + MAKE_CASE(AArch64ISD::CCMP) + MAKE_CASE(AArch64ISD::CCMN) + MAKE_CASE(AArch64ISD::FCCMP) + MAKE_CASE(AArch64ISD::FCMP) + MAKE_CASE(AArch64ISD::STRICT_FCMP) + MAKE_CASE(AArch64ISD::STRICT_FCMPE) + MAKE_CASE(AArch64ISD::DUP) + MAKE_CASE(AArch64ISD::DUPLANE8) + MAKE_CASE(AArch64ISD::DUPLANE16) + MAKE_CASE(AArch64ISD::DUPLANE32) + MAKE_CASE(AArch64ISD::DUPLANE64) + MAKE_CASE(AArch64ISD::MOVI) + MAKE_CASE(AArch64ISD::MOVIshift) + MAKE_CASE(AArch64ISD::MOVIedit) + MAKE_CASE(AArch64ISD::MOVImsl) + MAKE_CASE(AArch64ISD::FMOV) + MAKE_CASE(AArch64ISD::MVNIshift) + MAKE_CASE(AArch64ISD::MVNImsl) + MAKE_CASE(AArch64ISD::BICi) + MAKE_CASE(AArch64ISD::ORRi) + MAKE_CASE(AArch64ISD::BSP) + MAKE_CASE(AArch64ISD::NEG) + MAKE_CASE(AArch64ISD::EXTR) + MAKE_CASE(AArch64ISD::ZIP1) + MAKE_CASE(AArch64ISD::ZIP2) + MAKE_CASE(AArch64ISD::UZP1) + MAKE_CASE(AArch64ISD::UZP2) + MAKE_CASE(AArch64ISD::TRN1) + MAKE_CASE(AArch64ISD::TRN2) + MAKE_CASE(AArch64ISD::REV16) + MAKE_CASE(AArch64ISD::REV32) + MAKE_CASE(AArch64ISD::REV64) + MAKE_CASE(AArch64ISD::EXT) + MAKE_CASE(AArch64ISD::VSHL) + MAKE_CASE(AArch64ISD::VLSHR) + MAKE_CASE(AArch64ISD::VASHR) + MAKE_CASE(AArch64ISD::VSLI) + MAKE_CASE(AArch64ISD::VSRI) + MAKE_CASE(AArch64ISD::CMEQ) + MAKE_CASE(AArch64ISD::CMGE) + MAKE_CASE(AArch64ISD::CMGT) + MAKE_CASE(AArch64ISD::CMHI) + MAKE_CASE(AArch64ISD::CMHS) + MAKE_CASE(AArch64ISD::FCMEQ) + MAKE_CASE(AArch64ISD::FCMGE) + MAKE_CASE(AArch64ISD::FCMGT) + MAKE_CASE(AArch64ISD::CMEQz) + MAKE_CASE(AArch64ISD::CMGEz) + MAKE_CASE(AArch64ISD::CMGTz) + MAKE_CASE(AArch64ISD::CMLEz) + MAKE_CASE(AArch64ISD::CMLTz) + MAKE_CASE(AArch64ISD::FCMEQz) + MAKE_CASE(AArch64ISD::FCMGEz) + MAKE_CASE(AArch64ISD::FCMGTz) + MAKE_CASE(AArch64ISD::FCMLEz) + MAKE_CASE(AArch64ISD::FCMLTz) + MAKE_CASE(AArch64ISD::SADDV) + MAKE_CASE(AArch64ISD::UADDV) + MAKE_CASE(AArch64ISD::SMINV) + MAKE_CASE(AArch64ISD::UMINV) + MAKE_CASE(AArch64ISD::SMAXV) + MAKE_CASE(AArch64ISD::UMAXV) + MAKE_CASE(AArch64ISD::SMAXV_PRED) + MAKE_CASE(AArch64ISD::UMAXV_PRED) + MAKE_CASE(AArch64ISD::SMINV_PRED) + MAKE_CASE(AArch64ISD::UMINV_PRED) + MAKE_CASE(AArch64ISD::ORV_PRED) + MAKE_CASE(AArch64ISD::EORV_PRED) + MAKE_CASE(AArch64ISD::ANDV_PRED) + MAKE_CASE(AArch64ISD::CLASTA_N) + MAKE_CASE(AArch64ISD::CLASTB_N) + MAKE_CASE(AArch64ISD::LASTA) + MAKE_CASE(AArch64ISD::LASTB) + MAKE_CASE(AArch64ISD::REV) + MAKE_CASE(AArch64ISD::REINTERPRET_CAST) + MAKE_CASE(AArch64ISD::TBL) + MAKE_CASE(AArch64ISD::FADD_MERGE_OP1) + MAKE_CASE(AArch64ISD::FADDA_PRED) + MAKE_CASE(AArch64ISD::FADDV_PRED) + MAKE_CASE(AArch64ISD::FMAXV_PRED) + MAKE_CASE(AArch64ISD::FMAXNMV_PRED) + MAKE_CASE(AArch64ISD::FMINV_PRED) + MAKE_CASE(AArch64ISD::FMINNMV_PRED) + MAKE_CASE(AArch64ISD::NOT) + MAKE_CASE(AArch64ISD::BIT) + MAKE_CASE(AArch64ISD::CBZ) + MAKE_CASE(AArch64ISD::CBNZ) + MAKE_CASE(AArch64ISD::TBZ) + MAKE_CASE(AArch64ISD::TBNZ) + MAKE_CASE(AArch64ISD::TC_RETURN) + MAKE_CASE(AArch64ISD::PREFETCH) + MAKE_CASE(AArch64ISD::SITOF) + MAKE_CASE(AArch64ISD::UITOF) + MAKE_CASE(AArch64ISD::NVCAST) + MAKE_CASE(AArch64ISD::SQSHL_I) + MAKE_CASE(AArch64ISD::UQSHL_I) + MAKE_CASE(AArch64ISD::SRSHR_I) + MAKE_CASE(AArch64ISD::URSHR_I) + MAKE_CASE(AArch64ISD::SQSHLU_I) + MAKE_CASE(AArch64ISD::WrapperLarge) + MAKE_CASE(AArch64ISD::LD2post) + MAKE_CASE(AArch64ISD::LD3post) + MAKE_CASE(AArch64ISD::LD4post) + MAKE_CASE(AArch64ISD::ST2post) + MAKE_CASE(AArch64ISD::ST3post) + MAKE_CASE(AArch64ISD::ST4post) + MAKE_CASE(AArch64ISD::LD1x2post) + MAKE_CASE(AArch64ISD::LD1x3post) + MAKE_CASE(AArch64ISD::LD1x4post) + MAKE_CASE(AArch64ISD::ST1x2post) + MAKE_CASE(AArch64ISD::ST1x3post) + MAKE_CASE(AArch64ISD::ST1x4post) + MAKE_CASE(AArch64ISD::LD1DUPpost) + MAKE_CASE(AArch64ISD::LD2DUPpost) + MAKE_CASE(AArch64ISD::LD3DUPpost) + MAKE_CASE(AArch64ISD::LD4DUPpost) + MAKE_CASE(AArch64ISD::LD1LANEpost) + MAKE_CASE(AArch64ISD::LD2LANEpost) + MAKE_CASE(AArch64ISD::LD3LANEpost) + MAKE_CASE(AArch64ISD::LD4LANEpost) + MAKE_CASE(AArch64ISD::ST2LANEpost) + MAKE_CASE(AArch64ISD::ST3LANEpost) + MAKE_CASE(AArch64ISD::ST4LANEpost) + MAKE_CASE(AArch64ISD::SMULL) + MAKE_CASE(AArch64ISD::UMULL) + MAKE_CASE(AArch64ISD::FRECPE) + MAKE_CASE(AArch64ISD::FRECPS) + MAKE_CASE(AArch64ISD::FRSQRTE) + MAKE_CASE(AArch64ISD::FRSQRTS) + MAKE_CASE(AArch64ISD::STG) + MAKE_CASE(AArch64ISD::STZG) + MAKE_CASE(AArch64ISD::ST2G) + MAKE_CASE(AArch64ISD::STZ2G) + MAKE_CASE(AArch64ISD::SUNPKHI) + MAKE_CASE(AArch64ISD::SUNPKLO) + MAKE_CASE(AArch64ISD::UUNPKHI) + MAKE_CASE(AArch64ISD::UUNPKLO) + MAKE_CASE(AArch64ISD::INSR) + MAKE_CASE(AArch64ISD::PTEST) + MAKE_CASE(AArch64ISD::PTRUE) + MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO) + MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO) + MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO) + MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO) + MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) + MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO) + MAKE_CASE(AArch64ISD::ST1_PRED) + MAKE_CASE(AArch64ISD::SST1_PRED) + MAKE_CASE(AArch64ISD::SST1_SCALED_PRED) + MAKE_CASE(AArch64ISD::SST1_SXTW_PRED) + MAKE_CASE(AArch64ISD::SST1_UXTW_PRED) + MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED) + MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED) + MAKE_CASE(AArch64ISD::SST1_IMM_PRED) + MAKE_CASE(AArch64ISD::SSTNT1_PRED) + MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED) + MAKE_CASE(AArch64ISD::LDP) + MAKE_CASE(AArch64ISD::STP) + MAKE_CASE(AArch64ISD::STNP) + MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::INDEX_VECTOR) + } +#undef MAKE_CASE return nullptr; } @@ -3426,7 +3424,7 @@ return LowerXALUO(Op, DAG); case ISD::FADD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_MERGE_OP1); return LowerF128Call(Op, DAG, RTLIB::ADD_F128); case ISD::FSUB: return LowerF128Call(Op, DAG, RTLIB::SUB_F128); @@ -3460,17 +3458,17 @@ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::SDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_MERGE_OP1); case ISD::UDIV: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_MERGE_OP1); case ISD::SMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1); case ISD::UMIN: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1); case ISD::SMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1); case ISD::UMAX: - return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1); case ISD::SRA: case ISD::SRL: case ISD::SHL: @@ -3532,7 +3530,7 @@ llvm_unreachable("Unexpected request to lower ISD::LOAD"); case ISD::ADD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_MERGE_OP1); llvm_unreachable("Unexpected request to lower ISD::ADD"); } } @@ -8792,7 +8790,7 @@ case ISD::SHL: if (VT.isScalableVector()) - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1); if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0), @@ -8804,8 +8802,8 @@ case ISD::SRA: case ISD::SRL: if (VT.isScalableVector()) { - unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED - : AArch64ISD::SRL_PRED; + unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1 + : AArch64ISD::SRL_MERGE_OP1; return LowerToPredicatedOp(Op, DAG, Opc); } @@ -8934,7 +8932,7 @@ if (Op.getValueType().isScalableVector()) { if (Op.getOperand(0).getValueType().isFloatingPoint()) return Op; - return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_PRED); + return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO); } ISD::CondCode CC = cast(Op.getOperand(2))->get(); @@ -9929,9 +9927,9 @@ unsigned N, Opcode; static std::map> IntrinsicMap = { - {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2}}, - {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3}}, - {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4}}}; + {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, + {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, + {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; std::tie(N, Opcode) = IntrinsicMap[Intrinsic]; assert(VT.getVectorElementCount().Min % N == 0 && @@ -10872,26 +10870,26 @@ // SVE load instructions perform an implicit zero-extend, which makes them // perfect candidates for combining. switch (Opc) { - case AArch64ISD::LD1: - case AArch64ISD::LDNF1: - case AArch64ISD::LDFF1: + case AArch64ISD::LD1_MERGE_ZERO: + case AArch64ISD::LDNF1_MERGE_ZERO: + case AArch64ISD::LDFF1_MERGE_ZERO: MemVT = cast(Src->getOperand(3))->getVT(); break; - case AArch64ISD::GLD1: - case AArch64ISD::GLD1_SCALED: - case AArch64ISD::GLD1_SXTW: - case AArch64ISD::GLD1_SXTW_SCALED: - case AArch64ISD::GLD1_UXTW: - case AArch64ISD::GLD1_UXTW_SCALED: - case AArch64ISD::GLD1_IMM: - case AArch64ISD::GLDFF1: - case AArch64ISD::GLDFF1_SCALED: - case AArch64ISD::GLDFF1_SXTW: - case AArch64ISD::GLDFF1_SXTW_SCALED: - case AArch64ISD::GLDFF1_UXTW: - case AArch64ISD::GLDFF1_UXTW_SCALED: - case AArch64ISD::GLDFF1_IMM: - case AArch64ISD::GLDNT1: + case AArch64ISD::GLD1_MERGE_ZERO: + case AArch64ISD::GLD1_SCALED_MERGE_ZERO: + case AArch64ISD::GLD1_SXTW_MERGE_ZERO: + case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: + case AArch64ISD::GLD1_UXTW_MERGE_ZERO: + case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: + case AArch64ISD::GLD1_IMM_MERGE_ZERO: + case AArch64ISD::GLDFF1_MERGE_ZERO: + case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO: + case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO: + case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO: + case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO: + case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO: + case AArch64ISD::GLDFF1_IMM_MERGE_ZERO: + case AArch64ISD::GLDNT1_MERGE_ZERO: MemVT = cast(Src->getOperand(4))->getVT(); break; default: @@ -11525,8 +11523,10 @@ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar); - return DAG.getNode(AArch64ISD::DUP_PRED, dl, N->getValueType(0), - N->getOperand(1), N->getOperand(2), Scalar); + SDValue Passthru = N->getOperand(1); + SDValue Pred = N->getOperand(2); + return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0), + Pred, Scalar, Passthru); } static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) { @@ -11611,8 +11611,8 @@ return SDValue(); SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm); - return DAG.getNode(AArch64ISD::SETCC_PRED, DL, VT, Pred, N->getOperand(2), - Splat, DAG.getCondCode(CC)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred, + N->getOperand(2), Splat, DAG.getCondCode(CC)); } return SDValue(); @@ -11760,67 +11760,67 @@ case Intrinsic::aarch64_sve_ext: return LowerSVEIntrinsicEXT(N, DAG); case Intrinsic::aarch64_sve_sdiv: - return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::SDIV_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_udiv: - return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::UDIV_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smin: - return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_umin: - return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_smax: - return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_umax: - return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_lsl: - return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_lsr: - return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_asr: - return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0), + return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), N->getOperand(3)); case Intrinsic::aarch64_sve_cmphs: if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3), - DAG.getCondCode(ISD::SETUGE)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETUGE)); break; case Intrinsic::aarch64_sve_cmphi: if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3), - DAG.getCondCode(ISD::SETUGT)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETUGT)); break; case Intrinsic::aarch64_sve_cmpge: if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3), - DAG.getCondCode(ISD::SETGE)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETGE)); break; case Intrinsic::aarch64_sve_cmpgt: if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3), - DAG.getCondCode(ISD::SETGT)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETGT)); break; case Intrinsic::aarch64_sve_cmpeq: if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3), - DAG.getCondCode(ISD::SETEQ)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETEQ)); break; case Intrinsic::aarch64_sve_cmpne: if (!N->getOperand(2).getValueType().isFloatingPoint()) - return DAG.getNode(AArch64ISD::SETCC_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3), - DAG.getCondCode(ISD::SETNE)); + return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N), + N->getValueType(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), DAG.getCondCode(ISD::SETNE)); break; case Intrinsic::aarch64_sve_fadda: return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG); @@ -12092,7 +12092,8 @@ template static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) { - static_assert(Opcode == AArch64ISD::LD1RQ || Opcode == AArch64ISD::LD1RO, + static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO || + Opcode == AArch64ISD::LD1RO_MERGE_ZERO, "Unsupported opcode."); SDLoc DL(N); EVT VT = N->getValueType(0); @@ -12138,7 +12139,7 @@ InputVT }; - return DAG.getNode(AArch64ISD::ST1, DL, N->getValueType(0), Ops); + return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops); } static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) { @@ -13299,10 +13300,10 @@ // For "scalar + vector of indices", just scale the indices. This only // applies to non-temporal scatters because there's no instruction that takes // indicies. - if (Opcode == AArch64ISD::SSTNT1_INDEX) { + if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) { Offset = getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits()); - Opcode = AArch64ISD::SSTNT1; + Opcode = AArch64ISD::SSTNT1_PRED; } // In the case of non-temporal gather loads there's only one SVE instruction @@ -13310,8 +13311,8 @@ // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0] // Since we do have intrinsics that allow the arguments to be in a different // order, we may need to swap them to match the spec. - if (Opcode == AArch64ISD::SSTNT1 && Offset.getValueType().isVector()) - std::swap(Base, Offset); + if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector()) + std::swap(Base, Offset); // SST1_IMM requires that the offset is an immediate that is: // * a multiple of #SizeInBytes, @@ -13319,13 +13320,13 @@ // where #SizeInBytes is the size in bytes of the stored items. For // immediates outside that range and non-immediate scalar offsets use SST1 or // SST1_UXTW instead. - if (Opcode == AArch64ISD::SST1_IMM) { + if (Opcode == AArch64ISD::SST1_IMM_PRED) { if (!isValidImmForSVEVecImmAddrMode(Offset, SrcVT.getScalarSizeInBits() / 8)) { if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) - Opcode = AArch64ISD::SST1_UXTW; + Opcode = AArch64ISD::SST1_UXTW_PRED; else - Opcode = AArch64ISD::SST1; + Opcode = AArch64ISD::SST1_PRED; std::swap(Base, Offset); } @@ -13396,10 +13397,10 @@ // For "scalar + vector of indices", just scale the indices. This only // applies to non-temporal gathers because there's no instruction that takes // indicies. - if (Opcode == AArch64ISD::GLDNT1_INDEX) { + if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) { Offset = getScaledOffsetForBitWidth(DAG, Offset, DL, RetVT.getScalarSizeInBits()); - Opcode = AArch64ISD::GLDNT1; + Opcode = AArch64ISD::GLDNT1_MERGE_ZERO; } // In the case of non-temporal gather loads there's only one SVE instruction @@ -13407,24 +13408,28 @@ // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0] // Since we do have intrinsics that allow the arguments to be in a different // order, we may need to swap them to match the spec. - if (Opcode == AArch64ISD::GLDNT1 && Offset.getValueType().isVector()) - std::swap(Base, Offset); + if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO && + Offset.getValueType().isVector()) + std::swap(Base, Offset); // GLD{FF}1_IMM requires that the offset is an immediate that is: // * a multiple of #SizeInBytes, // * in the range [0, 31 x #SizeInBytes], // where #SizeInBytes is the size in bytes of the loaded items. For - // immediates outside that range and non-immediate scalar offsets use GLD1 or - // GLD1_UXTW instead. - if (Opcode == AArch64ISD::GLD1_IMM || Opcode == AArch64ISD::GLDFF1_IMM) { + // immediates outside that range and non-immediate scalar offsets use + // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead. + if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO || + Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) { if (!isValidImmForSVEVecImmAddrMode(Offset, RetVT.getScalarSizeInBits() / 8)) { if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy) - Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1_UXTW - : AArch64ISD::GLDFF1_UXTW; + Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO) + ? AArch64ISD::GLD1_UXTW_MERGE_ZERO + : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO; else - Opcode = (Opcode == AArch64ISD::GLD1_IMM) ? AArch64ISD::GLD1 - : AArch64ISD::GLDFF1; + Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO) + ? AArch64ISD::GLD1_MERGE_ZERO + : AArch64ISD::GLDFF1_MERGE_ZERO; std::swap(Base, Offset); } @@ -13517,62 +13522,62 @@ unsigned NewOpc; unsigned MemVTOpNum = 4; switch (Opc) { - case AArch64ISD::LD1: - NewOpc = AArch64ISD::LD1S; + case AArch64ISD::LD1_MERGE_ZERO: + NewOpc = AArch64ISD::LD1S_MERGE_ZERO; MemVTOpNum = 3; break; - case AArch64ISD::LDNF1: - NewOpc = AArch64ISD::LDNF1S; + case AArch64ISD::LDNF1_MERGE_ZERO: + NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO; MemVTOpNum = 3; break; - case AArch64ISD::LDFF1: - NewOpc = AArch64ISD::LDFF1S; + case AArch64ISD::LDFF1_MERGE_ZERO: + NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO; MemVTOpNum = 3; break; - case AArch64ISD::GLD1: - NewOpc = AArch64ISD::GLD1S; + case AArch64ISD::GLD1_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_MERGE_ZERO; break; - case AArch64ISD::GLD1_SCALED: - NewOpc = AArch64ISD::GLD1S_SCALED; + case AArch64ISD::GLD1_SCALED_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO; break; - case AArch64ISD::GLD1_SXTW: - NewOpc = AArch64ISD::GLD1S_SXTW; + case AArch64ISD::GLD1_SXTW_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO; break; - case AArch64ISD::GLD1_SXTW_SCALED: - NewOpc = AArch64ISD::GLD1S_SXTW_SCALED; + case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO; break; - case AArch64ISD::GLD1_UXTW: - NewOpc = AArch64ISD::GLD1S_UXTW; + case AArch64ISD::GLD1_UXTW_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO; break; - case AArch64ISD::GLD1_UXTW_SCALED: - NewOpc = AArch64ISD::GLD1S_UXTW_SCALED; + case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO; break; - case AArch64ISD::GLD1_IMM: - NewOpc = AArch64ISD::GLD1S_IMM; + case AArch64ISD::GLD1_IMM_MERGE_ZERO: + NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO; break; - case AArch64ISD::GLDFF1: - NewOpc = AArch64ISD::GLDFF1S; + case AArch64ISD::GLDFF1_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO; break; - case AArch64ISD::GLDFF1_SCALED: - NewOpc = AArch64ISD::GLDFF1S_SCALED; + case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO; break; - case AArch64ISD::GLDFF1_SXTW: - NewOpc = AArch64ISD::GLDFF1S_SXTW; + case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO; break; - case AArch64ISD::GLDFF1_SXTW_SCALED: - NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED; + case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO; break; - case AArch64ISD::GLDFF1_UXTW: - NewOpc = AArch64ISD::GLDFF1S_UXTW; + case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO; break; - case AArch64ISD::GLDFF1_UXTW_SCALED: - NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED; + case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO; break; - case AArch64ISD::GLDFF1_IMM: - NewOpc = AArch64ISD::GLDFF1S_IMM; + case AArch64ISD::GLDFF1_IMM_MERGE_ZERO: + NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO; break; - case AArch64ISD::GLDNT1: - NewOpc = AArch64ISD::GLDNT1S; + case AArch64ISD::GLDNT1_MERGE_ZERO: + NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO; break; default: return SDValue(); @@ -13750,89 +13755,101 @@ case Intrinsic::aarch64_sve_ldnt1: return performLDNT1Combine(N, DAG); case Intrinsic::aarch64_sve_ld1rq: - return performLD1ReplicateCombine(N, DAG); + return performLD1ReplicateCombine(N, DAG); case Intrinsic::aarch64_sve_ld1ro: - return performLD1ReplicateCombine(N, DAG); + return performLD1ReplicateCombine(N, DAG); case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1); + return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO); case Intrinsic::aarch64_sve_ldnt1_gather: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1); + return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO); case Intrinsic::aarch64_sve_ldnt1_gather_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_INDEX); + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDNT1_INDEX_MERGE_ZERO); case Intrinsic::aarch64_sve_ldnt1_gather_uxtw: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1); + return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1: - return performLD1Combine(N, DAG, AArch64ISD::LD1); + return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO); case Intrinsic::aarch64_sve_ldnf1: - return performLD1Combine(N, DAG, AArch64ISD::LDNF1); + return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO); case Intrinsic::aarch64_sve_ldff1: - return performLD1Combine(N, DAG, AArch64ISD::LDFF1); + return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO); case Intrinsic::aarch64_sve_st1: return performST1Combine(N, DAG); case Intrinsic::aarch64_sve_stnt1: return performSTNT1Combine(N, DAG); case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset: - return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1); + return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED); case Intrinsic::aarch64_sve_stnt1_scatter_uxtw: - return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1); + return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED); case Intrinsic::aarch64_sve_stnt1_scatter: - return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1); + return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED); case Intrinsic::aarch64_sve_stnt1_scatter_index: - return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX); + return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED); case Intrinsic::aarch64_sve_ld1_gather: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1); + return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1_gather_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SCALED); + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLD1_SCALED_MERGE_ZERO); case Intrinsic::aarch64_sve_ld1_gather_sxtw: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW, + return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_uxtw: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW, + return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_sxtw_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_SCALED, + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_uxtw_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_SCALED, + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM); + return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO); case Intrinsic::aarch64_sve_ldff1_gather: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1); + return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO); case Intrinsic::aarch64_sve_ldff1_gather_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SCALED); + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDFF1_SCALED_MERGE_ZERO); case Intrinsic::aarch64_sve_ldff1_gather_sxtw: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SXTW, + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDFF1_SXTW_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ldff1_gather_uxtw: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_UXTW, + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDFF1_UXTW_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_SXTW_SCALED, + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_UXTW_SCALED, + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset: - return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_IMM); + return performGatherLoadCombine(N, DAG, + AArch64ISD::GLDFF1_IMM_MERGE_ZERO); case Intrinsic::aarch64_sve_st1_scatter: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1); + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED); case Intrinsic::aarch64_sve_st1_scatter_index: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED); + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED); case Intrinsic::aarch64_sve_st1_scatter_sxtw: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW, + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_uxtw: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW, + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_sxtw_index: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_SCALED, + return performScatterStoreCombine(N, DAG, + AArch64ISD::SST1_SXTW_SCALED_PRED, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_uxtw_index: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_SCALED, + return performScatterStoreCombine(N, DAG, + AArch64ISD::SST1_UXTW_SCALED_PRED, /*OnlyPackedOffsets=*/false); case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: - return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM); + return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED); case Intrinsic::aarch64_sve_tuple_get: { SDLoc DL(N); SDValue Chain = N->getOperand(0); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -10,6 +10,21 @@ // //===----------------------------------------------------------------------===// +// For predicated nodes where the entire operation is controlled by a governing +// predicate, please stick to a similar naming convention as used for the +// ISD nodes: +// +// SDNode <=> AArch64ISD +// ------------------------------- +// _m <=> _MERGE_OP +// _mt <=> _MERGE_PASSTHRU +// _z <=> _MERGE_ZERO +// _p <=> _PRED +// +// Given the context of this file, it is not strictly necessary to use _p to +// distinguish predicated from unpredicated nodes given that most SVE +// instructions are predicated. + // Contiguous loads - node definitions // def SDT_AArch64_LD1 : SDTypeProfile<1, 3, [ @@ -17,16 +32,16 @@ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> ]>; -def AArch64ld1 : SDNode<"AArch64ISD::LD1", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; -def AArch64ld1s : SDNode<"AArch64ISD::LD1S", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1_z : SDNode<"AArch64ISD::LD1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def AArch64ld1s_z : SDNode<"AArch64ISD::LD1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; // Non-faulting & first-faulting loads - node definitions // -def AArch64ldnf1 : SDNode<"AArch64ISD::LDNF1", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1 : SDNode<"AArch64ISD::LDFF1", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldnf1_z : SDNode<"AArch64ISD::LDNF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_z : SDNode<"AArch64ISD::LDFF1_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldnf1s : SDNode<"AArch64ISD::LDNF1S", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s : SDNode<"AArch64ISD::LDFF1S", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldnf1s_z : SDNode<"AArch64ISD::LDNF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_z : SDNode<"AArch64ISD::LDFF1S_MERGE_ZERO", SDT_AArch64_LD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; // Contiguous load and replicate - node definitions // @@ -36,8 +51,8 @@ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> ]>; -def AArch64ld1rq : SDNode<"AArch64ISD::LD1RQ", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1ro : SDNode<"AArch64ISD::LD1RO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1rq_z : SDNode<"AArch64ISD::LD1RQ_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1ro_z : SDNode<"AArch64ISD::LD1RO_MERGE_ZERO", SDT_AArch64_LD1Replicate, [SDNPHasChain, SDNPMayLoad]>; // Gather loads - node definitions // @@ -51,40 +66,40 @@ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> ]>; -def AArch64ld1_gather : SDNode<"AArch64ISD::GLD1", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_scaled : SDNode<"AArch64ISD::GLD1_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_uxtw : SDNode<"AArch64ISD::GLD1_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_sxtw : SDNode<"AArch64ISD::GLD1_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; - -def AArch64ld1s_gather : SDNode<"AArch64ISD::GLD1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_scaled : SDNode<"AArch64ISD::GLD1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_uxtw : SDNode<"AArch64ISD::GLD1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_sxtw : SDNode<"AArch64ISD::GLD1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ld1s_gather_imm : SDNode<"AArch64ISD::GLD1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; - -def AArch64ldff1_gather : SDNode<"AArch64ISD::GLDFF1", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_scaled : SDNode<"AArch64ISD::GLDFF1_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_uxtw : SDNode<"AArch64ISD::GLDFF1_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_sxtw : SDNode<"AArch64ISD::GLDFF1_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1_gather_imm : SDNode<"AArch64ISD::GLDFF1_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; - -def AArch64ldff1s_gather : SDNode<"AArch64ISD::GLDFF1S", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_scaled : SDNode<"AArch64ISD::GLDFF1S_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_uxtw : SDNode<"AArch64ISD::GLDFF1S_UXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_sxtw : SDNode<"AArch64ISD::GLDFF1S_SXTW", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_uxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_sxtw_scaled : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; -def AArch64ldff1s_gather_imm : SDNode<"AArch64ISD::GLDFF1S_IMM", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; - -def AArch64ldnt1_gather : SDNode<"AArch64ISD::GLDNT1", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; -def AArch64ldnt1s_gather : SDNode<"AArch64ISD::GLDNT1S", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_z : SDNode<"AArch64ISD::GLD1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_scaled_z : SDNode<"AArch64ISD::GLD1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_uxtw_z : SDNode<"AArch64ISD::GLD1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_sxtw_z : SDNode<"AArch64ISD::GLD1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1_gather_imm_z : SDNode<"AArch64ISD::GLD1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; + +def AArch64ld1s_gather_z : SDNode<"AArch64ISD::GLD1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1s_gather_scaled_z : SDNode<"AArch64ISD::GLD1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1s_gather_uxtw_z : SDNode<"AArch64ISD::GLD1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1s_gather_sxtw_z : SDNode<"AArch64ISD::GLD1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ld1s_gather_imm_z : SDNode<"AArch64ISD::GLD1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; + +def AArch64ldff1_gather_z : SDNode<"AArch64ISD::GLDFF1_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1_gather_imm_z : SDNode<"AArch64ISD::GLDFF1_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; + +def AArch64ldff1s_gather_z : SDNode<"AArch64ISD::GLDFF1S_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_gather_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_gather_uxtw_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_gather_sxtw_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_gather_uxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_gather_sxtw_scaled_z : SDNode<"AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO", SDT_AArch64_GATHER_SV, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64ldff1s_gather_imm_z : SDNode<"AArch64ISD::GLDFF1S_IMM_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue, SDNPOutGlue]>; + +def AArch64ldnt1_gather_z : SDNode<"AArch64ISD::GLDNT1_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; +def AArch64ldnt1s_gather_z : SDNode<"AArch64ISD::GLDNT1S_MERGE_ZERO", SDT_AArch64_GATHER_VS, [SDNPHasChain, SDNPMayLoad]>; // Contiguous stores - node definitions // @@ -93,7 +108,7 @@ SDTCVecEltisVT<2,i1>, SDTCisSameNumEltsAs<0,2> ]>; -def AArch64st1 : SDNode<"AArch64ISD::ST1", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1 : SDNode<"AArch64ISD::ST1_PRED", SDT_AArch64_ST1, [SDNPHasChain, SDNPMayStore]>; // Scatter stores - node definitions // @@ -107,15 +122,15 @@ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1> ]>; -def AArch64st1_scatter : SDNode<"AArch64ISD::SST1", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; -def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter : SDNode<"AArch64ISD::SST1_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter_scaled : SDNode<"AArch64ISD::SST1_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter_uxtw : SDNode<"AArch64ISD::SST1_UXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter_sxtw : SDNode<"AArch64ISD::SST1_SXTW_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter_uxtw_scaled : SDNode<"AArch64ISD::SST1_UXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter_sxtw_scaled : SDNode<"AArch64ISD::SST1_SXTW_SCALED_PRED", SDT_AArch64_SCATTER_SV, [SDNPHasChain, SDNPMayStore]>; +def AArch64st1_scatter_imm : SDNode<"AArch64ISD::SST1_IMM_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; -def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; +def AArch64stnt1_scatter : SDNode<"AArch64ISD::SSTNT1_PRED", SDT_AArch64_SCATTER_VS, [SDNPHasChain, SDNPMayStore]>; // AArch64 SVE/SVE2 - the remaining node definitions // @@ -132,42 +147,43 @@ def sve_cntd_imm_neg : ComplexPattern">; def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; -def AArch64faddv_pred : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>; -def AArch64fmaxv_pred : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>; -def AArch64fmaxnmv_pred : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; -def AArch64fminv_pred : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>; -def AArch64fminnmv_pred : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>; -def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; -def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; -def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; -def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; -def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; -def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; -def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; -def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; -def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; +def AArch64faddv_p : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>; +def AArch64fmaxv_p : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>; +def AArch64fmaxnmv_p : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>; +def AArch64fminv_p : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>; +def AArch64fminnmv_p : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>; +def AArch64smaxv_p : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; +def AArch64umaxv_p : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; +def AArch64sminv_p : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; +def AArch64uminv_p : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; +def AArch64orv_p : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; +def AArch64eorv_p : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; +def AArch64andv_p : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; +def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; +def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; def SDT_AArch64Arith : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3> ]>; -def AArch64add_pred : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>; -def AArch64fadd_pred : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>; -def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>; -def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>; -def AArch64smin_pred : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>; -def AArch64umin_pred : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>; -def AArch64smax_pred : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>; -def AArch64umax_pred : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>; -def AArch64lsl_pred : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>; -def AArch64lsr_pred : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>; -def AArch64asr_pred : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>; +// Merging op1 into the inactive lanes. +def AArch64add_m1 : SDNode<"AArch64ISD::ADD_MERGE_OP1", SDT_AArch64Arith>; +def AArch64fadd_m1 : SDNode<"AArch64ISD::FADD_MERGE_OP1", SDT_AArch64Arith>; +def AArch64sdiv_m1 : SDNode<"AArch64ISD::SDIV_MERGE_OP1", SDT_AArch64Arith>; +def AArch64udiv_m1 : SDNode<"AArch64ISD::UDIV_MERGE_OP1", SDT_AArch64Arith>; +def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>; +def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>; +def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>; +def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>; +def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>; +def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>; +def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; -def AArch64fadda_pred : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; +def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>; def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>; @@ -175,8 +191,8 @@ def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>; -def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>; -def AArch64dup_pred : SDNode<"AArch64ISD::DUP_PRED", SDT_AArch64DUP_PRED>; +def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 3>, SDTCisVec<1>, SDTCVecEltisVT<1,i1>]>; +def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED>; def SDT_IndexVector : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<2>]>; def index_vector : SDNode<"AArch64ISD::INDEX_VECTOR", SDT_IndexVector, []>; @@ -206,7 +222,7 @@ defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ", 1>; defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", 0>; - defm ADD_ZPZZ : sve_int_bin_pred_zx; + defm ADD_ZPZZ : sve_int_bin_pred_zx; defm SUB_ZPZZ : sve_int_bin_pred_zx; defm SUBR_ZPZZ : sve_int_bin_pred_zx; @@ -231,22 +247,22 @@ // SVE predicated integer reductions. defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>; defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>; - defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_pred>; - defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_pred>; - defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_pred>; - defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_pred>; - defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_pred>; - defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_pred>; - defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_pred>; + defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_p>; + defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_p>; + defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_p>; + defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_p>; + defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_p>; + defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_p>; + defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_p>; defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn", or>; defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; - defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_pred>; - defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_pred>; - defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_pred>; - defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_pred>; + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>; + defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>; + defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>; defm MUL_ZI : sve_int_arith_imm2<"mul", mul>; defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; @@ -263,8 +279,8 @@ def : Pat<(mul nxv2i64:$Op1, nxv2i64:$Op2), (MUL_ZPmZ_D (PTRUE_D 31), $Op1, $Op2)>; - defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_pred>; - defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_pred>; + defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", AArch64sdiv_m1>; + defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", AArch64udiv_m1>; defm SDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b110, "sdivr", int_aarch64_sve_sdivr>; defm UDIVR_ZPmZ : sve_int_bin_pred_arit_2_div<0b111, "udivr", int_aarch64_sve_udivr>; @@ -296,10 +312,10 @@ defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; - defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_pred>; - defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_pred>; - defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_pred>; - defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_pred>; + defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>; + defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>; + defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>; + defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>; defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>; defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>; @@ -329,7 +345,7 @@ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", "FDIVR_ZPZZ", int_aarch64_sve_fdivr, DestructiveBinaryCommWithRev, "FDIV_ZPmZ", 0>; defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ", 1>; - defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; + defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx; defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx; defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx; @@ -373,12 +389,12 @@ defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; // SVE floating point reductions. - defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_pred>; - defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_pred>; - defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_pred>; - defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_pred>; - defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_pred>; - defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_pred>; + defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>; + defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>; + defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>; + defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>; + defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_p>; + defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_p>; // Use more efficient NEON instructions to extract elements within the NEON // part (first 128bits) of an SVE register. @@ -404,8 +420,8 @@ defm DUP_ZZI : sve_int_perm_dup_i<"dup">; // Splat scalar register (predicated) - defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_pred>; - defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_pred>; + defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>; + defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>; // Duplicate FP scalar into all vector elements def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))), @@ -673,115 +689,115 @@ // Gathers using unscaled 32-bit offsets, e.g. // ld1h z0.s, p0/z, [x0, z0.s, uxtw] - defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; - defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; - defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; - defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; + defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLDFF1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLD1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLDFF1B_S : sve_mem_32b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>; + defm GLD1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLDFF1SH_S : sve_mem_32b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLD1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLDFF1H_S : sve_mem_32b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i16>; + defm GLD1W : sve_mem_32b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; + defm GLDFF1W : sve_mem_32b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR32ExtSXTW8, ZPR32ExtUXTW8, nxv4i32>; // Gathers using scaled 32-bit offsets, e.g. // ld1h z0.s, p0/z, [x0, z0.s, uxtw #1] - defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; - defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; - defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; + defm GLD1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLDFF1SH_S : sve_mem_32b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLD1H_S : sve_mem_32b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLDFF1H_S : sve_mem_32b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW16, ZPR32ExtUXTW16, nxv4i16>; + defm GLD1W : sve_mem_32b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; + defm GLDFF1W : sve_mem_32b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR32ExtSXTW32, ZPR32ExtUXTW32, nxv4i32>; // Gathers using 32-bit pointers with scaled offset, e.g. // ld1h z0.s, p0/z, [z0.s, #16] - defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv4i8>; - defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm, nxv4i8>; - defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv4i8>; - defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm, nxv4i8>; - defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv4i16>; - defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm, nxv4i16>; - defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv4i16>; - defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm, nxv4i16>; - defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv4i32>; - defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm, nxv4i32>; + defm GLD1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv4i8>; + defm GLDFF1SB_S : sve_mem_32b_gld_vi_32_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv4i8>; + defm GLD1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv4i8>; + defm GLDFF1B_S : sve_mem_32b_gld_vi_32_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv4i8>; + defm GLD1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv4i16>; + defm GLDFF1SH_S : sve_mem_32b_gld_vi_32_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv4i16>; + defm GLD1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv4i16>; + defm GLDFF1H_S : sve_mem_32b_gld_vi_32_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv4i16>; + defm GLD1W : sve_mem_32b_gld_vi_32_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv4i32>; + defm GLDFF1W : sve_mem_32b_gld_vi_32_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv4i32>; // Gathers using 64-bit pointers with scaled offset, e.g. // ld1h z0.d, p0/z, [z0.d, #16] - defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm, nxv2i8>; - defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm, nxv2i8>; - defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm, nxv2i8>; - defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm, nxv2i8>; - defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm, nxv2i64>; + defm GLD1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0000, "ld1sb", imm0_31, AArch64ld1s_gather_imm_z, nxv2i8>; + defm GLDFF1SB_D : sve_mem_64b_gld_vi_64_ptrs<0b0001, "ldff1sb", imm0_31, AArch64ldff1s_gather_imm_z, nxv2i8>; + defm GLD1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0010, "ld1b", imm0_31, AArch64ld1_gather_imm_z, nxv2i8>; + defm GLDFF1B_D : sve_mem_64b_gld_vi_64_ptrs<0b0011, "ldff1b", imm0_31, AArch64ldff1_gather_imm_z, nxv2i8>; + defm GLD1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0100, "ld1sh", uimm5s2, AArch64ld1s_gather_imm_z, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_vi_64_ptrs<0b0101, "ldff1sh", uimm5s2, AArch64ldff1s_gather_imm_z, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0110, "ld1h", uimm5s2, AArch64ld1_gather_imm_z, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_vi_64_ptrs<0b0111, "ldff1h", uimm5s2, AArch64ldff1_gather_imm_z, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1000, "ld1sw", uimm5s4, AArch64ld1s_gather_imm_z, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_vi_64_ptrs<0b1001, "ldff1sw", uimm5s4, AArch64ldff1s_gather_imm_z, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1010, "ld1w", uimm5s4, AArch64ld1_gather_imm_z, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_vi_64_ptrs<0b1011, "ldff1w", uimm5s4, AArch64ldff1_gather_imm_z, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_vi_64_ptrs<0b1110, "ld1d", uimm5s8, AArch64ld1_gather_imm_z, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_vi_64_ptrs<0b1111, "ldff1d", uimm5s8, AArch64ldff1_gather_imm_z, nxv2i64>; // Gathers using unscaled 64-bit offsets, e.g. // ld1h z0.d, p0/z, [x0, z0.d] - defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather, nxv2i8>; - defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather, nxv2i8>; - defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather, nxv2i8>; - defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather, nxv2i8>; - defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather, nxv2i64>; + defm GLD1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_z, nxv2i8>; + defm GLDFF1SB_D : sve_mem_64b_gld_vs2_64_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_z, nxv2i8>; + defm GLD1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0010, "ld1b", AArch64ld1_gather_z, nxv2i8>; + defm GLDFF1B_D : sve_mem_64b_gld_vs2_64_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_z, nxv2i8>; + defm GLD1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_z, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_vs2_64_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_z, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0110, "ld1h", AArch64ld1_gather_z, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_vs2_64_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_z, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_z, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_vs2_64_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_z, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1010, "ld1w", AArch64ld1_gather_z, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_vs2_64_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_z, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_vs2_64_unscaled<0b1110, "ld1d", AArch64ld1_gather_z, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_vs2_64_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_z, nxv2i64>; // Gathers using scaled 64-bit offsets, e.g. // ld1h z0.d, p0/z, [x0, z0.d, lsl #1] - defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled, ZPR64ExtLSL16, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled, ZPR64ExtLSL16, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled, ZPR64ExtLSL16, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled, ZPR64ExtLSL32, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled, ZPR64ExtLSL32, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled, ZPR64ExtLSL32, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled, ZPR64ExtLSL64, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled, ZPR64ExtLSL64, nxv2i64>; + defm GLD1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0100, "ld1sh", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv2_64_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0110, "ld1h", AArch64ld1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv2_64_scaled<0b0111, "ldff1h", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL16, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1000, "ld1sw", AArch64ld1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv2_64_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1010, "ld1w", AArch64ld1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv2_64_scaled<0b1011, "ldff1w", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL32, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_sv2_64_scaled<0b1110, "ld1d", AArch64ld1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_sv2_64_scaled<0b1111, "ldff1d", AArch64ldff1_gather_scaled_z, ZPR64ExtLSL64, nxv2i64>; // Gathers using unscaled 32-bit offsets unpacked in 64-bits elements, e.g. // ld1h z0.d, p0/z, [x0, z0.d, uxtw] - defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; - defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw, AArch64ld1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw, AArch64ldff1s_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw, AArch64ld1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw, AArch64ldff1_gather_uxtw, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; + defm GLD1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLDFF1SB_D : sve_mem_64b_gld_vs_32_unscaled<0b0001, "ldff1sb", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLD1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0010, "ld1b", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLDFF1B_D : sve_mem_64b_gld_vs_32_unscaled<0b0011, "ldff1b", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>; + defm GLD1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_vs_32_unscaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_vs_32_unscaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_vs_32_unscaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_z, AArch64ldff1s_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_vs_32_unscaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_vs_32_unscaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_z, AArch64ld1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_vs_32_unscaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_z, AArch64ldff1_gather_uxtw_z, ZPR64ExtSXTW8, ZPR64ExtUXTW8, nxv2i64>; // Gathers using scaled 32-bit offsets unpacked in 64-bits elements, e.g. // ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] - defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; - defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled, AArch64ld1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled, AArch64ldff1s_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; - defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled, AArch64ld1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; - defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled, AArch64ldff1_gather_uxtw_scaled, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; + defm GLD1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0100, "ld1sh", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLDFF1SH_D : sve_mem_64b_gld_sv_32_scaled<0b0101, "ldff1sh", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLD1H_D : sve_mem_64b_gld_sv_32_scaled<0b0110, "ld1h", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLDFF1H_D : sve_mem_64b_gld_sv_32_scaled<0b0111, "ldff1h", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW16, ZPR64ExtUXTW16, nxv2i16>; + defm GLD1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1000, "ld1sw", AArch64ld1s_gather_sxtw_scaled_z, AArch64ld1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLDFF1SW_D : sve_mem_64b_gld_sv_32_scaled<0b1001, "ldff1sw", AArch64ldff1s_gather_sxtw_scaled_z, AArch64ldff1s_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLD1W_D : sve_mem_64b_gld_sv_32_scaled<0b1010, "ld1w", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>; + defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; + defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>; // Non-temporal contiguous loads (register + immediate) defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>; @@ -1193,9 +1209,9 @@ defm INDEX_II : sve_int_index_ii<"index", index_vector>; // Unpredicated shifts - defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_pred>; - defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_pred>; - defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_pred>; + defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>; + defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>; + defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>; defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">; defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">; @@ -1207,14 +1223,14 @@ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; - defm ASR_ZPZZ : sve_int_bin_pred_zx; - defm LSR_ZPZZ : sve_int_bin_pred_zx; - defm LSL_ZPZZ : sve_int_bin_pred_zx; + defm ASR_ZPZZ : sve_int_bin_pred_zx; + defm LSR_ZPZZ : sve_int_bin_pred_zx; + defm LSL_ZPZZ : sve_int_bin_pred_zx; defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx; - defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_pred, "ASRR_ZPmZ", 1>; - defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_pred, "LSRR_ZPmZ", 1>; - defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_pred, "LSLR_ZPmZ", 1>; + defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ", 1>; + defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ", 1>; + defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ", 1>; defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", 0>; defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", 0>; defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", 0>; @@ -1389,22 +1405,22 @@ (PTEST_PP PPR:$pg, PPR:$src)>; // LD1R of 128-bit masked data - def : Pat<(nxv16i8 (AArch64ld1rq PPR:$gp, GPR64:$base)), + def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), (LD1RQ_B_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv8i16 (AArch64ld1rq PPR:$gp, GPR64:$base)), + def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), (LD1RQ_H_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv4i32 (AArch64ld1rq PPR:$gp, GPR64:$base)), + def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), (LD1RQ_W_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv2i64 (AArch64ld1rq PPR:$gp, GPR64:$base)), + def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), (LD1RQ_D_IMM $gp, $base, (i64 0))>; - def : Pat<(nxv16i8 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), + def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), (LD1RQ_B_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(nxv8i16 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), + def : Pat<(nxv8i16 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), (LD1RQ_H_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(nxv4i32 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), + def : Pat<(nxv4i32 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), (LD1RQ_W_IMM $gp, $base, simm4s16:$imm)>; - def : Pat<(nxv2i64 (AArch64ld1rq PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), + def : Pat<(nxv2i64 (AArch64ld1rq_z PPR:$gp, (add GPR64:$base, (i64 simm4s16:$imm)))), (LD1RQ_D_IMM $gp, $base, simm4s16:$imm)>; def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>; @@ -1741,35 +1757,35 @@ } // 2-element contiguous loads - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; // 4-element contiguous loads - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; // 8-element contiguous loads - defm : ld1; - defm : ld1; - defm : ld1; - defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; + defm : ld1; let Predicates = [HasBF16, HasSVE] in { - defm : ld1; + defm : ld1; } // 16-element contiguous loads - defm : ld1; + defm : ld1; multiclass ldnf1 { // scalar + immediate (mul vl) @@ -1784,35 +1800,35 @@ } // 2-element contiguous non-faulting loads - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; // 4-element contiguous non-faulting loads - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; // 8-element contiguous non-faulting loads - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; - defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; + defm : ldnf1; let Predicates = [HasBF16, HasSVE] in { - defm : ldnf1; + defm : ldnf1; } // 16-element contiguous non-faulting loads - defm : ldnf1; + defm : ldnf1; multiclass ldff1 { // reg + reg @@ -1827,36 +1843,36 @@ } // 2-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; // 4-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; // 8-element contiguous first faulting loads - defm : ldff1; - defm : ldff1; - defm : ldff1; - defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; + defm : ldff1; let Predicates = [HasBF16, HasSVE] in { - defm : ldff1; + defm : ldff1; } // 16-element contiguous first faulting loads - defm : ldff1; + defm : ldff1; multiclass st1 { @@ -2023,14 +2039,14 @@ let Predicates = [HasSVE, HasMatMulFP64] in { defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>; - defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro>; - defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro>; - defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro>; - defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro>; - defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro, am_sve_regreg_lsl0>; - defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro, am_sve_regreg_lsl1>; - defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro, am_sve_regreg_lsl2>; - defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro, am_sve_regreg_lsl3>; + defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>; + defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>; + defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>; + defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro_z>; + defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro_z, am_sve_regreg_lsl0>; + defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>; + defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>; + defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>; defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>; defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>; defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>; @@ -2388,19 +2404,19 @@ def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; // SVE2 non-temporal gather loads - defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather, nxv4i8>; - defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather, nxv4i8>; - defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather, nxv4i16>; - defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather, nxv4i16>; - defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather, nxv4i32>; - - defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather, nxv2i8>; - defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather, nxv2i8>; - defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather, nxv2i16>; - defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather, nxv2i16>; - defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather, nxv2i32>; - defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather, nxv2i32>; - defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather, nxv2i64>; + defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>; + defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>; + defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv4i16>; + defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00101, "ldnt1h", AArch64ldnt1_gather_z, nxv4i16>; + defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b01001, "ldnt1w", AArch64ldnt1_gather_z, nxv4i32>; + + defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv2i8>; + defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10010, "ldnt1b", AArch64ldnt1_gather_z, nxv2i8>; + defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10100, "ldnt1sh", AArch64ldnt1s_gather_z, nxv2i16>; + defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b10110, "ldnt1h", AArch64ldnt1_gather_z, nxv2i16>; + defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>; + defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>; + defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>; // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -16,7 +16,7 @@ SDTCisVT<4, OtherVT> ]>; -def AArch64setcc_pred : SDNode<"AArch64ISD::SETCC_PRED", SDT_AArch64Setcc>; +def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>; def SVEPatternOperand : AsmOperandClass { let Name = "SVEPattern"; @@ -4166,9 +4166,9 @@ multiclass SVE_SETCC_Pat { - def : Pat<(predvt (AArch64setcc_pred predvt:$Op1, intvt:$Op2, intvt:$Op3, cc)), + def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, cc)), (cmp $Op1, $Op2, $Op3)>; - def : Pat<(predvt (AArch64setcc_pred predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)), + def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)), (cmp $Op1, $Op3, $Op2)>; } @@ -4239,12 +4239,12 @@ multiclass SVE_SETCC_Imm_Pat { - def : Pat<(predvt (AArch64setcc_pred (predvt PPR_3b:$Pg), + def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), (intvt ZPR:$Zs1), (intvt (AArch64dup (immtype:$imm))), cc)), (cmp $Pg, $Zs1, immtype:$imm)>; - def : Pat<(predvt (AArch64setcc_pred (predvt PPR_3b:$Pg), + def : Pat<(predvt (AArch64setcc_z (predvt PPR_3b:$Pg), (intvt (AArch64dup (immtype:$imm))), (intvt ZPR:$Zs1), commuted_cc)), @@ -5956,10 +5956,14 @@ def : InstAlias<"mov $Zd, $Pg/m, $Rn", (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, GPR64sp:$Rn), 1>; - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : Pat<(nxv16i8 (op nxv16i1:$pg, i32:$splat, nxv16i8:$passthru)), + (!cast(NAME # _B) $passthru, $pg, $splat)>; + def : Pat<(nxv8i16 (op nxv8i1:$pg, i32:$splat, nxv8i16:$passthru)), + (!cast(NAME # _H) $passthru, $pg, $splat)>; + def : Pat<(nxv4i32 (op nxv4i1:$pg, i32:$splat, nxv4i32:$passthru)), + (!cast(NAME # _S) $passthru, $pg, $splat)>; + def : Pat<(nxv2i64 (op nxv2i1:$pg, i64:$splat, nxv2i64:$passthru)), + (!cast(NAME # _D) $passthru, $pg, $splat)>; } class sve_int_perm_cpy_v sz8_64, string asm, ZPRRegOp zprty, @@ -5998,10 +6002,15 @@ def : InstAlias<"mov $Zd, $Pg/m, $Vn", (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + + def : Pat<(nxv8f16 (op nxv8i1:$pg, f16:$splat, nxv8f16:$passthru)), + (!cast(NAME # _H) $passthru, $pg, $splat)>; + def : Pat<(nxv2f32 (op nxv2i1:$pg, f32:$splat, nxv2f32:$passthru)), + (!cast(NAME # _S) $passthru, $pg, $splat)>; + def : Pat<(nxv4f32 (op nxv4i1:$pg, f32:$splat, nxv4f32:$passthru)), + (!cast(NAME # _S) $passthru, $pg, $splat)>; + def : Pat<(nxv2f64 (op nxv2i1:$pg, f64:$splat, nxv2f64:$passthru)), + (!cast(NAME # _D) $passthru, $pg, $splat)>; } class sve_int_perm_compact