Diff 261180

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 209 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {

CLASTA_N,		CLASTA_N,
CLASTB_N,		CLASTB_N,
LASTA,		LASTA,
LASTB,		LASTB,
REV,		REV,
TBL,		TBL,

		// Floating-point reductions.
		FADDA_PRED,
		FADDV_PRED,
		FMAXV_PRED,
		FMAXNMV_PRED,
		FMINV_PRED,
		FMINNMV_PRED,

INSR,		INSR,
PTEST,		PTEST,
PTRUE,		PTRUE,

DUP_PRED,		DUP_PRED,
INDEX_VECTOR,		INDEX_VECTOR,

REINTERPRET_CAST,		REINTERPRET_CAST,
▲ Show 20 Lines • Show All 660 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 887 Lines • ▼ Show 20 Lines	for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
setOperationAction(ISD::SDIV, VT, Custom);		setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);		setOperationAction(ISD::UDIV, VT, Custom);
}		}
}		}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);		setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);		setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);

for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {		for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
if (isTypeLegal(VT)) {		if (isTypeLegal(VT)) {
		efriedmaUnsubmitted Not Done Reply Inline Actions Is there some reason to mark EXTRACT_VECTOR_ELT "Custom" when the type isn't legal? efriedma: Is there some reason to mark EXTRACT_VECTOR_ELT "Custom" when the type isn't legal?
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions In the context of this patch no, but I guess it will make sense when legalizing `vector_extract_elt`. I've changed this to be legal only for legal FP types now that ISEL patterns are being used as you suggested. c-rhodes: In the context of this patch no, but I guess it will make sense when legalizing…
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}		}
		efriedmaUnsubmitted Not Done Reply Inline Actions I think "Legal" is the default, so this line doesn't do anything; . efriedma: I think "Legal" is the default, so this line doesn't do anything; .
}		}
}		}

PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();		PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}		}

void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {		void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
assert(VT.isVector() && "VT should be a vector type");		assert(VT.isVector() && "VT should be a vector type");
▲ Show 20 Lines • Show All 454 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";		case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";		case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";		case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
case AArch64ISD::LASTA: return "AArch64ISD::LASTA";		case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
case AArch64ISD::LASTB: return "AArch64ISD::LASTB";		case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
case AArch64ISD::REV: return "AArch64ISD::REV";		case AArch64ISD::REV: return "AArch64ISD::REV";
case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";		case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";
case AArch64ISD::TBL: return "AArch64ISD::TBL";		case AArch64ISD::TBL: return "AArch64ISD::TBL";
		case AArch64ISD::FADDA_PRED: return "AArch64ISD::FADDA_PRED";
		case AArch64ISD::FADDV_PRED: return "AArch64ISD::FADDV_PRED";
		case AArch64ISD::FMAXV_PRED: return "AArch64ISD::FMAXV_PRED";
		case AArch64ISD::FMAXNMV_PRED: return "AArch64ISD::FMAXNMV_PRED";
		case AArch64ISD::FMINV_PRED: return "AArch64ISD::FMINV_PRED";
		case AArch64ISD::FMINNMV_PRED: return "AArch64ISD::FMINNMV_PRED";
case AArch64ISD::NOT: return "AArch64ISD::NOT";		case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";		case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";		case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";		case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
case AArch64ISD::TBZ: return "AArch64ISD::TBZ";		case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";		case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";		case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";		case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
▲ Show 20 Lines • Show All 7,055 Lines • ▼ Show 20 Lines	if (!CI \|\| CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();		return SDValue();


// Insertion/extraction are legal for V128 types.		// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|		if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|		VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
VT == MVT::v8f16)		VT == MVT::v8f16)
return Op;		return Op;

		sdesmalenUnsubmitted Done Reply Inline Actions `getSizeInBits().getKnownMinSize()` sdesmalen: `getSizeInBits().getKnownMinSize()`
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&		if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
		sdesmalenUnsubmitted Not Done Reply Inline Actions Should this return `SDValue()` because VT is not yet legal? sdesmalen: Should this return `SDValue()` because VT is not yet legal?
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions if `VT` weren't legal it should be caught by the check above and `SDValue()` returned. c-rhodes: if `VT` weren't legal it should be caught by the check above and `SDValue()` returned.
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)		VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
return SDValue();		return SDValue();

		sdesmalenUnsubmitted Done Reply Inline Actions getVectorNumElements will issue a warning for scalable vectors, use `getElementCount().Min` sdesmalen: getVectorNumElements will issue a warning for scalable vectors, use `getElementCount().Min`
// For V64 types, we perform extraction by expanding the value		// For V64 types, we perform extraction by expanding the value
// to a V128 type and perform the extraction on that.		// to a V128 type and perform the extraction on that.
SDLoc DL(Op);		SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);		SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();		EVT WideTy = WideVec.getValueType();

EVT ExtrTy = WideTy.getVectorElementType();		EVT ExtrTy = WideTy.getVectorElementType();
if (ExtrTy == MVT::i16 \|\| ExtrTy == MVT::i8)		if (ExtrTy == MVT::i16 \|\| ExtrTy == MVT::i8)
ExtrTy = MVT::i32;		ExtrTy = MVT::i32;
		efriedmaUnsubmitted Not Done Reply Inline Actions Is there some reason we should do this transform as part of legalization, as opposed to just writing a few isel patterns? efriedma: Is there some reason we should do this transform as part of legalization, as opposed to just…
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions No good reason that I'm aware of, I've replaced this with the following patterns: def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>; def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>; def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>; c-rhodes: No good reason that I'm aware of, I've replaced this with the following patterns: ``` def…

// For extractions, we just return the result directly.		// For extractions, we just return the result directly.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
Op.getOperand(1));		Op.getOperand(1));
}		}

SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,		SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
▲ Show 20 Lines • Show All 2,841 Lines • ▼ Show 20 Lines	static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,

// Convert CC to integer based on requested condition.		// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.		// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);		SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);		SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);		return DAG.getZExtOrTrunc(Res, DL, VT);
}		}

		static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
		SelectionDAG &DAG) {
		SDLoc DL(N);

		sdesmalenUnsubmitted Done Reply Inline Actions nit: this is only used once on line 11309, you can inline the variable. sdesmalen: nit: this is only used once on line 11309, you can inline the variable.
		SDValue Pred = N->getOperand(1);
		SDValue VecToReduce = N->getOperand(2);

		EVT ReduceVT = VecToReduce.getValueType();
		SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);

		// SVE reductions set the whole vector register with the first element
		// containing the reduction result, which we'll now extract.
		SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
		Zero);
		}

		static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
		SelectionDAG &DAG) {
		SDLoc DL(N);

		sdesmalenUnsubmitted Done Reply Inline Actions same here. sdesmalen: same here.
		SDValue Pred = N->getOperand(1);
		SDValue InitVal = N->getOperand(2);
		SDValue VecToReduce = N->getOperand(3);
		EVT ReduceVT = VecToReduce.getValueType();

		// Ordered reductions use the first lane of the result vector as the
		// reduction's initial value.
		SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
		InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
		DAG.getUNDEF(ReduceVT), InitVal, Zero);

		SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);

		// SVE reductions set the whole vector register with the first element
		// containing the reduction result, which we'll now extract.
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
		Zero);
		}

static SDValue performIntrinsicCombine(SDNode *N,		static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {		const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
unsigned IID = getIntrinsicID(N);		unsigned IID = getIntrinsicID(N);
switch (IID) {		switch (IID) {
default:		default:
break;		break;
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_ext:		case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);		return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_sdiv:		case Intrinsic::aarch64_sve_sdiv:
return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),		return DAG.getNode(AArch64ISD::SDIV_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));		N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_udiv:		case Intrinsic::aarch64_sve_udiv:
return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),		return DAG.getNode(AArch64ISD::UDIV_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));		N->getOperand(1), N->getOperand(2), N->getOperand(3));
		case Intrinsic::aarch64_sve_fadda:
		return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
		case Intrinsic::aarch64_sve_faddv:
		return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
		case Intrinsic::aarch64_sve_fmaxnmv:
		return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
		case Intrinsic::aarch64_sve_fmaxv:
		return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
		case Intrinsic::aarch64_sve_fminnmv:
		return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
		case Intrinsic::aarch64_sve_fminv:
		return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
case Intrinsic::aarch64_sve_sel:		case Intrinsic::aarch64_sve_sel:
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),		return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));		N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:		case Intrinsic::aarch64_sve_cmpeq_wide:
return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,		return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,
false, DCI, DAG);		false, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:		case Intrinsic::aarch64_sve_cmpne_wide:
return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,		return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,
▲ Show 20 Lines • Show All 2,676 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 128 Lines • ▼ Show 20 Lines
def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;		def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;

// SVE DEC		// SVE DEC
def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;		def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;
def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;		def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;
def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;		def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;

def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;		def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
		def AArch64faddv_pred : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
		def AArch64fmaxv_pred : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
		def AArch64fmaxnmv_pred : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
		def AArch64fminv_pred : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
		def AArch64fminnmv_pred : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;		def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;		def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;		def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;		def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;		def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;		def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;		def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;		def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;		def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;

def SDT_AArch64DIV : SDTypeProfile<1, 3, [		def SDT_AArch64DIV : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,		SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>		SDTCVecEltisVT<1,i1>, SDTCisSameAs<2,3>
]>;		]>;

def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64DIV>;		def AArch64sdiv_pred : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64DIV>;
def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64DIV>;		def AArch64udiv_pred : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64DIV>;

def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;		def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;		def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;		def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
		def AArch64fadda_pred : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;

def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;		def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;		def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;

def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;		def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;		def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;

def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>;		def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>;
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE] in {

defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;		defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;		defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;

defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;		defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;		defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;

// SVE floating point reductions.		// SVE floating point reductions.
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", int_aarch64_sve_fadda>;		defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_pred>;
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", int_aarch64_sve_faddv>;		defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_pred>;
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", int_aarch64_sve_fmaxnmv>;		defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_pred>;
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", int_aarch64_sve_fminnmv>;		defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_pred>;
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", int_aarch64_sve_fmaxv>;		defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_pred>;
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", int_aarch64_sve_fminv>;		defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_pred>;

		// Use more efficient NEON instructions to extract elements within the NEON
		// part (first 128bits) of an SVE register.
		efriedmaUnsubmitted Done Reply Inline Actions Maybe worth explaining why you need two EXTRACT_SUBREG, as opposed to just one. It would be nice to handle non-zero indexes, but I guess that can wait for a followup. efriedma: Maybe worth explaining why you need two EXTRACT_SUBREG, as opposed to just one. It would be…
		def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)),
		(f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>;
		def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
		(f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>;
		def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)),
		(f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>;

// Splat immediate (unpredicated)		// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;		defm DUP_ZI : sve_int_dup_imm<"dup">;
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;		defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">;		defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">;

// Splat immediate (predicated)		// Splat immediate (predicated)
defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">;		defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">;
▲ Show 20 Lines • Show All 1,926 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 4,438 Lines • ▼ Show 20 Lines

	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Floating Point Fast Reduction Group			// SVE Floating Point Fast Reduction Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,			class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
	ZPRRegOp zprty, RegisterClass dstRegClass>			ZPRRegOp zprty, FPRasZPROperand dstOpType>
	: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),			: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
	asm, "\t$Vd, $Pg, $Zn",			asm, "\t$Vd, $Pg, $Zn",
	"",			"",
	[]>, Sched<[]> {			[]>, Sched<[]> {
	bits<5> Zn;			bits<5> Zn;
	bits<5> Vd;			bits<5> Vd;
	bits<3> Pg;			bits<3> Pg;
	let Inst{31-24} = 0b01100101;			let Inst{31-24} = 0b01100101;
	let Inst{23-22} = sz;			let Inst{23-22} = sz;
	let Inst{21-19} = 0b000;			let Inst{21-19} = 0b000;
	let Inst{18-16} = opc;			let Inst{18-16} = opc;
	let Inst{15-13} = 0b001;			let Inst{15-13} = 0b001;
	let Inst{12-10} = Pg;			let Inst{12-10} = Pg;
	let Inst{9-5} = Zn;			let Inst{9-5} = Zn;
	let Inst{4-0} = Vd;			let Inst{4-0} = Vd;
	}			}

	multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {			multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
	def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>;			def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16asZPR>;
	def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>;			def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>;
	def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>;			def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>;

	def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;			def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
	def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;			def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
	def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;			def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
	}			}


	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Floating Point Accumulating Reduction Group			// SVE Floating Point Accumulating Reduction Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,			class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
	ZPRRegOp zprty, RegisterClass dstRegClass>			ZPRRegOp zprty, FPRasZPROperand dstOpType>
	: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm),			: I<(outs dstOpType:$Vdn), (ins PPR3bAny:$Pg, dstOpType:$_Vdn, zprty:$Zm),
	asm, "\t$Vdn, $Pg, $_Vdn, $Zm",			asm, "\t$Vdn, $Pg, $_Vdn, $Zm",
	"",			"",
	[]>,			[]>,
	Sched<[]> {			Sched<[]> {
	bits<3> Pg;			bits<3> Pg;
	bits<5> Vdn;			bits<5> Vdn;
	bits<5> Zm;			bits<5> Zm;
	let Inst{31-24} = 0b01100101;			let Inst{31-24} = 0b01100101;
	let Inst{23-22} = sz;			let Inst{23-22} = sz;
	let Inst{21-19} = 0b011;			let Inst{21-19} = 0b011;
	let Inst{18-16} = opc;			let Inst{18-16} = opc;
	let Inst{15-13} = 0b001;			let Inst{15-13} = 0b001;
	let Inst{12-10} = Pg;			let Inst{12-10} = Pg;
	let Inst{9-5} = Zm;			let Inst{9-5} = Zm;
	let Inst{4-0} = Vdn;			let Inst{4-0} = Vdn;

	let Constraints = "$Vdn = $_Vdn";			let Constraints = "$Vdn = $_Vdn";
	}			}

	multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {			multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
	def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>;			def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16asZPR>;
	def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>;			def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>;
	def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>;			def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>;

	def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;			def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
	def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;			def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
	def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;			def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Floating Point Compare - Vectors Group			// SVE Floating Point Compare - Vectors Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,			class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
	ZPRRegOp zprty>			ZPRRegOp zprty>
	▲ Show 20 Lines • Show All 3,216 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

	; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s \| FileCheck %s
				sdesmalenUnsubmitted Not Done Reply Inline Actions Why is this change required? sdesmalen: Why is this change required?
				c-rhodesAuthorUnsubmitted Done Reply Inline Actions To remove `// kill: def $d0 killed $d0 killed $z0` from the output. c-rhodes: To remove `// kill: def $d0 killed $d0 killed $z0` from the output.

	;			;
	; FADDA			; FADDA
	;			;

	define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {			define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {
	; CHECK-LABEL: fadda_f16:			; CHECK-LABEL: fadda_f16:
	; CHECK: fadda h0, p0, h0, z1.h			; CHECK: fadda h0, p0, h0, z1.h
	▲ Show 20 Lines • Show All 205 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Custom lowering of floating-point reductions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 261180

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Custom lowering of floating-point reductionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 261180

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

[AArch64][SVE] Custom lowering of floating-point reductions
ClosedPublic