Diff 259845

llvm/include/llvm/IR/IntrinsicsAArch64.td

Show First 20 Lines • Show All 895 Lines • ▼ Show 20 Lines	: Intrinsic<[T],
[IntrNoMem, ImmArg<1>, ImmArg<2>]>;		[IntrNoMem, ImmArg<1>, ImmArg<2>]>;

class AdvSIMD_SVE_CNT_Intrinsic		class AdvSIMD_SVE_CNT_Intrinsic
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],		: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
[LLVMVectorOfBitcastsToInt<0>,		[LLVMVectorOfBitcastsToInt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyvector_ty],		llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;

class AdvSIMD_SVE_FP_Reduce_Intrinsic
: Intrinsic<[llvm_anyfloat_ty],
[LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
llvm_anyvector_ty],
[IntrNoMem]>;

class AdvSIMD_SVE_ReduceWithInit_Intrinsic		class AdvSIMD_SVE_ReduceWithInit_Intrinsic
		efriedmaUnsubmitted Not Done Reply Inline Actions Please commit this separately. efriedma: Please commit this separately.
		c-rhodesAuthorUnsubmitted Done Reply Inline Actions Moved to D79010. c-rhodes: Moved to D79010.
: Intrinsic<[LLVMVectorElementType<0>],		: Intrinsic<[LLVMVectorElementType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMVectorElementType<0>,		LLVMVectorElementType<0>,
llvm_anyvector_ty],		llvm_anyvector_ty],
[IntrNoMem]>;		[IntrNoMem]>;

class AdvSIMD_SVE_FP_ReduceWithInit_Intrinsic
: Intrinsic<[llvm_anyfloat_ty],
[LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
LLVMMatchType<0>,
llvm_anyvector_ty],
[IntrNoMem]>;

class AdvSIMD_SVE_ShiftByImm_Intrinsic		class AdvSIMD_SVE_ShiftByImm_Intrinsic
: Intrinsic<[llvm_anyvector_ty],		: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,		[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,		LLVMMatchType<0>,
llvm_i32_ty],		llvm_i32_ty],
[IntrNoMem, ImmArg<2>]>;		[IntrNoMem, ImmArg<2>]>;

class AdvSIMD_SVE_ShiftWide_Intrinsic		class AdvSIMD_SVE_ShiftWide_Intrinsic
▲ Show 20 Lines • Show All 749 Lines • ▼ Show 20 Lines
def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;		def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;		def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;		def int_aarch64_sve_ftssel_x : AdvSIMD_SVE_TSMUL_Intrinsic;

//		//
// Floating-point reductions		// Floating-point reductions
//		//

def int_aarch64_sve_fadda : AdvSIMD_SVE_FP_ReduceWithInit_Intrinsic;		def int_aarch64_sve_fadda : AdvSIMD_SVE_ReduceWithInit_Intrinsic;
def int_aarch64_sve_faddv : AdvSIMD_SVE_FP_Reduce_Intrinsic;		def int_aarch64_sve_faddv : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_fmaxv : AdvSIMD_SVE_FP_Reduce_Intrinsic;		def int_aarch64_sve_fmaxv : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_FP_Reduce_Intrinsic;		def int_aarch64_sve_fmaxnmv : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_fminv : AdvSIMD_SVE_FP_Reduce_Intrinsic;		def int_aarch64_sve_fminv : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_fminnmv : AdvSIMD_SVE_FP_Reduce_Intrinsic;		def int_aarch64_sve_fminnmv : AdvSIMD_SVE_Reduce_Intrinsic;

//		//
// Floating-point conversions		// Floating-point conversions
//		//

def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic;		def int_aarch64_sve_fcvt : AdvSIMD_SVE_FCVT_Intrinsic;
def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic;		def int_aarch64_sve_fcvtzs : AdvSIMD_SVE_FCVTZS_Intrinsic;
def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic;		def int_aarch64_sve_fcvtzu : AdvSIMD_SVE_FCVTZS_Intrinsic;
▲ Show 20 Lines • Show All 568 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Show First 20 Lines • Show All 205 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {

CLASTA_N,		CLASTA_N,
CLASTB_N,		CLASTB_N,
LASTA,		LASTA,
LASTB,		LASTB,
REV,		REV,
TBL,		TBL,

		// Floating-point reductions.
		FADDA_PRED,
		FADDV_PRED,
		FMAXV_PRED,
		FMAXNMV_PRED,
		FMINV_PRED,
		FMINNMV_PRED,

INSR,		INSR,
PTEST,		PTEST,
PTRUE,		PTRUE,

DUP_PRED,		DUP_PRED,
INDEX_VECTOR,		INDEX_VECTOR,

REINTERPRET_CAST,		REINTERPRET_CAST,
▲ Show 20 Lines • Show All 653 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 884 Lines • ▼ Show 20 Lines	if (Subtarget->hasSVE()) {
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {		for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
if (isTypeLegal(VT))		if (isTypeLegal(VT))
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}		}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);		setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);		setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);

for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {		for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
		setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
		efriedmaUnsubmitted Not Done Reply Inline Actions Is there some reason to mark EXTRACT_VECTOR_ELT "Custom" when the type isn't legal? efriedma: Is there some reason to mark EXTRACT_VECTOR_ELT "Custom" when the type isn't legal?
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions In the context of this patch no, but I guess it will make sense when legalizing `vector_extract_elt`. I've changed this to be legal only for legal FP types now that ISEL patterns are being used as you suggested. c-rhodes: In the context of this patch no, but I guess it will make sense when legalizing…

if (isTypeLegal(VT)) {		if (isTypeLegal(VT)) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}		}
		efriedmaUnsubmitted Not Done Reply Inline Actions I think "Legal" is the default, so this line doesn't do anything; . efriedma: I think "Legal" is the default, so this line doesn't do anything; .
}		}
}		}

PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();		PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}		}

void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {		void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
assert(VT.isVector() && "VT should be a vector type");		assert(VT.isVector() && "VT should be a vector type");
▲ Show 20 Lines • Show All 452 Lines • ▼ Show 20 Lines	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";		case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";		case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";		case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
case AArch64ISD::LASTA: return "AArch64ISD::LASTA";		case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
case AArch64ISD::LASTB: return "AArch64ISD::LASTB";		case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
case AArch64ISD::REV: return "AArch64ISD::REV";		case AArch64ISD::REV: return "AArch64ISD::REV";
case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";		case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";
case AArch64ISD::TBL: return "AArch64ISD::TBL";		case AArch64ISD::TBL: return "AArch64ISD::TBL";
		case AArch64ISD::FADDA_PRED: return "AArch64ISD::FADDA_PRED";
		case AArch64ISD::FADDV_PRED: return "AArch64ISD::FADDV_PRED";
		case AArch64ISD::FMAXV_PRED: return "AArch64ISD::FMAXV_PRED";
		case AArch64ISD::FMAXNMV_PRED: return "AArch64ISD::FMAXNMV_PRED";
		case AArch64ISD::FMINV_PRED: return "AArch64ISD::FMINV_PRED";
		case AArch64ISD::FMINNMV_PRED: return "AArch64ISD::FMINNMV_PRED";
case AArch64ISD::NOT: return "AArch64ISD::NOT";		case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";		case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";		case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";		case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
case AArch64ISD::TBZ: return "AArch64ISD::TBZ";		case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";		case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";		case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";		case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
▲ Show 20 Lines • Show All 7,016 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Re-narrow the resultant vector.		// Re-narrow the resultant vector.
return NarrowVector(Node, DAG);		return NarrowVector(Node, DAG);
}		}

SDValue		SDValue
AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,		AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");		assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
		SDLoc DL(Op);

// Check for non-constant or out of range lane.		// Check for out of range lane.
EVT VT = Op.getOperand(0).getValueType();		EVT VT = Op.getOperand(0).getValueType();
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));		auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!CI \|\| CI->getZExtValue() >= VT.getVectorNumElements())
		if (VT.isScalableVector()) {
		if (CIdx) {
		// Leave type legalisation to common code.
		if (!isTypeLegal(VT))
		return SDValue();

		// Ignore extracts from unpacked vectors.
		if (VT.getSizeInBits() != AArch64::SVEBitsPerBlock)
		sdesmalenUnsubmitted Done Reply Inline Actions `getSizeInBits().getKnownMinSize()` sdesmalen: `getSizeInBits().getKnownMinSize()`
		return Op;
		sdesmalenUnsubmitted Not Done Reply Inline Actions Should this return `SDValue()` because VT is not yet legal? sdesmalen: Should this return `SDValue()` because VT is not yet legal?
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions if `VT` weren't legal it should be caught by the check above and `SDValue()` returned. c-rhodes: if `VT` weren't legal it should be caught by the check above and `SDValue()` returned.

		// Ignore extracts whose index is beyond the range of NEON.
		if (CIdx->getZExtValue() >= VT.getVectorNumElements())
		sdesmalenUnsubmitted Done Reply Inline Actions getVectorNumElements will issue a warning for scalable vectors, use `getElementCount().Min` sdesmalen: getVectorNumElements will issue a warning for scalable vectors, use `getElementCount().Min`
		return Op;

		// ValueType for NEON part of the SVE input.
		EVT SubVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
		VT.getVectorNumElements());
		assert(isTypeLegal(SubVT) && "Unexpected Subtype during extract!");

		// The requested element is within the NEON part of the SVE register so
		// we can use more efficient NEON instructions to do the work.
		efriedmaUnsubmitted Not Done Reply Inline Actions Is there some reason we should do this transform as part of legalization, as opposed to just writing a few isel patterns? efriedma: Is there some reason we should do this transform as part of legalization, as opposed to just…
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions No good reason that I'm aware of, I've replaced this with the following patterns: def : Pat<(vector_extract (nxv8f16 ZPR:$Zs), (i64 0)), (f16 (EXTRACT_SUBREG (v8f16 (EXTRACT_SUBREG ZPR:$Zs, zsub)), hsub))>; def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)), (f32 (EXTRACT_SUBREG (v4f32 (EXTRACT_SUBREG ZPR:$Zs, zsub)), ssub))>; def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), (f64 (EXTRACT_SUBREG (v2f64 (EXTRACT_SUBREG ZPR:$Zs, zsub)), dsub))>; c-rhodes: No good reason that I'm aware of, I've replaced this with the following patterns: ``` def…
		auto Bottom128 =
		DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Op.getOperand(0),
		DAG.getConstant(0, DL, MVT::i64));
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
		Bottom128, Op.getOperand(1));
		}

		// We only care about type legalisation from this point onwards.
		if (isTypeLegal(VT))
		return Op;

return SDValue();		return SDValue();
		}

		// Only SVE supports non constant extracts
		if (!CIdx \|\| (CIdx && CIdx->getZExtValue() >= VT.getVectorNumElements()))
		return SDValue();

// Insertion/extraction are legal for V128 types.		// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|		if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|		VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
VT == MVT::v8f16)		VT == MVT::v8f16)
return Op;		return Op;

if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&		if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)		VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
return SDValue();		return SDValue();

// For V64 types, we perform extraction by expanding the value		// For V64 types, we perform extraction by expanding the value
// to a V128 type and perform the extraction on that.		// to a V128 type and perform the extraction on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);		SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();		EVT WideTy = WideVec.getValueType();

EVT ExtrTy = WideTy.getVectorElementType();		EVT ExtrTy = WideTy.getVectorElementType();
if (ExtrTy == MVT::i16 \|\| ExtrTy == MVT::i8)		if (ExtrTy == MVT::i16 \|\| ExtrTy == MVT::i8)
ExtrTy = MVT::i32;		ExtrTy = MVT::i32;

// For extractions, we just return the result directly.		// For extractions, we just return the result directly.
▲ Show 20 Lines • Show All 2,818 Lines • ▼ Show 20 Lines	static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,

// Convert CC to integer based on requested condition.		// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.		// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);		SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);		SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);		return DAG.getZExtOrTrunc(Res, DL, VT);
}		}

		static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
		SelectionDAG &DAG) {
		SDLoc DL(N);
		EVT VT = N->getValueType(0);
		sdesmalenUnsubmitted Done Reply Inline Actions nit: this is only used once on line 11309, you can inline the variable. sdesmalen: nit: this is only used once on line 11309, you can inline the variable.

		SDValue Pred = N->getOperand(1);
		SDValue VecToReduce = N->getOperand(2);

		EVT ReduceVT = VecToReduce.getValueType();
		SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);

		// SVE reductions set the whole vector register with the first element
		// containing the reduction result, which we'll now extract.
		SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Reduce, Zero);
		}

		static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
		SelectionDAG &DAG) {
		SDLoc DL(N);
		EVT VT = N->getValueType(0);
		sdesmalenUnsubmitted Done Reply Inline Actions same here. sdesmalen: same here.

		SDValue Pred = N->getOperand(1);
		SDValue InitVal = N->getOperand(2);
		SDValue VecToReduce = N->getOperand(3);
		EVT ReduceVT = VecToReduce.getValueType();

		// Ordered reductions use the first lane of the result vector as the
		// reduction's initial value.
		SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
		InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
		DAG.getUNDEF(ReduceVT), InitVal, Zero);

		SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);

		// SVE reductions set the whole vector register with the first element
		// containing the reduction result, which we'll now extract.
		return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Reduce, Zero);
		}

static SDValue performIntrinsicCombine(SDNode *N,		static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {		const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;		SelectionDAG &DAG = DCI.DAG;
unsigned IID = getIntrinsicID(N);		unsigned IID = getIntrinsicID(N);
switch (IID) {		switch (IID) {
default:		default:
break;		break;
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_sve_index:
return LowerSVEIntrinsicIndex(N, DAG);		return LowerSVEIntrinsicIndex(N, DAG);
case Intrinsic::aarch64_sve_dup:		case Intrinsic::aarch64_sve_dup:
return LowerSVEIntrinsicDUP(N, DAG);		return LowerSVEIntrinsicDUP(N, DAG);
case Intrinsic::aarch64_sve_dup_x:		case Intrinsic::aarch64_sve_dup_x:
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),		return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
N->getOperand(1));		N->getOperand(1));
case Intrinsic::aarch64_sve_ext:		case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);		return LowerSVEIntrinsicEXT(N, DAG);
		case Intrinsic::aarch64_sve_fadda:
		return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
		case Intrinsic::aarch64_sve_faddv:
		return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
		case Intrinsic::aarch64_sve_fmaxnmv:
		return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
		case Intrinsic::aarch64_sve_fmaxv:
		return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
		case Intrinsic::aarch64_sve_fminnmv:
		return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
		case Intrinsic::aarch64_sve_fminv:
		return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
case Intrinsic::aarch64_sve_sel:		case Intrinsic::aarch64_sve_sel:
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),		return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));		N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:		case Intrinsic::aarch64_sve_cmpeq_wide:
return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,		return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq,
false, DCI, DAG);		false, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:		case Intrinsic::aarch64_sve_cmpne_wide:
return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,		return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne,
▲ Show 20 Lines • Show All 2,632 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,430 Lines • ▼ Show 20 Lines	multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
}		}

def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,		def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
V128, V128,		V128, V128,
V128_lo, VectorIndexH,		V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",		asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),		[(set (v4i32 V128:$Rd),
(OpNode (extract_high_v8i16 V128:$Rn),		(OpNode (extract_high_v8i16 V128:$Rn),
(extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),		(extract_high_v8i16 (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
VectorIndexH:$idx))))]> {		VectorIndexH:$idx)))))]> {
		sdesmalenUnsubmitted Done Reply Inline Actions Given the change to TableGen is no longer needed for this patch, I guess the changes to these patterns are no longer needed either? We'll still need the TableGen changes and the`extract_subvector` patterns at a later point, but I don't think there's currently a way to test that yet (unless we allow using shufflevector to extract a fixed-width vector from a scalable vector). sdesmalen: Given the change to TableGen is no longer needed for this patch, I guess the changes to these…

bits<3> idx;		bits<3> idx;
let Inst{11} = idx{2};		let Inst{11} = idx{2};
let Inst{21} = idx{1};		let Inst{21} = idx{1};
let Inst{20} = idx{0};		let Inst{20} = idx{0};
}		}

def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,		def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
Show All 9 Lines	multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
}		}

def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,		def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
V128, V128,		V128, V128,
V128, VectorIndexS,		V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",		asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),		[(set (v2i64 V128:$Rd),
(OpNode (extract_high_v4i32 V128:$Rn),		(OpNode (extract_high_v4i32 V128:$Rn),
(extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),		(extract_high_v4i32 (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
VectorIndexS:$idx))))]> {		VectorIndexS:$idx)))))]> {
bits<2> idx;		bits<2> idx;
let Inst{11} = idx{1};		let Inst{11} = idx{1};
let Inst{21} = idx{0};		let Inst{21} = idx{0};
}		}

def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,		def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
FPR32Op, FPR16Op, V128_lo, VectorIndexH,		FPR32Op, FPR16Op, V128_lo, VectorIndexH,
asm, ".h", "", "", ".h", []> {		asm, ".h", "", "", ".h", []> {
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
V128, V128,		V128, V128,
V128_lo, VectorIndexH,		V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",		asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$dst),		[(set (v4i32 V128:$dst),
(Accum (v4i32 V128:$Rd),		(Accum (v4i32 V128:$Rd),
(v4i32 (int_aarch64_neon_sqdmull		(v4i32 (int_aarch64_neon_sqdmull
(extract_high_v8i16 V128:$Rn),		(extract_high_v8i16 V128:$Rn),
(extract_high_v8i16		(extract_high_v8i16
(AArch64duplane16 (v8i16 V128_lo:$Rm),		(v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
VectorIndexH:$idx))))))]> {		VectorIndexH:$idx)))))))]> {
bits<3> idx;		bits<3> idx;
let Inst{11} = idx{2};		let Inst{11} = idx{2};
let Inst{21} = idx{1};		let Inst{21} = idx{1};
let Inst{20} = idx{0};		let Inst{20} = idx{0};
}		}

def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,		def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
V128, V64,		V128, V64,
Show All 14 Lines	def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
V128, V128,		V128, V128,
V128, VectorIndexS,		V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",		asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$dst),		[(set (v2i64 V128:$dst),
(Accum (v2i64 V128:$Rd),		(Accum (v2i64 V128:$Rd),
(v2i64 (int_aarch64_neon_sqdmull		(v2i64 (int_aarch64_neon_sqdmull
(extract_high_v4i32 V128:$Rn),		(extract_high_v4i32 V128:$Rn),
(extract_high_v4i32		(extract_high_v4i32
(AArch64duplane32 (v4i32 V128:$Rm),		(v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
VectorIndexS:$idx))))))]> {		VectorIndexS:$idx)))))))]> {
bits<2> idx;		bits<2> idx;
let Inst{11} = idx{1};		let Inst{11} = idx{1};
let Inst{21} = idx{0};		let Inst{21} = idx{0};
}		}

def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,		def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
FPR32Op, FPR16Op, V128_lo, VectorIndexH,		FPR32Op, FPR16Op, V128_lo, VectorIndexH,
asm, ".h", "", "", ".h", []> {		asm, ".h", "", "", ".h", []> {
Show All 37 Lines	multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
}		}

def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,		def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
V128, V128,		V128, V128,
V128_lo, VectorIndexH,		V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",		asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$Rd),		[(set (v4i32 V128:$Rd),
(OpNode (extract_high_v8i16 V128:$Rn),		(OpNode (extract_high_v8i16 V128:$Rn),
(extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),		(extract_high_v8i16 (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
VectorIndexH:$idx))))]> {		VectorIndexH:$idx)))))]> {

bits<3> idx;		bits<3> idx;
let Inst{11} = idx{2};		let Inst{11} = idx{2};
let Inst{21} = idx{1};		let Inst{21} = idx{1};
let Inst{20} = idx{0};		let Inst{20} = idx{0};
}		}

def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,		def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
Show All 9 Lines	multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
}		}

def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,		def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
V128, V128,		V128, V128,
V128, VectorIndexS,		V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",		asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$Rd),		[(set (v2i64 V128:$Rd),
(OpNode (extract_high_v4i32 V128:$Rn),		(OpNode (extract_high_v4i32 V128:$Rn),
(extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),		(extract_high_v4i32 (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
VectorIndexS:$idx))))]> {		VectorIndexS:$idx)))))]> {
bits<2> idx;		bits<2> idx;
let Inst{11} = idx{1};		let Inst{11} = idx{1};
let Inst{21} = idx{0};		let Inst{21} = idx{0};
}		}
}		}
}		}

multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,		multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
Show All 14 Lines	multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,

def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,		def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
V128, V128,		V128, V128,
V128_lo, VectorIndexH,		V128_lo, VectorIndexH,
asm#"2", ".4s", ".4s", ".8h", ".h",		asm#"2", ".4s", ".4s", ".8h", ".h",
[(set (v4i32 V128:$dst),		[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd),		(OpNode (v4i32 V128:$Rd),
(extract_high_v8i16 V128:$Rn),		(extract_high_v8i16 V128:$Rn),
(extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),		(extract_high_v8i16 (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
VectorIndexH:$idx))))]> {		VectorIndexH:$idx)))))]> {
bits<3> idx;		bits<3> idx;
let Inst{11} = idx{2};		let Inst{11} = idx{2};
let Inst{21} = idx{1};		let Inst{21} = idx{1};
let Inst{20} = idx{0};		let Inst{20} = idx{0};
}		}

def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,		def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
V128, V64,		V128, V64,
Show All 9 Lines	multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,

def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,		def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
V128, V128,		V128, V128,
V128, VectorIndexS,		V128, VectorIndexS,
asm#"2", ".2d", ".2d", ".4s", ".s",		asm#"2", ".2d", ".2d", ".4s", ".s",
[(set (v2i64 V128:$dst),		[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd),		(OpNode (v2i64 V128:$Rd),
(extract_high_v4i32 V128:$Rn),		(extract_high_v4i32 V128:$Rn),
(extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),		(extract_high_v4i32 (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
VectorIndexS:$idx))))]> {		VectorIndexS:$idx)))))]> {
bits<2> idx;		bits<2> idx;
let Inst{11} = idx{1};		let Inst{11} = idx{1};
let Inst{21} = idx{0};		let Inst{21} = idx{0};
}		}
}		}
}		}

//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
▲ Show 20 Lines • Show All 2,438 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,164 Lines • ▼ Show 20 Lines	def : Pat<(v8i16 (opNode V128:$Rn)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),		(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;		(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
def : Pat<(v4i32 (opNode V128:$Rn)),		def : Pat<(v4i32 (opNode V128:$Rn)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),		(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;		(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;


// If none did, fallback to the explicit patterns, consuming the vector_extract.		// If none did, fallback to the explicit patterns, consuming the vector_extract.
def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),		def : Pat<(i32 (vector_extract (v16i8 (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
(i32 0)), (i64 0))),		(i32 0))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),		(EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),		(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
bsub), ssub)>;		bsub), ssub)>;
def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),		def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),		(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
bsub), ssub)>;		bsub), ssub)>;
def : Pat<(i32 (vector_extract (insert_subvector undef,		def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
(v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),		(v4i16 (opNode V64:$Rn)), (i32 0))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),		(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),		(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
hsub), ssub)>;		hsub), ssub)>;
def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),		def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),		(EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),		(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
hsub), ssub)>;		hsub), ssub)>;
def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),		def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),		(EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),		(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
ssub), ssub)>;		ssub), ssub)>;

}		}

multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,		multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
SDPatternOperator opNode>		SDPatternOperator opNode>
: SIMDAcrossLanesIntrinsic<baseOpc, opNode> {		: SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already		// If there is a sign extension after this intrinsic, consume it as smov already
// performed it		// performed it
def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,		def : Pat<(i32 (sext_inreg (i32 (vector_extract (v16i8 (insert_subvector undef,
(opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),		(opNode (v8i8 V64:$Rn)), (i32 0))), (i64 0))), i8)),
(i32 (SMOVvi8to32		(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),		(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
(i64 0)))>;		(i64 0)))>;
def : Pat<(i32 (sext_inreg (i32 (vector_extract		def : Pat<(i32 (sext_inreg (i32 (vector_extract
(opNode (v16i8 V128:$Rn)), (i64 0))), i8)),		(opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
(i32 (SMOVvi8to32		(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),		(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
(i64 0)))>;		(i64 0)))>;
def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,		def : Pat<(i32 (sext_inreg (i32 (vector_extract (v8i16 (insert_subvector undef,
(opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),		(opNode (v4i16 V64:$Rn)), (i32 0))), (i64 0))), i16)),
(i32 (SMOVvi16to32		(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),		(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;		(i64 0)))>;
def : Pat<(i32 (sext_inreg (i32 (vector_extract		def : Pat<(i32 (sext_inreg (i32 (vector_extract
(opNode (v8i16 V128:$Rn)), (i64 0))), i16)),		(opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32		(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),		(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
(i64 0)))>;		(i64 0)))>;
}		}

multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,		multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
SDPatternOperator opNode>		SDPatternOperator opNode>
: SIMDAcrossLanesIntrinsic<baseOpc, opNode> {		: SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually		// If there is a masking operation keeping only what has been actually
// generated, consume it.		// generated, consume it.
def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,		def : Pat<(i32 (and (i32 (vector_extract (v16i8 (insert_subvector undef,
(opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),		(opNode (v8i8 V64:$Rn)), (i32 0))), (i64 0))), maski8_or_more)),
(i32 (EXTRACT_SUBREG		(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),		(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
ssub))>;		ssub))>;
def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),		def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
maski8_or_more)),		maski8_or_more)),
(i32 (EXTRACT_SUBREG		(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),		(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;		ssub))>;
def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,		def : Pat<(i32 (and (i32 (vector_extract (v8i16 (insert_subvector undef,
(opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),		(opNode (v4i16 V64:$Rn)), (i32 0))), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG		(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),		(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;		ssub))>;
def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),		def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
maski16_or_more)),		maski16_or_more)),
(i32 (EXTRACT_SUBREG		(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),		(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
▲ Show 20 Lines • Show All 2,112 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines
def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;		def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;

// SVE DEC		// SVE DEC
def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;		def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;
def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;		def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;
def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;		def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;

def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;		def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>;
		def AArch64faddv_pred : SDNode<"AArch64ISD::FADDV_PRED", SDT_AArch64Reduce>;
		def AArch64fmaxv_pred : SDNode<"AArch64ISD::FMAXV_PRED", SDT_AArch64Reduce>;
		def AArch64fmaxnmv_pred : SDNode<"AArch64ISD::FMAXNMV_PRED", SDT_AArch64Reduce>;
		def AArch64fminv_pred : SDNode<"AArch64ISD::FMINV_PRED", SDT_AArch64Reduce>;
		def AArch64fminnmv_pred : SDNode<"AArch64ISD::FMINNMV_PRED", SDT_AArch64Reduce>;
def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;		def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>;
def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;		def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>;
def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;		def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>;
def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;		def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>;
def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;		def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>;
def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;		def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>;
def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;		def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>;
def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;		def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>;
def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;		def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>;

def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;		def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;		def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;		def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
		def AArch64fadda_pred : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;

def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;		def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;		def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;

def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;		def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;		def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;

def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>;		def SDT_AArch64DUP_PRED : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCVecEltisVT<2,i1>]>;
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines	let Predicates = [HasSVE] in {

defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;		defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;		defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;

defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;		defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;		defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;

// SVE floating point reductions.		// SVE floating point reductions.
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", int_aarch64_sve_fadda>;		defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_pred>;
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", int_aarch64_sve_faddv>;		defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_pred>;
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", int_aarch64_sve_fmaxnmv>;		defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_pred>;
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", int_aarch64_sve_fminnmv>;		defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_pred>;
defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", int_aarch64_sve_fmaxv>;		defm FMAXV_VPZ : sve_fp_fast_red<0b110, "fmaxv", AArch64fmaxv_pred>;
defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", int_aarch64_sve_fminv>;		defm FMINV_VPZ : sve_fp_fast_red<0b111, "fminv", AArch64fminv_pred>;

// Splat immediate (unpredicated)		// Splat immediate (unpredicated)
defm DUP_ZI : sve_int_dup_imm<"dup">;		defm DUP_ZI : sve_int_dup_imm<"dup">;
		efriedmaUnsubmitted Done Reply Inline Actions Maybe worth explaining why you need two EXTRACT_SUBREG, as opposed to just one. It would be nice to handle non-zero indexes, but I guess that can wait for a followup. efriedma: Maybe worth explaining why you need two EXTRACT_SUBREG, as opposed to just one. It would be…
defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;		defm FDUP_ZI : sve_int_dup_fpimm<"fdup">;
defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">;		defm DUPM_ZI : sve_int_dup_mask_imm<"dupm">;

// Splat immediate (predicated)		// Splat immediate (predicated)
defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">;		defm CPY_ZPmI : sve_int_dup_imm_pred_merge<"cpy">;
defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">;		defm CPY_ZPzI : sve_int_dup_imm_pred_zero<"cpy">;
defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;		defm FCPY_ZPmI : sve_int_dup_fpimm_pred<"fcpy">;

▲ Show 20 Lines • Show All 1,054 Lines • ▼ Show 20 Lines	def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;		(AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>;
def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),		def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>;		(AND_PPzPP (PTRUE_H 31), PPR:$Ps1, PPR:$Ps2)>;
def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)),		def : Pat<(nxv4i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>;		(AND_PPzPP (PTRUE_S 31), PPR:$Ps1, PPR:$Ps2)>;
def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)),		def : Pat<(nxv2i1 (and PPR:$Ps1, PPR:$Ps2)),
(AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>;		(AND_PPzPP (PTRUE_D 31), PPR:$Ps1, PPR:$Ps2)>;

		def : Pat<(v8f16 (extract_subvector ZPR:$Zs, (i64 0))),
		(EXTRACT_SUBREG ZPR:$Zs, zsub)>;
		def : Pat<(v4f32 (extract_subvector ZPR:$Zs, (i64 0))),
		(EXTRACT_SUBREG ZPR:$Zs, zsub)>;
		def : Pat<(v2f64 (extract_subvector ZPR:$Zs, (i64 0))),
		(EXTRACT_SUBREG ZPR:$Zs, zsub)>;

// Add more complex addressing modes here as required		// Add more complex addressing modes here as required
multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load,		multiclass pred_load<ValueType Ty, ValueType PredTy, SDPatternOperator Load,
Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {		Instruction RegRegInst, Instruction RegImmInst, ComplexPattern AddrCP> {
// reg + reg		// reg + reg
let AddedComplexity = 1 in {		let AddedComplexity = 1 in {
def _reg_reg_z : Pat<(Ty (Load (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), (SVEDup0Undef))),		def _reg_reg_z : Pat<(Ty (Load (AddrCP GPR64:$base, GPR64:$offset), (PredTy PPR:$gp), (SVEDup0Undef))),
(RegRegInst PPR:$gp, GPR64:$base, GPR64:$offset)>;		(RegRegInst PPR:$gp, GPR64:$base, GPR64:$offset)>;
}		}
▲ Show 20 Lines • Show All 648 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 4,438 Lines • ▼ Show 20 Lines

	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Floating Point Fast Reduction Group			// SVE Floating Point Fast Reduction Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,			class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
	ZPRRegOp zprty, RegisterClass dstRegClass>			ZPRRegOp zprty, FPRasZPROperand dstOpType>
	: I<(outs dstRegClass:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),			: I<(outs dstOpType:$Vd), (ins PPR3bAny:$Pg, zprty:$Zn),
	asm, "\t$Vd, $Pg, $Zn",			asm, "\t$Vd, $Pg, $Zn",
	"",			"",
	[]>, Sched<[]> {			[]>, Sched<[]> {
	bits<5> Zn;			bits<5> Zn;
	bits<5> Vd;			bits<5> Vd;
	bits<3> Pg;			bits<3> Pg;
	let Inst{31-24} = 0b01100101;			let Inst{31-24} = 0b01100101;
	let Inst{23-22} = sz;			let Inst{23-22} = sz;
	let Inst{21-19} = 0b000;			let Inst{21-19} = 0b000;
	let Inst{18-16} = opc;			let Inst{18-16} = opc;
	let Inst{15-13} = 0b001;			let Inst{15-13} = 0b001;
	let Inst{12-10} = Pg;			let Inst{12-10} = Pg;
	let Inst{9-5} = Zn;			let Inst{9-5} = Zn;
	let Inst{4-0} = Vd;			let Inst{4-0} = Vd;
	}			}

	multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {			multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
	def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16>;			def _H : sve_fp_fast_red<0b01, opc, asm, ZPR16, FPR16asZPR>;
	def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32>;			def _S : sve_fp_fast_red<0b10, opc, asm, ZPR32, FPR32asZPR>;
	def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64>;			def _D : sve_fp_fast_red<0b11, opc, asm, ZPR64, FPR64asZPR>;

	def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;			def : SVE_2_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
	def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;			def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
	def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;			def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
	}			}


	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Floating Point Accumulating Reduction Group			// SVE Floating Point Accumulating Reduction Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,			class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
	ZPRRegOp zprty, RegisterClass dstRegClass>			ZPRRegOp zprty, FPRasZPROperand dstOpType>
	: I<(outs dstRegClass:$Vdn), (ins PPR3bAny:$Pg, dstRegClass:$_Vdn, zprty:$Zm),			: I<(outs dstOpType:$Vdn), (ins PPR3bAny:$Pg, dstOpType:$_Vdn, zprty:$Zm),
	asm, "\t$Vdn, $Pg, $_Vdn, $Zm",			asm, "\t$Vdn, $Pg, $_Vdn, $Zm",
	"",			"",
	[]>,			[]>,
	Sched<[]> {			Sched<[]> {
	bits<3> Pg;			bits<3> Pg;
	bits<5> Vdn;			bits<5> Vdn;
	bits<5> Zm;			bits<5> Zm;
	let Inst{31-24} = 0b01100101;			let Inst{31-24} = 0b01100101;
	let Inst{23-22} = sz;			let Inst{23-22} = sz;
	let Inst{21-19} = 0b011;			let Inst{21-19} = 0b011;
	let Inst{18-16} = opc;			let Inst{18-16} = opc;
	let Inst{15-13} = 0b001;			let Inst{15-13} = 0b001;
	let Inst{12-10} = Pg;			let Inst{12-10} = Pg;
	let Inst{9-5} = Zm;			let Inst{9-5} = Zm;
	let Inst{4-0} = Vdn;			let Inst{4-0} = Vdn;

	let Constraints = "$Vdn = $_Vdn";			let Constraints = "$Vdn = $_Vdn";
	}			}

	multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {			multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
	def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16>;			def _H : sve_fp_2op_p_vd<0b01, opc, asm, ZPR16, FPR16asZPR>;
	def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32>;			def _S : sve_fp_2op_p_vd<0b10, opc, asm, ZPR32, FPR32asZPR>;
	def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64>;			def _D : sve_fp_2op_p_vd<0b11, opc, asm, ZPR64, FPR64asZPR>;

	def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>;			def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
	def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>;			def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
	def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>;			def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
	}			}

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// SVE Floating Point Compare - Vectors Group			// SVE Floating Point Compare - Vectors Group
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,			class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
	ZPRRegOp zprty>			ZPRRegOp zprty>
	▲ Show 20 Lines • Show All 3,021 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

	; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s \| FileCheck %s			; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s \| FileCheck %s
				sdesmalenUnsubmitted Not Done Reply Inline Actions Why is this change required? sdesmalen: Why is this change required?
				c-rhodesAuthorUnsubmitted Done Reply Inline Actions To remove `// kill: def $d0 killed $d0 killed $z0` from the output. c-rhodes: To remove `// kill: def $d0 killed $d0 killed $z0` from the output.

	;			;
	; FADDA			; FADDA
	;			;

	define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {			define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {
	; CHECK-LABEL: fadda_f16:			; CHECK-LABEL: fadda_f16:
	; CHECK: fadda h0, p0, h0, z1.h			; CHECK: fadda h0, p0, h0, z1.h
	▲ Show 20 Lines • Show All 205 Lines • Show Last 20 Lines

llvm/utils/TableGen/CodeGenDAGPatterns.cpp

Show First 20 Lines • Show All 617 Lines • ▼ Show 20 Lines	bool TypeInfer::EnforceVectorSubVectorTypeIs(TypeSetByHwMode &Vec,
ValidateOnExit _1(Vec, this), _2(Sub, this);		ValidateOnExit _1(Vec, this), _2(Sub, this);
if (TP.hasError())		if (TP.hasError())
return false;		return false;

/// Return true if B is a suB-vector of P, i.e. P is a suPer-vector of B.		/// Return true if B is a suB-vector of P, i.e. P is a suPer-vector of B.
auto IsSubVec = [](MVT B, MVT P) -> bool {		auto IsSubVec = [](MVT B, MVT P) -> bool {
if (!B.isVector() \|\| !P.isVector())		if (!B.isVector() \|\| !P.isVector())
return false;		return false;
// Logically a <4 x i32> is a valid subvector of <n x 4 x i32>
// but until there are obvious use-cases for this, keep the
// types separate.
if (B.isScalableVector() != P.isScalableVector())
return false;
if (B.getVectorElementType() != P.getVectorElementType())		if (B.getVectorElementType() != P.getVectorElementType())
return false;		return false;
		// Logically <k x i32> is a valid subvector of <n x m x i32> when
		efriedmaUnsubmitted Not Done Reply Inline Actions Do you mean `<vscale x m x i32>`? efriedma: Do you mean `<vscale x m x i32>`?
		c-rhodesAuthorUnsubmitted Not Done Reply Inline Actions This change is no longer needed since removing these patterns: def : Pat<(v8f16 (extract_subvector ZPR:$Zs, (i64 0))), (EXTRACT_SUBREG ZPR:$Zs, zsub)>; def : Pat<(v4f32 (extract_subvector ZPR:$Zs, (i64 0))), (EXTRACT_SUBREG ZPR:$Zs, zsub)>; def : Pat<(v2f64 (extract_subvector ZPR:$Zs, (i64 0))), (EXTRACT_SUBREG ZPR:$Zs, zsub)>; which were no longer needed after replacing the `LowerEXTRACT_VECTOR_ELT` with ISEL patterns as you suggested. c-rhodes: This change is no longer needed since removing these patterns: ``` def : Pat<(v8f16…
		// k <= m.
		if (!B.isScalableVector() && P.isScalableVector())
		return (B.getVectorNumElements() <= P.getVectorNumElements());
		efriedmaUnsubmitted Not Done Reply Inline Actions Please don't use getVectorNumElements() on scalable vectors. efriedma: Please don't use getVectorNumElements() on scalable vectors.
return B.getVectorNumElements() < P.getVectorNumElements();		return B.getVectorNumElements() < P.getVectorNumElements();
};		};

/// Return true if S has no element (vector type) that T is a sub-vector of,		/// Return true if S has no element (vector type) that T is a sub-vector of,
/// i.e. has the same element type as T and more elements.		/// i.e. has the same element type as T and more elements.
auto NoSubV = [&IsSubVec](const TypeSetByHwMode::SetType &S, MVT T) -> bool {		auto NoSubV = [&IsSubVec](const TypeSetByHwMode::SetType &S, MVT T) -> bool {
for (auto I : S)		for (auto I : S)
if (IsSubVec(T, I))		if (IsSubVec(T, I))
▲ Show 20 Lines • Show All 4,089 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Custom lowering of floating-point reductions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 259845

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64InstrInfo.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

llvm/utils/TableGen/CodeGenDAGPatterns.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Custom lowering of floating-point reductionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 259845

llvm/include/llvm/IR/IntrinsicsAArch64.td

llvm/lib/Target/AArch64/AArch64ISelLowering.h

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64InstrFormats.td

llvm/lib/Target/AArch64/AArch64InstrInfo.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll

llvm/utils/TableGen/CodeGenDAGPatterns.cpp

[AArch64][SVE] Custom lowering of floating-point reductions
ClosedPublic