Diff 441696

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

	Show First 20 Lines • Show All 6,647 Lines • ▼ Show 20 Lines
	/// FillWithZeroes specifies that the vector should be widened with zeroes.			/// FillWithZeroes specifies that the vector should be widened with zeroes.
	SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,			SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
	bool FillWithZeroes) {			bool FillWithZeroes) {
	// Note that InOp might have been widened so it might already have			// Note that InOp might have been widened so it might already have
	// the right width or it might need be narrowed.			// the right width or it might need be narrowed.
	EVT InVT = InOp.getValueType();			EVT InVT = InOp.getValueType();
	assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&			assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
	"input and widen element type must match");			"input and widen element type must match");
	assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&			assert(InVT.isScalableVector() == NVT.isScalableVector() &&
	"cannot modify scalable vectors in this way");			"cannot modify scalable vectors in this way");
				efriedmaUnsubmitted Done Reply Inline Actions How are these changes related? I would have thought that if you're making v1i1 legal, that would avoid triggering any target-independent legalization infrastructure. efriedma: How are these changes related? I would have thought that if you're making v1i1 legal, that…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions They're needed for the existing `<vscale x 1 x i1>` test-case in `llvm/test/CodeGen/AArch64/sve-select.ll`: select <vscale x 1 x i1> %p, <vscale x 1 x i64> %a, <vscale x 1 x i64> %dst Before `%p` was assumed to be widened. But now, only `%a` and `%b` need widening, so it calls `ModifyToType` to widen `<vscale x 1 x i1> %p` as well to `<vscale x 1 x i1>`. This leads to the extra `uzp1`. sdesmalen: They're needed for the existing `<vscale x 1 x i1>` test-case in `llvm/test/CodeGen/AArch64/sve…
				efriedmaUnsubmitted Done Reply Inline Actions Oh, that makes sense. And I guess that explains why the uzp1 shows up in sel_nxv1i64. efriedma: Oh, that makes sense. And I guess that explains why the uzp1 shows up in sel_nxv1i64.
	SDLoc dl(InOp);			SDLoc dl(InOp);

	// Check if InOp already has the right width.			// Check if InOp already has the right width.
	if (InVT == NVT)			if (InVT == NVT)
	return InOp;			return InOp;

	unsigned InNumElts = InVT.getVectorNumElements();			ElementCount InEC = InVT.getVectorElementCount();
	unsigned WidenNumElts = NVT.getVectorNumElements();			ElementCount WidenEC = NVT.getVectorElementCount();
	if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {			if (WidenEC.hasKnownScalarFactor(InEC)) {
	unsigned NumConcat = WidenNumElts / InNumElts;			unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC);
				paulwalker-armUnsubmitted Done Reply Inline Actions A few weeks back I extended TypeSize.h to include methods that allow us to handle such cases without resorting to `getVectorMinNumElements()`. See `hasKnownScalarFactor` and `getKnownScalarFactor`. The `>` code can use `NVT.bitsGT(InVT)`. paulwalker-arm: A few weeks back I extended TypeSize.h to include methods that allow us to handle such cases…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions Thanks for pointing me to those interfaces, I missed that patch while I was OoO. For the comparison, I believe that I can remove it entirely because X % Y where Y > X is always X and thus never 0. sdesmalen: Thanks for pointing me to those interfaces, I missed that patch while I was OoO. For the…
	SmallVector<SDValue, 16> Ops(NumConcat);			SmallVector<SDValue, 16> Ops(NumConcat);
	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :			SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
	DAG.getUNDEF(InVT);			DAG.getUNDEF(InVT);
	Ops[0] = InOp;			Ops[0] = InOp;
	for (unsigned i = 1; i != NumConcat; ++i)			for (unsigned i = 1; i != NumConcat; ++i)
	Ops[i] = FillVal;			Ops[i] = FillVal;

	return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);			return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
	}			}

	if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)			if (InEC.hasKnownScalarFactor(WidenEC))
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,			return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
	DAG.getVectorIdxConstant(0, dl));			DAG.getVectorIdxConstant(0, dl));

				assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
				"Scalable vectors should have been handled already.");

				unsigned InNumElts = InEC.getFixedValue();
				unsigned WidenNumElts = WidenEC.getFixedValue();

	// Fall back to extract and build.			// Fall back to extract and build.
	SmallVector<SDValue, 16> Ops(WidenNumElts);			SmallVector<SDValue, 16> Ops(WidenNumElts);
	EVT EltVT = NVT.getVectorElementType();			EVT EltVT = NVT.getVectorElementType();
	unsigned MinNumElts = std::min(WidenNumElts, InNumElts);			unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
	unsigned Idx;			unsigned Idx;
	for (Idx = 0; Idx < MinNumElts; ++Idx)			for (Idx = 0; Idx < MinNumElts; ++Idx)
	Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,			Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
	DAG.getVectorIdxConstant(Idx, dl));			DAG.getVectorIdxConstant(Idx, dl));

	SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :			SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
	DAG.getUNDEF(EltVT);			DAG.getUNDEF(EltVT);
	for ( ; Idx < WidenNumElts; ++Idx)			for ( ; Idx < WidenNumElts; ++Idx)
	Ops[Idx] = FillVal;			Ops[Idx] = FillVal;
	return DAG.getBuildVector(NVT, dl, Ops);			return DAG.getBuildVector(NVT, dl, Ops);
	}			}

llvm/lib/Target/AArch64/AArch64CallingConvention.td

Show First 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	def CC_AArch64_AAPCS : CallingConv<[

CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,		CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],		nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,		CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,		CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],		nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCPassIndirect<i64>>,		CCPassIndirect<i64>>,

CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],		CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
CCAssignToReg<[P0, P1, P2, P3]>>,		CCAssignToReg<[P0, P1, P2, P3]>>,
CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],		CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
CCPassIndirect<i64>>,		CCPassIndirect<i64>>,
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This is not relevant to this patch just observational. For my own education do you know what this means? If I was to guess I'd say "we can pass such parameter types through memory"? If correct then I believe we only support nxv16i1 types when going throw memory. paulwalker-arm: This is not relevant to this patch just observational. For my own education do you know what…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Yes, that's what it means. Fortunately it's not something we'll currently hit from user code, because that will always use `<vscale x 16 x i1>` types for svbool_t, but I guess we should clean this up at some point. sdesmalen: Yes, that's what it means. Fortunately it's not something we'll currently hit from user code…

// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,		// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.		// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,		CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,		CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
// i128 is split to two i64s, we can't fit half to register X7.		// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],		CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
[X0, X1, X3, X5]>>>,		[X0, X1, X3, X5]>>>,
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,		CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],		CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,		CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,

CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,		CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],		nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,		CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,

CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],		CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
CCAssignToReg<[P0, P1, P2, P3]>>		CCAssignToReg<[P0, P1, P2, P3]>>
]>;		]>;

// Vararg functions on windows pass floats in integer registers		// Vararg functions on windows pass floats in integer registers
let Entry = 1 in		let Entry = 1 in
def CC_AArch64_Win64_VarArg : CallingConv<[		def CC_AArch64_Win64_VarArg : CallingConv<[
CCIfType<[f16, bf16], CCBitConvertToType<i16>>,		CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
CCIfType<[f32], CCBitConvertToType<i32>>,		CCIfType<[f32], CCBitConvertToType<i32>>,
▲ Show 20 Lines • Show All 340 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 286 Lines • ▼ Show 20 Lines	if (Subtarget->hasNEON()) {
addQRTypeForNEON(MVT::v2i64);		addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);		addQRTypeForNEON(MVT::v8f16);
if (Subtarget->hasBF16())		if (Subtarget->hasBF16())
addQRTypeForNEON(MVT::v8bf16);		addQRTypeForNEON(MVT::v8bf16);
}		}

if (Subtarget->hasSVE() \|\| Subtarget->hasSME()) {		if (Subtarget->hasSVE() \|\| Subtarget->hasSME()) {
// Add legal sve predicate types		// Add legal sve predicate types
		addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);		addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);		addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);		addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);		addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);

// Add legal sve data types		// Add legal sve data types
addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);		addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
▲ Show 20 Lines • Show All 848 Lines • ▼ Show 20 Lines	for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})		MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
setOperationAction(ISD::BITCAST, VT, Custom);		setOperationAction(ISD::BITCAST, VT, Custom);

for (auto VT :		for (auto VT :
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,		{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })		MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);		setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);

for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {		for (auto VT :
		{MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);		setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);		setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);		setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);		setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);		setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);		setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);		setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);		setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
▲ Show 20 Lines • Show All 3,154 Lines • ▼ Show 20 Lines	return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,		DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),		DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,		DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));		DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}		}

static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,		static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {		int Pattern) {
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,		return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));		DAG.getTargetConstant(Pattern, DL, MVT::i32));
}		}

static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {		static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
		paulwalker-armUnsubmitted Done Reply Inline Actions Strictly speaking this is not correct for all predicate patterns. Do you need a truly generic implementation of PTRUE.Q or do you only care about specific cases? paulwalker-arm: Strictly speaking this is not correct for all predicate patterns. Do you need a truly generic…
SDLoc DL(Op);		SDLoc DL(Op);
EVT OutVT = Op.getValueType();		EVT OutVT = Op.getValueType();
SDValue InOp = Op.getOperand(1);		SDValue InOp = Op.getOperand(1);
EVT InVT = InOp.getValueType();		EVT InVT = InOp.getValueType();

// Return the operand if the cast isn't changing type,		// Return the operand if the cast isn't changing type,
// i.e. <n x 16 x i1> -> <n x 16 x i1>		// i.e. <n x 16 x i1> -> <n x 16 x i1>
if (InVT == OutVT)		if (InVT == OutVT)
▲ Show 20 Lines • Show All 327 Lines • ▼ Show 20 Lines	return DAG.getNode(
Op.getOperand(2), Op.getOperand(3),		Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),		DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));		Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtw:		case Intrinsic::aarch64_sve_uxtw:
return DAG.getNode(		return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),		AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),		Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),		DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));		Op.getOperand(1));

case Intrinsic::localaddress: {		case Intrinsic::localaddress: {
		paulwalker-armUnsubmitted Done Reply Inline Actions Do we really need this or can we fix the cause, which I'm presuming is `LowerSPLAT_VECTOR`. I'd rather not give the impression we're extending the SVE intrinsics for types they're not intended to support. paulwalker-arm: Do we really need this or can we fix the cause, which I'm presuming is `LowerSPLAT_VECTOR`. I'd…
const auto &MF = DAG.getMachineFunction();		const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();		const auto *RegInfo = Subtarget->getRegisterInfo();
unsigned Reg = RegInfo->getLocalAddressRegister(MF);		unsigned Reg = RegInfo->getLocalAddressRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,		return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());		Op.getSimpleValueType());
}		}

case Intrinsic::eh_recoverfp: {		case Intrinsic::eh_recoverfp: {
▲ Show 20 Lines • Show All 5,857 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// There isn't a natural way to handle the general i1 case, so we use some		// There isn't a natural way to handle the general i1 case, so we use some
// trickery with whilelo.		// trickery with whilelo.
SDLoc DL(Op);		SDLoc DL(Op);
SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64);		SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal,		SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));		DAG.getValueType(MVT::i1));
SDValue ID =		SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);		DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID,		SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
DAG.getConstant(0, DL, MVT::i64), SplatVal);		if (VT == MVT::nxv1i1)
		return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::nxv1i1,
		paulwalker-armUnsubmitted Done Reply Inline Actions Rather than using `PUNPKLO` directly can you use `ISD::EXTRACT_SUBVECTOR`? paulwalker-arm: Rather than using `PUNPKLO` directly can you use `ISD::EXTRACT_SUBVECTOR`?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions Good point! sdesmalen: Good point!
		DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv2i1, ID,
		Zero, SplatVal),
		Zero);
		return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Zero, SplatVal);
}		}

SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,		SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
SDLoc DL(Op);		SDLoc DL(Op);

EVT VT = Op.getValueType();		EVT VT = Op.getValueType();
if (!isTypeLegal(VT) \|\| !VT.isScalableVector())		if (!isTypeLegal(VT) \|\| !VT.isScalableVector())
▲ Show 20 Lines • Show All 11,004 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

	Show First 20 Lines • Show All 865 Lines • ▼ Show 20 Lines
	class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,			class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
	RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}			RegisterClass RC> : SVERegOp<Suffix, C, Size, RC> {}

	//******************************************************************************			//******************************************************************************

	// SVE predicate register classes.			// SVE predicate register classes.
	class PPRClass<int lastreg> : RegisterClass<			class PPRClass<int lastreg> : RegisterClass<
	"AArch64",			"AArch64",
	[ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16,			[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
	(sequence "P%u", 0, lastreg)> {			(sequence "P%u", 0, lastreg)> {
	let Size = 16;			let Size = 16;
	}			}

	def PPR : PPRClass<15>;			def PPR : PPRClass<15>;
	def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class.			def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class.

	class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {			class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {
	▲ Show 20 Lines • Show All 516 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Show First 20 Lines • Show All 742 Lines • ▼ Show 20 Lines	let Predicates = [HasSVEorSME] in {
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;		defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;		defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>;		defm UUNPKLO_ZZ : sve_int_perm_unpk<0b10, "uunpklo", AArch64uunpklo>;
defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>;		defm UUNPKHI_ZZ : sve_int_perm_unpk<0b11, "uunpkhi", AArch64uunpkhi>;

defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>;		defm PUNPKLO_PP : sve_int_perm_punpk<0b0, "punpklo", int_aarch64_sve_punpklo>;
defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>;		defm PUNPKHI_PP : sve_int_perm_punpk<0b1, "punpkhi", int_aarch64_sve_punpkhi>;

		// Define pattern for `nxv1i1 splat_vector(1)`.
		// We do this here instead of in ISelLowering such that PatFrag's can still
		// recognize a splat.
		def : Pat<(nxv1i1 immAllOnesV), (PUNPKLO_PP (PTRUE_D 31))>;

defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;		defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;		defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;		def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
} // End HasSVEorSME		} // End HasSVEorSME

let Predicates = [HasSVE] in {		let Predicates = [HasSVE] in {
defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;		defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
} // End HasSVE		} // End HasSVE
▲ Show 20 Lines • Show All 745 Lines • ▼ Show 20 Lines	let Predicates = [HasSVEorSME] in {
defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;		defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;
defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;		defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;
defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;		defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;
defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>;		defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>;
defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;		defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;		defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;

// Extract lo/hi halves of legal predicate types.		// Extract lo/hi halves of legal predicate types.
		def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 0))),
		(PUNPKLO_PP PPR:$Ps)>;
		def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 1))),
		(PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),		def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
(PUNPKLO_PP PPR:$Ps)>;		(PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),		def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
(PUNPKHI_PP PPR:$Ps)>;		(PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),		def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
(PUNPKLO_PP PPR:$Ps)>;		(PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),		def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
(PUNPKHI_PP PPR:$Ps)>;		(PUNPKHI_PP PPR:$Ps)>;
▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines	let Predicates = [HasSVEorSME] in {
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))),		def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))),
(UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;		(UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),		def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
(UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;		(UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),		def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;		(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;

// Concatenate two predicates.		// Concatenate two predicates.
		def : Pat<(nxv2i1 (concat_vectors nxv1i1:$p1, nxv1i1:$p2)),
		(UZP1_PPP_D $p1, $p2)>;
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),		def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;		(UZP1_PPP_S $p1, $p2)>;
def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)),		def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)),
(UZP1_PPP_H $p1, $p2)>;		(UZP1_PPP_H $p1, $p2)>;
def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),		def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
(UZP1_PPP_B $p1, $p2)>;		(UZP1_PPP_B $p1, $p2)>;

// Concatenate two floating point vectors.		// Concatenate two floating point vectors.
▲ Show 20 Lines • Show All 683 Lines • ▼ Show 20 Lines	let Predicates = [IsLE] in {
def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;		def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
}		}

// These allow casting from/to unpacked predicate types.		// These allow casting from/to unpacked predicate types.
def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv16i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv16i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv16i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv8i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv8i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv8i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv8i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv4i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv4i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv4i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv4i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv2i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;		def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv2i1 (reinterpret_cast (nxv1i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv1i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv1i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv1i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;
		def : Pat<(nxv1i1 (reinterpret_cast (nxv2i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>;

// These allow casting from/to unpacked floating-point types.		// These allow casting from/to unpacked floating-point types.
def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;		def : Pat<(nxv2f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;		def : Pat<(nxv8f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;		def : Pat<(nxv4f16 (reinterpret_cast (nxv8f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;		def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;		def : Pat<(nxv2f32 (reinterpret_cast (nxv4f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;		def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>;
▲ Show 20 Lines • Show All 1,077 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SVEInstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 641 Lines • ▼ Show 20 Lines

	multiclass sve_int_pfalse<bits<6> opc, string asm> {			multiclass sve_int_pfalse<bits<6> opc, string asm> {
	def NAME : sve_int_pfalse<opc, asm>;			def NAME : sve_int_pfalse<opc, asm>;

	def : Pat<(nxv16i1 immAllZerosV), (!cast<Instruction>(NAME))>;			def : Pat<(nxv16i1 immAllZerosV), (!cast<Instruction>(NAME))>;
	def : Pat<(nxv8i1 immAllZerosV), (!cast<Instruction>(NAME))>;			def : Pat<(nxv8i1 immAllZerosV), (!cast<Instruction>(NAME))>;
	def : Pat<(nxv4i1 immAllZerosV), (!cast<Instruction>(NAME))>;			def : Pat<(nxv4i1 immAllZerosV), (!cast<Instruction>(NAME))>;
	def : Pat<(nxv2i1 immAllZerosV), (!cast<Instruction>(NAME))>;			def : Pat<(nxv2i1 immAllZerosV), (!cast<Instruction>(NAME))>;
				def : Pat<(nxv1i1 immAllZerosV), (!cast<Instruction>(NAME))>;
	}			}

	class sve_int_ptest<bits<6> opc, string asm>			class sve_int_ptest<bits<6> opc, string asm>
	: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),			: I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
	asm, "\t$Pg, $Pn",			asm, "\t$Pg, $Pn",
	"",			"",
	[]>, Sched<[]> {			[]>, Sched<[]> {
	bits<4> Pg;			bits<4> Pg;
	▲ Show 20 Lines • Show All 1,018 Lines • ▼ Show 20 Lines
	multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,			multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
	SDPatternOperator op_nopred = null_frag> {			SDPatternOperator op_nopred = null_frag> {
	def NAME : sve_int_pred_log<opc, asm>;			def NAME : sve_int_pred_log<opc, asm>;

	def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;			def : SVE_3_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, nxv16i1, !cast<Instruction>(NAME)>;
	def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;			def : SVE_3_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, nxv8i1, !cast<Instruction>(NAME)>;
	def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;			def : SVE_3_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, nxv4i1, !cast<Instruction>(NAME)>;
	def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;			def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, nxv2i1, !cast<Instruction>(NAME)>;
				def : SVE_3_Op_Pat<nxv1i1, op, nxv1i1, nxv1i1, nxv1i1, !cast<Instruction>(NAME)>;
	def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,			def : SVE_2_Op_AllActive_Pat<nxv16i1, op_nopred, nxv16i1, nxv16i1,
	!cast<Instruction>(NAME), PTRUE_B>;			!cast<Instruction>(NAME), PTRUE_B>;
	def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,			def : SVE_2_Op_AllActive_Pat<nxv8i1, op_nopred, nxv8i1, nxv8i1,
	!cast<Instruction>(NAME), PTRUE_H>;			!cast<Instruction>(NAME), PTRUE_H>;
	def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4i1, nxv4i1,			def : SVE_2_Op_AllActive_Pat<nxv4i1, op_nopred, nxv4i1, nxv4i1,
	!cast<Instruction>(NAME), PTRUE_S>;			!cast<Instruction>(NAME), PTRUE_S>;
	def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,			def : SVE_2_Op_AllActive_Pat<nxv2i1, op_nopred, nxv2i1, nxv2i1,
	!cast<Instruction>(NAME), PTRUE_D>;			!cast<Instruction>(NAME), PTRUE_D>;
	▲ Show 20 Lines • Show All 4,561 Lines • ▼ Show 20 Lines
	}			}

	multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {			multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
	def NAME : sve_int_perm_punpk<opc, asm>;			def NAME : sve_int_perm_punpk<opc, asm>;

	def : SVE_1_Op_Pat<nxv8i1, op, nxv16i1, !cast<Instruction>(NAME)>;			def : SVE_1_Op_Pat<nxv8i1, op, nxv16i1, !cast<Instruction>(NAME)>;
	def : SVE_1_Op_Pat<nxv4i1, op, nxv8i1, !cast<Instruction>(NAME)>;			def : SVE_1_Op_Pat<nxv4i1, op, nxv8i1, !cast<Instruction>(NAME)>;
	def : SVE_1_Op_Pat<nxv2i1, op, nxv4i1, !cast<Instruction>(NAME)>;			def : SVE_1_Op_Pat<nxv2i1, op, nxv4i1, !cast<Instruction>(NAME)>;
	}			}
				paulwalker-armUnsubmitted Done Reply Inline Actions Do we still need this change? paulwalker-arm: Do we still need this change?
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions Nope, good catch, I'll remove it. sdesmalen: Nope, good catch, I'll remove it.

	class sve_int_rdffr_pred<bit s, string asm>			class sve_int_rdffr_pred<bit s, string asm>
	: I<(outs PPR8:$Pd), (ins PPRAny:$Pg),			: I<(outs PPR8:$Pd), (ins PPRAny:$Pg),
	asm, "\t$Pd, $Pg/z",			asm, "\t$Pd, $Pg/z",
	"",			"",
	[]>, Sched<[]> {			[]>, Sched<[]> {
	bits<4> Pd;			bits<4> Pd;
	bits<4> Pg;			bits<4> Pg;
	▲ Show 20 Lines • Show All 2,331 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll

	Show First 20 Lines • Show All 1,072 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: pfalse p0.b			; CHECK-NEXT: pfalse p0.b
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%ext = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> zeroinitializer, i64 0)			%ext = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> zeroinitializer, i64 0)
	ret <vscale x 2 x i1> %ext			ret <vscale x 2 x i1> %ext
	}			}

	declare <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float>, i64)			declare <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float>, i64)
	declare <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32>, i64)			declare <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32>, i64)

				;
				; Extract nxv1i1 type from: nxv2i1
				;

				define <vscale x 1 x i1> @extract_nxv1i1_nxv2i1_0(<vscale x 2 x i1> %in) {
				; CHECK-LABEL: extract_nxv1i1_nxv2i1_0:
				; CHECK: // %bb.0:
				; CHECK-NEXT: punpklo p0.h, p0.b
				; CHECK-NEXT: ret
				%res = call <vscale x 1 x i1> @llvm.vector.extract.nxv1i1.nxv2i1(<vscale x 2 x i1> %in, i64 0)
				ret <vscale x 1 x i1> %res
				}

				define <vscale x 1 x i1> @extract_nxv1i1_nxv2i1_1(<vscale x 2 x i1> %in) {
				; CHECK-LABEL: extract_nxv1i1_nxv2i1_1:
				; CHECK: // %bb.0:
				; CHECK-NEXT: punpkhi p0.h, p0.b
				; CHECK-NEXT: ret
				%res = call <vscale x 1 x i1> @llvm.vector.extract.nxv1i1.nxv2i1(<vscale x 2 x i1> %in, i64 1)
				ret <vscale x 1 x i1> %res
				}

				declare <vscale x 1 x i1> @llvm.vector.extract.nxv1i1.nxv2i1(<vscale x 2 x i1>, i64)

llvm/test/CodeGen/AArch64/sve-select.ll

	Show First 20 Lines • Show All 181 Lines • ▼ Show 20 Lines
	}			}

	define <vscale x 1 x i1> @select_nxv1i1(i1 %cond, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b) {			define <vscale x 1 x i1> @select_nxv1i1(i1 %cond, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b) {
	; CHECK-LABEL: select_nxv1i1:			; CHECK-LABEL: select_nxv1i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0			; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
	; CHECK-NEXT: sbfx x8, x0, #0, #1			; CHECK-NEXT: sbfx x8, x0, #0, #1
	; CHECK-NEXT: whilelo p2.d, xzr, x8			; CHECK-NEXT: whilelo p2.d, xzr, x8
				; CHECK-NEXT: punpklo p2.h, p2.b
	; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b			; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = select i1 %cond, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b			%res = select i1 %cond, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b
	ret <vscale x 1 x i1> %res			ret <vscale x 1 x i1> %res
	}			}

	; Integer vector select			; Integer vector select

	Show All 22 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %a, <vscale x 4 x i32> %dst			%sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %a, <vscale x 4 x i32> %dst
	ret <vscale x 4 x i32> %sel			ret <vscale x 4 x i32> %sel
	}			}

	define <vscale x 1 x i64> @sel_nxv1i64(<vscale x 1 x i1> %p, <vscale x 1 x i64> %dst, <vscale x 1 x i64> %a) {			define <vscale x 1 x i64> @sel_nxv1i64(<vscale x 1 x i1> %p, <vscale x 1 x i64> %dst, <vscale x 1 x i64> %a) {
	; CHECK-LABEL: sel_nxv1i64:			; CHECK-LABEL: sel_nxv1i64:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
				; CHECK-NEXT: uzp1 p0.d, p0.d, p0.d
	; CHECK-NEXT: mov z0.d, p0/m, z1.d			; CHECK-NEXT: mov z0.d, p0/m, z1.d
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%sel = select <vscale x 1 x i1> %p, <vscale x 1 x i64> %a, <vscale x 1 x i64> %dst			%sel = select <vscale x 1 x i1> %p, <vscale x 1 x i64> %a, <vscale x 1 x i64> %dst
	ret <vscale x 1 x i64> %sel			ret <vscale x 1 x i64> %sel
	}			}

	define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p, <vscale x 2 x i64> %dst, <vscale x 2 x i64> %a) {			define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p, <vscale x 2 x i64> %dst, <vscale x 2 x i64> %a) {
	; CHECK-LABEL: sel_nxv2i64:			; CHECK-LABEL: sel_nxv2i64:
	▲ Show 20 Lines • Show All 242 Lines • ▼ Show 20 Lines

	define <vscale x 1 x i1> @icmp_select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, i64 %x0) {			define <vscale x 1 x i1> @icmp_select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, i64 %x0) {
	; CHECK-LABEL: icmp_select_nxv1i1:			; CHECK-LABEL: icmp_select_nxv1i1:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: cmp x0, #0			; CHECK-NEXT: cmp x0, #0
	; CHECK-NEXT: cset w8, eq			; CHECK-NEXT: cset w8, eq
	; CHECK-NEXT: sbfx x8, x8, #0, #1			; CHECK-NEXT: sbfx x8, x8, #0, #1
	; CHECK-NEXT: whilelo p2.d, xzr, x8			; CHECK-NEXT: whilelo p2.d, xzr, x8
				; CHECK-NEXT: punpklo p2.h, p2.b
	; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b			; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%mask = icmp eq i64 %x0, 0			%mask = icmp eq i64 %x0, 0
	%sel = select i1 %mask, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b			%sel = select i1 %mask, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b
	ret <vscale x 1 x i1> %sel			ret <vscale x 1 x i1> %sel
	}			}

	define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) {			define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) {
	▲ Show 20 Lines • Show All 159 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-zeroinit.ll

	Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines

	define <vscale x 8 x half> @test_zeroinit_8xf16() {			define <vscale x 8 x half> @test_zeroinit_8xf16() {
	; CHECK-LABEL: test_zeroinit_8xf16			; CHECK-LABEL: test_zeroinit_8xf16
	; CHECK: mov z0.h, #0			; CHECK: mov z0.h, #0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	ret <vscale x 8 x half> zeroinitializer			ret <vscale x 8 x half> zeroinitializer
	}			}

				define <vscale x 1 x i1> @test_zeroinit_1xi1() {
				; CHECK-LABEL: test_zeroinit_1xi1
				; CHECK: pfalse p0.b
				; CHECK-NEXT: ret
				ret <vscale x 1 x i1> zeroinitializer
				}

	define <vscale x 2 x i1> @test_zeroinit_2xi1() {			define <vscale x 2 x i1> @test_zeroinit_2xi1() {
	; CHECK-LABEL: test_zeroinit_2xi1			; CHECK-LABEL: test_zeroinit_2xi1
	; CHECK: pfalse p0.b			; CHECK: pfalse p0.b
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	ret <vscale x 2 x i1> zeroinitializer			ret <vscale x 2 x i1> zeroinitializer
	}			}

	define <vscale x 4 x i1> @test_zeroinit_4xi1() {			define <vscale x 4 x i1> @test_zeroinit_4xi1() {
	Show All 19 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Make nxv1i1 types a legal type for SVE.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 441696

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

llvm/lib/Target/AArch64/AArch64CallingConvention.td

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll

llvm/test/CodeGen/AArch64/sve-select.ll

llvm/test/CodeGen/AArch64/sve-zeroinit.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Make nxv1i1 types a legal type for SVE.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 441696

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

llvm/lib/Target/AArch64/AArch64CallingConvention.td

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

llvm/lib/Target/AArch64/SVEInstrFormats.td

llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll

llvm/test/CodeGen/AArch64/sve-select.ll

llvm/test/CodeGen/AArch64/sve-zeroinit.ll

[AArch64] Make nxv1i1 types a legal type for SVE.
ClosedPublic