Diff 479135

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,318 Lines • ▼ Show 20 Lines	if (OrigTy.getSizeInBits() >= 64)
return N;		return N;

// Must extend size to at least 64 bits to be used as an operand for VMULL.		// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);		EVT NewVT = getExtensionTo64Bits(OrigTy);

return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);		return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}		}

// Returns lane if Op extracts from a two-element vector and lane is constant		// Returns lane if Op extracts from a two-element vector and lane is constant
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: Move `ElementSize` closer to its use. sdesmalen: nit: Move `ElementSize` closer to its use.
// (i.e., extractelt(<2 x Ty> %v, ConstantLane)), and None otherwise.		// (i.e., extractelt(<2 x Ty> %v, ConstantLane)), and None otherwise.
static Optional<uint64_t> getConstantLaneNumOfExtractHalfOperand(SDValue &Op) {		static Optional<uint64_t> getConstantLaneNumOfExtractHalfOperand(SDValue &Op) {
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: Can you move these out into separate variables, e.g. X and Y? sdesmalen: nit: Can you move these out into separate variables, e.g. X and Y?
SDNode *OpNode = Op.getNode();		SDNode *OpNode = Op.getNode();
if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)		if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return None;		return None;

EVT VT = OpNode->getOperand(0).getValueType();		EVT VT = OpNode->getOperand(0).getValueType();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));		ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
if (!VT.isFixedLengthVector() \|\| VT.getVectorNumElements() != 2 \|\| !C)		if (!VT.isFixedLengthVector() \|\| VT.getVectorNumElements() != 2 \|\| !C)
return None;		return None;

return C->getZExtValue();		return C->getZExtValue();
		sdesmalenUnsubmitted Not Done Reply Inline Actions Should this Overflow test be moved below the `if (IsOpEqualOrSame) { ... }` condition? sdesmalen: Should this Overflow test be moved below the `if (IsOpEqualOrSame) { ... }` condition?
}		}

static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,		static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {		bool isSigned) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

if (N->getOpcode() != ISD::BUILD_VECTOR)		if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;		return false;
▲ Show 20 Lines • Show All 246 Lines • ▼ Show 20 Lines	assert(InVT.getVectorElementType() == MVT::i1 &&
VT.getVectorElementType() == MVT::i1 &&		VT.getVectorElementType() == MVT::i1 &&
"Expected a predicate-to-predicate bitcast");		"Expected a predicate-to-predicate bitcast");
assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&		assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
InVT.isScalableVector() &&		InVT.isScalableVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&		DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
"Only expect to cast between legal scalable predicate types!");		"Only expect to cast between legal scalable predicate types!");

// Return the operand if the cast isn't changing type,		// Return the operand if the cast isn't changing type,
// e.g. <n x 16 x i1> -> <n x 16 x i1>		// e.g. <n x 16 x i1> -> <n x 16 x i1>
		sdesmalenUnsubmitted Done Reply Inline Actions nit: add a newline before the if? sdesmalen: nit: add a newline before the if?
if (InVT == VT)		if (InVT == VT)
return Op;		return Op;

		sdesmalenUnsubmitted Done Reply Inline Actions I guess this can still overflow right? I think you had the right code for that before, it only needed moving the `if (Overflow) return SDValue();` after the increment. It would also be good to add a test for the overflow case (both signed and unsigned, and both 'less' and 'less or equal') sdesmalen: I guess this can still overflow right? I think you had the right code for that before, it only…
		dtemirbulatovAuthorUnsubmitted Done Reply Inline Actions With the increment case, I don't think we have to worry about overflow/underflow. it either world be 0 or too large/too small number that are not representable with PTRUE VL instruction. dtemirbulatov: With the increment case, I don't think we have to worry about overflow/underflow. it either…
		dtemirbulatovAuthorUnsubmitted Done Reply Inline Actions With the increment case, I don't think we have to worry about overflow/underflow. it either world be 0 or too large/too small number that are not representable with PTRUE VL instruction. dtemirbulatov: With the increment case, I don't think we have to worry about overflow/underflow. it either…
		sdesmalenUnsubmitted Done Reply Inline Actions nit: add newline above. sdesmalen: nit: add newline above.
SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);		SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);

// We only have to zero the lanes if new lanes are being defined, e.g. when		// We only have to zero the lanes if new lanes are being defined, e.g. when
// casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the		// casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the
		sdesmalenUnsubmitted Done Reply Inline Actions You didn't add any tests for this case, so it's currently untested. sdesmalen: You didn't add any tests for this case, so it's currently untested.
// case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then		// case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
// we can return here.		// we can return here.
if (InVT.bitsGT(VT))		if (InVT.bitsGT(VT))
return Reinterpret;		return Reinterpret;

// Check if the other lanes are already known to be zeroed by		// Check if the other lanes are already known to be zeroed by
// construction.		// construction.
if (isZeroingInactiveLanes(Op))		if (isZeroingInactiveLanes(Op))
▲ Show 20 Lines • Show All 185 Lines • ▼ Show 20 Lines	if (Op.getValueType() == MVT::i32)
return DAG.getNode(ISD::BITCAST, dl, MVT::i32,		return DAG.getNode(ISD::BITCAST, dl, MVT::i32,
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::f32,		DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::f32,
Op.getOperand(0),		Op.getOperand(0),
DAG.getNode(ISD::BITCAST, dl, MVT::f64,		DAG.getNode(ISD::BITCAST, dl, MVT::f64,
Op.getOperand(1))));		Op.getOperand(1))));
return SDValue();		return SDValue();
}		}
case Intrinsic::aarch64_sve_whilelo: {		case Intrinsic::aarch64_sve_whilelo: {
if (isa<ConstantSDNode>(Op.getOperand(1)) &&		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
isa<ConstantSDNode>(Op.getOperand(2))) {		isa<ConstantSDNode>(Op.getOperand(2))) {
unsigned MinSVEVectorSize =		unsigned MinSVEVectorSize =
std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
unsigned NumActiveElems =		bool Overflow;
Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1);		APInt NumActiveElems = Op.getConstantOperandAPInt(2).usub_ov(
		Op.getConstantOperandAPInt(1), Overflow);
Optional<unsigned> PredPattern =		Optional<unsigned> PredPattern =
getSVEPredPatternFromNumElements(NumActiveElems);		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) && !Overflow &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilelt: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This isn't quite what I had in mind when suggesting you can increase commonality. I had in mind a helper function that takes parameters like `IsSigned` and `IsLess` or perhaps even makes use of the existing `AArch64CC::CondCode` enum? paulwalker-arm: This isn't quite what I had in mind when suggesting you can increase commonality. I had in…
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(2) - Op.getConstantOperandAPInt(1);
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilels: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		bool Overflow;
		APInt Op1 = Op.getConstantOperandAPInt(1);
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(2).usub_ov(Op1, Overflow);
		if (Overflow)
		return SDValue();
		APInt One(NumActiveElems.getBitWidth(), 1, false);
		NumActiveElems = NumActiveElems.uadd_ov(One, Overflow);
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) && !Overflow &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilele: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(2) - Op.getConstantOperandAPInt(1) + 1;
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
if ((PredPattern != None) &&		if ((PredPattern != None) &&
NumActiveElems <= (MinSVEVectorSize / ElementSize))		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilege: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(1) - Op.getConstantOperandAPInt(2) + 1;
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilegt: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(1) - Op.getConstantOperandAPInt(2);
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilehs: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		bool Overflow;
		APInt Op2 = Op.getConstantOperandAPInt(2);
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(1).usub_ov(Op2, Overflow);
		if (Overflow)
		return SDValue();
		APInt One(NumActiveElems.getBitWidth(), 1, false);
		NumActiveElems = NumActiveElems.uadd_ov(One, Overflow);
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) && !Overflow &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
		}
		return SDValue();
		}
		case Intrinsic::aarch64_sve_whilehi: {
		if (isa<ConstantSDNode>(Op.getOperand(1)) &&
		isa<ConstantSDNode>(Op.getOperand(2))) {
		unsigned MinSVEVectorSize =
		std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		bool Overflow;
		APInt Op2 = Op.getConstantOperandAPInt(2);
		APInt NumActiveElems =
		Op.getConstantOperandAPInt(1).usub_ov(Op2, Overflow);
		Optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		if ((PredPattern != None) && !Overflow &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);
}		}
		sdesmalenUnsubmitted Not Done Reply Inline Actions Is it worth just moving all the behaviour into that function, and passing IsSigned, IsLess and IsEqual as parameters, such that you get: case Intrinsic::aarch64_sve_whilelo: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/false, /IsLess=/true, /IsEqual=/false); case Intrinsic::aarch64_sve_whilels: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/false, /IsLess=/true, /IsEqual=/true); case Intrinsic::aarch64_sve_whilelt: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/true, /IsLess=/true, /IsEqual=/false); case Intrinsic::aarch64_sve_whilele: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/true, /IsLess=/true, /IsEqual=/true); ... sdesmalen: Is it worth just moving all the behaviour into that function, and passing IsSigned, IsLess and…
return SDValue();		return SDValue();
}		}
case Intrinsic::aarch64_sve_sunpkhi:		case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));		Op.getOperand(1));
case Intrinsic::aarch64_sve_sunpklo:		case Intrinsic::aarch64_sve_sunpklo:
return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));		Op.getOperand(1));
▲ Show 20 Lines • Show All 18,608 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

	Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilele_d_xx:			; CHECK-LABEL: whilele_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilele p0.d, x0, x1			; CHECK-NEXT: whilele p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilele_d_ii() {
				; CHECK-LABEL: whilele_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilele p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3)
				sdesmalenUnsubmitted Done Reply Inline Actions I guess this doesn't match: ptrue p0.d, vl4 because `NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)` evaluates to false. Can you clarify the reason these intrinsics don't fold in the name of these tests (e.g. `whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec`) and/or add a comment to explain it? sdesmalen: I guess this doesn't match: ptrue p0.d, vl4 because `NumActiveElems.getZExtValue() <=…
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii1() {
				; CHECK-LABEL: whilele_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 -2, i64 3)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii2() {
				sdesmalenUnsubmitted Done Reply Inline Actions Same question here: rename this to `whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9` or something? sdesmalen: Same question here: rename this to `whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9` or…
				; CHECK-LABEL: whilele_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilele p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilele_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255)
				ret <vscale x 16 x i1> %out
				}

	;			;
				sdesmalenUnsubmitted Done Reply Inline Actions same question about renaming this test, e.g. `whilele_b_ii_dont_fold_to_ptrue_overflow` ? sdesmalen: same question about renaming this test, e.g. `whilele_b_ii_dont_fold_to_ptrue_overflow` ?
	; WHILELO			; WHILELO
	;			;

	define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilelo_b_ww:			; CHECK-LABEL: whilelo_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelo p0.b, w0, w1			; CHECK-NEXT: whilelo p0.b, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilelo_d_xx:			; CHECK-LABEL: whilelo_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelo p0.d, x0, x1			; CHECK-NEXT: whilelo p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilelo_d_ii() {
				; CHECK-LABEL: whilelo_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilelo p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii1() {
				; CHECK-LABEL: whilelo_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 2, i64 8)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii2() {
				; CHECK-LABEL: whilelo_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilelo p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilelo_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii4() {
				; CHECK-LABEL: whilelo_b_ii4:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: mov x9, #-2
				; CHECK-NEXT: whilelo p0.b, x9, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 -2, i64 3)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILELS			; WHILELS
	;			;

	define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilels_b_ww:			; CHECK-LABEL: whilels_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilels p0.b, w0, w1			; CHECK-NEXT: whilels p0.b, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
				sdesmalenUnsubmitted Done Reply Inline Actions For whilelo (unsigned), this is the same as -1, and is therefore not much different than the test above. I would suggest keeping this test (because it's unsigned) and removing the test above sdesmalen: For whilelo (unsigned), this is the same as -1, and is therefore not much different than the…
	%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b)			%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b)
	ret <vscale x 16 x i1> %out			ret <vscale x 16 x i1> %out
	}			}

	define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {			define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
	; CHECK-LABEL: whilels_b_xx:			; CHECK-LABEL: whilels_b_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilels p0.b, x0, x1			; CHECK-NEXT: whilels p0.b, x0, x1
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilels_d_xx:			; CHECK-LABEL: whilels_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilels p0.d, x0, x1			; CHECK-NEXT: whilels p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilels_d_ii() {
				; CHECK-LABEL: whilels_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilels p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii1() {
				; CHECK-LABEL: whilels_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl7
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 2, i64 8)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii2() {
				; CHECK-LABEL: whilels_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilels p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilels_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii4() {
				; CHECK-LABEL: whilels_b_ii4:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: mov x9, #-2
				; CHECK-NEXT: whilels p0.b, x9, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 -2, i64 3)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILELT			; WHILELT
	;			;

	define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilelt_b_ww:			; CHECK-LABEL: whilelt_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelt p0.b, w0, w1			; CHECK-NEXT: whilelt p0.b, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
				sdesmalenUnsubmitted Done Reply Inline Actions For whilels (unsigned), this is the same as `-1`, and is therefore not much different than the test above. I would suggest keeping this test (because it's unsigned) and removing the test above. sdesmalen: For whilels (unsigned), this is the same as `-1`, and is therefore not much different than the…
	%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %a, i32 %b)			%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 %a, i32 %b)
	ret <vscale x 16 x i1> %out			ret <vscale x 16 x i1> %out
	}			}

	define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {			define <vscale x 16 x i1> @whilelt_b_xx(i64 %a, i64 %b) {
	; CHECK-LABEL: whilelt_b_xx:			; CHECK-LABEL: whilelt_b_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelt p0.b, x0, x1			; CHECK-NEXT: whilelt p0.b, x0, x1
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilelt_d_xx:			; CHECK-LABEL: whilelt_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelt p0.d, x0, x1			; CHECK-NEXT: whilelt p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilelt_d_ii() {
				; CHECK-LABEL: whilelt_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilelt p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii1() {
				; CHECK-LABEL: whilelt_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl5
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 -2, i64 3)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii2() {
				; CHECK-LABEL: whilelt_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilelt p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilelt_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256)
				ret <vscale x 16 x i1> %out
				}

	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32)
	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64, i64)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32, i32)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32, i32)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32, i32)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32, i32)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64, i64)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64, i64)
	Show All 27 Lines

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

	Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilege_d_xx:			; CHECK-LABEL: whilege_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilege p0.d, x0, x1			; CHECK-NEXT: whilege p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilege_d_ii() {
				; CHECK-LABEL: whilege_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilege p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii1() {
				; CHECK-LABEL: whilege_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii2() {
				; CHECK-LABEL: whilege_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilege p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilege_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILEHS			; WHILEHS
	;			;

	define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilehs_b_ww:			; CHECK-LABEL: whilehs_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehs p0.b, w0, w1			; CHECK-NEXT: whilehs p0.b, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 %a, i32 %b)			%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 %a, i32 %b)
	ret <vscale x 16 x i1> %out			ret <vscale x 16 x i1> %out
	}			}

	define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {			define <vscale x 16 x i1> @whilehs_b_xx(i64 %a, i64 %b) {
	; CHECK-LABEL: whilehs_b_xx:			; CHECK-LABEL: whilehs_b_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehs p0.b, x0, x1			; CHECK-NEXT: whilehs p0.b, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 %a, i64 %b)			%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 %a, i64 %b)
	ret <vscale x 16 x i1> %out			ret <vscale x 16 x i1> %out
	}			}
				sdesmalenUnsubmitted Done Reply Inline Actions Similar comment as for whilegt, it's better to pick some numbers that actually end up as a valid vl if you remove the overflow check, e.g. @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647) sdesmalen: Similar comment as for whilegt, it's better to pick some numbers that actually end up as a…

	define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {			define <vscale x 8 x i1> @whilehs_h_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilehs_h_ww:			; CHECK-LABEL: whilehs_h_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehs p0.h, w0, w1			; CHECK-NEXT: whilehs p0.h, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 %a, i32 %b)			%out = call <vscale x 8 x i1> @llvm.aarch64.sve.whilehs.nxv8i1.i32(i32 %a, i32 %b)
	ret <vscale x 8 x i1> %out			ret <vscale x 8 x i1> %out
	Show All 39 Lines
	; CHECK-LABEL: whilehs_d_xx:			; CHECK-LABEL: whilehs_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehs p0.d, x0, x1			; CHECK-NEXT: whilehs p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilehs_d_ii() {
				; CHECK-LABEL: whilehs_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilehs p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii1() {
				; CHECK-LABEL: whilehs_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl7
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii2() {
				; CHECK-LABEL: whilehs_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilehs p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilehs_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii4() {
				; CHECK-LABEL: whilehs_b_ii4:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: mov x9, #-2
				; CHECK-NEXT: whilehs p0.b, x9, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 -2, i64 3)
				sdesmalenUnsubmitted Done Reply Inline Actions Similar comment as for whilehi, this doesn't overflow. You could use @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295) ; this would wrap around to vl8 sdesmalen: Similar comment as for whilehi, this doesn't overflow. You could use @llvm.aarch64.sve.
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILEGT			; WHILEGT
	;			;

	define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilegt_b_ww:			; CHECK-LABEL: whilegt_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilegt p0.b, w0, w1			; CHECK-NEXT: whilegt p0.b, w0, w1
	▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilehi_s_ww:			; CHECK-LABEL: whilehi_s_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehi p0.s, w0, w1			; CHECK-NEXT: whilehi p0.s, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 %a, i32 %b)			%out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i32(i32 %a, i32 %b)
	ret <vscale x 4 x i1> %out			ret <vscale x 4 x i1> %out
	}			}

	define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {			define <vscale x 4 x i1> @whilehi_s_xx(i64 %a, i64 %b) {
				sdesmalenUnsubmitted Done Reply Inline Actions It's better to pick some numbers that actually end up as a valid `vl` if you remove the overflow check, e.g. @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647) Can you use similar numbers whilege? sdesmalen: It's better to pick some numbers that actually end up as a valid `vl` if you remove the…
	; CHECK-LABEL: whilehi_s_xx:			; CHECK-LABEL: whilehi_s_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehi p0.s, x0, x1			; CHECK-NEXT: whilehi p0.s, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 %a, i64 %b)			%out = call <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64 %a, i64 %b)
	ret <vscale x 4 x i1> %out			ret <vscale x 4 x i1> %out
	}			}

	Show All 10 Lines
	; CHECK-LABEL: whilehi_d_xx:			; CHECK-LABEL: whilehi_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehi p0.d, x0, x1			; CHECK-NEXT: whilehi p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilehi_d_ii() {
				; CHECK-LABEL: whilehi_d_ii:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilehi p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii1() {
				; CHECK-LABEL: whilehi_b_ii1:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii2() {
				; CHECK-LABEL: whilehi_b_ii2:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilehi p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii3() vscale_range(16, 16) {
				; CHECK-LABEL: whilehi_b_ii3:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii4() {
				; CHECK-LABEL: whilehi_b_ii4:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: mov x9, #-2
				; CHECK-NEXT: whilehi p0.b, x9, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 -2, i64 3)
				sdesmalenUnsubmitted Done Reply Inline Actions This doesn't overflow right? 4294967295 - 6 => 4294967289 (no overflow here). For whilehi, it would overflow if the start value would be lower than the end value, e.g. @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295) ; this would wrap around to vl8 sdesmalen: This doesn't overflow right? 4294967295 - 6 => 4294967289 (no overflow here). For whilehi, it…
				ret <vscale x 16 x i1> %out
				}

	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32)
	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64, i64)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32, i32)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32, i32)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32, i32)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32, i32)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64, i64)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64, i64)
	Show All 27 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Allow to lower WHILEop operations with constant operands to PTRUE
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 479135

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Allow to lower WHILEop operations with constant operands to PTRUEClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 479135

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

[AArch64][SVE] Allow to lower WHILEop operations with constant operands to PTRUE
ClosedPublic