Diff 483793

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,361 Lines • ▼ Show 20 Lines	if (OrigTy.getSizeInBits() >= 64)
return N;		return N;

// Must extend size to at least 64 bits to be used as an operand for VMULL.		// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);		EVT NewVT = getExtensionTo64Bits(OrigTy);

return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);		return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}		}

// Returns lane if Op extracts from a two-element vector and lane is constant		// Returns lane if Op extracts from a two-element vector and lane is constant
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: Move `ElementSize` closer to its use. sdesmalen: nit: Move `ElementSize` closer to its use.
// (i.e., extractelt(<2 x Ty> %v, ConstantLane)), and std::nullopt otherwise.		// (i.e., extractelt(<2 x Ty> %v, ConstantLane)), and std::nullopt otherwise.
static std::optional<uint64_t>		static std::optional<uint64_t>
		sdesmalenUnsubmitted Not Done Reply Inline Actions nit: Can you move these out into separate variables, e.g. X and Y? sdesmalen: nit: Can you move these out into separate variables, e.g. X and Y?
getConstantLaneNumOfExtractHalfOperand(SDValue &Op) {		getConstantLaneNumOfExtractHalfOperand(SDValue &Op) {
SDNode *OpNode = Op.getNode();		SDNode *OpNode = Op.getNode();
if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)		if (OpNode->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return std::nullopt;		return std::nullopt;

EVT VT = OpNode->getOperand(0).getValueType();		EVT VT = OpNode->getOperand(0).getValueType();
ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));		ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpNode->getOperand(1));
if (!VT.isFixedLengthVector() \|\| VT.getVectorNumElements() != 2 \|\| !C)		if (!VT.isFixedLengthVector() \|\| VT.getVectorNumElements() != 2 \|\| !C)
return std::nullopt;		return std::nullopt;

		sdesmalenUnsubmitted Not Done Reply Inline Actions Should this Overflow test be moved below the `if (IsOpEqualOrSame) { ... }` condition? sdesmalen: Should this Overflow test be moved below the `if (IsOpEqualOrSame) { ... }` condition?
return C->getZExtValue();		return C->getZExtValue();
}		}

static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,		static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {		bool isSigned) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);

if (N->getOpcode() != ISD::BUILD_VECTOR)		if (N->getOpcode() != ISD::BUILD_VECTOR)
▲ Show 20 Lines • Show All 203 Lines • ▼ Show 20 Lines	SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
bool OverrideNEON = VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\|		bool OverrideNEON = VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\|
Subtarget->forceStreamingCompatibleSVE();		Subtarget->forceStreamingCompatibleSVE();

if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT, OverrideNEON))		if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);		return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);

// Multiplications are only custom-lowered for 128-bit vectors so that		// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.		// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert(VT.is128BitVector() && VT.isInteger() &&		assert(VT.is128BitVector() && VT.isInteger() &&
		sdesmalenUnsubmitted Done Reply Inline Actions nit: add a newline before the if? sdesmalen: nit: add a newline before the if?
"unexpected type for custom-lowering ISD::MUL");		"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();		SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();		SDNode *N1 = Op.getOperand(1).getNode();
		sdesmalenUnsubmitted Done Reply Inline Actions I guess this can still overflow right? I think you had the right code for that before, it only needed moving the `if (Overflow) return SDValue();` after the increment. It would also be good to add a test for the overflow case (both signed and unsigned, and both 'less' and 'less or equal') sdesmalen: I guess this can still overflow right? I think you had the right code for that before, it only…
		dtemirbulatovAuthorUnsubmitted Done Reply Inline Actions With the increment case, I don't think we have to worry about overflow/underflow. it either world be 0 or too large/too small number that are not representable with PTRUE VL instruction. dtemirbulatov: With the increment case, I don't think we have to worry about overflow/underflow. it either…
		dtemirbulatovAuthorUnsubmitted Done Reply Inline Actions With the increment case, I don't think we have to worry about overflow/underflow. it either world be 0 or too large/too small number that are not representable with PTRUE VL instruction. dtemirbulatov: With the increment case, I don't think we have to worry about overflow/underflow. it either…
bool isMLA = false;		bool isMLA = false;
SDLoc DL(Op);		SDLoc DL(Op);
unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);		unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);

if (!NewOpc) {		if (!NewOpc) {
if (VT == MVT::v2i64)		if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.		// Fall through to expand this. It is not legal.
return SDValue();		return SDValue();
Show All 28 Lines
static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,		static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {		int Pattern) {
if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)		if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)
return DAG.getConstant(1, DL, MVT::nxv1i1);		return DAG.getConstant(1, DL, MVT::nxv1i1);
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,		return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));		DAG.getTargetConstant(Pattern, DL, MVT::i32));
}		}

		static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned,
		bool IsLess, bool IsEqual) {
		if (!isa<ConstantSDNode>(Op.getOperand(1)) \|\|
		!isa<ConstantSDNode>(Op.getOperand(2)))
		return SDValue();

		SDLoc dl(Op);
		APInt X = Op.getConstantOperandAPInt(1);
		APInt Y = Op.getConstantOperandAPInt(2);
		APInt NumActiveElems;
		bool Overflow;
		if (IsLess)
		NumActiveElems = IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow);
		else
		NumActiveElems = IsSigned ? X.ssub_ov(Y, Overflow) : X.usub_ov(Y, Overflow);

		if (Overflow)
		return SDValue();

		sdesmalenUnsubmitted Done Reply Inline Actions nit: add newline above. sdesmalen: nit: add newline above.
		if (IsEqual) {
		APInt One(NumActiveElems.getBitWidth(), 1, IsSigned);
		NumActiveElems = IsSigned ? NumActiveElems.sadd_ov(One, Overflow)
		: NumActiveElems.uadd_ov(One, Overflow);
		sdesmalenUnsubmitted Done Reply Inline Actions You didn't add any tests for this case, so it's currently untested. sdesmalen: You didn't add any tests for this case, so it's currently untested.
		if (Overflow)
		return SDValue();
		}

		std::optional<unsigned> PredPattern =
		getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue());
		unsigned MinSVEVectorSize = std::max(
		DAG.getSubtarget<AArch64Subtarget>().getMinSVEVectorSizeInBits(), 128u);
		unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();
		if (PredPattern != std::nullopt &&
		NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
		return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);

		return SDValue();
		}

// Returns a safe bitcast between two scalable vector predicates, where		// Returns a safe bitcast between two scalable vector predicates, where
// any newly created lanes from a widening bitcast are defined as zero.		// any newly created lanes from a widening bitcast are defined as zero.
static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {		static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);		SDLoc DL(Op);
EVT InVT = Op.getValueType();		EVT InVT = Op.getValueType();

assert(InVT.getVectorElementType() == MVT::i1 &&		assert(InVT.getVectorElementType() == MVT::i1 &&
VT.getVectorElementType() == MVT::i1 &&		VT.getVectorElementType() == MVT::i1 &&
▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines	case Intrinsic::aarch64_neon_scalar_uqxtn: {
assert(Op.getValueType() == MVT::i32 \|\| Op.getValueType() == MVT::f32);		assert(Op.getValueType() == MVT::i32 \|\| Op.getValueType() == MVT::f32);
if (Op.getValueType() == MVT::i32)		if (Op.getValueType() == MVT::i32)
return DAG.getNode(ISD::BITCAST, dl, MVT::i32,		return DAG.getNode(ISD::BITCAST, dl, MVT::i32,
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::f32,		DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::f32,
Op.getOperand(0),		Op.getOperand(0),
DAG.getNode(ISD::BITCAST, dl, MVT::f64,		DAG.getNode(ISD::BITCAST, dl, MVT::f64,
Op.getOperand(1))));		Op.getOperand(1))));
return SDValue();		return SDValue();
}		}
case Intrinsic::aarch64_sve_whilelo: {		case Intrinsic::aarch64_sve_whilelo:
		sdesmalenUnsubmitted Not Done Reply Inline Actions Is it worth just moving all the behaviour into that function, and passing IsSigned, IsLess and IsEqual as parameters, such that you get: case Intrinsic::aarch64_sve_whilelo: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/false, /IsLess=/true, /IsEqual=/false); case Intrinsic::aarch64_sve_whilels: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/false, /IsLess=/true, /IsEqual=/true); case Intrinsic::aarch64_sve_whilelt: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/true, /IsLess=/true, /IsEqual=/false); case Intrinsic::aarch64_sve_whilele: return optimizeWhile(Op.getOperand(1), Op.getOperand(2), /IsSigned=/true, /IsLess=/true, /IsEqual=/true); ... sdesmalen: Is it worth just moving all the behaviour into that function, and passing IsSigned, IsLess and…
if (isa<ConstantSDNode>(Op.getOperand(1)) &&		return optimizeWhile(Op, DAG, /IsSigned=/false, /IsLess=/true,
isa<ConstantSDNode>(Op.getOperand(2))) {		/IsEqual=/false);
unsigned MinSVEVectorSize =		case Intrinsic::aarch64_sve_whilelt:
std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);		return optimizeWhile(Op, DAG, /IsSigned=/true, /IsLess=/true,
unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements();		/IsEqual=/false);
unsigned NumActiveElems =		case Intrinsic::aarch64_sve_whilels:
Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1);		return optimizeWhile(Op, DAG, /IsSigned=/false, /IsLess=/true,
std::optional<unsigned> PredPattern =		/IsEqual=/true);
getSVEPredPatternFromNumElements(NumActiveElems);		case Intrinsic::aarch64_sve_whilele:
if ((PredPattern != std::nullopt) &&		return optimizeWhile(Op, DAG, /IsSigned=/true, /IsLess=/true,
NumActiveElems <= (MinSVEVectorSize / ElementSize))		/IsEqual=/true);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions This isn't quite what I had in mind when suggesting you can increase commonality. I had in mind a helper function that takes parameters like `IsSigned` and `IsLess` or perhaps even makes use of the existing `AArch64CC::CondCode` enum? paulwalker-arm: This isn't quite what I had in mind when suggesting you can increase commonality. I had in…
return getPTrue(DAG, dl, Op.getValueType(), *PredPattern);		case Intrinsic::aarch64_sve_whilege:
}		return optimizeWhile(Op, DAG, /IsSigned=/true, /IsLess=/false,
return SDValue();		/IsEqual=/true);
}		case Intrinsic::aarch64_sve_whilegt:
		return optimizeWhile(Op, DAG, /IsSigned=/true, /IsLess=/false,
		/IsEqual=/false);
		case Intrinsic::aarch64_sve_whilehs:
		return optimizeWhile(Op, DAG, /IsSigned=/false, /IsLess=/false,
		/IsEqual=/true);
		case Intrinsic::aarch64_sve_whilehi:
		return optimizeWhile(Op, DAG, /IsSigned=/false, /IsLess=/false,
		/IsEqual=/false);
case Intrinsic::aarch64_sve_sunpkhi:		case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));		Op.getOperand(1));
case Intrinsic::aarch64_sve_sunpklo:		case Intrinsic::aarch64_sve_sunpklo:
return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));		Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpkhi:		case Intrinsic::aarch64_sve_uunpkhi:
return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),		return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
▲ Show 20 Lines • Show All 18,780 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

	Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilele_d_xx:			; CHECK-LABEL: whilele_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilele p0.d, x0, x1			; CHECK-NEXT: whilele p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilele p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3)
				sdesmalenUnsubmitted Done Reply Inline Actions I guess this doesn't match: ptrue p0.d, vl4 because `NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)` evaluates to false. Can you clarify the reason these intrinsics don't fold in the name of these tests (e.g. `whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec`) and/or add a comment to explain it? sdesmalen: I guess this doesn't match: ptrue p0.d, vl4 because `NumActiveElems.getZExtValue() <=…
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii() {
				; CHECK-LABEL: whilele_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 -2, i64 3)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				sdesmalenUnsubmitted Done Reply Inline Actions Same question here: rename this to `whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9` or something? sdesmalen: Same question here: rename this to `whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9` or…
				; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilele p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilele_b_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_overflow() {
				sdesmalenUnsubmitted Done Reply Inline Actions same question about renaming this test, e.g. `whilele_b_ii_dont_fold_to_ptrue_overflow` ? sdesmalen: same question about renaming this test, e.g. `whilele_b_ii_dont_fold_to_ptrue_overflow` ?
				; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #2
				; CHECK-NEXT: mov w9, #2147483647
				; CHECK-NEXT: movk w8, #32768, lsl #16
				; CHECK-NEXT: whilele p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 2147483647, i32 -2147483646)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_increment_overflow() {
				; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_increment_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #2147483647
				; CHECK-NEXT: whilele p0.b, wzr, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 0, i32 2147483647)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILELO			; WHILELO
	;			;

	define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilelo_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilelo_b_ww:			; CHECK-LABEL: whilelo_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelo p0.b, w0, w1			; CHECK-NEXT: whilelo p0.b, w0, w1
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilelo_d_xx:			; CHECK-LABEL: whilelo_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelo p0.d, x0, x1			; CHECK-NEXT: whilelo p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilelo p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii() {
				; CHECK-LABEL: whilelo_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 2, i64 8)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilelo p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilelo_b_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilelo_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #6
				; CHECK-NEXT: mov w9, #-1
				; CHECK-NEXT: whilelo p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 4294967295, i32 6)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILELS			; WHILELS
	;			;

	define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilels_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilels_b_ww:			; CHECK-LABEL: whilels_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilels p0.b, w0, w1			; CHECK-NEXT: whilels p0.b, w0, w1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
				sdesmalenUnsubmitted Done Reply Inline Actions For whilelo (unsigned), this is the same as -1, and is therefore not much different than the test above. I would suggest keeping this test (because it's unsigned) and removing the test above sdesmalen: For whilelo (unsigned), this is the same as -1, and is therefore not much different than the…
	%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b)			%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 %a, i32 %b)
	ret <vscale x 16 x i1> %out			ret <vscale x 16 x i1> %out
	}			}

	define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {			define <vscale x 16 x i1> @whilels_b_xx(i64 %a, i64 %b) {
	; CHECK-LABEL: whilels_b_xx:			; CHECK-LABEL: whilels_b_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilels p0.b, x0, x1			; CHECK-NEXT: whilels p0.b, x0, x1
	▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilels_d_xx:			; CHECK-LABEL: whilels_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilels p0.d, x0, x1			; CHECK-NEXT: whilels p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilels p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii() {
				; CHECK-LABEL: whilels_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl7
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 2, i64 8)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilels p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilels_b_ii_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #6
				; CHECK-NEXT: mov w9, #-1
				; CHECK-NEXT: whilels p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 4294967295, i32 6)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_increment_overflow() {
				; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_increment_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #-1
				; CHECK-NEXT: whilels p0.b, wzr, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 0, i32 4294967295)
				ret <vscale x 16 x i1> %out
				sdesmalenUnsubmitted Done Reply Inline Actions For whilels (unsigned), this is the same as `-1`, and is therefore not much different than the test above. I would suggest keeping this test (because it's unsigned) and removing the test above. sdesmalen: For whilels (unsigned), this is the same as `-1`, and is therefore not much different than the…
				}

	;			;
	; WHILELT			; WHILELT
	;			;

	define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilelt_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilelt_b_ww:			; CHECK-LABEL: whilelt_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelt p0.b, w0, w1			; CHECK-NEXT: whilelt p0.b, w0, w1
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilelt_d_xx:			; CHECK-LABEL: whilelt_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilelt p0.d, x0, x1			; CHECK-NEXT: whilelt p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilelt p0.d, xzr, x8
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii() {
				; CHECK-LABEL: whilelt_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl5
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 -2, i64 3)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilelt p0.b, xzr, x8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilelt_b_ii_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilelt_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #2
				; CHECK-NEXT: mov w9, #2147483647
				; CHECK-NEXT: movk w8, #32768, lsl #16
				; CHECK-NEXT: whilelt p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 2147483647, i32 -2147483646)
				ret <vscale x 16 x i1> %out
				}

	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32)
	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64, i64)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilele.nxv8i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32, i32)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i32(i32, i32)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32, i32)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i32(i32, i32)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64, i64)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64, i64)
	Show All 27 Lines

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

	Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilege_d_xx:			; CHECK-LABEL: whilege_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilege p0.d, x0, x1			; CHECK-NEXT: whilege p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilege p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii() {
				; CHECK-LABEL: whilege_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilege p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilege_b_ii_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #2
				; CHECK-NEXT: mov w9, #2147483647
				; CHECK-NEXT: movk w8, #32768, lsl #16
				; CHECK-NEXT: whilege p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 2147483647, i32 -2147483646)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_increment_overflow() {
				; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_increment_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #2147483647
				; CHECK-NEXT: mov w9, #-2147483641
				; CHECK-NEXT: whilege p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				sdesmalenUnsubmitted Done Reply Inline Actions Similar comment as for whilegt, it's better to pick some numbers that actually end up as a valid vl if you remove the overflow check, e.g. @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647) sdesmalen: Similar comment as for whilegt, it's better to pick some numbers that actually end up as a…
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILEHS			; WHILEHS
	;			;

	define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilehs_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilehs_b_ww:			; CHECK-LABEL: whilehs_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehs p0.b, w0, w1			; CHECK-NEXT: whilehs p0.b, w0, w1
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilehs_d_xx:			; CHECK-LABEL: whilehs_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehs p0.d, x0, x1			; CHECK-NEXT: whilehs p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilehs p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii() {
				; CHECK-LABEL: whilehs_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl7
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilehs p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilehs_b_ii_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #-1
				; CHECK-NEXT: mov w9, #6
				; CHECK-NEXT: whilehs p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295)
				sdesmalenUnsubmitted Done Reply Inline Actions Similar comment as for whilehi, this doesn't overflow. You could use @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295) ; this would wrap around to vl8 sdesmalen: Similar comment as for whilehi, this doesn't overflow. You could use @llvm.aarch64.sve.
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_increment_overflow() {
				; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_increment_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #-1
				; CHECK-NEXT: whilehs p0.b, w8, wzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 4294967295, i32 0)
				ret <vscale x 16 x i1> %out
				}

	;			;
	; WHILEGT			; WHILEGT
	;			;

	define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilegt_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilegt_b_ww:			; CHECK-LABEL: whilegt_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilegt p0.b, w0, w1			; CHECK-NEXT: whilegt p0.b, w0, w1
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilegt_d_xx:			; CHECK-LABEL: whilegt_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilegt p0.d, x0, x1			; CHECK-NEXT: whilegt p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilegt p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilegt_b_ii() {
				; CHECK-LABEL: whilegt_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl5
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilegt_b_ii_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilegt_b_ii_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilegt p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilegt_b_ii_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilegt_b_ii_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 256, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilegt_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilegt_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #2147483647
				; CHECK-NEXT: mov w9, #-2147483641
				; CHECK-NEXT: whilegt p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647)
				ret <vscale x 16 x i1> %out
				sdesmalenUnsubmitted Done Reply Inline Actions It's better to pick some numbers that actually end up as a valid `vl` if you remove the overflow check, e.g. @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647) Can you use similar numbers whilege? sdesmalen: It's better to pick some numbers that actually end up as a valid `vl` if you remove the…
				}

	;			;
	; WHILEHI			; WHILEHI
	;			;

	define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {			define <vscale x 16 x i1> @whilehi_b_ww(i32 %a, i32 %b) {
	; CHECK-LABEL: whilehi_b_ww:			; CHECK-LABEL: whilehi_b_ww:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehi p0.b, w0, w1			; CHECK-NEXT: whilehi p0.b, w0, w1
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
	; CHECK-LABEL: whilehi_d_xx:			; CHECK-LABEL: whilehi_d_xx:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: whilehi p0.d, x0, x1			; CHECK-NEXT: whilehi p0.d, x0, x1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %a, i64 %b)			%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 %a, i64 %b)
	ret <vscale x 2 x i1> %out			ret <vscale x 2 x i1> %out
	}			}

				define <vscale x 2 x i1> @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
				; CHECK-LABEL: whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec:
				; CHECK: // %bb.0:
				; CHECK-NEXT: mov w8, #3
				; CHECK-NEXT: whilehi p0.d, x8, xzr
				; CHECK-NEXT: ret
				%out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0)
				ret <vscale x 2 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii() {
				; CHECK-LABEL: whilehi_b_ii:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: ptrue p0.b, vl6
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
				; CHECK-LABEL: whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #9
				; CHECK-NEXT: whilehi p0.b, x8, xzr
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilehi_b_ii_vl_maximum() vscale_range(16, 16) {
				; CHECK-LABEL: whilehi_b_ii_vl_maximum:
				; CHECK: // %bb.0:
				; CHECK-NEXT: ptrue p0.b, vl256
				; CHECK-NEXT: ret
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0)
				ret <vscale x 16 x i1> %out
				}

				define <vscale x 16 x i1> @whilelhi_b_ii_dont_fold_to_ptrue_overflow() {
				; CHECK-LABEL: whilelhi_b_ii_dont_fold_to_ptrue_overflow:
				; CHECK: // %bb.0: // %entry
				; CHECK-NEXT: mov w8, #-1
				; CHECK-NEXT: mov w9, #7
				; CHECK-NEXT: whilehi p0.b, w9, w8
				; CHECK-NEXT: ret
				entry:
				%out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295)
				sdesmalenUnsubmitted Done Reply Inline Actions This doesn't overflow right? 4294967295 - 6 => 4294967289 (no overflow here). For whilehi, it would overflow if the start value would be lower than the end value, e.g. @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295) ; this would wrap around to vl8 sdesmalen: This doesn't overflow right? 4294967295 - 6 => 4294967289 (no overflow here). For whilehi, it…
				ret <vscale x 16 x i1> %out
				}

	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32)
	declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64)			declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64, i64)			declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32, i32)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i32(i32, i32)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32, i32)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i32(i32, i32)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64, i64)			declare <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64, i64)
	Show All 27 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Allow to lower WHILEop operations with constant operands to PTRUE
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 483793

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Allow to lower WHILEop operations with constant operands to PTRUEClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 483793

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll

llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll

[AArch64][SVE] Allow to lower WHILEop operations with constant operands to PTRUE
ClosedPublic