Diff 501488

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 539 Lines • ▼ Show 20 Lines	public:
/// vector loop, which can avoid the need to emit a scalar epilogue loop.		/// vector loop, which can avoid the need to emit a scalar epilogue loop.
bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,		DominatorTree *DT,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) const;		InterleavedAccessInfo *IAI) const;

/// Query the target what the preferred style of tail folding is.		/// Query the target what the preferred style of tail folding is.
TailFoldingStyle getPreferredTailFoldingStyle() const;		/// \param IVUpdateMayOverflow Tells whether it is known if the IV update
		/// may (or will never) overflow for the suggested VF/UF in the given loop.
		/// Targets can use this information to select a more optimal tail folding
		/// style. The value conservatively defaults to true, such that no assumptions
		/// are made on overflow.
		TailFoldingStyle
		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;

// Parameters that control the loop peeling transformation		// Parameters that control the loop peeling transformation
struct PeelingPreferences {		struct PeelingPreferences {
/// A forced peeling factor (the number of bodied of the original loop		/// A forced peeling factor (the number of bodied of the original loop
/// that should be peeled off before the loop body). When set to 0, the		/// that should be peeled off before the loop body). When set to 0, the
/// a peeling factor based on profile information and other factors.		/// a peeling factor based on profile information and other factors.
unsigned PeelCount;		unsigned PeelCount;
/// Allow peeling off loop iterations.		/// Allow peeling off loop iterations.
▲ Show 20 Lines • Show All 1,088 Lines • ▼ Show 20 Lines	virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,		AssumptionCache &AC,
TargetLibraryInfo *LibInfo,		TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;		HardwareLoopInfo &HWLoopInfo) = 0;
virtual bool		virtual bool
preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,		preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree DT, LoopVectorizationLegality LVL,		DominatorTree DT, LoopVectorizationLegality LVL,
InterleavedAccessInfo *IAI) = 0;		InterleavedAccessInfo *IAI) = 0;
virtual TailFoldingStyle getPreferredTailFoldingStyle() = 0;		virtual TailFoldingStyle
		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
virtual std::optional<Instruction *> instCombineIntrinsic(		virtual std::optional<Instruction *> instCombineIntrinsic(
InstCombiner &IC, IntrinsicInst &II) = 0;		InstCombiner &IC, IntrinsicInst &II) = 0;
virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(		virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,		InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
KnownBits & Known, bool &KnownBitsComputed) = 0;		KnownBits & Known, bool &KnownBitsComputed) = 0;
virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(		virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,		InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,		APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
▲ Show 20 Lines • Show All 384 Lines • ▼ Show 20 Lines	public:
}		}
bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,		DominatorTree *DT,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) override {		InterleavedAccessInfo *IAI) override {
return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);		return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
}		}
TailFoldingStyle getPreferredTailFoldingStyle() override {		TailFoldingStyle
return Impl.getPreferredTailFoldingStyle();		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
		return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}		}
std::optional<Instruction *>		std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {		instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
return Impl.instCombineIntrinsic(IC, II);		return Impl.instCombineIntrinsic(IC, II);
}		}
std::optional<Value *>		std::optional<Value *>
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,		simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
APInt DemandedMask, KnownBits &Known,		APInt DemandedMask, KnownBits &Known,
▲ Show 20 Lines • Show All 667 Lines • Show Last 20 Lines

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Show First 20 Lines • Show All 165 Lines • ▼ Show 20 Lines	public:
bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,		DominatorTree *DT,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) const {		InterleavedAccessInfo *IAI) const {
return false;		return false;
}		}

TailFoldingStyle getPreferredTailFoldingStyle() const {		TailFoldingStyle
		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
return TailFoldingStyle::DataWithoutLaneMask;		return TailFoldingStyle::DataWithoutLaneMask;
}		}

std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,		std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {		IntrinsicInst &II) const {
return std::nullopt;		return std::nullopt;
}		}

▲ Show 20 Lines • Show All 1,120 Lines • Show Last 20 Lines

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Show First 20 Lines • Show All 624 Lines • ▼ Show 20 Lines	public:
bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,		DominatorTree *DT,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI) {		InterleavedAccessInfo *IAI) {
return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);		return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
}		}

TailFoldingStyle getPreferredTailFoldingStyle() {		TailFoldingStyle
return BaseT::getPreferredTailFoldingStyle();		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) {
		return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}		}

std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,		std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) {		IntrinsicInst &II) {
return BaseT::instCombineIntrinsic(IC, II);		return BaseT::instCombineIntrinsic(IC, II);
}		}

std::optional<Value *>		std::optional<Value *>
▲ Show 20 Lines • Show All 1,825 Lines • Show Last 20 Lines

llvm/lib/Analysis/TargetTransformInfo.cpp

	Show First 20 Lines • Show All 306 Lines • ▼ Show 20 Lines

	bool TargetTransformInfo::preferPredicateOverEpilogue(			bool TargetTransformInfo::preferPredicateOverEpilogue(
	Loop L, LoopInfo LI, ScalarEvolution &SE, AssumptionCache &AC,			Loop L, LoopInfo LI, ScalarEvolution &SE, AssumptionCache &AC,
	TargetLibraryInfo TLI, DominatorTree DT, LoopVectorizationLegality *LVL,			TargetLibraryInfo TLI, DominatorTree DT, LoopVectorizationLegality *LVL,
	InterleavedAccessInfo *IAI) const {			InterleavedAccessInfo *IAI) const {
	return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);			return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
	}			}

	TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle() const {			TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle(
	return TTIImpl->getPreferredTailFoldingStyle();			bool IVUpdateMayOverflow) const {
				return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);
	}			}

	std::optional<Instruction *>			std::optional<Instruction *>
	TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,			TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,
	IntrinsicInst &II) const {			IntrinsicInst &II) const {
	return TTIImpl->instCombineIntrinsic(IC, II);			return TTIImpl->instCombineIntrinsic(IC, II);
	}			}

	▲ Show 20 Lines • Show All 926 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Show First 20 Lines • Show All 341 Lines • ▼ Show 20 Lines	public:
unsigned getGISelRematGlobalCost() const {		unsigned getGISelRematGlobalCost() const {
return 2;		return 2;
}		}

unsigned getMinTripCountTailFoldingThreshold() const {		unsigned getMinTripCountTailFoldingThreshold() const {
return ST->hasSVE() ? 5 : 0;		return ST->hasSVE() ? 5 : 0;
}		}

TailFoldingStyle getPreferredTailFoldingStyle() const {		TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
if (ST->hasSVE())		if (ST->hasSVE())
return TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;		return IVUpdateMayOverflow
		? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
		: TailFoldingStyle::DataAndControlFlow;

return TailFoldingStyle::DataWithoutLaneMask;		return TailFoldingStyle::DataWithoutLaneMask;
}		}

bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,		bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,		DominatorTree *DT,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI);		InterleavedAccessInfo *IAI);
Show All 39 Lines

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Show First 20 Lines • Show All 306 Lines • ▼ Show 20 Lines	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
AssumptionCache &AC, TargetLibraryInfo *TLI,		AssumptionCache &AC, TargetLibraryInfo *TLI,
DominatorTree *DT,		DominatorTree *DT,
LoopVectorizationLegality *LVL,		LoopVectorizationLegality *LVL,
InterleavedAccessInfo *IAI);		InterleavedAccessInfo *IAI);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,		void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,		TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);		OptimizationRemarkEmitter *ORE);

TailFoldingStyle getPreferredTailFoldingStyle() const;		TailFoldingStyle
		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;

void getPeelingPreferences(Loop *L, ScalarEvolution &SE,		void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);		TTI::PeelingPreferences &PP);
bool shouldBuildLookupTablesForConstant(Constant *C) const {		bool shouldBuildLookupTablesForConstant(Constant *C) const {
// In the ROPI and RWPI relocation models we can't have pointers to global		// In the ROPI and RWPI relocation models we can't have pointers to global
// variables or functions in constant data, so don't convert switches to		// variables or functions in constant data, so don't convert switches to
// lookup tables if any of the values would need relocation.		// lookup tables if any of the values would need relocation.
if (ST->isROPI() \|\| ST->isRWPI())		if (ST->isROPI() \|\| ST->isRWPI())
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Show First 20 Lines • Show All 2,280 Lines • ▼ Show 20 Lines	if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "		LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"a candidate.\n");		"a candidate.\n");
return false;		return false;
}		}

return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());		return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());
}		}

TailFoldingStyle ARMTTIImpl::getPreferredTailFoldingStyle() const {		TailFoldingStyle
		ARMTTIImpl::getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
if (!ST->hasMVEIntegerOps() \|\| !EnableTailPredication)		if (!ST->hasMVEIntegerOps() \|\| !EnableTailPredication)
return TailFoldingStyle::DataWithoutLaneMask;		return TailFoldingStyle::DataWithoutLaneMask;

// Intrinsic @llvm.get.active.lane.mask is supported.		// Intrinsic @llvm.get.active.lane.mask is supported.
// It is used in the MVETailPredication pass, which requires the number of		// It is used in the MVETailPredication pass, which requires the number of
// elements processed by this vector loop to setup the tail-predicated		// elements processed by this vector loop to setup the tail-predicated
// loop.		// loop.
return TailFoldingStyle::Data;		return TailFoldingStyle::Data;
▲ Show 20 Lines • Show All 159 Lines • Show Last 20 Lines

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
const APInt &Imm, Type *Ty,		const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind);		TTI::TargetCostKind CostKind);

TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);		TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);

bool shouldExpandReduction(const IntrinsicInst *II) const;		bool shouldExpandReduction(const IntrinsicInst *II) const;
bool supportsScalableVectors() const { return ST->hasVInstructions(); }		bool supportsScalableVectors() const { return ST->hasVInstructions(); }
bool enableScalableVectorization() const { return ST->hasVInstructions(); }		bool enableScalableVectorization() const { return ST->hasVInstructions(); }
TailFoldingStyle getPreferredTailFoldingStyle() const {		TailFoldingStyle
		getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
return ST->hasVInstructions() ? TailFoldingStyle::Data		return ST->hasVInstructions() ? TailFoldingStyle::Data
: TailFoldingStyle::DataWithoutLaneMask;		: TailFoldingStyle::DataWithoutLaneMask;
}		}
std::optional<unsigned> getMaxVScale() const;		std::optional<unsigned> getMaxVScale() const;
std::optional<unsigned> getVScaleForTuning() const;		std::optional<unsigned> getVScaleForTuning() const;

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;		TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;

▲ Show 20 Lines • Show All 250 Lines • Show Last 20 Lines

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,562 Lines • ▼ Show 20 Lines	public:
}		}

/// Returns true if a scalar epilogue is not allowed due to optsize or a		/// Returns true if a scalar epilogue is not allowed due to optsize or a
/// loop hint annotation.		/// loop hint annotation.
bool isScalarEpilogueAllowed() const {		bool isScalarEpilogueAllowed() const {
return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;		return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;
}		}

/// Returns the TailFoldingStyle that is best for the current loop.		/// Returns the TailFoldingStyle that is best for the current loop.
		david-armUnsubmitted Done Reply Inline Actions Can you add some `///` comments here please? david-arm: Can you add some `///` comments here please?
TailFoldingStyle getTailFoldingStyle() const {		TailFoldingStyle
		getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
if (!CanFoldTailByMasking)		if (!CanFoldTailByMasking)
return TailFoldingStyle::None;		return TailFoldingStyle::None;

if (ForceTailFoldingStyle.getNumOccurrences())		if (ForceTailFoldingStyle.getNumOccurrences())
return ForceTailFoldingStyle;		return ForceTailFoldingStyle;

return TTI.getPreferredTailFoldingStyle();		return TTI.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}		}
		paulwalker-armUnsubmitted Done Reply Inline Actions It seems weird to ask the target for their preferred style and then immediate override it. Is there a reason not to pass `IVUpdateCannotOverflow` to `getPreferredTailFoldingStyle`? paulwalker-arm: It seems weird to ask the target for their preferred style and then immediate override it. Is…

/// Returns true if all loop blocks should be masked to fold tail loop.		/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const {		bool foldTailByMasking() const {
return getTailFoldingStyle() != TailFoldingStyle::None;		return getTailFoldingStyle() != TailFoldingStyle::None;
}		}

/// Returns true if the instructions in this block requires predication		/// Returns true if the instructions in this block requires predication
/// for any reason, e.g. because tail folding now requires a predicate		/// for any reason, e.g. because tail folding now requires a predicate
▲ Show 20 Lines • Show All 1,002 Lines • ▼ Show 20 Lines	if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
return MaxVScale;		return MaxVScale;

if (F.hasFnAttribute(Attribute::VScaleRange))		if (F.hasFnAttribute(Attribute::VScaleRange))
return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();		return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();

return std::nullopt;		return std::nullopt;
}		}

		/// For the given VF and UF and maximum trip count computed for the loop, return
		/// whether the induction variable might overflow in the vectorized loop. If not,
		/// then we know a runtime overflow check always evaluates to false and can be
		/// removed.
		static bool isIndvarOverflowCheckKnownFalse(
		const LoopVectorizationCostModel *Cost,
		ElementCount VF, std::optional<unsigned> UF = std::nullopt) {
		// Always be conservative if we don't know the exact unroll factor.
		unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF);

		Type *IdxTy = Cost->Legal->getWidestInductionType();
		APInt MaxUIntTripCount = cast<IntegerType>(IdxTy)->getMask();

		// We know the runtime overflow check is known false iff the (max) trip-count
		// is known and (max) trip-count + (VF * UF) does not overflow in the type of
		// the vector loop induction variable.
		if (unsigned TC =
		Cost->PSE.getSE()->getSmallConstantMaxTripCount(Cost->TheLoop)) {
		uint64_t MaxVF = VF.getKnownMinValue();
		if (VF.isScalable()) {
		std::optional<unsigned> MaxVScale =
		getMaxVScale(*Cost->TheFunction, Cost->TTI);
		if (!MaxVScale)
		return false;
		MaxVF = MaxVScale;
		}

		return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF);
		}

		return false;
		}

void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def,		void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def,
const VPIteration &Instance,		const VPIteration &Instance,
VPTransformState &State) {		VPTransformState &State) {
Value *ScalarInst = State.get(Def, Instance);		Value *ScalarInst = State.get(Def, Instance);
Value *VectorValue = State.get(Def, Instance.Part);		Value *VectorValue = State.get(Def, Instance.Part);
VectorValue = Builder.CreateInsertElement(		VectorValue = Builder.CreateInsertElement(
VectorValue, ScalarInst,		VectorValue, ScalarInst,
Instance.Lane.getAsRuntimeExpr(State.Builder, VF));		Instance.Lane.getAsRuntimeExpr(State.Builder, VF));
▲ Show 20 Lines • Show All 435 Lines • ▼ Show 20 Lines	auto CreateStep = [&]() -> Value * {
Value *MinProfTC =		Value *MinProfTC =
createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);		createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);
if (!VF.isScalable())		if (!VF.isScalable())
return MinProfTC;		return MinProfTC;
return Builder.CreateBinaryIntrinsic(		return Builder.CreateBinaryIntrinsic(
Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF));		Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF));
};		};

TailFoldingStyle Style = Cost->getTailFoldingStyle();		TailFoldingStyle Style = Cost->getTailFoldingStyle();
		paulwalker-armUnsubmitted Done Reply Inline Actions Up to you but personally I think for all instances the reverse polarity `IVUpdateCanOverflow` reads better. paulwalker-arm: Up to you but personally I think for all instances the reverse polarity `IVUpdateCanOverflow`…
if (Style == TailFoldingStyle::None)		if (Style == TailFoldingStyle::None)
		david-armUnsubmitted Done Reply Inline Actions This is just a thought, but I think you can probably simplify this to just: TailFoldingStyle Style = Cost->getTailFoldingStyle(); if (Style == TailFoldingStyle::None) CheckMinIters = Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check"); else if (VF.isScalable() && Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck && !Cost->isIndvarOverflowCheckKnownFalse(VF, UF)) { ... That way you only need to call `isIndvarOverflowCheckKnownFalse` just before you're about to actually create the checks. What do you think? david-arm: This is just a thought, but I think you can probably simplify this to just: TailFoldingStyle…
CheckMinIters =		CheckMinIters =
Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");		Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
else if (VF.isScalable() &&		else if (VF.isScalable() &&
		!isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {		Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
// vscale is not necessarily a power-of-2, which means we cannot guarantee		// vscale is not necessarily a power-of-2, which means we cannot guarantee
// an overflow to zero when updating induction variables and so an		// an overflow to zero when updating induction variables and so an
// additional overflow check is required before entering the vector loop.		// additional overflow check is required before entering the vector loop.

// Get the maximum unsigned value for the type.		// Get the maximum unsigned value for the type.
Value *MaxUIntTripCount =		Value *MaxUIntTripCount =
ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());		ConstantInt::get(CountTy, cast<IntegerType>(CountTy)->getMask());
▲ Show 20 Lines • Show All 422 Lines • ▼ Show 20 Lines	if (Instruction *V = CSEMap.lookup(&In)) {
In.eraseFromParent();		In.eraseFromParent();
continue;		continue;
}		}

CSEMap[&In] = &In;		CSEMap[&In] = &In;
}		}
}		}

InstructionCost LoopVectorizationCostModel::getVectorCallCost(		InstructionCost LoopVectorizationCostModel::getVectorCallCost(
		paulwalker-armUnsubmitted Done Reply Inline Actions Forgive my ignorance here but this doesn't look like a cost model question? paulwalker-arm: Forgive my ignorance here but this doesn't look like a cost model question?
CallInst CI, ElementCount VF, Function Variant, bool NeedsMask) const {		CallInst CI, ElementCount VF, Function Variant, bool NeedsMask) const {
Function *F = CI->getCalledFunction();		Function *F = CI->getCalledFunction();
Type *ScalarRetTy = CI->getType();		Type *ScalarRetTy = CI->getType();
SmallVector<Type *, 4> Tys, ScalarTys;		SmallVector<Type *, 4> Tys, ScalarTys;
for (auto &ArgOp : CI->args())		for (auto &ArgOp : CI->args())
ScalarTys.push_back(ArgOp->getType());		ScalarTys.push_back(ArgOp->getType());

// Estimate cost of scalarized vector call. The source operands are assumed		// Estimate cost of scalarized vector call. The source operands are assumed
// to be vectors, so we need to extract individual elements from there,		// to be vectors, so we need to extract individual elements from there,
// execute VF scalar calls, and then gather the result into the vector return		// execute VF scalar calls, and then gather the result into the vector return
// value.		// value.
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;		TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
		paulwalker-armUnsubmitted Done Reply Inline Actions `num-elts-per-iteration` or `(VF * UF)`? paulwalker-arm: `num-elts-per-iteration` or `(VF * UF)`?
InstructionCost ScalarCallCost =		InstructionCost ScalarCallCost =
TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, CostKind);		TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, CostKind);
		david-armUnsubmitted Done Reply Inline Actions I wonder if this is safe because `getSmallBestKnownTC` also returns a value from profiling for the expected trip count, which is more like a hint rather than a definite? david-arm: I wonder if this is safe because `getSmallBestKnownTC` also returns a value from profiling for…
		paulwalker-armUnsubmitted Done Reply Inline Actions This doesn't look entirely safe. I think you really want `SE.getSmallConstantTripCount`? paulwalker-arm: This doesn't look entirely safe. I think you really want `SE.getSmallConstantTripCount`?
if (VF.isScalar())		if (VF.isScalar())
		paulwalker-armUnsubmitted Done Reply Inline Actions `MaxVF`? because Elts is implied. paulwalker-arm: `MaxVF`? because Elts is implied.
return ScalarCallCost;		return ScalarCallCost;

		paulwalker-armUnsubmitted Done Reply Inline Actions We so need to get rid of `TTI::getMaxVScale()`. I think function attributes should take precedence but then this is the order used within `getMaxLegalScalableVF` so perhaps best fixed separately. Up to you. paulwalker-arm: We so need to get rid of `TTI::getMaxVScale()`. I think function attributes should take…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions I'd rather fix that separately so that the order is consistent between getMaxLegalScalableVF and here. sdesmalen: I'd rather fix that separately so that the order is consistent between getMaxLegalScalableVF…
// Compute corresponding vector type for return value and arguments.		// Compute corresponding vector type for return value and arguments.
Type *RetTy = ToVectorTy(ScalarRetTy, VF);		Type *RetTy = ToVectorTy(ScalarRetTy, VF);
for (Type *ScalarTy : ScalarTys)		for (Type *ScalarTy : ScalarTys)
		paulwalker-armUnsubmitted Done Reply Inline Actions I suppose this can overflow. Perhaps make `MaxVFElts` an `uint64_t` given that's what `ugt` will promote to anyway. paulwalker-arm: I suppose this can overflow. Perhaps make `MaxVFElts` an `uint64_t` given that's what `ugt`…
Tys.push_back(ToVectorTy(ScalarTy, VF));		Tys.push_back(ToVectorTy(ScalarTy, VF));

// Compute costs of unpacking argument values for the scalar calls and		// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.		// packing the return values to a vector.
InstructionCost ScalarizationCost =		InstructionCost ScalarizationCost =
getScalarizationOverhead(CI, VF, CostKind);		getScalarizationOverhead(CI, VF, CostKind);

InstructionCost Cost =		InstructionCost Cost =
▲ Show 20 Lines • Show All 5,457 Lines • ▼ Show 20 Lines	VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");		VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");		VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);		VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");		auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
VPBlockUtils::insertBlockAfter(TopRegion, Preheader);		VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");		VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);		VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);

		// Don't use getDecisionAndClampRange here, because we don't know the UF
		// so this function is better to be conservative, rather than to split
		// it up into different VPlans.
		bool IVUpdateMayOverflow = false;
		for (ElementCount VF = Range.Start;
		ElementCount::isKnownLT(VF, Range.End); VF *= 2)
		IVUpdateMayOverflow \|= !isIndvarOverflowCheckKnownFalse(&CM, VF);
		paulwalker-armUnsubmitted Not Done Reply Inline Actions Is `addCanonicalIVRecipes` being passed the "wrong" tail folding style a functional issue? or just a corner case that might affect performance. paulwalker-arm: Is `addCanonicalIVRecipes` being passed the "wrong" tail folding style a functional issue? or…
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions This is not a functional issue. In the worst case it generates both the runtime check and computes the slightly more expensive runtime trip-count in the preheader. sdesmalen: This is not a functional issue. In the worst case it generates both the runtime check and…

Instruction *DLInst =		Instruction *DLInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());		getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),		addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
DLInst ? DLInst->getDebugLoc() : DebugLoc(),		DLInst ? DLInst->getDebugLoc() : DebugLoc(),
CM.getTailFoldingStyle());		CM.getTailFoldingStyle(IVUpdateMayOverflow));

// Scan the body of the loop in a topological order to visit each basic block		// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.		// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);		LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);		DFS.perform(LI);

VPBasicBlock *VPBB = HeaderVPBB;		VPBasicBlock *VPBB = HeaderVPBB;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {		for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
▲ Show 20 Lines • Show All 1,718 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

	Show First 20 Lines • Show All 57 Lines • ▼ Show 20 Lines
	; CHECK: pred.load.continue6:			; CHECK: pred.load.continue6:
	; CHECK-NEXT: [[TMP24]] = phi <4 x i16> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]			; CHECK-NEXT: [[TMP24]] = phi <4 x i16> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
	; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP24]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>			; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP24]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
	; CHECK-NEXT: [[TMP26:%.*]] = sext <4 x i16> [[TMP25]] to <4 x i32>			; CHECK-NEXT: [[TMP26:%.*]] = sext <4 x i16> [[TMP25]] to <4 x i32>
	; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0			; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
	; CHECK-NEXT: [[TMP28:%.]] = getelementptr i32, ptr [[B:%.]], i64 [[TMP27]]			; CHECK-NEXT: [[TMP28:%.]] = getelementptr i32, ptr [[B:%.]], i64 [[TMP27]]
	; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i32 0			; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i32 0
	; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP29]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])			; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP29]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
	; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 998)
	; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4			; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX_NEXT]], i64 1002)
	; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>			; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
	; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>			; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
	; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0			; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0
	; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3			; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3
	; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP24]], i32 2			; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP24]], i32 2
	; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
	Show All 36 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll

	; RUN: opt -passes=loop-vectorize -S < %s \| FileCheck %s			; RUN: opt -passes=loop-vectorize -S < %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {			define void @trip7_i64(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
	; CHECK-LABEL: @trip7_i64(			; CHECK-LABEL: @trip7_i64(
	; CHECK: vector.ph:
	; CHECK: [[N_MINUS_VF:%.]] = sub i64 7, [[VSCALE_X_VF:%.]]
	; CHECK: [[CMP:%.*]] = icmp ugt i64 7, [[VSCALE_X_VF]]
	; CHECK: [[TRIP_COUNT:%.*]] = select i1 [[CMP]], i64 [[N_MINUS_VF]], i64 0
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.]], %vector.body ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.]], %vector.body ]
	; CHECK: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 2 x i1> [ {{%.}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ]			; CHECK: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 2 x i1> [ {{%.}}, %vector.ph ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %vector.body ]
	; CHECK: {{%.}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)			; CHECK: {{%.}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
	; CHECK: {{%.}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)			; CHECK: {{%.}} = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr {{%.}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
	; CHECK: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> {{%.}}, ptr {{%.}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])			; CHECK: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> {{%.}}, ptr {{%.}}, i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
	; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TRIP_COUNT]])
	; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()			; CHECK: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2			; CHECK-NEXT: [[VF:%.*]] = mul i64 [[VSCALE]], 2
	; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]			; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[VF]]
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 7)
	; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)			; CHECK-NEXT: [[ACTIVE_LANE_MASK_NOT:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
	; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NOT]], i32 0			; CHECK-NEXT: [[COND:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NOT]], i32 0
	; CHECK-NEXT: br i1 [[COND]], label %middle.block, label %vector.body			; CHECK-NEXT: br i1 [[COND]], label %middle.block, label %vector.body
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.body: ; preds = %entry, %for.body			for.body: ; preds = %entry, %for.body
	▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll

	Show All 10 Lines
	; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2			; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
	; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2			; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
	; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
	; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
	; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1024, [[TMP6]]
	; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 1024, [[TMP6]]
	; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
	; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1024)			; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1024)
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0			; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
	; CHECK-NEXT: [[TMP11:%.]] = getelementptr inbounds i64, i64 [[SRC:%.*]], i64 [[TMP10]]			; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, i64 [[SRC:%.*]], i64 [[TMP5]]
	; CHECK-NEXT: [[TMP12:%.]] = getelementptr inbounds i64, i64 [[TMP11]], i32 0			; CHECK-NEXT: [[TMP7:%.]] = getelementptr inbounds i64, i64 [[TMP6]], i32 0
	; CHECK-NEXT: [[TMP13:%.]] = bitcast i64 [[TMP12]] to <vscale x 2 x i64>*			; CHECK-NEXT: [[TMP8:%.]] = bitcast i64 [[TMP7]] to <vscale x 2 x i64>*
	; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)			; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[TMP8]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
	; CHECK-NEXT: [[TMP14:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)			; CHECK-NEXT: [[TMP9:%.*]] = shl nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
	; CHECK-NEXT: [[TMP15:%.]] = getelementptr inbounds i64, i64 [[DST:%.*]], i64 [[TMP10]]			; CHECK-NEXT: [[TMP10:%.]] = getelementptr inbounds i64, i64 [[DST:%.*]], i64 [[TMP5]]
	; CHECK-NEXT: [[TMP16:%.]] = getelementptr inbounds i64, i64 [[TMP15]], i32 0			; CHECK-NEXT: [[TMP11:%.]] = getelementptr inbounds i64, i64 [[TMP10]], i32 0
	; CHECK-NEXT: [[TMP17:%.]] = bitcast i64 [[TMP16]] to <vscale x 2 x i64>*			; CHECK-NEXT: [[TMP12:%.]] = bitcast i64 [[TMP11]] to <vscale x 2 x i64>*
	; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[TMP17]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)			; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[TMP12]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i64> poison)
	; CHECK-NEXT: [[TMP18:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD1]], [[TMP14]]			; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 2 x i64> [[WIDE_MASKED_LOAD1]], [[TMP9]]
	; CHECK-NEXT: [[TMP19:%.]] = bitcast i64 [[TMP16]] to <vscale x 2 x i64>*			; CHECK-NEXT: [[TMP14:%.]] = bitcast i64 [[TMP11]] to <vscale x 2 x i64>*
	; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[TMP18]], <vscale x 2 x i64>* [[TMP19]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])			; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[TMP13]], <vscale x 2 x i64>* [[TMP14]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
	; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP9]])			; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2
	; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 2			; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]]
	; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP21]]			; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1024)
	; CHECK-NEXT: [[TMP22:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)			; CHECK-NEXT: [[TMP17:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
	; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 2 x i1> [[TMP22]], i32 0			; CHECK-NEXT: [[TMP18:%.*]] = extractelement <vscale x 2 x i1> [[TMP17]], i32 0
	; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; CHECK-NEXT: br label [[FOR_BODY:%.*]]			; CHECK-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.body:			; CHECK: for.body:
	; CHECK-NEXT: [[I_06:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]			; CHECK-NEXT: [[I_06:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
	; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i64, i64 [[SRC]], i64 [[I_06]]			; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i64, i64 [[SRC]], i64 [[I_06]]
	; CHECK-NEXT: [[TMP24:%.]] = load i64, i64 [[ARRAYIDX]], align 8			; CHECK-NEXT: [[TMP19:%.]] = load i64, i64 [[ARRAYIDX]], align 8
	; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[TMP24]], 1			; CHECK-NEXT: [[MUL:%.*]] = shl nsw i64 [[TMP19]], 1
	; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i64, i64 [[DST]], i64 [[I_06]]			; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i64, i64 [[DST]], i64 [[I_06]]
	; CHECK-NEXT: [[TMP25:%.]] = load i64, i64 [[ARRAYIDX1]], align 8			; CHECK-NEXT: [[TMP20:%.]] = load i64, i64 [[ARRAYIDX1]], align 8
	; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[MUL]]			; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], [[MUL]]
	; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8			; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8
	; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_06]], 1			; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_06]], 1
	; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1024			; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1024
	; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: for.end:			; CHECK: for.end:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	Show All 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt -passes='loop-vectorize,instcombine' -sve-tail-folding=all -S < %s \| FileCheck %s

				target triple = "aarch64"

				; Test that this uses the 'DataAndControlFlow' style of tail folding
				; where it performs the loop-indvar increment before the active.lane.mask
				; and uses %N directly for the tripcount.
				define void @cannot_overflow_i32_induction_var(ptr noalias %dst, ptr readonly %src, i32 %N) #0 {
				; CHECK-LABEL: @cannot_overflow_i32_induction_var(
				; CHECK-NEXT: entry:
				; CHECK-NEXT: [[CMP6_NOT:%.]] = icmp eq i32 [[N:%.]], 0
				; CHECK-NEXT: br i1 [[CMP6_NOT]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]
				; CHECK: for.body.preheader:
				; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
				; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
				; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
				; CHECK: vector.body:
				; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[TMP0:%.]] = getelementptr inbounds i32, ptr [[SRC:%.]], i64 [[INDEX]]
				; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP0]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
				; CHECK-NEXT: [[TMP1:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 42, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
				; CHECK-NEXT: [[TMP2:%.]] = getelementptr inbounds i32, ptr [[DST:%.]], i64 [[INDEX]]
				; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP1]], ptr [[TMP2]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
				; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
				; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
				; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
				; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
				; CHECK: middle.block:
				; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
				; CHECK: scalar.ph:
				; CHECK-NEXT: br label [[FOR_BODY:%.*]]
				; CHECK: for.body:
				; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
				; CHECK: for.cond.cleanup.loopexit:
				; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
				; CHECK: for.cond.cleanup:
				; CHECK-NEXT: ret void
				;
				entry:
				%cmp6.not = icmp eq i32 %N, 0
				david-armUnsubmitted Done Reply Inline Actions nit: Maybe the entry blocks in both tests can be removed to simplify the IR and CHECK lines? I think the only thing that matters here is the `zext` in the `for.body.preheader`, right? david-arm: nit: Maybe the entry blocks in both tests can be removed to simplify the IR and CHECK lines? I…
				sdesmalenAuthorUnsubmitted Done Reply Inline Actions When I change that, ScalarEvolution doesn't recognise the maximum number of iterations that way, and so it vectorizes with the different tail folding style. sdesmalen: When I change that, ScalarEvolution doesn't recognise the maximum number of iterations that way…
				br i1 %cmp6.not, label %for.cond.cleanup, label %for.body.preheader

				for.body.preheader:
				%wide.trip.count = zext i32 %N to i64
				br label %for.body

				for.body:
				%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
				%arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
				%0 = load i32, ptr %arrayidx, align 4
				%add = add nsw i32 %0, 42
				%arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
				store i32 %add, ptr %arrayidx2, align 4
				%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
				%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

				for.cond.cleanup:
				ret void
				}

				; Test that this uses the 'DataAndControlFlowWithoutRuntimeCheck' style of
				; tail folding where it uses an updated trip count and do the loop-indvar
				; increment after the active.lane.mask.
				define void @can_overflow_i64_induction_var(ptr noalias %dst, ptr readonly %src, i64 %N) #0 {
				; CHECK-LABEL: @can_overflow_i64_induction_var(
				; CHECK-NEXT: entry:
				; CHECK-NEXT: [[CMP6_NOT:%.]] = icmp eq i64 [[N:%.]], 0
				; CHECK-NEXT: br i1 [[CMP6_NOT]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]
				; CHECK: for.body.preheader:
				; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
				; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
				; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP1]])
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]])
				; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
				; CHECK: vector.body:
				; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.]], [[VECTOR_BODY]] ]
				; CHECK-NEXT: [[TMP3:%.]] = getelementptr inbounds i32, ptr [[SRC:%.]], i64 [[INDEX]]
				; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP3]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
				; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 42, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
				; CHECK-NEXT: [[TMP5:%.]] = getelementptr inbounds i32, ptr [[DST:%.]], i64 [[INDEX]]
				; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP4]], ptr [[TMP5]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
				; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP2]])
				; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2
				; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
				; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
				; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
				; CHECK: middle.block:
				; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
				; CHECK: scalar.ph:
				; CHECK-NEXT: br label [[FOR_BODY:%.*]]
				; CHECK: for.body:
				; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
				; CHECK: for.cond.cleanup.loopexit:
				; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
				; CHECK: for.cond.cleanup:
				; CHECK-NEXT: ret void
				;
				entry:
				%cmp6.not = icmp eq i64 %N, 0
				br i1 %cmp6.not, label %for.cond.cleanup, label %for.body.preheader

				for.body.preheader:
				br label %for.body

				for.body:
				%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
				%arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
				%0 = load i32, ptr %arrayidx, align 4
				%add = add nsw i32 %0, 42
				%arrayidx2 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
				store i32 %add, ptr %arrayidx2, align 4
				%indvars.iv.next = add nuw i64 %indvars.iv, 1
				%exitcond.not = icmp eq i64 %indvars.iv.next, %N
				br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

				for.cond.cleanup:
				ret void
				}

				attributes #0 = { vscale_range(1,16) "target-features"="+sve" }

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -passes=loop-vectorize -riscv-v-vector-bits-min=128 -scalable-vectorization=on -force-target-instruction-cost=1 -S < %s \| FileCheck %s			; RUN: opt -passes=loop-vectorize -riscv-v-vector-bits-min=128 -scalable-vectorization=on -force-target-instruction-cost=1 -S < %s \| FileCheck %s

	target triple = "riscv64"			target triple = "riscv64"

	define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {			define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
	; CHECK-LABEL: @trip5_i8(			; CHECK-LABEL: @trip5_i8(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
				; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
	; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8			; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
	; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 -6, [[TMP1]]			; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
	; CHECK: vector.ph:			; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1
	; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]]
	; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
	; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
	; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
	; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP7]]
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0			; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
	; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP8]], i64 5)			; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP5]], i64 5)
	; CHECK-NEXT: [[TMP9:%.]] = getelementptr inbounds i8, ptr [[SRC:%.]], i64 [[TMP8]]			; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i8, ptr [[SRC:%.]], i64 [[TMP5]]
				; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0
				; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP7]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)
				; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 8 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)
				; CHECK-NEXT: [[TMP9:%.]] = getelementptr inbounds i8, ptr [[DST:%.]], i64 [[TMP5]]
	; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0			; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
	; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP10]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)			; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP10]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)
	; CHECK-NEXT: [[TMP11:%.*]] = shl <vscale x 8 x i8> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i64 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer)			; CHECK-NEXT: [[TMP11:%.*]] = add <vscale x 8 x i8> [[TMP8]], [[WIDE_MASKED_LOAD1]]
	; CHECK-NEXT: [[TMP12:%.]] = getelementptr inbounds i8, ptr [[DST:%.]], i64 [[TMP8]]			; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP11]], ptr [[TMP10]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
	; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0			; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr [[TMP13]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]], <vscale x 8 x i8> poison)			; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8
	; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 8 x i8> [[TMP11]], [[WIDE_MASKED_LOAD1]]			; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]
	; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP13]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
	; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 8
	; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]]
	; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]			; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; CHECK-NEXT: br label [[FOR_BODY:%.*]]			; CHECK-NEXT: br label [[FOR_BODY:%.*]]
	; CHECK: for.body:			; CHECK: for.body:
	; CHECK-NEXT: [[I_08:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]			; CHECK-NEXT: [[I_08:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.]], [[FOR_BODY]] ]
	; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]			; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[I_08]]
	; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX]], align 1			; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
	; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP17]], 1			; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP14]], 1
	; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]			; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
	; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1			; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
	; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP18]]			; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP15]]
	; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1			; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
	; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1			; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
	; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5			; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
	; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]			; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: for.end:			; CHECK: for.end:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.body: ; preds = %entry, %for.body			for.body: ; preds = %entry, %for.body
	%i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]			%i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
	Show All 16 Lines

llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t \| FileCheck %s -check-prefix=CHECK		; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t \| FileCheck %s -check-prefix=CHECK

; Exercise tail folding on RISCV w/scalable vectors.		; Exercise tail folding on RISCV w/scalable vectors.

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"		target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"		target triple = "riscv64"

define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {		define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @vector_add(		; CHECK-LABEL: @vector_add(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1		; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]		; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]		; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]		; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)		; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0		; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)		; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 1 x i64> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]		; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 1 x i64> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[TMP8]], ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])		; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[TMP6]], ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]		; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]		; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8		; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]]		; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8		; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1		; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024		; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]		; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.end:		; CHECK: for.end:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
br label %for.body		br label %for.body

for.body:		for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]		%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
Show All 9 Lines	for.end:
ret void		ret void
}		}


; a[b[i]] = v, exercise scatter support		; a[b[i]] = v, exercise scatter support
define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {		define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
; CHECK-LABEL: @indexed_store(		; CHECK-LABEL: @indexed_store(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1		; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]		; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]		; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]		; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)		; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[B:%.]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[B:%.]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0		; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)		; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)
; CHECK-NEXT: [[TMP8:%.]] = getelementptr inbounds i64, ptr [[A:%.]], <vscale x 1 x i64> [[WIDE_MASKED_LOAD]]		; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], <vscale x 1 x i64> [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP8]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])		; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[TMP6]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]		; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]		; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
Show All 21 Lines

for.end:		for.end:
ret void		ret void
}		}

define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {		define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
; CHECK-LABEL: @indexed_load(		; CHECK-LABEL: @indexed_load(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1		; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]		; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]		; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]		; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[VEC_PHI:%.]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)		; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[B:%.]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[B:%.]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0		; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)		; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)
; CHECK-NEXT: [[TMP8:%.]] = getelementptr inbounds i64, ptr [[A:%.]], <vscale x 1 x i64> [[WIDE_MASKED_LOAD]]		; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], <vscale x 1 x i64> [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP8]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)		; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP6]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> poison)
; CHECK-NEXT: [[TMP9]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]		; CHECK-NEXT: [[TMP7]] = add <vscale x 1 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> [[TMP9]], <vscale x 1 x i64> [[VEC_PHI]]		; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i64> [[TMP7]], <vscale x 1 x i64> [[VEC_PHI]]
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]		; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[TMP10]])		; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64(<vscale x 1 x i64> [[TMP8]])
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]		; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM:%.]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[SUM:%.]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]		; CHECK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8		; CHECK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]		; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]]
; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8		; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1		; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]		; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024		; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]		; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: for.end:		; CHECK: for.end:
; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]		; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]		; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]]
;		;
entry:		entry:
br label %for.body		br label %for.body

for.body:		for.body:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]		%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%sum = phi i64 [0, %entry], [%sum.next, %for.body]		%sum = phi i64 [0, %entry], [%sum.next, %for.body]
%baddr = getelementptr inbounds i64, ptr %b, i64 %iv		%baddr = getelementptr inbounds i64, ptr %b, i64 %iv
%aidx = load i64, ptr %baddr		%aidx = load i64, ptr %baddr
%aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx		%aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
%elem = load i64, ptr %aaddr		%elem = load i64, ptr %aaddr
%iv.next = add nuw nsw i64 %iv, 1		%iv.next = add nuw nsw i64 %iv, 1
%sum.next = add i64 %sum, %elem		%sum.next = add i64 %sum, %elem
%exitcond.not = icmp eq i64 %iv.next, 1024		%exitcond.not = icmp eq i64 %iv.next, 1024
br i1 %exitcond.not, label %for.end, label %for.body		br i1 %exitcond.not, label %for.end, label %for.body

for.end:		for.end:
ret i64 %sum.next		ret i64 %sum.next
}		}

define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {		define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
; CHECK-LABEL: @splat_int(		; CHECK-LABEL: @splat_int(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1		; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]		; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]		; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]		; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)		; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0		; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])		; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]		; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]		; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
Show All 17 Lines

for.end:		for.end:
ret void		ret void
}		}

define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {		define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
; CHECK-LABEL: @uniform_store(		; CHECK-LABEL: @uniform_store(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
; CHECK: vector.ph:		; CHECK: vector.ph:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1		; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]		; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]		; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]		; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0		; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer		; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]		; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:		; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]		; CHECK-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0		; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)		; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8		; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8
; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]		; CHECK-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0		; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])		; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()		; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]		; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]		; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]		; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:		; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]		; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:		; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]		; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]		; CHECK-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
; CHECK-NEXT: store i64 [[V]], ptr [[B]], align 8		; CHECK-NEXT: store i64 [[V]], ptr [[B]], align 8
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll

	Show All 25 Lines
	; CHECK: loop:			; CHECK: loop:
	; CHECK-NEXT: [[IV:%.]] = phi i32 [ [[IV_NEXT:%.]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]			; CHECK-NEXT: [[IV:%.]] = phi i32 [ [[IV_NEXT:%.]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
	; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]			; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]
	; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4			; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
	; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1			; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1
	; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4			; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4
	; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1			; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
	; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 3			; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 3
	; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]			; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	%iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]			%iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ]
	%gep = getelementptr inbounds i32, ptr %a, i32 %iv			%gep = getelementptr inbounds i32, ptr %a, i32 %iv
	%v = load i32, ptr %gep, align 4			%v = load i32, ptr %gep, align 4
	%add = add nsw i32 %v, 1			%add = add nsw i32 %v, 1
	store i32 %add, ptr %gep, align 4			store i32 %add, ptr %gep, align 4
	%iv.next = add i32 %iv, 1			%iv.next = add i32 %iv, 1
	%cond = icmp eq i32 %iv, 3			%cond = icmp eq i32 %iv, 3
	br i1 %cond, label %exit, label %loop			br i1 %cond, label %exit, label %loop

	exit:			exit:
	ret void			ret void
	}			}

	define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_range(1,1024) {			define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_range(1,1024) {
	; CHECK-LABEL: @small_trip_count_min_vlen_32(			; CHECK-LABEL: @small_trip_count_min_vlen_32(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
				; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
				; CHECK: vector.ph:
	; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()			; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
	; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2			; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2
	; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 -5, [[TMP1]]			; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
	; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2
	; CHECK: vector.ph:			; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
	; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()			; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]]
	; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2			; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
	; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32()
	; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2
	; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 1
	; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP7]]
	; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP4]]
	; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]			; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
	; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]			; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK-NEXT: [[INDEX:%.]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; CHECK-NEXT: [[INDEX:%.]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0			; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0
	; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP8]], i32 4)			; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP5]], i32 4)
	; CHECK-NEXT: [[TMP9:%.]] = getelementptr inbounds i32, ptr [[A:%.]], i32 [[TMP8]]			; CHECK-NEXT: [[TMP6:%.]] = getelementptr inbounds i32, ptr [[A:%.]], i32 [[TMP5]]
	; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0			; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
	; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP10]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i32> poison)			; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0(ptr [[TMP7]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i32> poison)
	; CHECK-NEXT: [[TMP11:%.*]] = add nsw <vscale x 2 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)			; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 2 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 2 x i32> insertelement (<vscale x 2 x i32> poison, i32 1, i64 0), <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer)
	; CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP11]], ptr [[TMP10]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])			; CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP8]], ptr [[TMP7]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
	; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()			; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
	; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], 2			; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2
	; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP13]]			; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]]
	; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]			; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
	; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]			; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
	; CHECK: middle.block:			; CHECK: middle.block:
	; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]			; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
	; CHECK: scalar.ph:			; CHECK: scalar.ph:
	; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; CHECK-NEXT: [[BC_RESUME_VAL:%.]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; CHECK-NEXT: br label [[LOOP:%.*]]			; CHECK-NEXT: br label [[LOOP:%.*]]
	; CHECK: loop:			; CHECK: loop:
	; CHECK-NEXT: [[IV:%.]] = phi i32 [ [[IV_NEXT:%.]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]			; CHECK-NEXT: [[IV:%.]] = phi i32 [ [[IV_NEXT:%.]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
	; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]			; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]]
	Show All 25 Lines

llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll

	Show All 40 Lines
	; SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; SCALABLE: for.body:			; SCALABLE: for.body:
	; SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8			; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8
	; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]			; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
	; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8			; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
	; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]			; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; SCALABLE: for.end:			; SCALABLE: for.end:
	; SCALABLE-NEXT: ret void			; SCALABLE-NEXT: ret void
	;			;
	; FIXEDLEN-LABEL: @uniform_load(			; FIXEDLEN-LABEL: @uniform_load(
	; FIXEDLEN-NEXT: entry:			; FIXEDLEN-NEXT: entry:
	; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; FIXEDLEN: vector.ph:			; FIXEDLEN: vector.ph:
	; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]]			; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]]
	Show All 21 Lines
	; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]]			; FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]]
	; FIXEDLEN: for.body:			; FIXEDLEN: for.body:
	; FIXEDLEN-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; FIXEDLEN-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8			; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8
	; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]			; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
	; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8			; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @uniform_load(			; TF-SCALABLE-LABEL: @uniform_load(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
	; TF-SCALABLE-NEXT: [[TMP6:%.]] = load i64, ptr [[B:%.]], align 8			; TF-SCALABLE-NEXT: [[TMP4:%.]] = load i64, ptr [[B:%.]], align 8
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP4]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP7:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]			; TF-SCALABLE-NEXT: [[TMP5:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
	; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8			; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8
	; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]			; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
	; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8			; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
	; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; TF-SCALABLE: for.end:			; TF-SCALABLE: for.end:
	; TF-SCALABLE-NEXT: ret void			; TF-SCALABLE-NEXT: ret void
	;			;
	; TF-FIXEDLEN-LABEL: @uniform_load(			; TF-FIXEDLEN-LABEL: @uniform_load(
	; TF-FIXEDLEN-NEXT: entry:			; TF-FIXEDLEN-NEXT: entry:
	; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-FIXEDLEN: vector.ph:			; TF-FIXEDLEN: vector.ph:
	; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]]
	Show All 17 Lines
	; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]]			; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]]
	; TF-FIXEDLEN: for.body:			; TF-FIXEDLEN: for.body:
	; TF-FIXEDLEN-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; TF-FIXEDLEN-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8			; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8
	; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]			; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
	; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8			; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
	; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]			; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
	; TF-FIXEDLEN: for.end:			; TF-FIXEDLEN: for.end:
	; TF-FIXEDLEN-NEXT: ret void			; TF-FIXEDLEN-NEXT: ret void
	;			;
	entry:			entry:
	br label %for.body			br label %for.body

	for.body:			for.body:
	%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]			%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
	▲ Show 20 Lines • Show All 269 Lines • ▼ Show 20 Lines
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @conditional_uniform_load(			; TF-SCALABLE-LABEL: @conditional_uniform_load(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
	; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add <vscale x 1 x i64> [[TMP5]], zeroinitializer			; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add <vscale x 1 x i64> [[TMP3]], zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
	; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP7]]			; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP5]]
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]]
	; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i64 0			; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i64 0
	; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[VEC_IND:%.]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[VEC_IND:%.]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024)
	; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
				; TF-SCALABLE-NEXT: [[TMP10:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP9]], <vscale x 1 x i1> zeroinitializer
				; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 1 x i1> [[TMP10]], <vscale x 1 x i64> poison)
				; TF-SCALABLE-NEXT: [[TMP11:%.*]] = xor <vscale x 1 x i1> [[TMP9]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
	; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP11]], <vscale x 1 x i1> zeroinitializer			; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP11]], <vscale x 1 x i1> zeroinitializer
	; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 1 x i1> [[TMP12]], <vscale x 1 x i64> poison)			; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP10]], <vscale x 1 x i64> [[WIDE_MASKED_GATHER]], <vscale x 1 x i64> zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP13:%.*]] = xor <vscale x 1 x i1> [[TMP11]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP13:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP8]]
	; TF-SCALABLE-NEXT: [[TMP14:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP13]], <vscale x 1 x i1> zeroinitializer			; TF-SCALABLE-NEXT: [[TMP14:%.*]] = or <vscale x 1 x i1> [[TMP10]], [[TMP12]]
	; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select <vscale x 1 x i1> [[TMP12]], <vscale x 1 x i64> [[WIDE_MASKED_GATHER]], <vscale x 1 x i64> zeroinitializer			; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
	; TF-SCALABLE-NEXT: [[TMP15:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP10]]			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[PREDPHI]], ptr [[TMP15]], i32 8, <vscale x 1 x i1> [[TMP14]])
	; TF-SCALABLE-NEXT: [[TMP16:%.*]] = or <vscale x 1 x i1> [[TMP12]], [[TMP14]]			; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP15]], i32 0			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]]
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[PREDPHI]], ptr [[TMP17]], i32 8, <vscale x 1 x i1> [[TMP16]])
	; TF-SCALABLE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP18]]
	; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]			; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
	; TF-SCALABLE-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LATCH:%.*]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LATCH:%.*]] ]
	; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10			; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10
	▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @uniform_load_unaligned(			; TF-SCALABLE-LABEL: @uniform_load_unaligned(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
	; TF-SCALABLE-NEXT: [[TMP6:%.]] = load i64, ptr [[B:%.]], align 1			; TF-SCALABLE-NEXT: [[TMP4:%.]] = load i64, ptr [[B:%.]], align 1
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP6]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP4]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP7:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]			; TF-SCALABLE-NEXT: [[TMP5:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]]
	; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1			; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1
	▲ Show 20 Lines • Show All 133 Lines • ▼ Show 20 Lines
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @uniform_store(			; TF-SCALABLE-LABEL: @uniform_store(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
	; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8			; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8
	; TF-SCALABLE-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]			; TF-SCALABLE-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
	; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0			; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8			; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8
	▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @uniform_store_of_loop_varying(			; TF-SCALABLE-LABEL: @uniform_store_of_loop_varying(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
	; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add <vscale x 1 x i64> [[TMP5]], zeroinitializer			; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add <vscale x 1 x i64> [[TMP3]], zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
	; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP7]]			; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP5]]
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]]
	; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i64 0			; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i64 0
	; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[VEC_IND:%.]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[VEC_IND:%.]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024)
	; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[VEC_IND]], <vscale x 1 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])			; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[VEC_IND]], <vscale x 1 x ptr> [[BROADCAST_SPLAT]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
	; TF-SCALABLE-NEXT: [[TMP11:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP10]]			; TF-SCALABLE-NEXT: [[TMP9:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP8]]
	; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0			; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP12]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
	; TF-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]]			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
	; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]			; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
	; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8			; TF-SCALABLE-NEXT: store i64 [[IV]], ptr [[B]], align 8
	▲ Show 20 Lines • Show All 170 Lines • ▼ Show 20 Lines
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @conditional_uniform_store(			; TF-SCALABLE-LABEL: @conditional_uniform_store(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
	; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add <vscale x 1 x i64> [[TMP5]], zeroinitializer			; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add <vscale x 1 x i64> [[TMP3]], zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul <vscale x 1 x i64> [[TMP6]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul <vscale x 1 x i64> [[TMP4]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
	; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP7]]			; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP5]]
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]]
	; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP9]], i64 0			; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP7]], i64 0
	; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.]] = insertelement <vscale x 1 x ptr> poison, ptr [[B:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[VEC_IND:%.]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[VEC_IND:%.]] = phi <vscale x 1 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP10]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024)
	; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp ugt <vscale x 1 x i64> [[VEC_IND]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 10, i64 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
	; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP11]], <vscale x 1 x i1> zeroinitializer			; TF-SCALABLE-NEXT: [[TMP10:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP9]], <vscale x 1 x i1> zeroinitializer
	; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 1 x i1> [[TMP12]])			; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], <vscale x 1 x ptr> [[BROADCAST_SPLAT2]], i32 8, <vscale x 1 x i1> [[TMP10]])
	; TF-SCALABLE-NEXT: [[TMP13:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP10]]			; TF-SCALABLE-NEXT: [[TMP11:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP8]]
	; TF-SCALABLE-NEXT: [[TMP14:%.*]] = xor <vscale x 1 x i1> [[TMP11]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)			; TF-SCALABLE-NEXT: [[TMP12:%.*]] = xor <vscale x 1 x i1> [[TMP9]], shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer)
	; TF-SCALABLE-NEXT: [[TMP15:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP14]], <vscale x 1 x i1> zeroinitializer			; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i1> [[TMP12]], <vscale x 1 x i1> zeroinitializer
	; TF-SCALABLE-NEXT: [[TMP16:%.*]] = or <vscale x 1 x i1> [[TMP12]], [[TMP15]]			; TF-SCALABLE-NEXT: [[TMP14:%.*]] = or <vscale x 1 x i1> [[TMP10]], [[TMP13]]
	; TF-SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0			; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP17]], i32 8, <vscale x 1 x i1> [[TMP16]])			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP15]], i32 8, <vscale x 1 x i1> [[TMP14]])
	; TF-SCALABLE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP18]]			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]]
	; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]			; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
	; TF-SCALABLE-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LATCH:%.*]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[LATCH:%.*]] ]
	; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10			; TF-SCALABLE-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10
	▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines
	; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1			; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
	; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024			; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
	; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]			; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
	; FIXEDLEN: for.end:			; FIXEDLEN: for.end:
	; FIXEDLEN-NEXT: ret void			; FIXEDLEN-NEXT: ret void
	;			;
	; TF-SCALABLE-LABEL: @uniform_store_unaligned(			; TF-SCALABLE-LABEL: @uniform_store_unaligned(
	; TF-SCALABLE-NEXT: entry:			; TF-SCALABLE-NEXT: entry:
	; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE-NEXT: [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
	; TF-SCALABLE-NEXT: br i1 [[TMP1]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	; TF-SCALABLE: vector.ph:			; TF-SCALABLE: vector.ph:
	; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1			; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
	; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]			; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
	; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]			; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
	; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]			; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0			; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.]], i64 0
	; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer			; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
	; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]]
	; TF-SCALABLE: vector.body:			; TF-SCALABLE: vector.body:
	; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]			; TF-SCALABLE-NEXT: [[INDEX:%.]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.]], [[VECTOR_BODY]] ]
	; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0			; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
	; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP5]], i64 1024)			; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024)
	; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1			; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1
	; TF-SCALABLE-NEXT: [[TMP6:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP5]]			; TF-SCALABLE-NEXT: [[TMP4:%.]] = getelementptr inbounds i64, ptr [[A:%.]], i64 [[TMP3]]
	; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0			; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
	; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])			; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
	; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()			; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
	; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]			; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
	; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]			; TF-SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
	; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]			; TF-SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
	; TF-SCALABLE: middle.block:			; TF-SCALABLE: middle.block:
	; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]			; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
	; TF-SCALABLE: scalar.ph:			; TF-SCALABLE: scalar.ph:
	; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]			; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.]] ]
	; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]			; TF-SCALABLE-NEXT: br label [[FOR_BODY:%.*]]
	; TF-SCALABLE: for.body:			; TF-SCALABLE: for.body:
	; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]			; TF-SCALABLE-NEXT: [[IV:%.]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.]], [[FOR_BODY]] ]
	; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1			; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1
	▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Use overflow-check analysis to improve tail-folding.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 501488

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll

llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll

llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopVectorize] Use overflow-check analysis to improve tail-folding.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 501488

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

llvm/include/llvm/CodeGen/BasicTTIImpl.h

llvm/lib/Analysis/TargetTransformInfo.cpp

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-low-trip-count.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-overflow-checks.ll

llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll

llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll

llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll

llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll

[LoopVectorize] Use overflow-check analysis to improve tail-folding.
ClosedPublic