Diff 247662

llvm/include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 488 Lines • ▼ Show 20 Lines	struct UnrollingPreferences {
bool AllowExpensiveTripCount;		bool AllowExpensiveTripCount;
/// Apply loop unroll on any kind of loop		/// Apply loop unroll on any kind of loop
/// (mainly to loops that fail runtime unrolling).		/// (mainly to loops that fail runtime unrolling).
bool Force;		bool Force;
/// Allow using trip count upper bound to unroll loops.		/// Allow using trip count upper bound to unroll loops.
bool UpperBound;		bool UpperBound;
/// Allow peeling off loop iterations.		/// Allow peeling off loop iterations.
bool AllowPeeling;		bool AllowPeeling;
		/// Allow peeling off loop iterations for loop nests.
		bool AllowLoopNestsPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.		/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;		bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.		/// Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollAndJam;		bool UnrollAndJam;
/// Allow peeling basing on profile. Uses to enable peeling off all		/// Allow peeling basing on profile. Uses to enable peeling off all
/// iterations basing on provided profile.		/// iterations basing on provided profile.
/// If the value is true the peeling cost model can decide to peel only		/// If the value is true the peeling cost model can decide to peel only
/// some iterations and in this case it will set this to false.		/// some iterations and in this case it will set this to false.
▲ Show 20 Lines • Show All 1,487 Lines • Show Last 20 Lines

llvm/include/llvm/Transforms/Utils/LoopUtils.h

	Show All 18 Lines
	#include "llvm/ADT/SetVector.h"			#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallPtrSet.h"			#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"			#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/StringRef.h"			#include "llvm/ADT/StringRef.h"
	#include "llvm/Analysis/AliasAnalysis.h"			#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/DemandedBits.h"			#include "llvm/Analysis/DemandedBits.h"
	#include "llvm/Analysis/EHPersonalities.h"			#include "llvm/Analysis/EHPersonalities.h"
	#include "llvm/Analysis/IVDescriptors.h"			#include "llvm/Analysis/IVDescriptors.h"
				#include "llvm/Analysis/LoopPass.h"
	#include "llvm/Analysis/MustExecute.h"			#include "llvm/Analysis/MustExecute.h"
	#include "llvm/Analysis/TargetTransformInfo.h"			#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/IR/Dominators.h"			#include "llvm/IR/Dominators.h"
	#include "llvm/IR/InstrTypes.h"			#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Operator.h"			#include "llvm/IR/Operator.h"
	#include "llvm/IR/ValueHandle.h"			#include "llvm/IR/ValueHandle.h"
	#include "llvm/Support/Casting.h"			#include "llvm/Support/Casting.h"
				#include "llvm/Transforms/Utils/ValueMapper.h"

	namespace llvm {			namespace llvm {

	class AliasSet;			class AliasSet;
	class AliasSetTracker;			class AliasSetTracker;
	class BasicBlock;			class BasicBlock;
	class DataLayout;			class DataLayout;
	class IRBuilderBase;			class IRBuilderBase;
	▲ Show 20 Lines • Show All 379 Lines • ▼ Show 20 Lines
	/// in LoopInfo, iterated in reverse. This is because the loops are stored in			/// in LoopInfo, iterated in reverse. This is because the loops are stored in
	/// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,			/// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,
	/// loop deletion, and LICM, we largely want to work forward across the CFG so			/// loop deletion, and LICM, we largely want to work forward across the CFG so
	/// that we visit defs before uses and can propagate simplifications from one			/// that we visit defs before uses and can propagate simplifications from one
	/// loop nest into the next. Calls appendReversedLoopsToWorklist with the			/// loop nest into the next. Calls appendReversedLoopsToWorklist with the
	/// already reversed loops in LI.			/// already reversed loops in LI.
	/// FIXME: Consider changing the order in LoopInfo.			/// FIXME: Consider changing the order in LoopInfo.
	void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);			void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);

				/// Recursively clone the specified loop and all of its children,
				/// mapping the blocks with the specified map.
				Loop cloneLoop(Loop L, Loop *PL, ValueToValueMapTy &VM,
				LoopInfo LI, LPPassManager LPM);

	} // end namespace llvm			} // end namespace llvm

	#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H			#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

Show First 20 Lines • Show All 148 Lines • ▼ Show 20 Lines	cl::desc("If the runtime tripcount for the loop is lower than the "
"threshold, the loop is considered as flat and will be less "		"threshold, the loop is considered as flat and will be less "
"aggressively unrolled."));		"aggressively unrolled."));

static cl::opt<bool>		static cl::opt<bool>
UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,		UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
cl::desc("Allows loops to be peeled when the dynamic "		cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));		"trip count is known to be low."));

		static cl::opt<bool> UnrollAllowLoopNestsPeeling(
		"unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden,
		xbolva00Unsubmitted Not Done Reply Inline Actions enable by default? or atleast for x86? xbolva00: enable by default? or atleast for x86?
		fhahnUnsubmitted Not Done Reply Inline Actions I don't think that would be good idea, without at least some analysis on the impact on code size on targets turning this on. The cost-model currently assumes innermost loops exclusively. fhahn: I don't think that would be good idea, without at least some analysis on the impact on code…
		ashlykovAuthorUnsubmitted Not Done Reply Inline Actions The change is intended to be enabled on per-target basis via TTI where it's profitable. If someone could supply enough benchmarking for e.g. x86 target we can proceed further with enabling it/adjusting cost-model. ashlykov: The change is intended to be enabled on per-target basis via TTI where it's profitable. If…
		cl::desc("Allows loop nests to be peeled."));

static cl::opt<bool> UnrollUnrollRemainder(		static cl::opt<bool> UnrollUnrollRemainder(
"unroll-remainder", cl::Hidden,		"unroll-remainder", cl::Hidden,
cl::desc("Allow the loop remainder to be unrolled."));		cl::desc("Allow the loop remainder to be unrolled."));

// This option isn't ever intended to be enabled, it serves to allow		// This option isn't ever intended to be enabled, it serves to allow
// experiments to check the assumptions about when this kind of revisit is		// experiments to check the assumptions about when this kind of revisit is
// necessary.		// necessary.
static cl::opt<bool> UnrollRevisitChildLoops(		static cl::opt<bool> UnrollRevisitChildLoops(
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines	TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.Partial = false;		UP.Partial = false;
UP.Runtime = false;		UP.Runtime = false;
UP.AllowRemainder = true;		UP.AllowRemainder = true;
UP.UnrollRemainder = false;		UP.UnrollRemainder = false;
UP.AllowExpensiveTripCount = false;		UP.AllowExpensiveTripCount = false;
UP.Force = false;		UP.Force = false;
UP.UpperBound = false;		UP.UpperBound = false;
UP.AllowPeeling = true;		UP.AllowPeeling = true;
		UP.AllowLoopNestsPeeling = false;
UP.UnrollAndJam = false;		UP.UnrollAndJam = false;
UP.PeelProfiledIterations = true;		UP.PeelProfiledIterations = true;
UP.UnrollAndJamInnerLoopThreshold = 60;		UP.UnrollAndJamInnerLoopThreshold = 60;

// Override with any target specific settings		// Override with any target specific settings
TTI.getUnrollingPreferences(L, SE, UP);		TTI.getUnrollingPreferences(L, SE, UP);

// Apply size attributes		// Apply size attributes
Show All 24 Lines	TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
if (UnrollAllowRemainder.getNumOccurrences() > 0)		if (UnrollAllowRemainder.getNumOccurrences() > 0)
UP.AllowRemainder = UnrollAllowRemainder;		UP.AllowRemainder = UnrollAllowRemainder;
if (UnrollRuntime.getNumOccurrences() > 0)		if (UnrollRuntime.getNumOccurrences() > 0)
UP.Runtime = UnrollRuntime;		UP.Runtime = UnrollRuntime;
if (UnrollMaxUpperBound == 0)		if (UnrollMaxUpperBound == 0)
UP.UpperBound = false;		UP.UpperBound = false;
if (UnrollAllowPeeling.getNumOccurrences() > 0)		if (UnrollAllowPeeling.getNumOccurrences() > 0)
UP.AllowPeeling = UnrollAllowPeeling;		UP.AllowPeeling = UnrollAllowPeeling;
		if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
		UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
if (UnrollUnrollRemainder.getNumOccurrences() > 0)		if (UnrollUnrollRemainder.getNumOccurrences() > 0)
UP.UnrollRemainder = UnrollUnrollRemainder;		UP.UnrollRemainder = UnrollUnrollRemainder;

// Apply user values provided by argument		// Apply user values provided by argument
if (UserThreshold.hasValue()) {		if (UserThreshold.hasValue()) {
UP.Threshold = *UserThreshold;		UP.Threshold = *UserThreshold;
UP.PartialThreshold = *UserThreshold;		UP.PartialThreshold = *UserThreshold;
}		}
▲ Show 20 Lines • Show All 1,208 Lines • Show Last 20 Lines

llvm/lib/Transforms/Scalar/LoopUnswitch.cpp

Show First 20 Lines • Show All 897 Lines • ▼ Show 20 Lines	LLVM_DEBUG(dbgs() << "NOT unswitching loop %"
<< ". Condition is divergent.\n");		<< ". Condition is divergent.\n");
return false;		return false;
}		}

unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI);		unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI);
return true;		return true;
}		}

/// Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
static Loop cloneLoop(Loop L, Loop PL, ValueToValueMapTy &VM, LoopInfo LI,
LPPassManager *LPM) {
Loop &New = *LI->AllocateLoop();
if (PL)
PL->addChildLoop(&New);
else
LI->addTopLevelLoop(&New);
LPM->addLoop(New);

// Add all of the blocks in L to the new loop.
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
if (LI->getLoopFor(*I) == L)
New.addBasicBlockToLoop(cast<BasicBlock>(VM[I]), LI);

// Add all of the subloops to the new loop.
for (Loop I : L)
cloneLoop(I, &New, VM, LI, LPM);

return &New;
}

/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,		/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
/// otherwise branch to FalseDest. Insert the code immediately before OldBranch		/// otherwise branch to FalseDest. Insert the code immediately before OldBranch
/// and remove (but not erase!) it from the function.		/// and remove (but not erase!) it from the function.
void LoopUnswitch::emitPreheaderBranchOnCondition(Value LIC, Constant Val,		void LoopUnswitch::emitPreheaderBranchOnCondition(Value LIC, Constant Val,
BasicBlock *TrueDest,		BasicBlock *TrueDest,
BasicBlock *FalseDest,		BasicBlock *FalseDest,
BranchInst *OldBranch,		BranchInst *OldBranch,
Instruction *TI) {		Instruction *TI) {
▲ Show 20 Lines • Show All 731 Lines • Show Last 20 Lines

llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp

Show First 20 Lines • Show All 282 Lines • ▼ Show 20 Lines	void llvm::computePeelCount(Loop *L, unsigned LoopSize,
unsigned &TripCount, ScalarEvolution &SE) {		unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");		assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the UP.PeelCount value set by the target in		// Save the UP.PeelCount value set by the target in
// TTI.getUnrollingPreferences or by the flag -unroll-peel-count.		// TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
unsigned TargetPeelCount = UP.PeelCount;		unsigned TargetPeelCount = UP.PeelCount;
UP.PeelCount = 0;		UP.PeelCount = 0;
if (!canPeel(L))		if (!canPeel(L))
return;		return;

// Only try to peel innermost loops.		// Only try to peel innermost loops by default.
		fhahnUnsubmitted Done Reply Inline Actions Update comment. fhahn: Update comment.
if (!L->empty())		// The constraint can be relaxed by the target in TTI.getUnrollingPreferences
		// or by the flag -unroll-allow-loop-nests-peeling.
		if (!UP.AllowLoopNestsPeeling && !L->empty())
return;		return;

// If the user provided a peel count, use that.		// If the user provided a peel count, use that.
bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;		bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
if (UserPeelCount) {		if (UserPeelCount) {
LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount		LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
<< " iterations.\n");		<< " iterations.\n");
UP.PeelCount = UnrollForcePeelCount;		UP.PeelCount = UnrollForcePeelCount;
▲ Show 20 Lines • Show All 201 Lines • ▼ Show 20 Lines	static void cloneLoopBlocks(
Loop *ParentLoop = L->getParentLoop();		Loop *ParentLoop = L->getParentLoop();

// For each block in the original loop, create a new copy,		// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.		// and update the value map with the newly created values.
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {		for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
BasicBlock NewBB = CloneBasicBlock(BB, VMap, ".peel", F);		BasicBlock NewBB = CloneBasicBlock(BB, VMap, ".peel", F);
NewBlocks.push_back(NewBB);		NewBlocks.push_back(NewBB);

if (ParentLoop)		// If an original block is an immediate child of the loop L, its copy
		// is a child of a ParentLoop after peeling. If a block is a child of
		// a nested loop, it is handled in the cloneLoop() call below.
		if (ParentLoop && LI->getLoopFor(*BB) == L)
ParentLoop->addBasicBlockToLoop(NewBB, *LI);		ParentLoop->addBasicBlockToLoop(NewBB, *LI);

VMap[*BB] = NewBB;		VMap[*BB] = NewBB;

// If dominator tree is available, insert nodes to represent cloned blocks.		// If dominator tree is available, insert nodes to represent cloned blocks.
if (DT) {		if (DT) {
if (Header == *BB)		if (Header == *BB)
DT->addNewBlock(NewBB, InsertTop);		DT->addNewBlock(NewBB, InsertTop);
else {		else {
DomTreeNode IDom = DT->getNode(BB)->getIDom();		DomTreeNode IDom = DT->getNode(BB)->getIDom();
// VMap must contain entry for IDom, as the iteration order is RPO.		// VMap must contain entry for IDom, as the iteration order is RPO.
DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));		DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
}		}
}		}
}		}

		// Recursively create the new Loop objects for nested loops, if any,
		// to preserve LoopInfo.
		for (Loop ChildLoop : L) {
		cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr);
		}

// Hook-up the control flow for the newly inserted blocks.		// Hook-up the control flow for the newly inserted blocks.
// The new header is hooked up directly to the "top", which is either		// The new header is hooked up directly to the "top", which is either
// the original loop preheader (for the first iteration) or the previous		// the original loop preheader (for the first iteration) or the previous
// iteration's exiting block (for every other iteration)		// iteration's exiting block (for every other iteration)
InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));		InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));

// Similarly, for the latch:		// Similarly, for the latch:
// The original exiting edge is still hooked up to the loop exit.		// The original exiting edge is still hooked up to the loop exit.
▲ Show 20 Lines • Show All 252 Lines • Show Last 20 Lines

llvm/lib/Transforms/Utils/LoopUtils.cpp

	Show First 20 Lines • Show All 1,493 Lines • ▼ Show 20 Lines
	template void			template void
	llvm::appendLoopsToWorklist<Loop &>(Loop &L,			llvm::appendLoopsToWorklist<Loop &>(Loop &L,
	SmallPriorityWorklist<Loop *, 4> &Worklist);			SmallPriorityWorklist<Loop *, 4> &Worklist);

	void llvm::appendLoopsToWorklist(LoopInfo &LI,			void llvm::appendLoopsToWorklist(LoopInfo &LI,
	SmallPriorityWorklist<Loop *, 4> &Worklist) {			SmallPriorityWorklist<Loop *, 4> &Worklist) {
	appendReversedLoopsToWorklist(LI, Worklist);			appendReversedLoopsToWorklist(LI, Worklist);
	}			}

				Loop llvm::cloneLoop(Loop L, Loop *PL, ValueToValueMapTy &VM,
				LoopInfo LI, LPPassManager LPM) {
				Loop &New = *LI->AllocateLoop();
				if (PL)
				PL->addChildLoop(&New);
				else
				LI->addTopLevelLoop(&New);

				if (LPM)
				LPM->addLoop(New);

				// Add all of the blocks in L to the new loop.
				for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
				I != E; ++I)
				if (LI->getLoopFor(*I) == L)
				New.addBasicBlockToLoop(cast<BasicBlock>(VM[I]), LI);

				// Add all of the subloops to the new loop.
				for (Loop I : L)
				cloneLoop(I, &New, VM, LI, LPM);

				return &New;
				}

llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info \| FileCheck %s		; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info \| FileCheck %s

		fhahnUnsubmitted Done Reply Inline Actions I think it would be better to add tests for this in a separate file. fhahn: I think it would be better to add tests for this in a separate file.
declare void @f1()		declare void @f1()
declare void @f2()		declare void @f2()

; Check that we can peel off iterations that make conditions true.		; Check that we can peel off iterations that make conditions true.
define void @test1(i32 %k) {		define void @test1(i32 %k) {
; CHECK-LABEL: @test1(		; CHECK-LABEL: @test1(
; CHECK-NEXT: for.body.lr.ph:		; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
▲ Show 20 Lines • Show All 386 Lines • ▼ Show 20 Lines	for.inc:
%inc = add nsw i32 %i.05, 1		%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k		%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %for.end		br i1 %cmp, label %for.body, label %for.end

for.end:		for.end:
ret void		ret void
}		}

; In this case we cannot peel the inner loop, because the condition involves
; the outer induction variable.
define void @test5(i32 %k) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
; CHECK: outer.header:
; CHECK-NEXT: [[J:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[J_INC:%.]], [[OUTER_INC:%.]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.]], label [[IF_ELSE:%.]]
; CHECK: if.then:
; CHECK-NEXT: call void @f1()
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: if.else:
; CHECK-NEXT: call void @f2()
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
; CHECK-NEXT: [[CMP:%.]] = icmp slt i32 [[INC]], [[K:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
; CHECK: outer.inc:
; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
for.body.lr.ph:
br label %outer.header

outer.header:
%j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
br label %for.body

for.body:
%i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
%cmp1 = icmp ult i32 %j, 2
br i1 %cmp1, label %if.then, label %if.else

if.then:
call void @f1()
br label %for.inc

if.else:
call void @f2()
br label %for.inc

for.inc:
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %outer.inc

outer.inc:
%j.inc = add nsw i32 %j, 1
%outer.cmp = icmp slt i32 %j.inc, %k
br i1 %outer.cmp, label %outer.header, label %for.end


for.end:
ret void
}

; In this test, the condition involves 2 AddRecs. Without evaluating both		; In this test, the condition involves 2 AddRecs. Without evaluating both
; AddRecs, we cannot prove that the condition becomes known in the loop body		; AddRecs, we cannot prove that the condition becomes known in the loop body
; after peeling.		; after peeling.
define void @test6(i32 %k) {		define void @test5(i32 %k) {
; CHECK-LABEL: @test6(		; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[ENTRY:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[ENTRY:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]
; CHECK-NEXT: [[J:%.]] = phi i32 [ 4, [[ENTRY]] ], [ [[J_INC:%.]], [[FOR_INC]] ]		; CHECK-NEXT: [[J:%.]] = phi i32 [ 4, [[ENTRY]] ], [ [[J_INC:%.]], [[FOR_INC]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], [[J]]		; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], [[J]]
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.]], label [[IF_ELSE:%.]]		; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.]], label [[IF_ELSE:%.]]
; CHECK: if.then:		; CHECK: if.then:
Show All 32 Lines	for.inc:
%j.inc = add nsw i32 %j, 1		%j.inc = add nsw i32 %j, 1
%cmp = icmp slt i32 %inc, %k		%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %for.end		br i1 %cmp, label %for.body, label %for.end

for.end:		for.end:
ret void		ret void
}		}

define void @test7(i32 %k) {		define void @test6(i32 %k) {
; CHECK-LABEL: @test7(		; CHECK-LABEL: @test6(
; CHECK-NEXT: for.body.lr.ph:		; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:		; CHECK: for.body.peel.begin:
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; CHECK: for.body.peel:		; CHECK: for.body.peel:
; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ne i32 0, 3		; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ne i32 0, 3
; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.]], label [[FOR_INC_PEEL:%.]]		; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.]], label [[FOR_INC_PEEL:%.]]
; CHECK: if.then.peel:		; CHECK: if.then.peel:
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	for.inc:
%inc = add nsw i32 %i.05, 1		%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k		%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %for.end		br i1 %cmp, label %for.body, label %for.end

for.end:		for.end:
ret void		ret void
}		}

define void @test8(i32 %k) {		define void @test7(i32 %k) {
; CHECK-LABEL: @test8(		; CHECK-LABEL: @test7(
; CHECK-NEXT: for.body.lr.ph:		; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:		; CHECK: for.body.peel.begin:
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; CHECK: for.body.peel:		; CHECK: for.body.peel:
; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp eq i32 0, 3		; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp eq i32 0, 3
; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.]], label [[FOR_INC_PEEL:%.]]		; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.]], label [[FOR_INC_PEEL:%.]]
; CHECK: if.then.peel:		; CHECK: if.then.peel:
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines	for.inc:
br i1 %cmp, label %for.body, label %for.end		br i1 %cmp, label %for.body, label %for.end

for.end:		for.end:
ret void		ret void
}		}

; Comparison with non-monotonic predicate due to possible wrapping, loop		; Comparison with non-monotonic predicate due to possible wrapping, loop
; body cannot be simplified.		; body cannot be simplified.
define void @test9(i32 %k) {		define void @test8(i32 %k) {
; CHECK-LABEL: @test9(		; CHECK-LABEL: @test8(
; CHECK-NEXT: for.body.lr.ph:		; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_05]], 3		; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_05]], 3
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]		; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:		; CHECK: if.then:
; CHECK-NEXT: call void @f1()		; CHECK-NEXT: call void @f1()
Show All 22 Lines	for.inc:
%cmp = icmp slt i32 %inc, %k		%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %for.end		br i1 %cmp, label %for.body, label %for.end

for.end:		for.end:
ret void		ret void
}		}
; CHECK-NOT: llvm.loop.unroll.disable		; CHECK-NOT: llvm.loop.unroll.disable

define void @test_10__peel_first_iter_via_slt_pred(i32 %len) {		define void @test_9__peel_first_iter_via_slt_pred(i32 %len) {
; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred(		; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0		; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:		; CHECK: for.body.peel.begin:
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; CHECK: for.body.peel:		; CHECK: for.body.peel:
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines

if.end: ; preds = %if.then, %for.body		if.end: ; preds = %if.then, %for.body
call void @sink()		call void @sink()
%inc = add nuw nsw i32 %i.06, 1		%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %len		%exitcond = icmp eq i32 %inc, %len
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) {		define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) {
; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred(		; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0		; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:		; CHECK: for.body.peel.begin:
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; CHECK: for.body.peel:		; CHECK: for.body.peel:
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	if.end: ; preds = %if.then, %for.body
call void @sink()		call void @sink()
%inc = add nuw nsw i32 %i.06, 1		%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %len		%exitcond = icmp eq i32 %inc, %len
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; NOTE: here we should only peel the first iteration,		; NOTE: here we should only peel the first iteration,
; i.e. all calls to sink() must stay in loop.		; i.e. all calls to sink() must stay in loop.
define void @test12__peel_first_iter_via_eq_pred(i32 %len) {		define void @test11__peel_first_iter_via_eq_pred(i32 %len) {
; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred(		; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0		; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:		; CHECK: for.body.peel.begin:
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; CHECK: for.body.peel:		; CHECK: for.body.peel:
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines	if.end: ; preds = %if.then, %for.body
call void @sink()		call void @sink()
%inc = add nuw nsw i32 %i.06, 1		%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %len		%exitcond = icmp eq i32 %inc, %len
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; NOTE: here we should only peel the first iteration,		; NOTE: here we should only peel the first iteration,
; i.e. all calls to sink() must stay in loop.		; i.e. all calls to sink() must stay in loop.
define void @test13__peel_first_iter_via_ne_pred(i32 %len) {		define void @test12__peel_first_iter_via_ne_pred(i32 %len) {
; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred(		; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0		; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]]
; CHECK: for.body.peel.begin:		; CHECK: for.body.peel.begin:
; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]		; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]]
; CHECK: for.body.peel:		; CHECK: for.body.peel:
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
if.end: ; preds = %if.then, %for.body		if.end: ; preds = %if.then, %for.body
call void @sink()		call void @sink()
%inc = add nuw nsw i32 %i.06, 1		%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %len		%exitcond = icmp eq i32 %inc, %len
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; No peeling is profitable here.		; No peeling is profitable here.
define void @test14__ivar_mod2_is_1(i32 %len) {		define void @test13__ivar_mod2_is_1(i32 %len) {
; CHECK-LABEL: @test14__ivar_mod2_is_1(		; CHECK-LABEL: @test13__ivar_mod2_is_1(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0		; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:		; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:		; CHECK: for.cond.cleanup:
Show All 32 Lines
if.end: ; preds = %if.then, %for.body		if.end: ; preds = %if.then, %for.body
call void @sink()		call void @sink()
%inc = add nuw nsw i32 %i.06, 1		%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %len		%exitcond = icmp eq i32 %inc, %len
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; No peeling is profitable here.		; No peeling is profitable here.
define void @test15__ivar_mod2_is_0(i32 %len) {		define void @test14__ivar_mod2_is_0(i32 %len) {
; CHECK-LABEL: @test15__ivar_mod2_is_0(		; CHECK-LABEL: @test14__ivar_mod2_is_0(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0		; CHECK-NEXT: [[CMP5:%.]] = icmp sgt i32 [[LEN:%.]], 0
; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup.loopexit:		; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:		; CHECK: for.cond.cleanup:
Show All 31 Lines

if.end: ; preds = %if.then, %for.body		if.end: ; preds = %if.then, %for.body
call void @sink()		call void @sink()
%inc = add nuw nsw i32 %i.06, 1		%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %len		%exitcond = icmp eq i32 %inc, %len
br i1 %exitcond, label %for.cond.cleanup, label %for.body		br i1 %exitcond, label %for.cond.cleanup, label %for.body
}		}

; Similar to @test7, we need to peel one extra iteration, and we can't do that		; Similar to @test6, we need to peel one extra iteration, and we can't do that
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.		; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
define void @test16(i32 %k) {		define void @test15(i32 %k) {
; CHECK-LABEL: @test16(		; CHECK-LABEL: @test15(
; CHECK-NEXT: for.body.lr.ph:		; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[I_05]], 4		; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[I_05]], 4
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]		; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:		; CHECK: if.then:
; CHECK-NEXT: call void @f1()		; CHECK-NEXT: call void @f1()
Show All 21 Lines	for.inc:
%inc = add nsw i32 %i.05, 1		%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %k		%cmp = icmp slt i32 %inc, %k
br i1 %cmp, label %for.body, label %for.end		br i1 %cmp, label %for.body, label %for.end

for.end:		for.end:
ret void		ret void
}		}

; Similar to @test8, we need to peel one extra iteration, and we can't do that		; Similar to @test7, we need to peel one extra iteration, and we can't do that
; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.		; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all.
define void @test17(i32 %k) {		define void @test16(i32 %k) {
; CHECK-LABEL: @test17(		; CHECK-LABEL: @test16(
; CHECK-NEXT: for.body.lr.ph:		; CHECK-NEXT: for.body.lr.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_05]], 4		; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_05]], 4
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]		; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:		; CHECK: if.then:
; CHECK-NEXT: call void @f1()		; CHECK-NEXT: call void @f1()
Show All 31 Lines

llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
				; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info \| FileCheck %s
				; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info \| FileCheck %s --check-prefix PEELED

				declare void @f1()
				declare void @f2()

				; In this case we cannot peel the inner loop, because the condition involves
				; the outer induction variable.
				; Peel the loop nest if allowed by the flag -unroll-allow-loop-nests-peeling.
				define void @test1(i32 %k) {
				; CHECK-LABEL: @test1(
				; CHECK-NEXT: for.body.lr.ph:
				; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
				; CHECK: outer.header:
				; CHECK-NEXT: [[J:%.]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.]] ], [ [[J_INC:%.]], [[OUTER_INC:%.]] ]
				; CHECK-NEXT: br label [[FOR_BODY:%.*]]
				; CHECK: for.body:
				; CHECK-NEXT: [[I_05:%.]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.]], [[FOR_INC:%.*]] ]
				; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2
				; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.]], label [[IF_ELSE:%.]]
				; CHECK: if.then:
				; CHECK-NEXT: call void @f1()
				; CHECK-NEXT: br label [[FOR_INC]]
				; CHECK: if.else:
				; CHECK-NEXT: call void @f2()
				; CHECK-NEXT: br label [[FOR_INC]]
				; CHECK: for.inc:
				; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
				; CHECK-NEXT: [[CMP:%.]] = icmp slt i32 [[INC]], [[K:%.]]
				; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
				; CHECK: outer.inc:
				; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1
				; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
				; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.]], !llvm.loop !{{.}}
				; CHECK: for.end:
				; CHECK-NEXT: ret void
				;
				; PEELED-LABEL: @test1(
				; PEELED-NEXT: for.body.lr.ph:
				; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_BEGIN:%.*]]
				; PEELED: outer.header.peel.begin:
				; PEELED-NEXT: br label [[OUTER_HEADER_PEEL:%.*]]
				; PEELED: outer.header.peel:
				; PEELED-NEXT: br label [[FOR_BODY_PEEL:%.*]]
				; PEELED: for.body.peel:
				; PEELED-NEXT: [[I_05_PEEL:%.]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.]], [[FOR_INC_PEEL:%.*]] ]
				; PEELED-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2
				; PEELED-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.]], label [[IF_ELSE_PEEL:%.]]
				; PEELED: if.else.peel:
				; PEELED-NEXT: call void @f2()
				; PEELED-NEXT: br label [[FOR_INC_PEEL]]
				; PEELED: if.then.peel:
				; PEELED-NEXT: call void @f1()
				; PEELED-NEXT: br label [[FOR_INC_PEEL]]
				; PEELED: for.inc.peel:
				; PEELED-NEXT: [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1
				; PEELED-NEXT: [[CMP_PEEL:%.]] = icmp slt i32 [[INC_PEEL]], [[K:%.]]
				; PEELED-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]]
				; PEELED: outer.inc.peel:
				; PEELED-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 0, 1
				; PEELED-NEXT: [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]]
				; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.]], label [[FOR_END:%[^,]]]
				; Verify that MD_loop metadata is dropped.
				; PEELED-NOT: , !llvm.loop !{{[0-9]*}}
				; PEELED: outer.header.peel.next:
				; PEELED-NEXT: br label [[OUTER_HEADER_PEEL2:%.*]]
				; PEELED: outer.header.peel2:
				; PEELED-NEXT: br label [[FOR_BODY_PEEL3:%.*]]
				; PEELED: for.body.peel3:
				; PEELED-NEXT: [[I_05_PEEL4:%.]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.]], [[FOR_INC_PEEL8:%.*]] ]
				; PEELED-NEXT: [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2
				; PEELED-NEXT: br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.]], label [[IF_ELSE_PEEL6:%.]]
				; PEELED: if.else.peel6:
				; PEELED-NEXT: call void @f2()
				; PEELED-NEXT: br label [[FOR_INC_PEEL8]]
				; PEELED: if.then.peel7:
				; PEELED-NEXT: call void @f1()
				; PEELED-NEXT: br label [[FOR_INC_PEEL8]]
				; PEELED: for.inc.peel8:
				; PEELED-NEXT: [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1
				; PEELED-NEXT: [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]]
				; PEELED-NEXT: br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]]
				; PEELED: outer.inc.peel11:
				; PEELED-NEXT: [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1
				; PEELED-NEXT: [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]]
				; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]]
				; Verify that MD_loop metadata is dropped.
				; PEELED-NOT: , !llvm.loop !{{[0-9]*}}
				; PEELED: outer.header.peel.next1:
				; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_NEXT14:%.*]]
				; PEELED: outer.header.peel.next14:
				; PEELED-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]]
				; PEELED: for.body.lr.ph.peel.newph:
				; PEELED-NEXT: br label [[OUTER_HEADER:%.*]]
				; PEELED: outer.header:
				; PEELED-NEXT: [[J:%.]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.]], [[OUTER_INC:%.*]] ]
				; PEELED-NEXT: br label [[FOR_BODY:%.*]]
				; PEELED: for.body:
				; PEELED-NEXT: [[I_05:%.]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.]], [[FOR_INC:%.*]] ]
				; PEELED-NEXT: br i1 false, label [[IF_THEN:%.]], label [[IF_ELSE:%.]]
				; PEELED: if.then:
				; PEELED-NEXT: call void @f1()
				; PEELED-NEXT: br label [[FOR_INC]]
				; PEELED: if.else:
				; PEELED-NEXT: call void @f2()
				; PEELED-NEXT: br label [[FOR_INC]]
				; PEELED: for.inc:
				; PEELED-NEXT: [[INC]] = add nsw i32 [[I_05]], 1
				; PEELED-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]]
				; PEELED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]]
				; PEELED: outer.inc:
				; PEELED-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1
				; PEELED-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]]
				; PEELED-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.]], !llvm.loop !{{.}}
				; PEELED: for.end.loopexit:
				; PEELED-NEXT: br label [[FOR_END]]
				; PEELED: for.end:
				; PEELED-NEXT: ret void
				;
				for.body.lr.ph:
				br label %outer.header

				outer.header:
				%j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ]
				br label %for.body

				for.body:
				%i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ]
				%cmp1 = icmp ult i32 %j, 2
				br i1 %cmp1, label %if.then, label %if.else

				if.then:
				call void @f1()
				br label %for.inc

				if.else:
				call void @f2()
				br label %for.inc

				for.inc:
				%inc = add nsw i32 %i.05, 1
				%cmp = icmp slt i32 %inc, %k
				br i1 %cmp, label %for.body, label %outer.inc

				outer.inc:
				%j.inc = add nsw i32 %j, 1
				%outer.cmp = icmp slt i32 %j.inc, %k
				br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !0

				for.end:
				ret void
				}

				!0 = distinct !{!0}

This is an archive of the discontinued LLVM Phabricator instance.

[Loop Peeling] Add possibility to enable peeling on loop nests.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 247662

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Transforms/Utils/LoopUtils.h

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

llvm/lib/Transforms/Scalar/LoopUnswitch.cpp

llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp

llvm/lib/Transforms/Utils/LoopUtils.cpp

llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll

llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll

This is an archive of the discontinued LLVM Phabricator instance.

[Loop Peeling] Add possibility to enable peeling on loop nests.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 247662

llvm/include/llvm/Analysis/TargetTransformInfo.h

llvm/include/llvm/Transforms/Utils/LoopUtils.h

llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp

llvm/lib/Transforms/Scalar/LoopUnswitch.cpp

llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp

llvm/lib/Transforms/Utils/LoopUtils.cpp

llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll

llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll

[Loop Peeling] Add possibility to enable peeling on loop nests.
ClosedPublic