Diff 306843

llvm/lib/Transforms/Scalar/LoopFlatten.cpp

Show All 29 Lines
#include "llvm/Analysis/AssumptionCache.h"		#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"		#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"		#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"		#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"		#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"		#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"		#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
		#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"		#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"		#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Verifier.h"		#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"		#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"		#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
Show All 33 Lines	struct FlattenInfo {
Value *OuterLimit = nullptr;		Value *OuterLimit = nullptr;
BinaryOperator *InnerIncrement = nullptr;		BinaryOperator *InnerIncrement = nullptr;
BinaryOperator *OuterIncrement = nullptr;		BinaryOperator *OuterIncrement = nullptr;
BranchInst *InnerBranch = nullptr;		BranchInst *InnerBranch = nullptr;
BranchInst *OuterBranch = nullptr;		BranchInst *OuterBranch = nullptr;
SmallPtrSet<Value *, 4> LinearIVUses;		SmallPtrSet<Value *, 4> LinearIVUses;
SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;		SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;

		// Whether this holds the flatten info before or after widening.
		bool Widened = false;

FlattenInfo(Loop OL, Loop IL) : OuterLoop(OL), InnerLoop(IL) {};		FlattenInfo(Loop OL, Loop IL) : OuterLoop(OL), InnerLoop(IL) {};
};		};

// Finds the induction variable, increment and limit for a simple loop that we		// Finds the induction variable, increment and limit for a simple loop that we
// can flatten.		// can flatten.
static bool findLoopComponents(		static bool findLoopComponents(
Loop L, SmallPtrSetImpl<Instruction > &IterationInstructions,		Loop L, SmallPtrSetImpl<Instruction > &IterationInstructions,
PHINode &InductionPHI, Value &Limit, BinaryOperator *&Increment,		PHINode &InductionPHI, Value &Limit, BinaryOperator *&Increment,
▲ Show 20 Lines • Show All 235 Lines • ▼ Show 20 Lines	static bool checkIVUsers(struct FlattenInfo &FI) {
//		//
// (OuterPHI * InnerLimit) + InnerPHI		// (OuterPHI * InnerLimit) + InnerPHI
//		//
// Any uses of the induction variables not matching that pattern would		// Any uses of the induction variables not matching that pattern would
// require a div/mod to reconstruct in the flattened loop, so the		// require a div/mod to reconstruct in the flattened loop, so the
// transformation wouldn't be profitable.		// transformation wouldn't be profitable.

Value *InnerLimit = FI.InnerLimit;		Value *InnerLimit = FI.InnerLimit;
if (auto *I = dyn_cast<SExtInst>(InnerLimit))		if (FI.Widened &&
InnerLimit = I->getOperand(0);		(isa<SExtInst>(InnerLimit) \|\| isa<ZExtInst>(InnerLimit)))
		dmgreenUnsubmitted Not Done Reply Inline Actions dyn_cast -> isa dmgreen: dyn_cast -> isa
		InnerLimit = cast<Instruction>(InnerLimit)->getOperand(0);
		dmgreenUnsubmitted Not Done Reply Inline Actions dyn_cast -> cast dmgreen: dyn_cast -> cast

// Check that all uses of the inner loop's induction variable match the		// Check that all uses of the inner loop's induction variable match the
// expected pattern, recording the uses of the outer IV.		// expected pattern, recording the uses of the outer IV.
SmallPtrSet<Value *, 4> ValidOuterPHIUses;		SmallPtrSet<Value *, 4> ValidOuterPHIUses;
for (User *U : FI.InnerInductionPHI->users()) {		for (User *U : FI.InnerInductionPHI->users()) {
if (U == FI.InnerIncrement)		if (U == FI.InnerIncrement)
continue;		continue;

// After widening the IVs, a trunc instruction might have been introduced, so		// After widening the IVs, a trunc instruction might have been introduced, so
// look through truncs.		// look through truncs.
if (dyn_cast<TruncInst>(U) ) {		if (isa<TruncInst>(U)) {
if (!U->hasOneUse())		if (!U->hasOneUse())
return false;		return false;
U = *U->user_begin();		U = *U->user_begin();
}		}

LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());		LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());

Value *MatchedMul;		Value *MatchedMul;
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines	static bool DoFlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);		cast<User>(FI.OuterBranch->getCondition())->setOperand(1, NewTripCount);

// Replace the inner loop backedge with an unconditional branch to the exit.		// Replace the inner loop backedge with an unconditional branch to the exit.
BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();		BasicBlock *InnerExitBlock = FI.InnerLoop->getExitBlock();
BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();		BasicBlock *InnerExitingBlock = FI.InnerLoop->getExitingBlock();
InnerExitingBlock->getTerminator()->eraseFromParent();		InnerExitingBlock->getTerminator()->eraseFromParent();
BranchInst::Create(InnerExitBlock, InnerExitingBlock);		BranchInst::Create(InnerExitBlock, InnerExitingBlock);
DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());		DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader());

auto HasSExtUser = [] (Value V) -> Value {
for (User *U : V->users() )
if (dyn_cast<SExtInst>(U))
return U;
return nullptr;
};

// Replace all uses of the polynomial calculated from the two induction		// Replace all uses of the polynomial calculated from the two induction
		dmgreenUnsubmitted Not Done Reply Inline Actions Some of the formatting is still a little off here. dmgreen: Some of the formatting is still a little off here.
		xbolva00Unsubmitted Not Done Reply Inline Actions isa? xbolva00: isa?
		dmgreenUnsubmitted Not Done Reply Inline Actions Is is better to introduce a new trunc of FI.OuterInductionPHI to the correct bitwidth? I'm a little worried that this is just finding _some_ trunc, not necessarily one that it should. It may introduce more truncs but they should get cleared up. It would also prevent using something that did not dominate. Maybe it is fine like this, if it is know that the widening will have introduced a trunc. Can it at least check the type of the trunc is correct? And add a comment saying it should have been added by widening. dmgreen: Is is better to introduce a new trunc of FI.OuterInductionPHI to the correct bitwidth? I'm a…
		SjoerdMeijerAuthorUnsubmitted Done Reply Inline Actions Since we promote the IV there has to be a trunc back to its users. What I see is that there is 1 trunc instruction, and then different zexts instructions of the IV value that to different users if they have different types. This means there is 1 trunc instruction, but you're right that this is not the whole story and something is missing at the moment. So, we will need a generic way to map the different values with the different users, if there are any. I think, for now, I will add a check a bit earlier in the pipeline to bail if we find more than 1 zext user of that trunc. That won't be optimal, but I am keen to start somewhere with this. And of course this patch in its current shape is still running in an assert when I just tried it out with different trunc users slightly modifying your example case: void test(char n, char m) { for(char i = 0; i < n; i++) for(char j = 0; j < m; j++) { char x = im+j; use_32(x); use_16(x); } } SjoerdMeijer:* Since we promote the IV there has to be a trunc back to its users. What I see is that there is…
// variables with the one new one.		// variables with the one new one.
		IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator());
for (Value *V : FI.LinearIVUses) {		for (Value *V : FI.LinearIVUses) {
// If the induction variable has been widened, look through the SExt.		Value *OuterValue = FI.OuterInductionPHI;
if (Value *U = HasSExtUser(V))		if (FI.Widened)
		dmgreenUnsubmitted Not Done Reply Inline Actions I'm not sure I understand any more. Should we not be replacing it with trunc(OuterInductionPHI) ? Can you add a test where it (the oI+i value) is not zext or sext? dmgreen:* I'm not sure I understand any more. Should we not be replacing it with trunc(OuterInductionPHI)…
		SjoerdMeijerAuthorUnsubmitted Done Reply Inline Actions I'm not sure I understand any more. Should we not be replacing it with trunc(OuterInductionPHI) ? After widening we have e.g. this pattern: %indvar = phi i64 [ %indvar.next, %for.body3.us ], [ 0, %for.cond1.preheader.us ] %3 = trunc i64 %indvar to i32 %add.us = add i32 %3, %mul.us %idxprom.us = zext i32 %add.us to i64 The linear IV user in this example is: %add.us = add i32 %3, %mul.us We don't want to be replacing this `%add.us` value which is a i32 value, because it will indeed be replaced by OuterInductionPhi, which is i64 value after widening. This was the assertion is was talking about. After widening, the value that we should be replacing is zext user which is `%idxprom.us` in this case. After widening, we have this IV -> Trunc->LinearIV ->Ext pattern, which is what we are matching here. I will add a comment to clarify this. Can you add a test where it (the oI+i value) is not zext or sext? I think these cases are present in the original test test/Transforms/LoopFlatten/loop-flatten.ll. SjoerdMeijer:* > I'm not sure I understand any more. Should we not be replacing it with trunc…
		dmgreenUnsubmitted Not Done Reply Inline Actions Hmm. But, don't we start with something that looks like: for i32 outer = 0..n for i32 inner = 0..m use(outer * m + inner) We widen the IV's so they become: for i64 outer = 0..zext(n) for i64 inner = 0..zext(m) use(trunc(outer) * m + trunc(inner)) And we want to replace that with a single for i64 outer = 0..zext(n)zext(m) use(trunc(outer)) We have not proved that the original did not overflow, so if it does we need to use the original truncated i32 value, not the i64 version of it directly. dmgreen:* Hmm. But, don't we start with something that looks like: for i32 outer = 0..n for i32…
		SjoerdMeijerAuthorUnsubmitted Done Reply Inline Actions Do you mean overflow in the original outer * m + inner Expression? Is that relevant? Will check when I am back at my desk, but I think after widening we have: Use(outer) Without the trunc. SjoerdMeijer: Do you mean overflow in the original outer * m + inner Expression? Is that relevant? Will…
		dmgreenUnsubmitted Not Done Reply Inline Actions Hmm. But Use in this case is an i32. We need to do something to outer (an i64) to get is back to an i32. This patch seems to be assuming that Use will be either a sext or a zext to the widened type. I think if it's not, it will still hit the assert (and if it is, would use the wrong value once `outer * m + inner` doesn't fit into the smaller type.) dmgreen: Hmm. But Use in this case is an i32. We need to do something to outer (an i64) to get is back…
		SjoerdMeijerAuthorUnsubmitted Done Reply Inline Actions This pass is very restrictive in what it currently supports; it is pattern matching very specific patterns. If there are other users this pass will bail, for example Found use of inner induction variable: Did not match expected pattern, bailing or Found use of outer induction variable Did not match expected pattern, bailing The assumptions are covered with checks. And when it comes to replacing values, we are safe because we are only replacing values in the loop update which have been widened. SjoerdMeijer: This pass is very restrictive in what it currently supports; it is pattern matching very…
		dmgreenUnsubmitted Not Done Reply Inline Actions What happens in the zext test if it is changed from: %arrayidx.us = getelementptr inbounds i16, i16* %A, i64 %idxprom.us to %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %add.us Also, consider a simpler example where we have: for i8 outer = 0..n for i8 inner = 0..m use(i32 zext(outer * m + inner)) If we widen the IV's to i32's for example, the call to use() should still get a value between [0..255], even if the nm was higher than that (and so outer m + inner overflows). It would wrap in the original, and still needs to wrap in the final version. For i32->i64 the values will be a lot higher, but the same principle applies. I'm guessing that if it was a trunc(outer * m + inner) instead, the sext(trunc(..)) in the original case would not naturally simplify nicely? dmgreen: What happens in the zext test if it is changed from: %arrayidx.us = getelementptr inbounds…
		SjoerdMeijerAuthorUnsubmitted Done Reply Inline Actions What happens in the zext test if it is changed from: %arrayidx.us = getelementptr inbounds i16, i16* %A, i64 %idxprom.us to %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %add.us We will end up with: %indvar = phi i64 [ 0, %for.cond1.preheader.us ] %3 = trunc i64 %indvar to i32 %add.us = add i32 %3, %mul.us %idxprom.us = zext i32 %add.us to i64 %arrayidx.us = getelementptr inbounds i16, i16* %A, i32 %add.us if this is the snippet you're interested in, because . Replacing: %idxprom.us = zext i32 %add.us to i64 with: %indvar1 = phi i64 [ %indvar.next2, %for.cond1.for.inc7_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] Which leaves `%idxprom.us` dead. Also, consider a simpler example where we have: for i8 outer = 0..n for i8 inner = 0..m use(i32 zext(outer * m + inner)) Like I said, the pass is very restrictive, and we match very specific patterns. The ZExts are in the way here, we don't recognise the increment and we bail. SjoerdMeijer: > What happens in the zext test if it is changed from: > > %arrayidx.us = getelementptr…
		dmgreenUnsubmitted Not Done Reply Inline Actions Finding the `Value OuterValue = FI.OuterInductionPHI; if (...` can be outside of the loop. dmgreen:* Finding the `Value *OuterValue = FI.OuterInductionPHI; if (...` can be outside of the loop.
V = U;		OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
V->replaceAllUsesWith(FI.OuterInductionPHI);		"flatten.trunciv");

		LLVM_DEBUG(dbgs() << "Replacing: "; V->dump();
		dbgs() << "with: "; OuterValue->dump());
		V->replaceAllUsesWith(OuterValue);
}		}

// Tell LoopInfo, SCEV and the pass manager that the inner loop has been		// Tell LoopInfo, SCEV and the pass manager that the inner loop has been
// deleted, and any information that have about the outer loop invalidated.		// deleted, and any information that have about the outer loop invalidated.
SE->forgetLoop(FI.OuterLoop);		SE->forgetLoop(FI.OuterLoop);
SE->forgetLoop(FI.InnerLoop);		SE->forgetLoop(FI.InnerLoop);
LI->erase(FI.InnerLoop);		LI->erase(FI.InnerLoop);
return true;		return true;
Show All 39 Lines	PHINode *WidePhi = createWideIV(WideIVs[i], LI, SE, Rewriter, DT, DeadInsts,
true /* UsePostIncrementRanges */);		true /* UsePostIncrementRanges */);
if (!WidePhi)		if (!WidePhi)
return false;		return false;
LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());		LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump());		LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIVs[i].NarrowIV->dump());
RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV);		RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV);
}		}
// After widening, rediscover all the loop components.		// After widening, rediscover all the loop components.
		assert(Widened && "Widenend IV expected");
		FI.Widened = true;
return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);		return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
}		}

static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,		static bool FlattenLoopPair(struct FlattenInfo &FI, DominatorTree *DT,
LoopInfo LI, ScalarEvolution SE,		LoopInfo LI, ScalarEvolution SE,
AssumptionCache *AC,		AssumptionCache *AC,
const TargetTransformInfo *TTI) {		const TargetTransformInfo *TTI) {
LLVM_DEBUG(		LLVM_DEBUG(
▲ Show 20 Lines • Show All 100 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopFlatten/widen-iv.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -S -loop-flatten -loop-flatten-widen-iv=true -verify-loop-info -verify-dom-info -verify-scev -verify \| FileCheck %s --check-prefix=CHECK		; RUN: opt < %s -S -loop-flatten -loop-flatten-widen-iv=true -verify-loop-info -verify-dom-info -verify-scev -verify \| FileCheck %s --check-prefix=CHECK
; RUN: opt < %s -S -loop-flatten -loop-flatten-widen-iv=false -verify-loop-info -verify-dom-info -verify-scev -verify \| FileCheck %s --check-prefix=DONTWIDEN		; RUN: opt < %s -S -loop-flatten -loop-flatten-widen-iv=false -verify-loop-info -verify-dom-info -verify-scev -verify \| FileCheck %s --check-prefix=DONTWIDEN

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"		target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

		; DONTWIDEN-NOT: %flatten.tripcount
		; DONTWIDEN-NOT: %flatten.trunciv

; Function Attrs: nounwind		; Function Attrs: nounwind
define void @foo(i32* %A, i32 %N, i32 %M) {		define void @foo(i32* %A, i32 %N, i32 %M) {
; CHECK-LABEL: @foo(		; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP17:%.]] = icmp sgt i32 [[N:%.]], 0		; CHECK-NEXT: [[CMP17:%.]] = icmp sgt i32 [[N:%.]], 0
; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_COND1_PREHEADER_LR_PH:%.]], label [[FOR_COND_CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_COND1_PREHEADER_LR_PH:%.]], label [[FOR_COND_CLEANUP:%.]]
; CHECK: for.cond1.preheader.lr.ph:		; CHECK: for.cond1.preheader.lr.ph:
; CHECK-NEXT: [[CMP215:%.]] = icmp sgt i32 [[M:%.]], 0		; CHECK-NEXT: [[CMP215:%.]] = icmp sgt i32 [[M:%.]], 0
; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND_CLEANUP]]		; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND_CLEANUP]]
; CHECK: for.cond1.preheader.us.preheader:		; CHECK: for.cond1.preheader.us.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M]] to i64		; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[N]] to i64		; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[N]] to i64
; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]		; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]		; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; CHECK: for.cond1.preheader.us:		; CHECK: for.cond1.preheader.us:
; CHECK-NEXT: [[INDVAR1:%.]] = phi i64 [ [[INDVAR_NEXT2:%.]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]		; CHECK-NEXT: [[INDVAR1:%.]] = phi i64 [ [[INDVAR_NEXT2:%.]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32		; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32
; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[TMP2]], [[M]]		; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[TMP2]], [[M]]
		; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32
; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]		; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]
; CHECK: for.body4.us:		; CHECK: for.body4.us:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ]		; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ]
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32		; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32
; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[TMP3]], [[MUL_US]]		; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[TMP3]], [[MUL_US]]
; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64		; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64
; CHECK-NEXT: [[ARRAYIDX_US:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i64 [[INDVAR1]]		; CHECK-NEXT: [[ARRAYIDX_US:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i64 [[IDXPROM_US]]
; CHECK-NEXT: tail call void @f(i32* [[ARRAYIDX_US]])		; CHECK-NEXT: tail call void @f(i32* [[ARRAYIDX_US]])
; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1		; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]]		; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]]
; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]		; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:		; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:
; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1		; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1
; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]]		; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]]
; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]		; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.cond.cleanup.loopexit:		; CHECK: for.cond.cleanup.loopexit:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:		; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
; DONTWIDEN-LABEL: @foo(
; DONTWIDEN-NEXT: entry:
; DONTWIDEN-NEXT: [[CMP17:%.]] = icmp sgt i32 [[N:%.]], 0
; DONTWIDEN-NEXT: br i1 [[CMP17]], label [[FOR_COND1_PREHEADER_LR_PH:%.]], label [[FOR_COND_CLEANUP:%.]]
; DONTWIDEN: for.cond1.preheader.lr.ph:
; DONTWIDEN-NEXT: [[CMP215:%.]] = icmp sgt i32 [[M:%.]], 0
; DONTWIDEN-NEXT: br i1 [[CMP215]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND_CLEANUP]]
; DONTWIDEN: for.cond1.preheader.us.preheader:
; DONTWIDEN-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
; DONTWIDEN: for.cond1.preheader.us:
; DONTWIDEN-NEXT: [[I_018_US:%.]] = phi i32 [ [[INC6_US:%.]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; DONTWIDEN-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[I_018_US]], [[M]]
; DONTWIDEN-NEXT: br label [[FOR_BODY4_US:%.*]]
; DONTWIDEN: for.body4.us:
; DONTWIDEN-NEXT: [[J_016_US:%.]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.]], [[FOR_BODY4_US]] ]
; DONTWIDEN-NEXT: [[ADD_US:%.*]] = add nsw i32 [[J_016_US]], [[MUL_US]]
; DONTWIDEN-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64
; DONTWIDEN-NEXT: [[ARRAYIDX_US:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i64 [[IDXPROM_US]]
; DONTWIDEN-NEXT: tail call void @f(i32* [[ARRAYIDX_US]])
; DONTWIDEN-NEXT: [[INC_US]] = add nuw nsw i32 [[J_016_US]], 1
; DONTWIDEN-NEXT: [[CMP2_US:%.*]] = icmp slt i32 [[INC_US]], [[M]]
; DONTWIDEN-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
; DONTWIDEN: for.cond1.for.cond.cleanup3_crit_edge.us:
; DONTWIDEN-NEXT: [[INC6_US]] = add nuw nsw i32 [[I_018_US]], 1
; DONTWIDEN-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[INC6_US]], [[N]]
; DONTWIDEN-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
; DONTWIDEN: for.cond.cleanup.loopexit:
; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]]
; DONTWIDEN: for.cond.cleanup:
; DONTWIDEN-NEXT: ret void
;
entry:		entry:
%cmp17 = icmp sgt i32 %N, 0		%cmp17 = icmp sgt i32 %N, 0
br i1 %cmp17, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup		br i1 %cmp17, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup

for.cond1.preheader.lr.ph:		for.cond1.preheader.lr.ph:
%cmp215 = icmp sgt i32 %M, 0		%cmp215 = icmp sgt i32 %M, 0
br i1 %cmp215, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup		br i1 %cmp215, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup

Show All 19 Lines	for.cond1.for.cond.cleanup3_crit_edge.us:
%inc6.us = add nuw nsw i32 %i.018.us, 1		%inc6.us = add nuw nsw i32 %i.018.us, 1
%cmp.us = icmp slt i32 %inc6.us, %N		%cmp.us = icmp slt i32 %inc6.us, %N
br i1 %cmp.us, label %for.cond1.preheader.us, label %for.cond.cleanup		br i1 %cmp.us, label %for.cond1.preheader.us, label %for.cond.cleanup

for.cond.cleanup:		for.cond.cleanup:
ret void		ret void
}		}

		define void @zext(i32 %N, i16* nocapture %A, i16 %val) {
		; CHECK-LABEL: @zext(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[CMP20_NOT:%.]] = icmp eq i32 [[N:%.]], 0
		; CHECK-NEXT: br i1 [[CMP20_NOT]], label [[FOR_END9:%.]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.]]
		; CHECK: for.cond1.preheader.us.preheader:
		; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64
		; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
		; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
		; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
		; CHECK: for.cond1.preheader.us:
		; CHECK-NEXT: [[INDVAR1:%.]] = phi i64 [ [[INDVAR_NEXT2:%.]], [[FOR_COND1_FOR_INC7_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
		; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32
		; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP2]], [[N]]
		; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32
		; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]]
		; CHECK: for.body3.us:
		; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ]
		; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32
		; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP3]], [[MUL_US]]
		; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[FLATTEN_TRUNCIV]] to i64
		; CHECK-NEXT: [[ARRAYIDX_US:%.]] = getelementptr inbounds i16, i16 [[A:%.*]], i64 [[IDXPROM_US]]
		; CHECK-NEXT: [[TMP4:%.]] = load i16, i16 [[ARRAYIDX_US]], align 2
		; CHECK-NEXT: [[ADD5_US:%.]] = add i16 [[TMP4]], [[VAL:%.]]
		; CHECK-NEXT: store i16 [[ADD5_US]], i16* [[ARRAYIDX_US]], align 2
		; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1
		; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]]
		; CHECK-NEXT: br label [[FOR_COND1_FOR_INC7_CRIT_EDGE_US]]
		; CHECK: for.cond1.for.inc7_crit_edge.us:
		; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1
		; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]]
		; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END9_LOOPEXIT:%.*]]
		; CHECK: for.end9.loopexit:
		; CHECK-NEXT: br label [[FOR_END9]]
		; CHECK: for.end9:
		; CHECK-NEXT: ret void
		;
		entry:
		%cmp20.not = icmp eq i32 %N, 0
		br i1 %cmp20.not, label %for.end9, label %for.cond1.preheader.us.preheader

		for.cond1.preheader.us.preheader:
		br label %for.cond1.preheader.us

		for.cond1.preheader.us:
		%i.021.us = phi i32 [ %inc8.us, %for.cond1.for.inc7_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ]
		%mul.us = mul i32 %i.021.us, %N
		br label %for.body3.us

		for.body3.us:
		%j.019.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body3.us ]
		%add.us = add i32 %j.019.us, %mul.us
		%idxprom.us = zext i32 %add.us to i64
		%arrayidx.us = getelementptr inbounds i16, i16* %A, i64 %idxprom.us
		%0 = load i16, i16* %arrayidx.us, align 2
		%add5.us = add i16 %0, %val
		store i16 %add5.us, i16* %arrayidx.us, align 2
		%inc.us = add nuw i32 %j.019.us, 1
		%cmp2.us = icmp ult i32 %inc.us, %N
		br i1 %cmp2.us, label %for.body3.us, label %for.cond1.for.inc7_crit_edge.us

		for.cond1.for.inc7_crit_edge.us:
		%inc8.us = add i32 %i.021.us, 1
		%cmp.us = icmp ult i32 %inc8.us, %N
		br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end9.loopexit

		for.end9.loopexit:
		br label %for.end9

		for.end9:
		ret void
		}

		; This IR corresponds to this input:
		;
		; void test(char n, char m) {
		; for(char i = 0; i < n; i++)
		; for(char j = 0; j < m; j++) {
		; char x = i*m+j;
		; use_32(x);
		; }
		; }
		;
		define void @test(i8 %n, i8 %m) {
		; CHECK-LABEL: @test(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[CMP25_NOT:%.]] = icmp eq i8 [[N:%.]], 0
		; CHECK-NEXT: br i1 [[CMP25_NOT]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_COND3_PREHEADER_LR_PH:%.]]
		; CHECK: for.cond3.preheader.lr.ph:
		; CHECK-NEXT: [[CMP623_NOT:%.]] = icmp eq i8 [[M:%.]], 0
		; CHECK-NEXT: br i1 [[CMP623_NOT]], label [[FOR_COND3_PREHEADER_PREHEADER:%.]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.]]
		; CHECK: for.cond3.preheader.preheader:
		; CHECK-NEXT: br label [[FOR_COND3_PREHEADER:%.*]]
		; CHECK: for.cond3.preheader.us.preheader:
		; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[M]] to i64
		; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[N]] to i64
		; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
		; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]]
		; CHECK: for.cond3.preheader.us:
		; CHECK-NEXT: [[INDVAR2:%.]] = phi i64 [ [[INDVAR_NEXT3:%.]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ]
		; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i8
		; CHECK-NEXT: [[MUL_US:%.*]] = mul i8 [[TMP2]], [[M]]
		; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i8
		; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]]
		; CHECK: for.body9.us:
		; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ]
		; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i8
		; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]]
		; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32
		; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1
		; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]]
		; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]]
		; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us:
		; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1
		; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
		; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]]
		; CHECK: for.cond3.preheader:
		; CHECK-NEXT: [[I_026:%.]] = phi i8 [ [[INC16:%.]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ]
		; CHECK-NEXT: [[INC16]] = add i8 [[I_026]], 1
		; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[INC16]], [[N]]
		; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
		; CHECK: for.cond.cleanup.loopexit:
		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
		; CHECK: for.cond.cleanup.loopexit1:
		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
		; CHECK: for.cond.cleanup:
		; CHECK-NEXT: ret void
		;
		entry:
		%cmp25.not = icmp eq i8 %n, 0
		br i1 %cmp25.not, label %for.cond.cleanup, label %for.cond3.preheader.lr.ph

		for.cond3.preheader.lr.ph:
		%cmp623.not = icmp eq i8 %m, 0
		br i1 %cmp623.not, label %for.cond3.preheader.preheader, label %for.cond3.preheader.us.preheader

		for.cond3.preheader.preheader:
		br label %for.cond3.preheader

		for.cond3.preheader.us.preheader:
		br label %for.cond3.preheader.us

		for.cond3.preheader.us:
		%i.026.us = phi i8 [ %inc16.us, %for.cond3.for.cond.cleanup8_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
		%mul.us = mul i8 %i.026.us, %m
		br label %for.body9.us

		for.body9.us:
		%j.024.us = phi i8 [ 0, %for.cond3.preheader.us ], [ %inc.us, %for.body9.us ]
		%add.us = add i8 %j.024.us, %mul.us
		%conv14.us = zext i8 %add.us to i32
		%call.us = tail call i32 @use_32(i32 %conv14.us) #2
		%inc.us = add nuw i8 %j.024.us, 1
		%cmp6.us = icmp ult i8 %inc.us, %m
		br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us

		for.cond3.for.cond.cleanup8_crit_edge.us:
		%inc16.us = add i8 %i.026.us, 1
		%cmp.us = icmp ult i8 %inc16.us, %n
		br i1 %cmp.us, label %for.cond3.preheader.us, label %for.cond.cleanup

		for.cond3.preheader:
		%i.026 = phi i8 [ %inc16, %for.cond3.preheader ], [ 0, %for.cond3.preheader.preheader ]
		%inc16 = add i8 %i.026, 1
		%cmp = icmp ult i8 %inc16, %n
		br i1 %cmp, label %for.cond3.preheader, label %for.cond.cleanup

		for.cond.cleanup:
		ret void
		}

		; This IR corresponds to this input:
		;
		; void test3(char n, char m) {
		; for(char i = 0; i < n; i++)
		; for(char j = 0; j < m; j++) {
		; char x = i*m+j;
		; use_32(x);
		; use_16(x);
		; use_32(x);
		; use_16(x);
		; use_64(x);
		; }
		; }
		;
		define void @test3(i8 %n, i8 %m) {
		; CHECK-LABEL: @test3(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[CMP37_NOT:%.]] = icmp eq i8 [[N:%.]], 0
		; CHECK-NEXT: br i1 [[CMP37_NOT]], label [[FOR_COND_CLEANUP:%.]], label [[FOR_COND3_PREHEADER_LR_PH:%.]]
		; CHECK: for.cond3.preheader.lr.ph:
		; CHECK-NEXT: [[CMP635_NOT:%.]] = icmp eq i8 [[M:%.]], 0
		; CHECK-NEXT: br i1 [[CMP635_NOT]], label [[FOR_COND3_PREHEADER_PREHEADER:%.]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.]]
		; CHECK: for.cond3.preheader.preheader:
		; CHECK-NEXT: br label [[FOR_COND3_PREHEADER:%.*]]
		; CHECK: for.cond3.preheader.us.preheader:
		; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[M]] to i64
		; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[N]] to i64
		; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
		; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]]
		; CHECK: for.cond3.preheader.us:
		; CHECK-NEXT: [[INDVAR2:%.]] = phi i64 [ [[INDVAR_NEXT3:%.]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ]
		; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i8
		; CHECK-NEXT: [[MUL_US:%.*]] = mul i8 [[TMP2]], [[M]]
		; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i8
		; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]]
		; CHECK: for.body9.us:
		; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ]
		; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i8
		; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]]
		; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32
		; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; CHECK-NEXT: [[CONV15_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i16
		; CHECK-NEXT: [[CALL16_US:%.*]] = tail call i32 @use_16(i16 [[CONV15_US]])
		; CHECK-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_16(i16 [[CONV15_US]])
		; CHECK-NEXT: [[CONV21_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i64
		; CHECK-NEXT: [[CALL22_US:%.*]] = tail call i32 @use_64(i64 [[CONV21_US]])
		; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1
		; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]]
		; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]]
		; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us:
		; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1
		; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
		; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]]
		; CHECK: for.cond3.preheader:
		; CHECK-NEXT: [[I_038:%.]] = phi i8 [ [[INC24:%.]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ]
		; CHECK-NEXT: [[INC24]] = add i8 [[I_038]], 1
		; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[INC24]], [[N]]
		; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
		; CHECK: for.cond.cleanup.loopexit:
		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
		; CHECK: for.cond.cleanup.loopexit1:
		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
		; CHECK: for.cond.cleanup:
		; CHECK-NEXT: ret void
		;
		entry:
		%cmp37.not = icmp eq i8 %n, 0
		br i1 %cmp37.not, label %for.cond.cleanup, label %for.cond3.preheader.lr.ph

		for.cond3.preheader.lr.ph:
		%cmp635.not = icmp eq i8 %m, 0
		br i1 %cmp635.not, label %for.cond3.preheader.preheader, label %for.cond3.preheader.us.preheader

		for.cond3.preheader.preheader:
		br label %for.cond3.preheader

		for.cond3.preheader.us.preheader:
		br label %for.cond3.preheader.us

		for.cond3.preheader.us:
		%i.038.us = phi i8 [ %inc24.us, %for.cond3.for.cond.cleanup8_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
		%mul.us = mul i8 %i.038.us, %m
		br label %for.body9.us

		for.body9.us:
		%j.036.us = phi i8 [ 0, %for.cond3.preheader.us ], [ %inc.us, %for.body9.us ]
		%add.us = add i8 %j.036.us, %mul.us
		%conv14.us = zext i8 %add.us to i32
		%call.us = tail call i32 @use_32(i32 %conv14.us)
		%conv15.us = zext i8 %add.us to i16
		%call16.us = tail call i32 @use_16(i16 %conv15.us)
		%call18.us = tail call i32 @use_32(i32 %conv14.us)
		%call20.us = tail call i32 @use_16(i16 %conv15.us)
		%conv21.us = zext i8 %add.us to i64
		%call22.us = tail call i32 @use_64(i64 %conv21.us)
		%inc.us = add nuw i8 %j.036.us, 1
		%cmp6.us = icmp ult i8 %inc.us, %m
		br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us

		for.cond3.for.cond.cleanup8_crit_edge.us:
		%inc24.us = add i8 %i.038.us, 1
		%cmp.us = icmp ult i8 %inc24.us, %n
		br i1 %cmp.us, label %for.cond3.preheader.us, label %for.cond.cleanup

		for.cond3.preheader:
		%i.038 = phi i8 [ %inc24, %for.cond3.preheader ], [ 0, %for.cond3.preheader.preheader ]
		%inc24 = add i8 %i.038, 1
		%cmp = icmp ult i8 %inc24, %n
		br i1 %cmp, label %for.cond3.preheader, label %for.cond.cleanup

		for.cond.cleanup:
		ret void
		}

		; This IR corresponds to this input:
		;
		; void test4(short n, short m) {
		; for(short i = 0; i < n; i++)
		; for(short j = 0; j < m; j++) {
		; short x = i*m+j;
		; use_32(x);
		; use_16(x);
		; use_32(x);
		; use_16(x);
		; use_64(x);
		; }
		; }
		;
		define void @test4(i16 %n, i16 %m) {
		; CHECK-LABEL: @test4(
		; CHECK-NEXT: entry:
		; CHECK-NEXT: [[CMP38:%.]] = icmp sgt i16 [[N:%.]], 0
		; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_COND3_PREHEADER_LR_PH:%.]], label [[FOR_COND_CLEANUP:%.]]
		; CHECK: for.cond3.preheader.lr.ph:
		; CHECK-NEXT: [[CMP636:%.]] = icmp sgt i16 [[M:%.]], 0
		; CHECK-NEXT: br i1 [[CMP636]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.]], label [[FOR_COND3_PREHEADER_PREHEADER:%.]]
		; CHECK: for.cond3.preheader.preheader:
		; CHECK-NEXT: br label [[FOR_COND3_PREHEADER:%.*]]
		; CHECK: for.cond3.preheader.us.preheader:
		; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[M]] to i64
		; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[N]] to i64
		; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]]
		; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]]
		; CHECK: for.cond3.preheader.us:
		; CHECK-NEXT: [[INDVAR2:%.]] = phi i64 [ [[INDVAR_NEXT3:%.]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ]
		; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i16
		; CHECK-NEXT: [[MUL_US:%.*]] = mul i16 [[TMP2]], [[M]]
		; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i16
		; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]]
		; CHECK: for.body9.us:
		; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ]
		; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i16
		; CHECK-NEXT: [[ADD_US:%.*]] = add i16 [[TMP3]], [[MUL_US]]
		; CHECK-NEXT: [[CONV14_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i32
		; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; CHECK-NEXT: [[CALL15_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]])
		; CHECK-NEXT: [[CALL17_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; CHECK-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]])
		; CHECK-NEXT: [[CONV19_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i64
		; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]])
		; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1
		; CHECK-NEXT: [[CMP6_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]]
		; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]]
		; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us:
		; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1
		; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
		; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
		; CHECK: for.cond3.preheader:
		; CHECK-NEXT: [[I_039:%.]] = phi i16 [ [[INC22:%.]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ]
		; CHECK-NEXT: [[INC22]] = add i16 [[I_039]], 1
		; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC22]], [[N]]
		; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]]
		; CHECK: for.cond.cleanup.loopexit:
		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
		; CHECK: for.cond.cleanup.loopexit1:
		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
		; CHECK: for.cond.cleanup:
		; CHECK-NEXT: ret void
		;
		; DONTWIDEN-LABEL: @test4(
		; DONTWIDEN-NEXT: entry:
		; DONTWIDEN-NEXT: [[CMP38:%.]] = icmp sgt i16 [[N:%.]], 0
		; DONTWIDEN-NEXT: br i1 [[CMP38]], label [[FOR_COND3_PREHEADER_LR_PH:%.]], label [[FOR_COND_CLEANUP:%.]]
		; DONTWIDEN: for.cond3.preheader.lr.ph:
		; DONTWIDEN-NEXT: [[CMP636:%.]] = icmp sgt i16 [[M:%.]], 0
		; DONTWIDEN-NEXT: br i1 [[CMP636]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.]], label [[FOR_COND3_PREHEADER_PREHEADER:%.]]
		; DONTWIDEN: for.cond3.preheader.preheader:
		; DONTWIDEN-NEXT: br label [[FOR_COND3_PREHEADER:%.*]]
		; DONTWIDEN: for.cond3.preheader.us.preheader:
		; DONTWIDEN-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]]
		; DONTWIDEN: for.cond3.preheader.us:
		; DONTWIDEN-NEXT: [[I_039_US:%.]] = phi i16 [ [[INC22_US:%.]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ]
		; DONTWIDEN-NEXT: [[MUL_US:%.*]] = mul i16 [[I_039_US]], [[M]]
		; DONTWIDEN-NEXT: br label [[FOR_BODY9_US:%.*]]
		; DONTWIDEN: for.body9.us:
		; DONTWIDEN-NEXT: [[J_037_US:%.]] = phi i16 [ 0, [[FOR_COND3_PREHEADER_US]] ], [ [[INC_US:%.]], [[FOR_BODY9_US]] ]
		; DONTWIDEN-NEXT: [[ADD_US:%.*]] = add i16 [[J_037_US]], [[MUL_US]]
		; DONTWIDEN-NEXT: [[CONV14_US:%.*]] = sext i16 [[ADD_US]] to i32
		; DONTWIDEN-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; DONTWIDEN-NEXT: [[CALL15_US:%.*]] = tail call i32 @use_16(i16 [[ADD_US]])
		; DONTWIDEN-NEXT: [[CALL17_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]])
		; DONTWIDEN-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[ADD_US]])
		; DONTWIDEN-NEXT: [[CONV19_US:%.*]] = sext i16 [[ADD_US]] to i64
		; DONTWIDEN-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]])
		; DONTWIDEN-NEXT: [[INC_US]] = add nuw nsw i16 [[J_037_US]], 1
		; DONTWIDEN-NEXT: [[CMP6_US:%.*]] = icmp slt i16 [[INC_US]], [[M]]
		; DONTWIDEN-NEXT: br i1 [[CMP6_US]], label [[FOR_BODY9_US]], label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]]
		; DONTWIDEN: for.cond3.for.cond.cleanup8_crit_edge.us:
		; DONTWIDEN-NEXT: [[INC22_US]] = add i16 [[I_039_US]], 1
		; DONTWIDEN-NEXT: [[CMP_US:%.*]] = icmp slt i16 [[INC22_US]], [[N]]
		; DONTWIDEN-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
		; DONTWIDEN: for.cond3.preheader:
		; DONTWIDEN-NEXT: [[I_039:%.]] = phi i16 [ [[INC22:%.]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ]
		; DONTWIDEN-NEXT: [[INC22]] = add i16 [[I_039]], 1
		; DONTWIDEN-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC22]], [[N]]
		; DONTWIDEN-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]]
		; DONTWIDEN: for.cond.cleanup.loopexit:
		; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]]
		; DONTWIDEN: for.cond.cleanup.loopexit1:
		; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]]
		; DONTWIDEN: for.cond.cleanup:
		; DONTWIDEN-NEXT: ret void
		;
		entry:
		%cmp38 = icmp sgt i16 %n, 0
		br i1 %cmp38, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup

		for.cond3.preheader.lr.ph:
		%cmp636 = icmp sgt i16 %m, 0
		br i1 %cmp636, label %for.cond3.preheader.us.preheader, label %for.cond3.preheader.preheader

		for.cond3.preheader.preheader:
		br label %for.cond3.preheader

		for.cond3.preheader.us.preheader:
		br label %for.cond3.preheader.us

		for.cond3.preheader.us:
		%i.039.us = phi i16 [ %inc22.us, %for.cond3.for.cond.cleanup8_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ]
		%mul.us = mul i16 %i.039.us, %m
		br label %for.body9.us

		for.body9.us:
		%j.037.us = phi i16 [ 0, %for.cond3.preheader.us ], [ %inc.us, %for.body9.us ]
		%add.us = add i16 %j.037.us, %mul.us
		%conv14.us = sext i16 %add.us to i32
		%call.us = tail call i32 @use_32(i32 %conv14.us) #2
		%call15.us = tail call i32 @use_16(i16 %add.us) #2
		%call17.us = tail call i32 @use_32(i32 %conv14.us) #2
		%call18.us = tail call i32 @use_16(i16 %add.us) #2
		%conv19.us = sext i16 %add.us to i64
		%call20.us = tail call i32 @use_64(i64 %conv19.us) #2
		%inc.us = add nuw nsw i16 %j.037.us, 1
		%cmp6.us = icmp slt i16 %inc.us, %m
		br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us

		for.cond3.for.cond.cleanup8_crit_edge.us:
		%inc22.us = add i16 %i.039.us, 1
		%cmp.us = icmp slt i16 %inc22.us, %n
		br i1 %cmp.us, label %for.cond3.preheader.us, label %for.cond.cleanup

		for.cond3.preheader:
		%i.039 = phi i16 [ %inc22, %for.cond3.preheader ], [ 0, %for.cond3.preheader.preheader ]
		%inc22 = add i16 %i.039, 1
		%cmp = icmp slt i16 %inc22, %n
		br i1 %cmp, label %for.cond3.preheader, label %for.cond.cleanup

		for.cond.cleanup:
		ret void
		}

		declare dso_local i32 @use_32(i32)
		declare dso_local i32 @use_16(i16)
		declare dso_local i32 @use_64(i64)

declare dso_local void @f(i32* %0) local_unnamed_addr #1		declare dso_local void @f(i32* %0) local_unnamed_addr #1

This is an archive of the discontinued LLVM Phabricator instance.

[LoopFlatten] Widen IV, cont'd
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 306843

llvm/lib/Transforms/Scalar/LoopFlatten.cpp

llvm/test/Transforms/LoopFlatten/widen-iv.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LoopFlatten] Widen IV, cont'dClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 306843

llvm/lib/Transforms/Scalar/LoopFlatten.cpp

llvm/test/Transforms/LoopFlatten/widen-iv.ll

[LoopFlatten] Widen IV, cont'd
ClosedPublic