diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Verifier.h" @@ -66,7 +67,7 @@ static cl::opt WidenIV("loop-flatten-widen-iv", cl::Hidden, - cl::init(false), + cl::init(true), cl::desc("Widen the loop induction variables, if possible, so " "overflow checks won't reject flattening")); @@ -84,6 +85,9 @@ SmallPtrSet LinearIVUses; SmallPtrSet InnerPHIsToTransform; + // Whether this holds the flatten info before or after widening. + bool Widened = false; + FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; }; @@ -335,8 +339,9 @@ // transformation wouldn't be profitable. Value *InnerLimit = FI.InnerLimit; - if (auto *I = dyn_cast(InnerLimit)) - InnerLimit = I->getOperand(0); + if (FI.Widened && + (isa(InnerLimit) || isa(InnerLimit))) + InnerLimit = cast(InnerLimit)->getOperand(0); // Check that all uses of the inner loop's induction variable match the // expected pattern, recording the uses of the outer IV. @@ -347,7 +352,7 @@ // After widening the IVs, a trunc instruction might have been introduced, so // look through truncs. - if (dyn_cast(U) ) { + if (isa(U)) { if (!U->hasOneUse()) return false; U = *U->user_begin(); @@ -544,20 +549,18 @@ BranchInst::Create(InnerExitBlock, InnerExitingBlock); DT->deleteEdge(InnerExitingBlock, FI.InnerLoop->getHeader()); - auto HasSExtUser = [] (Value *V) -> Value * { - for (User *U : V->users() ) - if (dyn_cast(U)) - return U; - return nullptr; - }; - // Replace all uses of the polynomial calculated from the two induction // variables with the one new one. + IRBuilder<> Builder(FI.OuterInductionPHI->getParent()->getTerminator()); for (Value *V : FI.LinearIVUses) { - // If the induction variable has been widened, look through the SExt. - if (Value *U = HasSExtUser(V)) - V = U; - V->replaceAllUsesWith(FI.OuterInductionPHI); + Value *OuterValue = FI.OuterInductionPHI; + if (FI.Widened) + OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(), + "flatten.trunciv"); + + LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); + dbgs() << "with: "; OuterValue->dump()); + V->replaceAllUsesWith(OuterValue); } // Tell LoopInfo, SCEV and the pass manager that the inner loop has been @@ -613,6 +616,8 @@ RecursivelyDeleteDeadPHINode(WideIVs[i].NarrowIV); } // After widening, rediscover all the loop components. + assert(Widened && "Widenend IV expected"); + FI.Widened = true; return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); } diff --git a/llvm/test/Transforms/LoopFlatten/widen-iv.ll b/llvm/test/Transforms/LoopFlatten/widen-iv.ll --- a/llvm/test/Transforms/LoopFlatten/widen-iv.ll +++ b/llvm/test/Transforms/LoopFlatten/widen-iv.ll @@ -4,6 +4,9 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +; DONTWIDEN-NOT: %flatten.tripcount +; DONTWIDEN-NOT: %flatten.trunciv + ; Function Attrs: nounwind define void @foo(i32* %A, i32 %N, i32 %M) { ; CHECK-LABEL: @foo( @@ -22,13 +25,14 @@ ; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 ; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[TMP2]], [[M]] +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] ; CHECK: for.body4.us: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[TMP3]], [[MUL_US]] -; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 -; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVAR1]] +; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64 +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @f(i32* [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] @@ -42,37 +46,6 @@ ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; -; DONTWIDEN-LABEL: @foo( -; DONTWIDEN-NEXT: entry: -; DONTWIDEN-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 -; DONTWIDEN-NEXT: br i1 [[CMP17]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; DONTWIDEN: for.cond1.preheader.lr.ph: -; DONTWIDEN-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[M:%.*]], 0 -; DONTWIDEN-NEXT: br i1 [[CMP215]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND_CLEANUP]] -; DONTWIDEN: for.cond1.preheader.us.preheader: -; DONTWIDEN-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] -; DONTWIDEN: for.cond1.preheader.us: -; DONTWIDEN-NEXT: [[I_018_US:%.*]] = phi i32 [ [[INC6_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] -; DONTWIDEN-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[I_018_US]], [[M]] -; DONTWIDEN-NEXT: br label [[FOR_BODY4_US:%.*]] -; DONTWIDEN: for.body4.us: -; DONTWIDEN-NEXT: [[J_016_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ] -; DONTWIDEN-NEXT: [[ADD_US:%.*]] = add nsw i32 [[J_016_US]], [[MUL_US]] -; DONTWIDEN-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 -; DONTWIDEN-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] -; DONTWIDEN-NEXT: tail call void @f(i32* [[ARRAYIDX_US]]) -; DONTWIDEN-NEXT: [[INC_US]] = add nuw nsw i32 [[J_016_US]], 1 -; DONTWIDEN-NEXT: [[CMP2_US:%.*]] = icmp slt i32 [[INC_US]], [[M]] -; DONTWIDEN-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] -; DONTWIDEN: for.cond1.for.cond.cleanup3_crit_edge.us: -; DONTWIDEN-NEXT: [[INC6_US]] = add nuw nsw i32 [[I_018_US]], 1 -; DONTWIDEN-NEXT: [[CMP_US:%.*]] = icmp slt i32 [[INC6_US]], [[N]] -; DONTWIDEN-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; DONTWIDEN: for.cond.cleanup.loopexit: -; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]] -; DONTWIDEN: for.cond.cleanup: -; DONTWIDEN-NEXT: ret void -; entry: %cmp17 = icmp sgt i32 %N, 0 br i1 %cmp17, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup @@ -108,4 +81,452 @@ ret void } +define void @zext(i32 %N, i16* nocapture %A, i16 %val) { +; CHECK-LABEL: @zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP20_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP20_NOT]], label [[FOR_END9:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK: for.cond1.preheader.us: +; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_INC7_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 +; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP2]], [[N]] +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 +; CHECK-NEXT: br label [[FOR_BODY3_US:%.*]] +; CHECK: for.body3.us: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 +; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP3]], [[MUL_US]] +; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[FLATTEN_TRUNCIV]] to i64 +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[IDXPROM_US]] +; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_US]], align 2 +; CHECK-NEXT: [[ADD5_US:%.*]] = add i16 [[TMP4]], [[VAL:%.*]] +; CHECK-NEXT: store i16 [[ADD5_US]], i16* [[ARRAYIDX_US]], align 2 +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND1_FOR_INC7_CRIT_EDGE_US]] +; CHECK: for.cond1.for.inc7_crit_edge.us: +; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END9_LOOPEXIT:%.*]] +; CHECK: for.end9.loopexit: +; CHECK-NEXT: br label [[FOR_END9]] +; CHECK: for.end9: +; CHECK-NEXT: ret void +; +entry: + %cmp20.not = icmp eq i32 %N, 0 + br i1 %cmp20.not, label %for.end9, label %for.cond1.preheader.us.preheader + +for.cond1.preheader.us.preheader: + br label %for.cond1.preheader.us + +for.cond1.preheader.us: + %i.021.us = phi i32 [ %inc8.us, %for.cond1.for.inc7_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + %mul.us = mul i32 %i.021.us, %N + br label %for.body3.us + +for.body3.us: + %j.019.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body3.us ] + %add.us = add i32 %j.019.us, %mul.us + %idxprom.us = zext i32 %add.us to i64 + %arrayidx.us = getelementptr inbounds i16, i16* %A, i64 %idxprom.us + %0 = load i16, i16* %arrayidx.us, align 2 + %add5.us = add i16 %0, %val + store i16 %add5.us, i16* %arrayidx.us, align 2 + %inc.us = add nuw i32 %j.019.us, 1 + %cmp2.us = icmp ult i32 %inc.us, %N + br i1 %cmp2.us, label %for.body3.us, label %for.cond1.for.inc7_crit_edge.us + +for.cond1.for.inc7_crit_edge.us: + %inc8.us = add i32 %i.021.us, 1 + %cmp.us = icmp ult i32 %inc8.us, %N + br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end9.loopexit + +for.end9.loopexit: + br label %for.end9 + +for.end9: + ret void +} + +; This IR corresponds to this input: +; +; void test(char n, char m) { +; for(char i = 0; i < n; i++) +; for(char j = 0; j < m; j++) { +; char x = i*m+j; +; use_32(x); +; } +; } +; +define void @test(i8 %n, i8 %m) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP25_NOT:%.*]] = icmp eq i8 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP25_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND3_PREHEADER_LR_PH:%.*]] +; CHECK: for.cond3.preheader.lr.ph: +; CHECK-NEXT: [[CMP623_NOT:%.*]] = icmp eq i8 [[M:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP623_NOT]], label [[FOR_COND3_PREHEADER_PREHEADER:%.*]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.*]] +; CHECK: for.cond3.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND3_PREHEADER:%.*]] +; CHECK: for.cond3.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[M]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] +; CHECK: for.cond3.preheader.us: +; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i8 +; CHECK-NEXT: [[MUL_US:%.*]] = mul i8 [[TMP2]], [[M]] +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i8 +; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]] +; CHECK: for.body9.us: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i8 +; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]] +; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32 +; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] +; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: +; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]] +; CHECK: for.cond3.preheader: +; CHECK-NEXT: [[I_026:%.*]] = phi i8 [ [[INC16:%.*]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[INC16]] = add i8 [[I_026]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[INC16]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit1: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + %cmp25.not = icmp eq i8 %n, 0 + br i1 %cmp25.not, label %for.cond.cleanup, label %for.cond3.preheader.lr.ph + +for.cond3.preheader.lr.ph: + %cmp623.not = icmp eq i8 %m, 0 + br i1 %cmp623.not, label %for.cond3.preheader.preheader, label %for.cond3.preheader.us.preheader + +for.cond3.preheader.preheader: + br label %for.cond3.preheader + +for.cond3.preheader.us.preheader: + br label %for.cond3.preheader.us + +for.cond3.preheader.us: + %i.026.us = phi i8 [ %inc16.us, %for.cond3.for.cond.cleanup8_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ] + %mul.us = mul i8 %i.026.us, %m + br label %for.body9.us + +for.body9.us: + %j.024.us = phi i8 [ 0, %for.cond3.preheader.us ], [ %inc.us, %for.body9.us ] + %add.us = add i8 %j.024.us, %mul.us + %conv14.us = zext i8 %add.us to i32 + %call.us = tail call i32 @use_32(i32 %conv14.us) #2 + %inc.us = add nuw i8 %j.024.us, 1 + %cmp6.us = icmp ult i8 %inc.us, %m + br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us + +for.cond3.for.cond.cleanup8_crit_edge.us: + %inc16.us = add i8 %i.026.us, 1 + %cmp.us = icmp ult i8 %inc16.us, %n + br i1 %cmp.us, label %for.cond3.preheader.us, label %for.cond.cleanup + +for.cond3.preheader: + %i.026 = phi i8 [ %inc16, %for.cond3.preheader ], [ 0, %for.cond3.preheader.preheader ] + %inc16 = add i8 %i.026, 1 + %cmp = icmp ult i8 %inc16, %n + br i1 %cmp, label %for.cond3.preheader, label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +; This IR corresponds to this input: +; +; void test3(char n, char m) { +; for(char i = 0; i < n; i++) +; for(char j = 0; j < m; j++) { +; char x = i*m+j; +; use_32(x); +; use_16(x); +; use_32(x); +; use_16(x); +; use_64(x); +; } +; } +; +define void @test3(i8 %n, i8 %m) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP37_NOT:%.*]] = icmp eq i8 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP37_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND3_PREHEADER_LR_PH:%.*]] +; CHECK: for.cond3.preheader.lr.ph: +; CHECK-NEXT: [[CMP635_NOT:%.*]] = icmp eq i8 [[M:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP635_NOT]], label [[FOR_COND3_PREHEADER_PREHEADER:%.*]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.*]] +; CHECK: for.cond3.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND3_PREHEADER:%.*]] +; CHECK: for.cond3.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[M]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] +; CHECK: for.cond3.preheader.us: +; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i8 +; CHECK-NEXT: [[MUL_US:%.*]] = mul i8 [[TMP2]], [[M]] +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i8 +; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]] +; CHECK: for.body9.us: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i8 +; CHECK-NEXT: [[ADD_US:%.*]] = add i8 [[TMP3]], [[MUL_US]] +; CHECK-NEXT: [[CONV14_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i32 +; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; CHECK-NEXT: [[CONV15_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i16 +; CHECK-NEXT: [[CALL16_US:%.*]] = tail call i32 @use_16(i16 [[CONV15_US]]) +; CHECK-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_16(i16 [[CONV15_US]]) +; CHECK-NEXT: [[CONV21_US:%.*]] = zext i8 [[FLATTEN_TRUNCIV]] to i64 +; CHECK-NEXT: [[CALL22_US:%.*]] = tail call i32 @use_64(i64 [[CONV21_US]]) +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[CMP6_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] +; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: +; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]] +; CHECK: for.cond3.preheader: +; CHECK-NEXT: [[I_038:%.*]] = phi i8 [ [[INC24:%.*]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[INC24]] = add i8 [[I_038]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[INC24]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit1: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + %cmp37.not = icmp eq i8 %n, 0 + br i1 %cmp37.not, label %for.cond.cleanup, label %for.cond3.preheader.lr.ph + +for.cond3.preheader.lr.ph: + %cmp635.not = icmp eq i8 %m, 0 + br i1 %cmp635.not, label %for.cond3.preheader.preheader, label %for.cond3.preheader.us.preheader + +for.cond3.preheader.preheader: + br label %for.cond3.preheader + +for.cond3.preheader.us.preheader: + br label %for.cond3.preheader.us + +for.cond3.preheader.us: + %i.038.us = phi i8 [ %inc24.us, %for.cond3.for.cond.cleanup8_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ] + %mul.us = mul i8 %i.038.us, %m + br label %for.body9.us + +for.body9.us: + %j.036.us = phi i8 [ 0, %for.cond3.preheader.us ], [ %inc.us, %for.body9.us ] + %add.us = add i8 %j.036.us, %mul.us + %conv14.us = zext i8 %add.us to i32 + %call.us = tail call i32 @use_32(i32 %conv14.us) + %conv15.us = zext i8 %add.us to i16 + %call16.us = tail call i32 @use_16(i16 %conv15.us) + %call18.us = tail call i32 @use_32(i32 %conv14.us) + %call20.us = tail call i32 @use_16(i16 %conv15.us) + %conv21.us = zext i8 %add.us to i64 + %call22.us = tail call i32 @use_64(i64 %conv21.us) + %inc.us = add nuw i8 %j.036.us, 1 + %cmp6.us = icmp ult i8 %inc.us, %m + br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us + +for.cond3.for.cond.cleanup8_crit_edge.us: + %inc24.us = add i8 %i.038.us, 1 + %cmp.us = icmp ult i8 %inc24.us, %n + br i1 %cmp.us, label %for.cond3.preheader.us, label %for.cond.cleanup + +for.cond3.preheader: + %i.038 = phi i8 [ %inc24, %for.cond3.preheader ], [ 0, %for.cond3.preheader.preheader ] + %inc24 = add i8 %i.038, 1 + %cmp = icmp ult i8 %inc24, %n + br i1 %cmp, label %for.cond3.preheader, label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +; This IR corresponds to this input: +; +; void test4(short n, short m) { +; for(short i = 0; i < n; i++) +; for(short j = 0; j < m; j++) { +; short x = i*m+j; +; use_32(x); +; use_16(x); +; use_32(x); +; use_16(x); +; use_64(x); +; } +; } +; +define void @test4(i16 %n, i16 %m) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP38:%.*]] = icmp sgt i16 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP38]], label [[FOR_COND3_PREHEADER_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond3.preheader.lr.ph: +; CHECK-NEXT: [[CMP636:%.*]] = icmp sgt i16 [[M:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP636]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND3_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond3.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND3_PREHEADER:%.*]] +; CHECK: for.cond3.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[M]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] +; CHECK: for.cond3.preheader.us: +; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR2]] to i16 +; CHECK-NEXT: [[MUL_US:%.*]] = mul i16 [[TMP2]], [[M]] +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i16 +; CHECK-NEXT: br label [[FOR_BODY9_US:%.*]] +; CHECK: for.body9.us: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND3_PREHEADER_US]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i16 +; CHECK-NEXT: [[ADD_US:%.*]] = add i16 [[TMP3]], [[MUL_US]] +; CHECK-NEXT: [[CONV14_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i32 +; CHECK-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; CHECK-NEXT: [[CALL15_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]]) +; CHECK-NEXT: [[CALL17_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; CHECK-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[FLATTEN_TRUNCIV]]) +; CHECK-NEXT: [[CONV19_US:%.*]] = sext i16 [[FLATTEN_TRUNCIV]] to i64 +; CHECK-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]]) +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[CMP6_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] +; CHECK: for.cond3.for.cond.cleanup8_crit_edge.us: +; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond3.preheader: +; CHECK-NEXT: [[I_039:%.*]] = phi i16 [ [[INC22:%.*]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[INC22]] = add i16 [[I_039]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC22]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit1: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +; DONTWIDEN-LABEL: @test4( +; DONTWIDEN-NEXT: entry: +; DONTWIDEN-NEXT: [[CMP38:%.*]] = icmp sgt i16 [[N:%.*]], 0 +; DONTWIDEN-NEXT: br i1 [[CMP38]], label [[FOR_COND3_PREHEADER_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; DONTWIDEN: for.cond3.preheader.lr.ph: +; DONTWIDEN-NEXT: [[CMP636:%.*]] = icmp sgt i16 [[M:%.*]], 0 +; DONTWIDEN-NEXT: br i1 [[CMP636]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND3_PREHEADER_PREHEADER:%.*]] +; DONTWIDEN: for.cond3.preheader.preheader: +; DONTWIDEN-NEXT: br label [[FOR_COND3_PREHEADER:%.*]] +; DONTWIDEN: for.cond3.preheader.us.preheader: +; DONTWIDEN-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] +; DONTWIDEN: for.cond3.preheader.us: +; DONTWIDEN-NEXT: [[I_039_US:%.*]] = phi i16 [ [[INC22_US:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] +; DONTWIDEN-NEXT: [[MUL_US:%.*]] = mul i16 [[I_039_US]], [[M]] +; DONTWIDEN-NEXT: br label [[FOR_BODY9_US:%.*]] +; DONTWIDEN: for.body9.us: +; DONTWIDEN-NEXT: [[J_037_US:%.*]] = phi i16 [ 0, [[FOR_COND3_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY9_US]] ] +; DONTWIDEN-NEXT: [[ADD_US:%.*]] = add i16 [[J_037_US]], [[MUL_US]] +; DONTWIDEN-NEXT: [[CONV14_US:%.*]] = sext i16 [[ADD_US]] to i32 +; DONTWIDEN-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; DONTWIDEN-NEXT: [[CALL15_US:%.*]] = tail call i32 @use_16(i16 [[ADD_US]]) +; DONTWIDEN-NEXT: [[CALL17_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) +; DONTWIDEN-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[ADD_US]]) +; DONTWIDEN-NEXT: [[CONV19_US:%.*]] = sext i16 [[ADD_US]] to i64 +; DONTWIDEN-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]]) +; DONTWIDEN-NEXT: [[INC_US]] = add nuw nsw i16 [[J_037_US]], 1 +; DONTWIDEN-NEXT: [[CMP6_US:%.*]] = icmp slt i16 [[INC_US]], [[M]] +; DONTWIDEN-NEXT: br i1 [[CMP6_US]], label [[FOR_BODY9_US]], label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] +; DONTWIDEN: for.cond3.for.cond.cleanup8_crit_edge.us: +; DONTWIDEN-NEXT: [[INC22_US]] = add i16 [[I_039_US]], 1 +; DONTWIDEN-NEXT: [[CMP_US:%.*]] = icmp slt i16 [[INC22_US]], [[N]] +; DONTWIDEN-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; DONTWIDEN: for.cond3.preheader: +; DONTWIDEN-NEXT: [[I_039:%.*]] = phi i16 [ [[INC22:%.*]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ] +; DONTWIDEN-NEXT: [[INC22]] = add i16 [[I_039]], 1 +; DONTWIDEN-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC22]], [[N]] +; DONTWIDEN-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]] +; DONTWIDEN: for.cond.cleanup.loopexit: +; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]] +; DONTWIDEN: for.cond.cleanup.loopexit1: +; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]] +; DONTWIDEN: for.cond.cleanup: +; DONTWIDEN-NEXT: ret void +; +entry: + %cmp38 = icmp sgt i16 %n, 0 + br i1 %cmp38, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup + +for.cond3.preheader.lr.ph: + %cmp636 = icmp sgt i16 %m, 0 + br i1 %cmp636, label %for.cond3.preheader.us.preheader, label %for.cond3.preheader.preheader + +for.cond3.preheader.preheader: + br label %for.cond3.preheader + +for.cond3.preheader.us.preheader: + br label %for.cond3.preheader.us + +for.cond3.preheader.us: + %i.039.us = phi i16 [ %inc22.us, %for.cond3.for.cond.cleanup8_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ] + %mul.us = mul i16 %i.039.us, %m + br label %for.body9.us + +for.body9.us: + %j.037.us = phi i16 [ 0, %for.cond3.preheader.us ], [ %inc.us, %for.body9.us ] + %add.us = add i16 %j.037.us, %mul.us + %conv14.us = sext i16 %add.us to i32 + %call.us = tail call i32 @use_32(i32 %conv14.us) #2 + %call15.us = tail call i32 @use_16(i16 %add.us) #2 + %call17.us = tail call i32 @use_32(i32 %conv14.us) #2 + %call18.us = tail call i32 @use_16(i16 %add.us) #2 + %conv19.us = sext i16 %add.us to i64 + %call20.us = tail call i32 @use_64(i64 %conv19.us) #2 + %inc.us = add nuw nsw i16 %j.037.us, 1 + %cmp6.us = icmp slt i16 %inc.us, %m + br i1 %cmp6.us, label %for.body9.us, label %for.cond3.for.cond.cleanup8_crit_edge.us + +for.cond3.for.cond.cleanup8_crit_edge.us: + %inc22.us = add i16 %i.039.us, 1 + %cmp.us = icmp slt i16 %inc22.us, %n + br i1 %cmp.us, label %for.cond3.preheader.us, label %for.cond.cleanup + +for.cond3.preheader: + %i.039 = phi i16 [ %inc22, %for.cond3.preheader ], [ 0, %for.cond3.preheader.preheader ] + %inc22 = add i16 %i.039, 1 + %cmp = icmp slt i16 %inc22, %n + br i1 %cmp, label %for.cond3.preheader, label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +declare dso_local i32 @use_32(i32) +declare dso_local i32 @use_16(i16) +declare dso_local i32 @use_64(i64) + declare dso_local void @f(i32* %0) local_unnamed_addr #1