diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -94,6 +94,11 @@ // Whether this holds the flatten info before or after widening. bool Widened = false; + // Holds the old/narrow induction phis, i.e. the Phis before IV widening has + // been applied. This bookkeeping is used so we can skip some checks on these + // phi nodes. + SmallPtrSet OldInductionPHIs; + FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {}; }; @@ -263,6 +268,8 @@ // them specially when doing the transformation. if (&InnerPHI == FI.InnerInductionPHI) continue; + if (FI.Widened && FI.OldInductionPHIs.count(&InnerPHI)) + continue; // Each inner loop PHI node must have two incoming values/blocks - one // from the pre-header, and one from the latch. @@ -308,6 +315,8 @@ } for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) { + if (FI.Widened && FI.OldInductionPHIs.count(&OuterPHI)) + continue; if (!SafeOuterPHIs.count(&OuterPHI)) { LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump()); return false; @@ -398,8 +407,8 @@ if (U == FI.InnerIncrement) continue; - // After widening the IVs, a trunc instruction might have been introduced, so - // look through truncs. + // After widening the IVs, a trunc instruction might have been introduced, + // so look through truncs. if (isa(U)) { if (!U->hasOneUse()) return false; @@ -424,11 +433,23 @@ // Matches the same pattern as above, except it also looks for truncs // on the phi, which can be the result of widening the induction variables. - bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)), - m_Value(MatchedMul))) && - match(MatchedMul, - m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)), - m_Value(MatchedItCount))); + bool IsAddTrunc = + match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)), + m_Value(MatchedMul))) && + match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)), + m_Value(MatchedItCount))); + + if (!MatchedItCount) + return false; + // Look through extends if the IV has been widened. + if (FI.Widened && + (isa(MatchedItCount) || isa(MatchedItCount))) { + assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() && + "Unexpected type mismatch in types after widening"); + MatchedItCount = isa(MatchedItCount) + ? dyn_cast(MatchedItCount)->getOperand(0) + : dyn_cast(MatchedItCount)->getOperand(0); + } if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) { LLVM_DEBUG(dbgs() << "Use is optimisable\n"); @@ -668,14 +689,11 @@ } SCEVExpander Rewriter(*SE, DL, "loopflatten"); - SmallVector WideIVs; SmallVector DeadInsts; - WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false }); - WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false }); unsigned ElimExt = 0; unsigned Widened = 0; - for (const auto &WideIV : WideIVs) { + auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool { PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts, ElimExt, Widened, true /* HasGuards */, true /* UsePostIncrementRanges */); @@ -683,11 +701,28 @@ return false; LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump()); LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump()); - RecursivelyDeleteDeadPHINode(WideIV.NarrowIV); - } - // After widening, rediscover all the loop components. + Deleted = RecursivelyDeleteDeadPHINode(WideIV.NarrowIV); + return true; + }; + + bool Deleted; + if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted)) + return false; + // If the inner Phi node cannot be trivially deleted, we need to at least + // bring it in a consistent state. + if (!Deleted) + FI.InnerInductionPHI->removeIncomingValue(FI.InnerLoop->getLoopLatch()); + if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted)) + return false; + assert(Widened && "Widened IV expected"); FI.Widened = true; + + // Save the old/narrow induction phis, which we need to ignore in CheckPHIs. + FI.OldInductionPHIs.insert(FI.InnerInductionPHI); + FI.OldInductionPHIs.insert(FI.OuterInductionPHI); + + // After widening, rediscover all the loop components. return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI); } diff --git a/llvm/test/Transforms/LoopFlatten/widen-iv.ll b/llvm/test/Transforms/LoopFlatten/widen-iv.ll --- a/llvm/test/Transforms/LoopFlatten/widen-iv.ll +++ b/llvm/test/Transforms/LoopFlatten/widen-iv.ll @@ -1,6 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -loop-simplify -loop-flatten -loop-flatten-widen-iv=true -verify-loop-info -verify-dom-info -verify-scev -verify | FileCheck %s --check-prefix=CHECK -; RUN: opt < %s -S -loop-simplify -loop-flatten -loop-flatten-widen-iv=false -verify-loop-info -verify-dom-info -verify-scev -verify | FileCheck %s --check-prefix=DONTWIDEN + +; RUN: opt < %s -S -loop-simplify -loop-flatten -loop-flatten-widen-iv=true \ +; RUN: -verify-loop-info -verify-dom-info -verify-scev -verify \ +; RUN: -loop-flatten-cost-threshold=6 | \ +; RUN: FileCheck %s --check-prefix=CHECK + +; RUN: opt < %s -S -loop-simplify -loop-flatten -loop-flatten-widen-iv=false \ +; RUN: -verify-loop-info -verify-dom-info -verify-scev -verify | \ +; RUN: FileCheck %s --check-prefix=DONTWIDEN target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -81,6 +88,228 @@ ret void } +; This test case corresponds to this input: +; +; for (int i = 0; i < N; ++i) +; for (int j = 0; j < M; ++j) +; f(A[i*M+j]); +; +; It is very similar to test case @foo above, but the CFG is slightly +; different, making the analysis slightly different. +; +define void @foo2_sext(i32* nocapture readonly %A, i32 %N, i32 %M) { +; CHECK-LABEL: @foo2_sext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond1.preheader.lr.ph: +; CHECK-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[M:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[M]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[M]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP2]] +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK: for.cond1.preheader.us: +; CHECK-NEXT: [[INDVAR2:%.*]] = phi i64 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[I_018_US:%.*]] = phi i32 [ [[INC6_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP3:%.*]] = mul nsw i64 [[INDVAR2]], [[TMP1]] +; CHECK-NEXT: [[MUL_US:%.*]] = mul nsw i32 [[I_018_US]], [[M]] +; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[MUL_US]] to i64 +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR2]] to i32 +; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] +; CHECK: for.body4.us: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] +; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[INDVAR]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[J_016_US]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[ADD_US:%.*]] = add nsw i32 [[J_016_US]], [[MUL_US]] +; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[FLATTEN_TRUNCIV]] to i64 +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVAR2]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: tail call void @g(i32 [[TMP8]]) +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[INC_US:%.*]] = add nuw nsw i32 [[J_016_US]], 1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp slt i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: +; CHECK-NEXT: [[INDVAR_NEXT3]] = add i64 [[INDVAR2]], 1 +; CHECK-NEXT: [[INC6_US]] = add nuw nsw i32 [[I_018_US]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[I_018:%.*]] = phi i32 [ [[INC6:%.*]], [[FOR_COND1_PREHEADER]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[I_018]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC6]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT19:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit19: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + %cmp17 = icmp sgt i32 %N, 0 + br i1 %cmp17, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup + +for.cond1.preheader.lr.ph: + %cmp215 = icmp sgt i32 %M, 0 + br i1 %cmp215, label %for.cond1.preheader.us.preheader, label %for.cond1.preheader.preheader + +for.cond1.preheader.preheader: + br label %for.cond1.preheader + +for.cond1.preheader.us.preheader: + br label %for.cond1.preheader.us + +for.cond1.preheader.us: + %i.018.us = phi i32 [ %inc6.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + %mul.us = mul nsw i32 %i.018.us, %M + br label %for.body4.us + +for.body4.us: + %j.016.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] + %add.us = add nsw i32 %j.016.us, %mul.us + %idxprom.us = sext i32 %add.us to i64 + %arrayidx.us = getelementptr inbounds i32, i32* %A, i64 %idxprom.us + %0 = load i32, i32* %arrayidx.us, align 4 + tail call void @g(i32 %0) + %inc.us = add nuw nsw i32 %j.016.us, 1 + %cmp2.us = icmp slt i32 %inc.us, %M + br i1 %cmp2.us, label %for.body4.us, label %for.cond1.for.cond.cleanup3_crit_edge.us + +for.cond1.for.cond.cleanup3_crit_edge.us: + %inc6.us = add nuw nsw i32 %i.018.us, 1 + %cmp.us = icmp slt i32 %inc6.us, %N + br i1 %cmp.us, label %for.cond1.preheader.us, label %for.cond.cleanup.loopexit + +for.cond1.preheader: + %i.018 = phi i32 [ %inc6, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ] + %inc6 = add nuw nsw i32 %i.018, 1 + %cmp = icmp slt i32 %inc6, %N + br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup.loopexit19 + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup.loopexit19: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + +; This test case corresponds to this input: +; +; void foo2_zext(unsigned *A, ..) { +; for (unsigned i = 0; i < N; ++i) +; for (unsigned j = 0; j < M; ++j) +; f(A[i*M+j]); +; +define void @foo2_zext(i32* nocapture readonly %A, i32 %N, i32 %M) { +; CHECK-LABEL: @foo2_zext( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP17_NOT:%.*]] = icmp eq i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP17_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]] +; CHECK: for.cond1.preheader.lr.ph: +; CHECK-NEXT: [[CMP215_NOT:%.*]] = icmp eq i32 [[M:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP215_NOT]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[M]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul i64 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK: for.cond1.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader.us: +; CHECK-NEXT: [[INDVAR1:%.*]] = phi i64 [ [[INDVAR_NEXT2:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVAR1]] to i32 +; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP2]], [[M]] +; CHECK-NEXT: [[FLATTEN_TRUNCIV:%.*]] = trunc i64 [[INDVAR1]] to i32 +; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] +; CHECK: for.body4.us: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVAR]] to i32 +; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP3]], [[MUL_US]] +; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[FLATTEN_TRUNCIV]] to i64 +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM_US]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: tail call void @g(i32 [[TMP4]]) +; CHECK-NEXT: [[INDVAR_NEXT:%.*]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ult i64 [[INDVAR_NEXT]], [[TMP0]] +; CHECK-NEXT: br label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: +; CHECK-NEXT: [[INDVAR_NEXT2]] = add i64 [[INDVAR1]], 1 +; CHECK-NEXT: [[CMP_US:%.*]] = icmp ult i64 [[INDVAR_NEXT2]], [[FLATTEN_TRIPCOUNT]] +; CHECK-NEXT: br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT19:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[I_018:%.*]] = phi i32 [ [[INC6:%.*]], [[FOR_COND1_PREHEADER]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[INC6]] = add i32 [[I_018]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC6]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup.loopexit19: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + %cmp17.not = icmp eq i32 %N, 0 + br i1 %cmp17.not, label %for.cond.cleanup, label %for.cond1.preheader.lr.ph + +for.cond1.preheader.lr.ph: + %cmp215.not = icmp eq i32 %M, 0 + br i1 %cmp215.not, label %for.cond1.preheader.preheader, label %for.cond1.preheader.us.preheader + +for.cond1.preheader.us.preheader: + br label %for.cond1.preheader.us + +for.cond1.preheader.preheader: + br label %for.cond1.preheader + +for.cond1.preheader.us: + %i.018.us = phi i32 [ %inc6.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] + %mul.us = mul i32 %i.018.us, %M + br label %for.body4.us + +for.body4.us: + %j.016.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] + %add.us = add i32 %j.016.us, %mul.us + %idxprom.us = zext i32 %add.us to i64 + %arrayidx.us = getelementptr inbounds i32, i32* %A, i64 %idxprom.us + %0 = load i32, i32* %arrayidx.us, align 4 + tail call void @g(i32 %0) + %inc.us = add nuw i32 %j.016.us, 1 + %cmp2.us = icmp ult i32 %inc.us, %M + br i1 %cmp2.us, label %for.body4.us, label %for.cond1.for.cond.cleanup3_crit_edge.us + +for.cond1.for.cond.cleanup3_crit_edge.us: + %inc6.us = add i32 %i.018.us, 1 + %cmp.us = icmp ult i32 %inc6.us, %N + br i1 %cmp.us, label %for.cond1.preheader.us, label %for.cond.cleanup.loopexit19 + +for.cond1.preheader: + %i.018 = phi i32 [ %inc6, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ] + %inc6 = add i32 %i.018, 1 + %cmp = icmp ult i32 %inc6, %N + br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup.loopexit + +for.cond.cleanup.loopexit: + br label %for.cond.cleanup + +for.cond.cleanup.loopexit19: + br label %for.cond.cleanup + +for.cond.cleanup: + ret void +} + define void @zext(i32 %N, i16* nocapture %A, i16 %val) { ; CHECK-LABEL: @zext( ; CHECK-NEXT: entry: @@ -433,50 +662,6 @@ ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; -; DONTWIDEN-LABEL: @test4( -; DONTWIDEN-NEXT: entry: -; DONTWIDEN-NEXT: [[CMP38:%.*]] = icmp sgt i16 [[N:%.*]], 0 -; DONTWIDEN-NEXT: br i1 [[CMP38]], label [[FOR_COND3_PREHEADER_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; DONTWIDEN: for.cond3.preheader.lr.ph: -; DONTWIDEN-NEXT: [[CMP636:%.*]] = icmp sgt i16 [[M:%.*]], 0 -; DONTWIDEN-NEXT: br i1 [[CMP636]], label [[FOR_COND3_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND3_PREHEADER_PREHEADER:%.*]] -; DONTWIDEN: for.cond3.preheader.preheader: -; DONTWIDEN-NEXT: br label [[FOR_COND3_PREHEADER:%.*]] -; DONTWIDEN: for.cond3.preheader.us.preheader: -; DONTWIDEN-NEXT: br label [[FOR_COND3_PREHEADER_US:%.*]] -; DONTWIDEN: for.cond3.preheader.us: -; DONTWIDEN-NEXT: [[I_039_US:%.*]] = phi i16 [ [[INC22_US:%.*]], [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND3_PREHEADER_US_PREHEADER]] ] -; DONTWIDEN-NEXT: [[MUL_US:%.*]] = mul i16 [[I_039_US]], [[M]] -; DONTWIDEN-NEXT: br label [[FOR_BODY9_US:%.*]] -; DONTWIDEN: for.body9.us: -; DONTWIDEN-NEXT: [[J_037_US:%.*]] = phi i16 [ 0, [[FOR_COND3_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY9_US]] ] -; DONTWIDEN-NEXT: [[ADD_US:%.*]] = add i16 [[J_037_US]], [[MUL_US]] -; DONTWIDEN-NEXT: [[CONV14_US:%.*]] = sext i16 [[ADD_US]] to i32 -; DONTWIDEN-NEXT: [[CALL_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) -; DONTWIDEN-NEXT: [[CALL15_US:%.*]] = tail call i32 @use_16(i16 [[ADD_US]]) -; DONTWIDEN-NEXT: [[CALL17_US:%.*]] = tail call i32 @use_32(i32 [[CONV14_US]]) -; DONTWIDEN-NEXT: [[CALL18_US:%.*]] = tail call i32 @use_16(i16 [[ADD_US]]) -; DONTWIDEN-NEXT: [[CONV19_US:%.*]] = sext i16 [[ADD_US]] to i64 -; DONTWIDEN-NEXT: [[CALL20_US:%.*]] = tail call i32 @use_64(i64 [[CONV19_US]]) -; DONTWIDEN-NEXT: [[INC_US]] = add nuw nsw i16 [[J_037_US]], 1 -; DONTWIDEN-NEXT: [[CMP6_US:%.*]] = icmp slt i16 [[INC_US]], [[M]] -; DONTWIDEN-NEXT: br i1 [[CMP6_US]], label [[FOR_BODY9_US]], label [[FOR_COND3_FOR_COND_CLEANUP8_CRIT_EDGE_US]] -; DONTWIDEN: for.cond3.for.cond.cleanup8_crit_edge.us: -; DONTWIDEN-NEXT: [[INC22_US]] = add i16 [[I_039_US]], 1 -; DONTWIDEN-NEXT: [[CMP_US:%.*]] = icmp slt i16 [[INC22_US]], [[N]] -; DONTWIDEN-NEXT: br i1 [[CMP_US]], label [[FOR_COND3_PREHEADER_US]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; DONTWIDEN: for.cond3.preheader: -; DONTWIDEN-NEXT: [[I_039:%.*]] = phi i16 [ [[INC22:%.*]], [[FOR_COND3_PREHEADER]] ], [ 0, [[FOR_COND3_PREHEADER_PREHEADER]] ] -; DONTWIDEN-NEXT: [[INC22]] = add i16 [[I_039]], 1 -; DONTWIDEN-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC22]], [[N]] -; DONTWIDEN-NEXT: br i1 [[CMP]], label [[FOR_COND3_PREHEADER]], label [[FOR_COND_CLEANUP_LOOPEXIT1:%.*]] -; DONTWIDEN: for.cond.cleanup.loopexit: -; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]] -; DONTWIDEN: for.cond.cleanup.loopexit1: -; DONTWIDEN-NEXT: br label [[FOR_COND_CLEANUP]] -; DONTWIDEN: for.cond.cleanup: -; DONTWIDEN-NEXT: ret void -; entry: %cmp38 = icmp sgt i16 %n, 0 br i1 %cmp38, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup @@ -574,5 +759,6 @@ declare dso_local i32 @use_32(i32) declare dso_local i32 @use_16(i16) declare dso_local i32 @use_64(i64) +declare dso_local void @g(i32) declare dso_local void @f(i32* %0) local_unnamed_addr #1