diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -830,6 +830,9 @@ /// unrolling pass is run more than once (which it generally is). void setLoopAlreadyUnrolled(); + /// Add llvm.loop.mustprogress to this loop's loop id metadata. + void setLoopMustProgress(); + void dump() const; void dumpVerbose() const; diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -176,9 +176,9 @@ function_ref GetAssumptionCache = nullptr, ProfileSummaryInfo *PSI = nullptr, BlockFrequencyInfo *CallerBFI = nullptr, - BlockFrequencyInfo *CalleeBFI = nullptr) + BlockFrequencyInfo *CalleeBFI = nullptr, LoopInfo *CallerLI = nullptr) : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI), - CallerBFI(CallerBFI), CalleeBFI(CalleeBFI) {} + CallerBFI(CallerBFI), CalleeBFI(CalleeBFI), CallerLI(CallerLI) {} /// If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. @@ -186,6 +186,7 @@ function_ref GetAssumptionCache; ProfileSummaryInfo *PSI; BlockFrequencyInfo *CallerBFI, *CalleeBFI; + LoopInfo *CallerLI; /// InlineFunction fills this in with all static allocas that get copied into /// the caller. diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -535,6 +535,22 @@ setLoopID(NewLoopID); } +void Loop::setLoopMustProgress() { + LLVMContext &Context = getHeader()->getContext(); + + MDNode *MustProgress = findOptionMDForLoop(this, "llvm.loop.mustprogress"); + + if (MustProgress) + return; + + MDNode *MustProgressMD = + MDNode::get(Context, MDString::get(Context, "llvm.loop.mustprogress")); + MDNode *LoopID = getLoopID(); + MDNode *NewLoopID = + makePostTransformationMetadata(Context, LoopID, {}, {MustProgressMD}); + setLoopID(NewLoopID); +} + bool Loop::isAnnotatedParallel() const { MDNode *DesiredLoopIdMetadata = getLoopID(); diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -846,7 +847,8 @@ InlineFunctionInfo IFI( /*cg=*/nullptr, GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), - &FAM.getResult(Callee)); + &FAM.getResult(Callee), + &FAM.getResult(*(CB->getCaller()))); InlineResult IR = InlineFunction(*CB, IFI); if (!IR.isSuccess()) { diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -27,11 +27,12 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" @@ -61,6 +62,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -2300,6 +2302,27 @@ IFI.InlinedCallSites.push_back(CB); } + // Before we inline the basic blocks from the callee, we should check if the + // callee function has the maynotprogress attribute and if it does, we need + // to iterate through the caller function blocks (if it isn't maynotprogress) + // to attach the mustprogress loop metadata to each loop. + if (CB.getCalledFunction()->hasFnAttribute(Attribute::MayNotProgress) && + !CB.getCaller()->hasFnAttribute(Attribute::MayNotProgress) && + IFI.CallerLI) { + SmallVector Worklist; + for (Loop *L : make_range(IFI.CallerLI->begin(), IFI.CallerLI->end())) { + Worklist.push_back(L); + + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + Loop *L2 = Worklist[Idx]; + Worklist.append(L2->begin(), L2->end()); + } + + while (!Worklist.empty()) + Worklist.pop_back_val()->setLoopMustProgress(); + } + } + // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. diff --git a/llvm/test/Transforms/Inline/inlined-mustprogress-loop-metadata.ll b/llvm/test/Transforms/Inline/inlined-mustprogress-loop-metadata.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inlined-mustprogress-loop-metadata.ll @@ -0,0 +1,503 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --force-update +; RUN: opt < %s -S -passes="inline" | FileCheck %s + +define void @callee(i32 %a, i32 %b) #0 { +entry: + br label %for.cond +for.cond: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond, !llvm.loop !0 +for.end: + br label %while.body +while.body: + br label %while.body +} + +define void @caller(i32 %a, i32 %b) #1 { +; CHECK: Function Attrs: noinline maynotprogress +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) [[ATTR1:#.*]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:!.*]] +; CHECK: for.end: +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP0:!.*]] +; CHECK: callee.exit: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond +for.cond: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond +for.end: + call void @callee(i32 0, i32 5) + ret void +} + +define void @callee_no_metadata(i32 %a, i32 %b) { +entry: + br label %for.cond +for.cond: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond +for.end: + br label %while.body +while.body: + br label %while.body +} + +define void @caller_no_metadata(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@caller_no_metadata +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: br label [[FOR_COND_I]] +; CHECK: callee_no_metadata.exit: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond +for.cond: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond +for.end: + call void @callee_no_metadata(i32 0, i32 5) + ret void +} + +define void @callee_maynotprogress(i32 %a, i32 %b) #0 { +entry: + br label %for.cond +for.cond: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond +for.end: + br label %while.body +while.body: + br label %while.body +} + +define void @caller_maynotprogress(i32 %a, i32 %b) #0 { +; CHECK: Function Attrs: maynotprogress +; CHECK-LABEL: define {{[^@]+}}@caller_maynotprogress +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) [[ATTR0:#.*]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], [[B]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: br label [[FOR_COND_I]] +; CHECK: callee_maynotprogress.exit: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond +for.cond: + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond +for.end: + call void @callee_maynotprogress(i32 0, i32 5) + ret void +} + +define void @callee_multiple(i32 %a, i32 %b) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + br label %for.cond +for.cond: + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond, !llvm.loop !2 +for.end: + store i32 0, i32* %i, align 4 + br label %for.cond1 +for.cond1: + %2 = load i32, i32* %i, align 4 + %cmp2 = icmp slt i32 %2, 10 + br i1 %cmp2, label %for.body3, label %for.end4 +for.body3: + br label %for.inc +for.inc: + %3 = load i32, i32* %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond1, !llvm.loop !4 +for.end4: + br label %while.body +while.body: + br label %while.body +} + +define void @caller_multiple(i32 %a, i32 %b) #1 { +; CHECK: Function Attrs: noinline maynotprogress +; CHECK-LABEL: define {{[^@]+}}@caller_multiple +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) [[ATTR1:#.*]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:!.*]] +; CHECK: for.end: +; CHECK-NEXT: store i32 0, i32* [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond1: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 10 +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END4:%.*]] +; CHECK: for.body3: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP4:!.*]] +; CHECK: for.end4: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[A_ADDR_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[B_ADDR_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[I_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) +; CHECK-NEXT: store i32 0, i32* [[A_ADDR_I]], align 4 +; CHECK-NEXT: store i32 5, i32* [[B_ADDR_I]], align 4 +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_ADDR_I]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[B_ADDR_I]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[FOR_END_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP3:!.*]] +; CHECK: for.end.i: +; CHECK-NEXT: store i32 0, i32* [[I_I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1_I:%.*]] +; CHECK: for.cond1.i: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[I_I]], align 4 +; CHECK-NEXT: [[CMP2_I:%.*]] = icmp slt i32 [[TMP9]], 10 +; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY3_I:%.*]], label [[FOR_END4_I:%.*]] +; CHECK: for.body3.i: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[I_I]], align 4 +; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP10]], 1 +; CHECK-NEXT: store i32 [[INC_I]], i32* [[I_I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1_I]], !llvm.loop [[LOOP4:!.*]] +; CHECK: for.end4.i: +; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] +; CHECK: while.body.i: +; CHECK-NEXT: br label [[WHILE_BODY_I]] +; CHECK: callee_multiple.exit: +; CHECK-NEXT: ret void +; +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + br label %for.cond +for.cond: + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond +for.end: + store i32 0, i32* %i, align 4 + br label %for.cond1 +for.cond1: + %2 = load i32, i32* %i, align 4 + %cmp2 = icmp slt i32 %2, 10 + br i1 %cmp2, label %for.body3, label %for.end4 +for.body3: + br label %for.inc +for.inc: + %3 = load i32, i32* %i, align 4 + %inc = add nsw i32 %3, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond1 +for.end4: + call void @callee_multiple(i32 0, i32 5) + ret void +} + +define void @callee_nested(i32 %a, i32 %b) #0 { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + br label %for.cond +for.cond: + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +for.body: + br label %for.cond, !llvm.loop !0 +for.end: + store i32 0, i32* %i, align 4 + br label %for.cond1 +for.cond1: + %2 = load i32, i32* %i, align 4 + %cmp2 = icmp slt i32 %2, 10 + br i1 %cmp2, label %for.body3, label %for.end8 +for.body3: + br label %for.cond4 +for.cond4: + %3 = load i32, i32* %b.addr, align 4 + %4 = load i32, i32* %a.addr, align 4 + %cmp5 = icmp slt i32 %3, %4 + br i1 %cmp5, label %for.body6, label %for.end7 +for.body6: + br label %for.cond4, !llvm.loop !2 +for.end7: + br label %for.inc +for.inc: + %5 = load i32, i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond1, !llvm.loop !3 +for.end8: + br label %while.body +while.body: + br label %while.body +} + +define void @caller_nested(i32 %a, i32 %b) #1 { +; CHECK: Function Attrs: noinline maynotprogress +; CHECK-LABEL: define {{[^@]+}}@caller_nested +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) [[ATTR1:#.*]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I9:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]] +; CHECK: for.body: +; CHECK-NEXT: store i32 0, i32* [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1:%.*]] +; CHECK: for.cond1: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 10 +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END7:%.*]] +; CHECK: for.body3: +; CHECK-NEXT: br label [[FOR_COND4:%.*]] +; CHECK: for.cond4: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_ADDR]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body6: +; CHECK-NEXT: br label [[FOR_COND4]], !llvm.loop [[LOOP10:!.*]] +; CHECK: for.end: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +; CHECK-NEXT: store i32 [[INC]], i32* [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP11:!.*]] +; CHECK: for.end7: +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:!.*]] +; CHECK: for.end8: +; CHECK-NEXT: store i32 0, i32* [[I9]], align 4 +; CHECK-NEXT: br label [[FOR_COND10:%.*]] +; CHECK: for.cond10: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[I9]], align 4 +; CHECK-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP6]], 10 +; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END15:%.*]] +; CHECK: for.body12: +; CHECK-NEXT: br label [[FOR_INC13:%.*]] +; CHECK: for.inc13: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[I9]], align 4 +; CHECK-NEXT: [[INC14:%.*]] = add nsw i32 [[TMP7]], 1 +; CHECK-NEXT: store i32 [[INC14]], i32* [[I9]], align 4 +; CHECK-NEXT: br label [[FOR_COND10]], !llvm.loop [[LOOP13:!.*]] +; CHECK: for.end15: +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A_ADDR_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[B_ADDR_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[I_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) +; CHECK-NEXT: store i32 0, i32* [[A_ADDR_I]], align 4 +; CHECK-NEXT: store i32 5, i32* [[B_ADDR_I]], align 4 +; CHECK-NEXT: br label [[FOR_COND_I:%.*]] +; CHECK: for.cond.i: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR_I]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR_I]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[FOR_END_I:%.*]] +; CHECK: for.body.i: +; CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP0]] +; CHECK: for.end.i: +; CHECK-NEXT: store i32 0, i32* [[I_I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1_I:%.*]] +; CHECK: for.cond1.i: +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[I_I]], align 4 +; CHECK-NEXT: [[CMP2_I:%.*]] = icmp slt i32 [[TMP13]], 10 +; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY3_I:%.*]], label [[FOR_END8_I:%.*]] +; CHECK: for.body3.i: +; CHECK-NEXT: br label [[FOR_COND4_I:%.*]] +; CHECK: for.cond4.i: +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[B_ADDR_I]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[A_ADDR_I]], align 4 +; CHECK-NEXT: [[CMP5_I:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[CMP5_I]], label [[FOR_BODY6_I:%.*]], label [[FOR_END7_I:%.*]] +; CHECK: for.body6.i: +; CHECK-NEXT: br label [[FOR_COND4_I]], !llvm.loop [[LOOP4:!.*]] +; CHECK: for.end7.i: +; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[I_I]], align 4 +; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP16]], 1 +; CHECK-NEXT: store i32 [[INC_I]], i32* [[I_I]], align 4 +; CHECK-NEXT: br label [[FOR_COND1_I]], !llvm.loop [[LOOP5:!.*]] +; CHECK: for.end8.i: +; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] +; CHECK: while.body.i: +; CHECK-NEXT: br label [[WHILE_BODY_I]] +; CHECK: callee_nested.exit: +; CHECK-NEXT: ret void +; +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %i = alloca i32, align 4 + %i9 = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + br label %for.cond +for.cond: + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end8 +for.body: + store i32 0, i32* %i, align 4 + br label %for.cond1 +for.cond1: + %2 = load i32, i32* %i, align 4 + %cmp2 = icmp slt i32 %2, 10 + br i1 %cmp2, label %for.body3, label %for.end7 +for.body3: + br label %for.cond4 +for.cond4: + %3 = load i32, i32* %b.addr, align 4 + %4 = load i32, i32* %a.addr, align 4 + %cmp5 = icmp slt i32 %3, %4 + br i1 %cmp5, label %for.body6, label %for.end +for.body6: + br label %for.cond4 +for.end: + br label %for.inc +for.inc: + %5 = load i32, i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond1 +for.end7: + br label %for.cond +for.end8: + store i32 0, i32* %i9, align 4 + br label %for.cond10 +for.cond10: + %6 = load i32, i32* %i9, align 4 + %cmp11 = icmp slt i32 %6, 10 + br i1 %cmp11, label %for.body12, label %for.end15 +for.body12: + br label %for.inc13 +for.inc13: + %7 = load i32, i32* %i9, align 4 + %inc14 = add nsw i32 %7, 1 + store i32 %inc14, i32* %i9, align 4 + br label %for.cond10 +for.end15: + call void @callee_nested(i32 0, i32 5) + ret void +} + +; CHECK: attributes [[ATTR0]] = { maynotprogress } +; CHECK: attributes [[ATTR1]] = { noinline maynotprogress } + +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[GEN1:!.*]]} +; CHECK: [[GEN1]] = !{!"llvm.loop.mustprogress"} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[GEN1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[GEN1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[GEN1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[GEN1]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[GEN1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[GEN1]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[GEN1]]} + +attributes #0 = { maynotprogress } +attributes #1 = { noinline } +attributes #2 = { noinline maynotprogress } + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.mustprogress"} +!2 = distinct !{!2, !1} +!3 = distinct !{!3, !1} +!4 = distinct !{!4, !1} +!5 = distinct !{!5, !1} +!6 = distinct !{!6, !1} +!7 = distinct !{!7, !1}