diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -47,6 +47,7 @@ class Function; class GlobalValue; class InstCombiner; +class OptimizationRemarkEmitter; class IntrinsicInst; class LoadInst; class LoopAccessInfo; @@ -506,7 +507,8 @@ /// transformation. The caller will initialize UP with the current /// target-independent defaults. void getUnrollingPreferences(Loop *L, ScalarEvolution &, - UnrollingPreferences &UP) const; + UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) const; /// Query the target whether it would be profitable to convert the given loop /// into a hardware loop. @@ -1458,7 +1460,8 @@ Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, - UnrollingPreferences &UP) = 0; + UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) = 0; virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) = 0; virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, @@ -1788,8 +1791,9 @@ return Impl.isLoweredToCall(F); } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - UnrollingPreferences &UP) override { - return Impl.getUnrollingPreferences(L, SE, UP); + UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) override { + return Impl.getUnrollingPreferences(L, SE, UP, ORE); } void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) override { diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -187,7 +187,8 @@ } void getUnrollingPreferences(Loop *, ScalarEvolution &, - TTI::UnrollingPreferences &) const {} + TTI::UnrollingPreferences &, + OptimizationRemarkEmitter *) const {} void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const {} diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -484,7 +485,8 @@ int getInlinerVectorBonusPercent() { return 150; } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: @@ -526,6 +528,15 @@ continue; } + if (ORE) { + ORE->emit([&]() { + return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(), + L->getHeader()) + << "advising against unrolling the loop because it " + "contains a " + << ore::NV("Call", &I); + }); + } return; } } diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -117,7 +117,8 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound, Optional UserFullUnrollMaxCount); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -317,8 +317,9 @@ } void TargetTransformInfo::getUnrollingPreferences( - Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { - return TTIImpl->getUnrollingPreferences(L, SE, UP); + Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) const { + return TTIImpl->getUnrollingPreferences(L, SE, UP, ORE); } void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -209,7 +209,8 @@ InstructionCost getCostOfKeepingLiveOverCall(ArrayRef Tys); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1705,9 +1705,10 @@ } void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { // Enable partial unrolling and runtime unrolling. - BaseT::getUnrollingPreferences(L, SE, UP); + BaseT::getUnrollingPreferences(L, SE, UP, ORE); // For inner loop, it is more likely to be a hot one, and the runtime check // can be promoted out from LICM pass, so the overhead is less, let's try diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -53,7 +53,8 @@ explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); @@ -108,7 +109,8 @@ bool useGPUDivergenceAnalysis() const; void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); @@ -239,7 +241,8 @@ const AMDGPUTargetLowering *getTLI() const { return TLI; } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); unsigned getHardwareNumberOfRegisters(bool Vec) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -101,7 +101,8 @@ TLI(ST->getTargetLowering()) {} void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { const Function &F = *L->getHeader()->getParent(); UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300); UP.MaxCount = std::numeric_limits::max(); @@ -1224,8 +1225,9 @@ } void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { - CommonTTI.getUnrollingPreferences(L, SE, UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); } void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, @@ -1350,8 +1352,9 @@ } void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { - CommonTTI.getUnrollingPreferences(L, SE, UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); } void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -279,7 +279,8 @@ DominatorTree *DT, const LoopAccessInfo *LAI); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); bool emitGetActiveLaneMask() const; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2146,14 +2146,15 @@ return true; } void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { // Enable Upper bound unrolling universally, not dependant upon the conditions // below. UP.UpperBound = true; // Only currently enable these preferences for M-Class cores. if (!ST->isMClass()) - return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP); + return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE); // Disable loop unrolling for Oz and Os. UP.OptSizeThreshold = 0; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -61,7 +61,8 @@ // The Hexagon target can unroll loops with run-time trip counts. void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -64,7 +64,8 @@ // The Hexagon target can unroll loops with run-time trip counts. void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { UP.Runtime = UP.Partial = true; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -99,7 +99,8 @@ const Instruction *CxtI = nullptr); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -402,8 +402,9 @@ } void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { - BaseT::getUnrollingPreferences(L, SE, UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + BaseT::getUnrollingPreferences(L, SE, UP, ORE); // Enable partial unrolling and runtime unrolling, but reduce the // threshold. This partially unrolls small loops which are often diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -72,7 +72,8 @@ TargetLibraryInfo *LibInfo); bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -787,7 +787,8 @@ } void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { if (ST->getCPUDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling // helps expose latency-hiding opportunities to the instruction scheduler. @@ -798,7 +799,7 @@ UP.AllowExpensiveTripCount = true; } - BaseT::getUnrollingPreferences(L, SE, UP); + BaseT::getUnrollingPreferences(L, SE, UP, ORE); } void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -52,7 +52,8 @@ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP); + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -243,7 +243,8 @@ } void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { // Find out if L contains a call, what the machine instruction count // estimate is, and how many stores there are. bool HasCall = false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -49,7 +49,8 @@ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) const; + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) const; /// @} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -114,7 +114,8 @@ } void WebAssemblyTTIImpl::getUnrollingPreferences( - Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) const { + Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) const { // Scan the loop: don't unroll loops with calls. This is a standard approach // for most (all?) targets. for (BasicBlock *BB : L->blocks()) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -286,8 +286,8 @@ AssumptionCache &AC, DependenceInfo &DI, OptimizationRemarkEmitter &ORE, int OptLevel) { TargetTransformInfo::UnrollingPreferences UP = - gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None, - None, None, None, None, None); + gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel, + None, None, None, None, None, None); TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(L, SE, TTI, None, None); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -184,7 +184,8 @@ /// flags, TTI overrides and user specified parameters. TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + OptimizationRemarkEmitter &ORE, int OptLevel, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, Optional UserRuntime, Optional UserUpperBound, Optional UserFullUnrollMaxCount) { @@ -214,7 +215,7 @@ UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze; // Override with any target specific settings - TTI.getUnrollingPreferences(L, SE, UP); + TTI.getUnrollingPreferences(L, SE, UP, &ORE); // Apply size attributes bool OptForSize = L->getHeader()->getParent()->hasOptSize() || @@ -1079,7 +1080,7 @@ bool NotDuplicatable; bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, + L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedFullUnrollMaxCount); TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences( diff --git a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll @@ -0,0 +1,45 @@ +; RUN: opt -debugify -loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s +; RUN: opt -debugify -loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; TTI: remark: :7:1: advising against unrolling the loop because it contains a call +; UNROLL: remark: :14:1: unrolled loop by a factor of 8 with run-time trip count + +define void @contains_external_call(i32 %count) { +; ALL-LABEL: @contains_external_call( +; ALL-NOT: unroll +entry: + %cmp.not3 = icmp eq i32 %count, 0 + br i1 %cmp.not3, label %for.cond.cleanup, label %for.body + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + tail call void @sideeffect() + %inc = add nuw nsw i32 %i.04, 1 + %cmp.not = icmp eq i32 %inc, %count + br i1 %cmp.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +declare void @sideeffect() + +define i32 @no_external_calls(i32 %count) { +; ALL-LABEL: @no_external_calls( +; ALL: unroll +entry: + %cmp.not5 = icmp eq i32 %count, 0 + br i1 %cmp.not5, label %for.end, label %for.body + +for.body: + %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %inc = add nuw nsw i32 %i.06, 1 + %cmp.not = icmp eq i32 %inc, %count + br i1 %cmp.not, label %for.end, label %for.body + +for.end: + ret i32 %count +}