Index: llvm/lib/CodeGen/HardwareLoops.cpp =================================================================== --- llvm/lib/CodeGen/HardwareLoops.cpp +++ llvm/lib/CodeGen/HardwareLoops.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -75,8 +76,44 @@ STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); +#ifndef NDEBUG +static void debugHWLoopFailure(const StringRef DebugMsg, + Instruction *I) { + dbgs() << "HWLoops: " << DebugMsg; + if (I != nullptr) + dbgs() << " " << *I; + else + dbgs() << '.'; + dbgs() << '\n'; +} +#endif + +static OptimizationRemarkAnalysis +createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) { + Value *CodeRegion = L->getHeader(); + DebugLoc DL = L->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); + R << "hardware-loop not created: "; + return R; +} + namespace { + void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) { + LLVM_DEBUG(debugHWLoopFailure(Msg, I)); + ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg); + } + using TTI = TargetTransformInfo; class HardwareLoops : public FunctionPass { @@ -97,6 +134,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } // Try to convert the given Loop into a hardware loop. @@ -110,6 +148,7 @@ ScalarEvolution *SE = nullptr; LoopInfo *LI = nullptr; const DataLayout *DL = nullptr; + OptimizationRemarkEmitter *ORE = nullptr; const TargetTransformInfo *TTI = nullptr; DominatorTree *DT = nullptr; bool PreserveLCSSA = false; @@ -143,8 +182,9 @@ public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, - const DataLayout &DL) : - SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()), + const DataLayout &DL, + OptimizationRemarkEmitter *ORE) : + SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), @@ -157,6 +197,7 @@ private: ScalarEvolution &SE; const DataLayout &DL; + OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; const SCEV *ExitCount = nullptr; @@ -182,6 +223,7 @@ DT = &getAnalysis().getDomTree(); TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); + ORE = &getAnalysis().getORE(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); @@ -209,23 +251,24 @@ if (!HWLoopInfo.canAnalyze(*LI)) return false; - if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) || - ForceHardwareLoops) { - - // Allow overriding of the counter width and loop decrement value. - if (CounterBitWidth.getNumOccurrences()) - HWLoopInfo.CountType = - IntegerType::get(M->getContext(), CounterBitWidth); + if (!TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) && + !ForceHardwareLoops) { + reportHWLoopFailure("it's not profitable to create a hardware-loop", + "HWLoopNotProfitable", ORE, L); + return false; + } - if (LoopDecrement.getNumOccurrences()) - HWLoopInfo.LoopDecrement = - ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + // Allow overriding of the counter width and loop decrement value. + if (CounterBitWidth.getNumOccurrences()) + HWLoopInfo.CountType = + IntegerType::get(M->getContext(), CounterBitWidth); - MadeChange |= TryConvertLoop(HWLoopInfo); - return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); - } + if (LoopDecrement.getNumOccurrences()) + HWLoopInfo.LoopDecrement = + ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); - return false; + MadeChange |= TryConvertLoop(HWLoopInfo); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); } bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { @@ -234,8 +277,13 @@ LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) + ForceHardwareLoopPHI)) { + // TODO: there can be many reasons why a loop is not considered a + // candidate, so we should let isHardwareLoopCandidate fill in the + // reason, and report a better message here. + reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L); return false; + } assert( (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && @@ -249,7 +297,7 @@ if (!Preheader) return false; - HardwareLoop HWLoop(HWLoopInfo, *SE, *DL); + HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); HWLoop.Create(); ++NumHWLoops; return true; @@ -257,10 +305,13 @@ void HardwareLoop::Create() { LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); - + Value *LoopCountInit = InitLoopCount(); - if (!LoopCountInit) + if (!LoopCountInit) { + reportHWLoopFailure("could not safely create a loop count expression", + "HWLoopNotSafe", ORE, L); return; + } InsertIterationSetup(LoopCountInit); Index: llvm/test/CodeGen/ARM/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/ARM/O3-pipeline.ll +++ llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -51,6 +51,9 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Hardware Loop Insertion ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager Index: llvm/test/Transforms/HardwareLoops/ARM/opt-remarks.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HardwareLoops/ARM/opt-remarks.ll @@ -0,0 +1,24 @@ +; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -hardware-loops \ +; RUN: -disable-arm-loloops=true -pass-remarks-analysis=hardware-loops \ +; RUN: -S %s -o - 2>&1 | FileCheck %s + +; CHECK: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop + +define void @while_lt(i32 %i, i32 %N, i32* nocapture %A) { +entry: + %cmp4 = icmp ult i32 %i, %N + br i1 %cmp4, label %while.body, label %while.end + +while.body: + %i.addr.05 = phi i32 [ %inc, %while.body ], [ %i, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.addr.05 + store i32 %i.addr.05, i32* %arrayidx, align 4 + %inc = add nuw i32 %i.addr.05, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %while.end, label %while.body + +while.end: + ret void +} + + Index: llvm/test/Transforms/HardwareLoops/unconditional-latch.ll =================================================================== --- llvm/test/Transforms/HardwareLoops/unconditional-latch.ll +++ llvm/test/Transforms/HardwareLoops/unconditional-latch.ll @@ -1,6 +1,12 @@ ; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW ; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW -; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH +; +; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 \ +; RUN: -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true \ +; RUN: -hardware-loops -S -pass-remarks-analysis=hardware-loops %s -o - \ +; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-LATCH + +; CHECK-LATCH: remark: :0:0: hardware-loop not created: loop is not a candidate ; CHECK-LABEL: not_rotated ; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations