diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -75,8 +76,44 @@ STATISTIC(NumHWLoops, "Number of loops converted to hardware loops"); +#ifndef NDEBUG +static void debugHWLoopFailure(const StringRef DebugMsg, + Instruction *I) { + dbgs() << "HWLoops: " << DebugMsg; + if (I) + dbgs() << ' ' << *I; + else + dbgs() << '.'; + dbgs() << '\n'; +} +#endif + +static OptimizationRemarkAnalysis +createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) { + Value *CodeRegion = L->getHeader(); + DebugLoc DL = L->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); + R << "hardware-loop not created: "; + return R; +} + namespace { + void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag, + OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) { + LLVM_DEBUG(debugHWLoopFailure(Msg, I)); + ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg); + } + using TTI = TargetTransformInfo; class HardwareLoops : public FunctionPass { @@ -97,6 +134,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } // Try to convert the given Loop into a hardware loop. @@ -110,6 +148,7 @@ ScalarEvolution *SE = nullptr; LoopInfo *LI = nullptr; const DataLayout *DL = nullptr; + OptimizationRemarkEmitter *ORE = nullptr; const TargetTransformInfo *TTI = nullptr; DominatorTree *DT = nullptr; bool PreserveLCSSA = false; @@ -143,8 +182,9 @@ public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, - const DataLayout &DL) : - SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()), + const DataLayout &DL, + OptimizationRemarkEmitter *ORE) : + SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), @@ -157,6 +197,7 @@ private: ScalarEvolution &SE; const DataLayout &DL; + OptimizationRemarkEmitter *ORE = nullptr; Loop *L = nullptr; Module *M = nullptr; const SCEV *ExitCount = nullptr; @@ -182,6 +223,7 @@ DT = &getAnalysis().getDomTree(); TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); + ORE = &getAnalysis().getORE(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); @@ -201,31 +243,39 @@ // converted and the parent loop doesn't support containing a hardware loop. bool HardwareLoops::TryConvertLoop(Loop *L) { // Process nested loops first. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - if (TryConvertLoop(*I)) + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { + if (TryConvertLoop(*I)) { + reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", + ORE, L); return true; // Stop search. + } + } HardwareLoopInfo HWLoopInfo(L); - if (!HWLoopInfo.canAnalyze(*LI)) + if (!HWLoopInfo.canAnalyze(*LI)) { + reportHWLoopFailure("cannot analyze loop, irreducible control flow", + "HWLoopCannotAnalyze", ORE, L); return false; + } - if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) || - ForceHardwareLoops) { - - // Allow overriding of the counter width and loop decrement value. - if (CounterBitWidth.getNumOccurrences()) - HWLoopInfo.CountType = - IntegerType::get(M->getContext(), CounterBitWidth); + if (!ForceHardwareLoops && + !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) { + reportHWLoopFailure("it's not profitable to create a hardware-loop", + "HWLoopNotProfitable", ORE, L); + return false; + } - if (LoopDecrement.getNumOccurrences()) - HWLoopInfo.LoopDecrement = - ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + // Allow overriding of the counter width and loop decrement value. + if (CounterBitWidth.getNumOccurrences()) + HWLoopInfo.CountType = + IntegerType::get(M->getContext(), CounterBitWidth); - MadeChange |= TryConvertLoop(HWLoopInfo); - return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); - } + if (LoopDecrement.getNumOccurrences()) + HWLoopInfo.LoopDecrement = + ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); - return false; + MadeChange |= TryConvertLoop(HWLoopInfo); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); } bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { @@ -234,8 +284,13 @@ LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) + ForceHardwareLoopPHI)) { + // TODO: there can be many reasons a loop is not considered a + // candidate, so we should let isHardwareLoopCandidate fill in the + // reason and then report a better message here. + reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L); return false; + } assert( (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && @@ -249,7 +304,7 @@ if (!Preheader) return false; - HardwareLoop HWLoop(HWLoopInfo, *SE, *DL); + HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); HWLoop.Create(); ++NumHWLoops; return true; @@ -257,10 +312,13 @@ void HardwareLoop::Create() { LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); - + Value *LoopCountInit = InitLoopCount(); - if (!LoopCountInit) + if (!LoopCountInit) { + reportHWLoopFailure("could not safely create a loop count expression", + "HWLoopNotSafe", ORE, L); return; + } InsertIterationSetup(LoopCountInit); diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -52,6 +52,9 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Hardware Loop Insertion ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager diff --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll @@ -1,7 +1,8 @@ ; RUN: opt -mtriple=thumbv8.1m.main-arm-none-eabi -hardware-loops %s -S -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -pass-remarks-analysis=hardware-loops 2>&1 | FileCheck %s --check-prefix=CHECK-LLC ; RUN: opt -mtriple=thumbv8.1m.main -loop-unroll -unroll-remainder=false -S < %s | llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL +; CHECK-LLC: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop ; CHECK-LABEL: early_exit ; CHECK-NOT: llvm.set.loop.iterations ; CHECK-NOT: llvm.loop.decrement @@ -46,6 +47,7 @@ ; CHECK-NOT: [[LOOP_DEC1:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-NOT: br i1 [[LOOP_DEC1]], label %while.cond1.preheader.us, label %while.end7 +; CHECK-LLC: remark: :0:0: hardware-loop not created: nested hardware-loops not supported ; CHECK-LLC: nested: ; CHECK-LLC-NOT: mov lr, r1 ; CHECK-LLC: dls lr, r1 @@ -176,6 +178,9 @@ ret void } + +; CHECK-LLC: remark: :0:0: hardware-loop not created: loop is not a candidate +; CHECK-LLC: remark: :0:0: hardware-loop not created: nested hardware-loops not supported ; CHECK-LABEL: not_rotated ; CHECK-NOT: call void @llvm.set.loop.iterations ; CHECK-NOT: call i32 @llvm.loop.decrement.i32 diff --git a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll --- a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll +++ b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll @@ -1,6 +1,12 @@ ; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW ; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW -; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH +; +; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 \ +; RUN: -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true \ +; RUN: -hardware-loops -S -pass-remarks-analysis=hardware-loops %s -o - \ +; RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-LATCH + +; CHECK-LATCH: remark: :0:0: hardware-loop not created: loop is not a candidate ; CHECK-LABEL: not_rotated ; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations