Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -99,7 +99,8 @@ // produces an i1 to guard the loop entry. bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop = false, - bool ForceHardwareLoopPHI = false); + bool ForceHardwareLoopPHI = false, + bool ForceGuardLoopEntry = false); bool canAnalyze(LoopInfo &LI); }; Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -20,6 +20,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include using namespace llvm; @@ -55,7 +58,8 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop, - bool ForceHardwareLoopPHI) { + bool ForceHardwareLoopPHI, + bool ForceGuardLoopEntry) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -134,6 +138,33 @@ if (!ExitBlock) return false; + + BasicBlock *Preheader = L->getLoopPreheader(); + + // If we don't have a preheader, then insert one. + if (!Preheader) + Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, true); + if (!Preheader) + return false; + + // Make sure we have a valid Loop Count + if (!ExitCount->getType()->isPointerTy() && ExitCount->getType() != CountType) + ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); + + ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); + + BasicBlock *BB = L->getLoopPreheader(); + + if (ForceGuardLoopEntry && BB->getSinglePredecessor() && + cast(BB->getTerminator())->isUnconditional()) + BB = BB->getSinglePredecessor(); + + if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { + LLVM_DEBUG(dbgs() << "Not a Hardware Loop: unsafe to expand ExitCount " + << *ExitCount << "\n"); + return false; + } + return true; } Index: llvm/trunk/lib/CodeGen/HardwareLoops.cpp =================================================================== --- llvm/trunk/lib/CodeGen/HardwareLoops.cpp +++ llvm/trunk/lib/CodeGen/HardwareLoops.cpp @@ -15,7 +15,6 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Pass.h" #include "llvm/PassRegistry.h" #include "llvm/PassSupport.h" #include "llvm/ADT/Statistic.h" @@ -36,10 +35,8 @@ #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopUtils.h" #define DEBUG_TYPE "hardware-loops" @@ -112,7 +109,6 @@ const DataLayout *DL = nullptr; const TargetTransformInfo *TTI = nullptr; DominatorTree *DT = nullptr; - bool PreserveLCSSA = false; AssumptionCache *AC = nullptr; TargetLibraryInfo *LibInfo = nullptr; Module *M = nullptr; @@ -184,7 +180,6 @@ DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; - PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); AC = &getAnalysis().getAssumptionCache(F); M = F.getParent(); @@ -230,25 +225,19 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { - Loop *L = HWLoopInfo.L; - LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); + LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " + << *HWLoopInfo.L); if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) + ForceHardwareLoopPHI, + ForceGuardLoopEntry)) return false; assert( (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) && "Hardware Loop must have set exit info."); - BasicBlock *Preheader = L->getLoopPreheader(); - - // If we don't have a preheader, then insert one. - if (!Preheader) - Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); - if (!Preheader) - return false; - + // Now start to converting... HardwareLoop HWLoop(HWLoopInfo, *SE, *DL); HWLoop.Create(); ++NumHWLoops; @@ -257,10 +246,10 @@ void HardwareLoop::Create() { LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n"); - + Value *LoopCountInit = InitLoopCount(); - if (!LoopCountInit) - return; + + assert(LoopCountInit && "Hardware Loop must have a loop count"); InsertIterationSetup(LoopCountInit); @@ -320,32 +309,22 @@ // loop counter and tests that is not zero? SCEVExpander SCEVE(SE, DL, "loopcnt"); - if (!ExitCount->getType()->isPointerTy() && - ExitCount->getType() != CountType) - ExitCount = SE.getZeroExtendExpr(ExitCount, CountType); - - ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType)); // If we're trying to use the 'test and set' form of the intrinsic, we need // to replace a conditional branch that is controlling entry to the loop. It // is likely (guaranteed?) that the preheader has an unconditional branch to // the loop header, so also check if it has a single predecessor. if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, - SE.getZero(ExitCount->getType()))) { - LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); + SE.getZero(ExitCount->getType()))) UseLoopGuard |= ForceGuardLoopEntry; - } else + else UseLoopGuard = false; BasicBlock *BB = L->getLoopPreheader(); if (UseLoopGuard && BB->getSinglePredecessor() && - cast(BB->getTerminator())->isUnconditional()) + cast(BB->getTerminator())->isUnconditional()) { + LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); BB = BB->getSinglePredecessor(); - - if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) { - LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount " - << *ExitCount << "\n"); - return nullptr; } Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,