Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -112,7 +112,7 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - bool, OptimizationRemarkEmitter *); + bool, int &, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -124,7 +124,7 @@ /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, + MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &, OptimizationRemarkEmitter *); /// This function deletes dead loops. The caller of this function needs to @@ -277,6 +277,7 @@ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, bool NoOfMemAccessesTooLarge, + int *EnableLicmCapCounter = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -106,17 +106,19 @@ LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), cl::desc("How many instruction to cross product using AA")); -// Experimental option to allow imprecision in LICM (use MemorySSA cap) in -// pathological cases, in exchange for faster compile. This is to be removed -// if MemorySSA starts to address the same issue. This flag applies only when -// LICM uses MemorySSA instead on AliasSetTracker. When the flag is disabled -// (default), LICM calls MemorySSAWalker's getClobberingMemoryAccess, which -// gets perfect accuracy. When flag is enabled, LICM will call into MemorySSA's -// getDefiningAccess, which may not be precise, since optimizeUses is capped. -static cl::opt EnableLicmCap( - "enable-licm-cap", cl::init(false), cl::Hidden, - cl::desc("Enable imprecision in LICM (uses MemorySSA cap) in " - "pathological cases, in exchange for faster compile")); +// Experimental option to allow imprecision in LICM in pathological cases, in +// exchange for faster compile. This is to be removed if MemorySSA starts to +// address the same issue. This flag applies only when LICM uses MemorySSA +// instead on AliasSetTracker. LICM calls MemorySSAWalker's +// getClobberingMemoryAccess, up to the value of the Cap, getting perfect +// accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, +// which may not be precise, since optimizeUses is capped. The result is +// correct, but we may not get as "far up" as possible to get which access is +// clobbering the one queried. +static cl::opt EnableLicmCap( + "enable-licm-cap", cl::init(100), cl::Hidden, + cl::desc("Enable imprecision in LICM in pathological cases, in exchange " + "for faster compile. Caps the MemorySSA clobbering calls.")); // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save @@ -149,7 +151,8 @@ AliasSetTracker *CurAST, Loop *CurLoop, AliasAnalysis *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop); + Loop *CurLoop, + int &EnableLicmCapCounter); static Instruction *CloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); @@ -307,6 +310,7 @@ std::unique_ptr CurAST; std::unique_ptr MSSAU; bool NoOfMemAccTooLarge = false; + int EnableLicmCapCounter = 0; if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); @@ -352,11 +356,11 @@ if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, - NoOfMemAccTooLarge, ORE); + NoOfMemAccTooLarge, EnableLicmCapCounter, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, - NoOfMemAccTooLarge, ORE); + NoOfMemAccTooLarge, EnableLicmCapCounter, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -461,6 +465,7 @@ TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge, + int &EnableLicmCapCounter, OptimizationRemarkEmitter *ORE) { // Verify inputs. @@ -505,7 +510,7 @@ bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, - NoOfMemAccTooLarge, ORE) && + NoOfMemAccTooLarge, &EnableLicmCapCounter, ORE) && !I.mayHaveSideEffects()) { if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE, FreeInLoop)) { if (!FreeInLoop) { @@ -760,6 +765,7 @@ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge, + int &EnableLicmCapCounter, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -813,7 +819,7 @@ // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, - NoOfMemAccTooLarge, ORE) && + NoOfMemAccTooLarge, &EnableLicmCapCounter, ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { @@ -1041,12 +1047,15 @@ MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, bool NoOfMemAccTooLarge, + int *EnableLicmCapCounter, OptimizationRemarkEmitter *ORE) { // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) return false; MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; + if (MSSA) + assert(EnableLicmCapCounter != nullptr && "Counter cannot be null."); // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { @@ -1073,7 +1082,8 @@ CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop); + MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, + *EnableLicmCapCounter); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1118,7 +1128,8 @@ CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop); + MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, + *EnableLicmCapCounter); if (Invalidated) return false; } @@ -1177,8 +1188,9 @@ } else { // MSSAU if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) return true; - if (!EnableLicmCap) { + if (*EnableLicmCapCounter < EnableLicmCap) { auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); + (*EnableLicmCapCounter)++; // If there are no clobbering Defs in the loop, we still need to check // for interfering Uses. If there are more accesses than the Promotion // cap, give up, we're not walking a list that long. Otherwise, walk the @@ -2195,13 +2207,16 @@ } static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, - Loop *CurLoop) { + Loop *CurLoop, + int &EnableLicmCapCounter) { MemoryAccess *Source; // See declaration of EnableLicmCap for usage details. - if (EnableLicmCap) + if (EnableLicmCapCounter >= EnableLicmCap) Source = MU->getDefiningAccess(); - else + else { Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); + EnableLicmCapCounter++; + } return !MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()); }