Index: llvm/trunk/include/llvm/Passes/PassBuilder.h =================================================================== --- llvm/trunk/include/llvm/Passes/PassBuilder.h +++ llvm/trunk/include/llvm/Passes/PassBuilder.h @@ -80,6 +80,14 @@ /// Tuning option to enable/disable loop vectorization. Its default value is /// that of the flag: `-vectorize-loops`. bool LoopVectorization; + + /// Tuning option to cap the number of calls to retrive clobbering accesses in + /// MemorySSA, in LICM. + unsigned LicmMssaOptCap; + + /// Tuning option to disable promotion to scalars in LICM with MemorySSA, if + /// the number of access is too large. + unsigned LicmMssaNoAccForPromotionCap; }; /// This class provides access to building LLVM's passes. Index: llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ llvm/trunk/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -156,6 +156,8 @@ bool PrepareForThinLTO; bool PerformThinLTO; bool DivergentTarget; + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; /// Enable profile instrumentation pass. bool EnablePGOInstrGen; Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -137,6 +137,8 @@ // LICM - This pass is a loop invariant code motion and memory promotion pass. // Pass *createLICMPass(); +Pass *createLICMPass(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap); //===----------------------------------------------------------------------===// // Index: llvm/trunk/include/llvm/Transforms/Scalar/LICM.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/LICM.h +++ llvm/trunk/include/llvm/Transforms/Scalar/LICM.h @@ -38,9 +38,21 @@ namespace llvm { +extern cl::opt SetLicmMssaOptCap; +extern cl::opt SetLicmMssaNoAccForPromotionCap; + /// Performs Loop Invariant Code Motion Pass. class LICMPass : public PassInfoMixin { + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; + public: + LICMPass() + : LicmMssaOptCap(SetLicmMssaOptCap), + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} + LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; Index: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h @@ -101,6 +101,13 @@ bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE); +struct SinkAndHoistLICMFlags { + bool NoOfMemAccTooLarge; + unsigned LicmMssaOptCounter; + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; +}; + /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in /// reverse depth first order w.r.t the DominatorTree. This allows us to visit @@ -112,7 +119,7 @@ bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - bool, int &, OptimizationRemarkEmitter *); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -124,8 +131,8 @@ /// ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - MemorySSAUpdater *, ICFLoopSafetyInfo *, bool, int &, - OptimizationRemarkEmitter *); + MemorySSAUpdater *, ICFLoopSafetyInfo *, + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -276,8 +283,7 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, - bool NoOfMemAccessesTooLarge, - int *LicmMssaOptCap = nullptr, + SinkAndHoistLICMFlags *LICMFlags = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -218,6 +218,8 @@ PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = EnableLoopInterleaving; LoopVectorization = EnableLoopVectorization; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; } extern cl::opt EnableHotColdSplit; @@ -446,7 +448,7 @@ // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); - LPM1.addPass(LICMPass()); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); @@ -506,7 +508,9 @@ FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); FPM.addPass(DSEPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + DebugLogging)); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); @@ -907,7 +911,9 @@ OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); OptimizePM.addPass(RequireAnalysisPass()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging)); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + DebugLogging)); // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" @@ -170,6 +171,8 @@ // the LoopVectorize pass, to be consistent with the new pass manager. RerollLoops = RunLoopRerolling; NewGVN = RunNewGVN; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; DisableGVNLoadPRE = false; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; VerifyInput = false; @@ -370,7 +373,7 @@ } // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else @@ -415,7 +418,7 @@ MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass()); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -639,7 +642,7 @@ // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -696,7 +699,7 @@ MPM.add(createEarlyCSEPass()); MPM.add(createCorrelatedValuePropagationPass()); addInstructionCombiningPass(MPM); - MPM.add(createLICMPass()); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); @@ -738,7 +741,7 @@ // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - MPM.add(createLICMPass()); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); } MPM.add(createWarnMissedTransformationsPass()); @@ -907,7 +910,7 @@ PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. Index: llvm/trunk/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp @@ -115,7 +115,7 @@ // which may not be precise, since optimizeUses is capped. The result is // correct, but we may not get as "far up" as possible to get which access is // clobbering the one queried. -static cl::opt LicmMssaOptCap( +cl::opt llvm::SetLicmMssaOptCap( "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls.")); @@ -123,8 +123,8 @@ // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save // compile time. -static cl::opt AccessCapForMSSAPromotion( - "max-acc-licm-promotion", cl::init(250), cl::Hidden, +cl::opt llvm::SetLicmMssaNoAccForPromotionCap( + "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " "number of accesses allowed to be present in a loop in order to " @@ -151,7 +151,7 @@ AliasAnalysis *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, - int &LicmMssaOptCounter); + SinkAndHoistLICMFlags &Flags); static Instruction *CloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); @@ -172,9 +172,15 @@ OptimizationRemarkEmitter *ORE, bool DeleteAST); ASTrackerMapTy &getLoopToAliasSetMap() { return LoopToAliasSetMap; } + LoopInvariantCodeMotion(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap) + : LicmMssaOptCap(LicmMssaOptCap), + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} private: ASTrackerMapTy LoopToAliasSetMap; + unsigned LicmMssaOptCap; + unsigned LicmMssaNoAccForPromotionCap; std::unique_ptr collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AliasAnalysis *AA); @@ -185,7 +191,10 @@ struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid - LegacyLICMPass() : LoopPass(ID) { + LegacyLICMPass( + unsigned LicmMssaOptCap = SetLicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) + : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } @@ -267,7 +276,7 @@ report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not " "cached at a higher level"); - LoopInvariantCodeMotion LICM; + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, ORE, true)) return PreservedAnalyses::all(); @@ -291,6 +300,10 @@ false) Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } +Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap) { + return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); +} /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple @@ -309,7 +322,7 @@ std::unique_ptr CurAST; std::unique_ptr MSSAU; bool NoOfMemAccTooLarge = false; - int LicmMssaOptCounter = 0; + unsigned LicmMssaOptCounter = 0; if (!MSSA) { LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n"); @@ -324,7 +337,7 @@ for (const auto &MA : *Accesses) { (void)MA; AccessCapCount++; - if (AccessCapCount > AccessCapForMSSAPromotion) { + if (AccessCapCount > LicmMssaNoAccForPromotionCap) { NoOfMemAccTooLarge = true; break; } @@ -351,15 +364,14 @@ // that we are guaranteed to see definitions before we see uses. This allows // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. - // + SinkAndHoistLICMFlags Flags = {NoOfMemAccTooLarge, LicmMssaOptCounter, + LicmMssaOptCap, LicmMssaNoAccForPromotionCap}; if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, - NoOfMemAccTooLarge, LicmMssaOptCounter, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST.get(), MSSAU.get(), &SafetyInfo, - NoOfMemAccTooLarge, LicmMssaOptCounter, ORE); + CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -463,8 +475,9 @@ DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, - ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge, - int &LicmMssaOptCounter, OptimizationRemarkEmitter *ORE) { + ICFLoopSafetyInfo *SafetyInfo, + SinkAndHoistLICMFlags &Flags, + OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -507,8 +520,8 @@ // bool FreeInLoop = false; if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, - NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, + ORE) && !I.mayHaveSideEffects()) { if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) { if (!FreeInLoop) { @@ -762,8 +775,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, - ICFLoopSafetyInfo *SafetyInfo, bool NoOfMemAccTooLarge, - int &LicmMssaOptCounter, + ICFLoopSafetyInfo *SafetyInfo, + SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -816,8 +829,8 @@ // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, - NoOfMemAccTooLarge, &LicmMssaOptCounter, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags, + ORE) && isSafeToExecuteUnconditionally( I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { @@ -1048,7 +1061,7 @@ Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, - bool NoOfMemAccTooLarge, int *LicmMssaOptCounter, + SinkAndHoistLICMFlags *Flags, OptimizationRemarkEmitter *ORE) { // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) @@ -1056,7 +1069,7 @@ MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; if (MSSA) - assert(LicmMssaOptCounter != nullptr && "Counter cannot be null."); + assert(Flags != nullptr && "Flags cannot be null."); // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { @@ -1083,8 +1096,7 @@ CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( - MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, - *LicmMssaOptCounter); + MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, *Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) @@ -1130,7 +1142,7 @@ else Invalidated = pointerInvalidatedByLoopWithMSSA( MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, - *LicmMssaOptCounter); + *Flags); if (Invalidated) return false; } @@ -1191,10 +1203,10 @@ return true; // If there are more accesses than the Promotion cap, give up, we're not // walking a list that long. - if (NoOfMemAccTooLarge) + if (Flags->NoOfMemAccTooLarge) return false; // Check store only if there's still "quota" to check clobber. - if (*LicmMssaOptCounter >= LicmMssaOptCap) + if (Flags->LicmMssaOptCounter >= Flags->LicmMssaOptCap) return false; // If there are interfering Uses (i.e. their defining access is in the // loop), or ordered loads (stored as Defs!), don't move this store. @@ -1218,7 +1230,7 @@ } auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); - (*LicmMssaOptCounter)++; + Flags->LicmMssaOptCounter++; // If there are no clobbering Defs in the loop, store is safe to hoist. return MSSA->isLiveOnEntryDef(Source) || !CurLoop->contains(Source->getBlock()); @@ -2236,14 +2248,14 @@ static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, - int &LicmMssaOptCounter) { + SinkAndHoistLICMFlags &Flags) { MemoryAccess *Source; - // See declaration of LicmMssaOptCap for usage details. - if (LicmMssaOptCounter >= LicmMssaOptCap) + // See declaration of SetLicmMssaOptCap for usage details. + if (Flags.LicmMssaOptCounter >= Flags.LicmMssaOptCap) Source = MU->getDefiningAccess(); else { Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); - LicmMssaOptCounter++; + Flags.LicmMssaOptCounter++; } return !MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()); Index: llvm/trunk/lib/Transforms/Scalar/LoopSink.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopSink.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopSink.cpp @@ -303,7 +303,7 @@ // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false, false)) + if (!canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr, false)) continue; if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI)) Changed = true;