diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -44,6 +44,8 @@ class ScalarEvolution; class TargetLibraryInfo; class TargetTransformInfo; +class PostDominatorTree; +class DependenceInfo; /// The adaptor from a function pass to a loop pass computes these analyses and /// makes them available to the loop passes "for free". Each loop pass is @@ -58,6 +60,8 @@ TargetLibraryInfo &TLI; TargetTransformInfo &TTI; MemorySSA *MSSA; + PostDominatorTree &PDT; + DependenceInfo &DI; }; /// Extern template declaration for the analysis set for this IR unit. @@ -156,6 +160,6 @@ /// Returns the minimum set of Analyses that all loop passes must preserve. PreservedAnalyses getLoopPassPreservedAnalyses(); -} +} // namespace llvm #endif // LLVM_ANALYSIS_LOOPANALYSISMANAGER_H diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -41,10 +41,12 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -266,14 +268,17 @@ MemorySSA *MSSA = UseMemorySSA ? (&AM.getResult(F).getMSSA()) : nullptr; - LoopStandardAnalysisResults LAR = {AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - AM.getResult(F), - MSSA}; + LoopStandardAnalysisResults LAR = { + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + AM.getResult(F), + MSSA, + AM.getResult(F), + AM.getResult(F)}; // Setup the loop analysis manager from its proxy. It is important that // this is only done when there are loops to process and we have built the @@ -353,6 +358,8 @@ PA.preserve(); // We also preserve the set of standard analyses. PA.preserve(); + PA.preserve(); + PA.preserve(); PA.preserve(); PA.preserve(); if (UseMemorySSA) @@ -396,6 +403,6 @@ PreservedAnalyses run(Loop &L, LoopAnalysisManager &, LoopStandardAnalysisResults &, LPMUpdater &); }; -} +} // namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPPASSMANAGER_H diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h --- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h @@ -47,20 +47,19 @@ /// Return true if all instructions (except the terminator) in \p BB can be /// safely moved before \p InsertPoint. bool isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint, - DominatorTree &DT, const PostDominatorTree &PDT, + DominatorTree &DT, PostDominatorTree &PDT, DependenceInfo &DI); /// Move instructions, in an order-preserving manner, from \p FromBB to the /// beginning of \p ToBB when proven safe. void moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, - DominatorTree &DT, - const PostDominatorTree &PDT, + DominatorTree &DT, PostDominatorTree &PDT, DependenceInfo &DI); /// Move instructions, in an order-preserving manner, from \p FromBB to the end /// of \p ToBB when proven safe. void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, - DominatorTree &DT, const PostDominatorTree &PDT, + DominatorTree &DT, PostDominatorTree &PDT, DependenceInfo &DI); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -15,7 +15,9 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -137,7 +139,8 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, + PostDominatorTree *, DependenceInfo *); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -192,9 +195,10 @@ /// @param OrigLoopID The loop ID of the loop before the transformation. /// @param FollowupAttrs List of attribute names that contain attributes to be /// added to the new loop ID. -/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited -/// from the original loop. The following values -/// are considered: +/// @param InheritOptionsAttrsPrefix Selects which attributes should be +/// inherited +/// from the original loop. The following +/// values are considered: /// nullptr : Inherit all attributes from @p OrigLoopID. /// "" : Do not inherit any attribute from @p OrigLoopID; only use /// those specified by a followup attribute. @@ -219,7 +223,8 @@ /// Look for the loop attribute that disables all transformation heuristic. bool hasDisableAllTransformsHint(const Loop *L); -/// Look for the loop attribute that disables the LICM transformation heuristics. +/// Look for the loop attribute that disables the LICM transformation +/// heuristics. bool hasDisableLICMTransformsHint(const Loop *L); /// The mode sets how eager a transformation should be applied. @@ -429,8 +434,8 @@ /// Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. -Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, - LoopInfo *LI, LPPassManager *LPM); +Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, + LPPassManager *LPM); /// Add code that checks at runtime if the accessed arrays in \p PointerChecks /// overlap. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/Loads.h" @@ -48,6 +49,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -72,6 +74,7 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeMoverUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -107,8 +110,8 @@ // instead of the cross product using AA to identify aliasing of the memory // location we are interested in. static cl::opt -LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), - cl::desc("How many instruction to cross product using AA")); + LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0), + cl::desc("How many instruction to cross product using AA")); // Experimental option to allow imprecision in LICM in pathological cases, in // exchange for faster compile. This is to be removed if MemorySSA starts to @@ -138,10 +141,11 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop); -static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, +static void hoist(Instruction &I, DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, PostDominatorTree *PDT, + DependenceInfo *DI); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE); @@ -166,14 +170,17 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater *MSSAU, ScalarEvolution *SE); + MemorySSAUpdater *MSSAU, ScalarEvolution *SE, + DominatorTree *DT, PostDominatorTree *PDT, + DependenceInfo *DI); namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, PostDominatorTree *PDT, + DependenceInfo *DI); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) @@ -212,15 +219,17 @@ // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - return LICM.runOnLoop(L, - &getAnalysis().getAAResults(), - &getAnalysis().getLoopInfo(), - &getAnalysis().getDomTree(), - &getAnalysis().getTLI( - *L->getHeader()->getParent()), - &getAnalysis().getTTI( - *L->getHeader()->getParent()), - SE ? &SE->getSE() : nullptr, MSSA, &ORE); + return LICM.runOnLoop( + L, &getAnalysis().getAAResults(), + &getAnalysis().getLoopInfo(), + &getAnalysis().getDomTree(), + &getAnalysis().getTLI( + *L->getHeader()->getParent()), + &getAnalysis().getTTI( + *L->getHeader()->getParent()), + SE ? &SE->getSE() : nullptr, MSSA, &ORE, + &getAnalysis().getPostDomTree(), + &getAnalysis().getDI()); } /// This transformation requires natural loop information & requires that @@ -252,7 +261,7 @@ LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, - AR.MSSA, &ORE)) + AR.MSSA, &ORE, &AR.PDT, &AR.DI)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -287,7 +296,8 @@ bool LoopInvariantCodeMotion::runOnLoop( Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, - MemorySSA *MSSA, OptimizationRemarkEmitter *ORE) { + MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, PostDominatorTree *PDT, + DependenceInfo *DI) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -351,9 +361,9 @@ CurAST.get(), MSSAU.get(), &SafetyInfo, Flags, ORE); Flags.IsSink = false; if (Preheader) - Changed |= - hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, - CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, ORE); + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, + CurAST.get(), MSSAU.get(), SE, &SafetyInfo, Flags, + ORE, PDT, DI); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -532,7 +542,8 @@ DominatorTree *DT; Loop *CurLoop; MemorySSAUpdater *MSSAU; - + PostDominatorTree *PDT; + DependenceInfo *DI; // A map of blocks in the loop to the block their instructions will be hoisted // to. DenseMap HoistDestinationMap; @@ -756,7 +767,8 @@ AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, PostDominatorTree *PDT, + DependenceInfo *DI) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -814,7 +826,7 @@ I, DT, CurLoop, SafetyInfo, ORE, CurLoop->getLoopPreheader()->getTerminator())) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); + MSSAU, SE, ORE, PDT, DI); HoistedInstructions.push_back(&I); Changed = true; continue; @@ -840,7 +852,7 @@ eraseInstruction(I, *SafetyInfo, CurAST, MSSAU); hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), - SafetyInfo, MSSAU, SE, ORE); + SafetyInfo, MSSAU, SE, ORE, PDT, DI); HoistedInstructions.push_back(ReciprocalDivisor); Changed = true; continue; @@ -859,7 +871,7 @@ CurLoop->hasLoopInvariantOperands(&I) && MustExecuteWithoutWritesBefore(I)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); + MSSAU, SE, ORE, PDT, DI); HoistedInstructions.push_back(&I); Changed = true; continue; @@ -873,7 +885,7 @@ PN->setIncomingBlock( i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); + MSSAU, SE, ORE, PDT, DI); assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); Changed = true; continue; @@ -908,9 +920,10 @@ HoistPoint = Dominator->getTerminator(); } LLVM_DEBUG(dbgs() << "LICM rehoisting to " - << HoistPoint->getParent()->getName() - << ": " << *I << "\n"); - moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE); + << HoistPoint->getParent()->getName() << ": " << *I + << "\n"); + moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE, DT, PDT, + DI); HoistPoint = I; Changed = true; } @@ -985,19 +998,6 @@ } namespace { -/// Return true if-and-only-if we know how to (mechanically) both hoist and -/// sink a given instruction out of a loop. Does not address legality -/// concerns such as aliasing or speculation safety. -bool isHoistableAndSinkableInst(Instruction &I) { - // Only these instructions are hoistable/sinkable. - return (isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I) || - isa(I) || isa(I)); -} /// Return true if all of the alias sets within this AST are known not to /// contain a Mod, or if MSSA knows thare are no MemoryDefs in the loop. bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, @@ -1033,7 +1033,7 @@ } return true; } -} +} // namespace bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, @@ -1041,9 +1041,6 @@ bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *Flags, OptimizationRemarkEmitter *ORE) { - // If we don't understand the instruction, bail early. - if (!isHoistableAndSinkableInst(I)) - return false; MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; if (MSSA) @@ -1051,9 +1048,6 @@ // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { - if (!LI->isUnordered()) - return false; // Don't sink/hoist volatile or ordered atomic loads! - // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) @@ -1087,14 +1081,6 @@ return !Invalidated; } else if (CallInst *CI = dyn_cast(&I)) { - // Don't sink or hoist dbg info; it's legal, but not useful. - if (isa(I)) - return false; - - // Don't sink calls which can throw. - if (CI->mayThrow()) - return false; - using namespace PatternMatch; if (match(CI, m_Intrinsic())) // Assumes don't actually alias anything or throw @@ -1160,11 +1146,6 @@ } else // MSSAU return isOnlyMemoryAccess(FI, CurLoop, MSSAU); } else if (auto *SI = dyn_cast(&I)) { - if (!SI->isUnordered()) - return false; // Don't sink/hoist volatile or ordered atomic store! - - // We can only hoist a store that we can prove writes a value which is not - // read or overwritten within the loop. For those cases, we fallback to // load store promotion instead. TODO: We can extend this to cases where // there is exactly one write to the location and that write dominates an // arbitrary number of reads in the loop. @@ -1405,8 +1386,11 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, - MemorySSAUpdater *MSSAU, - ScalarEvolution *SE) { + MemorySSAUpdater *MSSAU, ScalarEvolution *SE, + DominatorTree *DT, PostDominatorTree *PDT, + DependenceInfo *DI) { + if (!isSafeToMoveBefore(I, Dest, *DT, *PDT, *DI)) + return; SafetyInfo.removeInstruction(&I); SafetyInfo.insertInstructionTo(&I, Dest.getParent()); I.moveBefore(&Dest); @@ -1606,7 +1590,7 @@ // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. - SmallSetVector Users(I.user_begin(), I.user_end()); + SmallSetVector Users(I.user_begin(), I.user_end()); for (auto *UI : Users) { auto *User = cast(UI); @@ -1629,15 +1613,16 @@ /// When an instruction is found to only use loop invariant operands that /// is safe to hoist, this instruction is called to do the dirty work. /// -static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, +static void hoist(Instruction &I, DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, PostDominatorTree *PDT, + DependenceInfo *DI) { LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I << "\n"); ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " - << ore::NV("Inst", &I); + return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) + << "hoisting " << ore::NV("Inst", &I); }); // Metadata can be dependent on conditions we are hoisting above. @@ -1653,10 +1638,12 @@ if (isa(I)) // Move the new node to the end of the phi list in the destination block. - moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE); + moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE, + DT, PDT, DI); else // Move the new node to the destination block, before its terminator. - moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE); + moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE, DT, + PDT, DI); // Apply line 0 debug locations when we are moving instructions to different // basic blocks because we want to avoid jumpy line tables. @@ -1804,7 +1791,6 @@ } }; - /// Return true iff we can prove that a caller of this function can not inspect /// the contents of the provided object in a well defined program. bool isKnownNonEscaping(Value *Object, const TargetLibraryInfo *TLI) { @@ -1824,7 +1810,7 @@ // weaker condition and handle only AllocLikeFunctions (which are // known to be noalias). TODO return isAllocLikeFn(Object, TLI) && - !PointerMayBeCaptured(Object, true, true); + !PointerMayBeCaptured(Object, true, true); } } // namespace @@ -1944,13 +1930,12 @@ unsigned InstAlignment = Load->getAlignment(); if (!InstAlignment) - InstAlignment = - MDL.getABITypeAlignment(Load->getType()); + InstAlignment = MDL.getABITypeAlignment(Load->getType()); // Note that proving a load safe to speculate requires proving // sufficient alignment at the target location. Proving it guaranteed // to execute does as well. Thus we can increase our guaranteed - // alignment as well. + // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) if (isSafeToExecuteUnconditionally(*Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator())) { @@ -2031,8 +2016,7 @@ // lower it. We're only guaranteed to be able to lower naturally aligned // atomics. auto *SomePtrElemType = SomePtr->getType()->getPointerElementType(); - if (SawUnorderedAtomic && - Alignment < MDL.getTypeStoreSize(SomePtrElemType)) + if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(SomePtrElemType)) return false; // If we couldn't prove we can hoist the load, bail. diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -40,6 +40,7 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -673,8 +674,7 @@ if (!L->getExitBlock()) return fail("MultipleExitBlocks", "multiple exit blocks"); if (!L->isLoopSimplifyForm()) - return fail("NotLoopSimplifyForm", - "loop is not in loop-simplify form"); + return fail("NotLoopSimplifyForm", "loop is not in loop-simplify form"); BasicBlock *PH = L->getLoopPreheader(); @@ -884,8 +884,9 @@ // failed. if (Forced) Ctx.diagnose(DiagnosticInfoOptimizationFailure( - *F, L->getStartLoc(), "loop not distributed: failed " - "explicitly specified loop distribution")); + *F, L->getStartLoc(), + "loop not distributed: failed " + "explicitly specified loop distribution")); return false; } @@ -958,7 +959,8 @@ DominatorTree *DT; ScalarEvolution *SE; OptimizationRemarkEmitter *ORE; - + PostDominatorTree *PDT; + DependenceAnalysis *DI; /// Indicates whether distribution is forced to be enabled/disabled for /// the loop. /// @@ -1054,11 +1056,14 @@ auto &AC = AM.getResult(F); auto &TTI = AM.getResult(F); auto &TLI = AM.getResult(F); + auto &PDT = AM.getResult(F); + auto &DI = AM.getResult(F); auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, nullptr}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, + TLI, TTI, nullptr, PDT, DI}; return LAM.getResult(L, AR); }; @@ -1085,4 +1090,6 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) -FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); } +FunctionPass *llvm::createLoopDistributePass() { + return new LoopDistributeLegacy(); +} diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -30,12 +30,14 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -165,7 +167,7 @@ public: LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, DominatorTree *DT, BlockFrequencyInfo *BFI, - ProfileSummaryInfo* PSI) + ProfileSummaryInfo *PSI) : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {} /// Look through the loop-carried and loop-independent dependences in @@ -386,9 +388,8 @@ // Collect the pointers of the candidate loads. // FIXME: SmallPtrSet does not work with std::inserter. std::set CandLoadPtrs; - transform(Candidates, - std::inserter(CandLoadPtrs, CandLoadPtrs.begin()), - std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr)); + transform(Candidates, std::inserter(CandLoadPtrs, CandLoadPtrs.begin()), + std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr)); const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks(); SmallVector Checks; @@ -548,9 +549,9 @@ auto *HeaderBB = L->getHeader(); auto *F = HeaderBB->getParent(); - bool OptForSize = F->hasOptSize() || - llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI, - PGSOQueryType::IRPass); + bool OptForSize = + F->hasOptSize() || llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI, + PGSOQueryType::IRPass); if (OptForSize) { LLVM_DEBUG( dbgs() << "Versioning is needed but not allowed when optimizing " @@ -643,9 +644,9 @@ auto &LAA = getAnalysis(); auto &DT = getAnalysis().getDomTree(); auto *PSI = &getAnalysis().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis().getBFI() : - nullptr; + auto *BFI = (PSI && PSI->hasProfileSummary()) + ? &getAnalysis().getBFI() + : nullptr; // Process each loop nest in the function. return eliminateLoadsAcrossLoops( @@ -696,10 +697,13 @@ auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &AC = AM.getResult(F); + auto &PDT = AM.getResult(F); + auto &DI = AM.getResult(F); auto &MAMProxy = AM.getResult(F); auto *PSI = MAMProxy.getCachedResult(*F.getParent()); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &AM.getResult(F) : nullptr; + auto *BFI = (PSI && PSI->hasProfileSummary()) + ? &AM.getResult(F) + : nullptr; MemorySSA *MSSA = EnableMSSALoopDependency ? &AM.getResult(F).getMSSA() : nullptr; @@ -707,7 +711,8 @@ auto &LAM = AM.getResult(F).getManager(); bool Changed = eliminateLoadsAcrossLoops( F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, + TLI, TTI, MSSA, PDT, DI}; return LAM.getResult(L, AR); }); diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -330,7 +330,7 @@ } bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, - DominatorTree &DT, const PostDominatorTree &PDT, + DominatorTree &DT, PostDominatorTree &PDT, DependenceInfo &DI) { // check can we move Instruction if (!isSafeToMove(I)) @@ -410,7 +410,7 @@ } bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint, - DominatorTree &DT, const PostDominatorTree &PDT, + DominatorTree &DT, PostDominatorTree &PDT, DependenceInfo &DI) { return llvm::all_of(BB, [&](Instruction &I) { if (BB.getTerminator() == &I) @@ -422,7 +422,7 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, - const PostDominatorTree &PDT, + PostDominatorTree &PDT, DependenceInfo &DI) { for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) { Instruction *MovePos = ToBB.getFirstNonPHIOrDbg(); @@ -436,8 +436,7 @@ } void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, - DominatorTree &DT, - const PostDominatorTree &PDT, + DominatorTree &DT, PostDominatorTree &PDT, DependenceInfo &DI) { Instruction *MovePos = ToBB.getTerminator(); while (FromBB.size() > 1) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -82,6 +82,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -89,6 +90,7 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -700,6 +702,12 @@ /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; + /// Post Dominator Tree + PostDominatorTree *PDT; + + /// Dependence Info + DependenceInfo *DI + /// LoopVersioning. It's only set up (non-null) if memchecks were /// used. /// @@ -8071,6 +8079,8 @@ auto &AC = AM.getResult(F); auto &DB = AM.getResult(F); auto &ORE = AM.getResult(F); + auto &PDT = AM.getResult(F); + auto &DI = AM.getResult(F); MemorySSA *MSSA = EnableMSSALoopDependency ? &AM.getResult(F).getMSSA() : nullptr; @@ -8078,7 +8088,7 @@ auto &LAM = AM.getResult(F).getManager(); std::function GetLAA = [&](Loop &L) -> const LoopAccessInfo & { - LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA, PDT, DI}; return LAM.getResult(L, AR); }; auto &MAMProxy = AM.getResult(F);