diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -26,6 +26,7 @@ class DominatorTree; class Instruction; class Loop; +class MemoryDependenceResults; class PredicatedScalarEvolution; class ScalarEvolution; class SCEV; @@ -193,7 +194,8 @@ static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, MapVector &SinkAfter, - DominatorTree *DT); + DominatorTree *DT, + MemoryDependenceResults *MDR); RecurKind getRecurrenceKind() const { return Kind; } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -41,6 +41,7 @@ class Function; class Loop; class LoopInfo; +class MemoryDependenceResults; class Metadata; class OptimizationRemarkEmitter; class PredicatedScalarEvolution; @@ -247,10 +248,11 @@ OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, - AssumptionCache *AC, BlockFrequencyInfo *BFI, + AssumptionCache *AC, MemoryDependenceResults *MDR, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs), - ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), + ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), MDR(MDR), BFI(BFI), PSI(PSI) {} /// ReductionList contains the reduction descriptors for all @@ -543,6 +545,10 @@ /// which a reduction can be computed. AssumptionCache *AC; + // The memory dependence analysis is used to determine if stores can be + // reordered past loads for fixed order reductions. + MemoryDependenceResults *MDR; + /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -71,6 +71,7 @@ class Loop; class LoopAccessInfoManager; class LoopInfo; +class MemoryDependenceResults; class OptimizationRemarkEmitter; class ProfileSummaryInfo; class ScalarEvolution; @@ -179,6 +180,7 @@ TargetLibraryInfo *TLI; DemandedBits *DB; AAResults *AA; + MemoryDependenceResults *MDR; AssumptionCache *AC; LoopAccessInfoManager *LAIs; OptimizationRemarkEmitter *ORE; @@ -193,8 +195,9 @@ runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, - AAResults &AA_, AssumptionCache &AC_, LoopAccessInfoManager &LAIs_, - OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_); + AAResults &AA_, AssumptionCache &AC_, MemoryDependenceResults &MDR_, + LoopAccessInfoManager &LAIs_, OptimizationRemarkEmitter &ORE_, + ProfileSummaryInfo *PSI_); bool processLoop(Loop *L); }; diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -13,6 +13,7 @@ #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -923,7 +924,8 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence( PHINode *Phi, Loop *TheLoop, - MapVector &SinkAfter, DominatorTree *DT) { + MapVector &SinkAfter, DominatorTree *DT, + MemoryDependenceResults *MDR) { // Ensure the phi node is in the loop header and has two incoming values. if (Phi->getParent() != TheLoop->getHeader() || diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -722,7 +722,7 @@ } if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, - SinkAfter, DT)) { + SinkAfter, DT, MDR)) { AllowedExit.insert(Phi); FixedOrderRecurrences.insert(Phi); continue; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -86,6 +86,7 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -2216,9 +2217,10 @@ auto *DB = &getAnalysis().getDemandedBits(); auto *ORE = &getAnalysis().getORE(); auto *PSI = &getAnalysis().getPSI(); + auto *MDR = &getAnalysis().getMemDep(); return Impl - .runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, LAIs, *ORE, + .runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, *MDR, LAIs, *ORE, PSI) .MadeAnyChange; } @@ -2235,6 +2237,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); // We currently do not preserve loopinfo/dominator analyses with outer loop // vectorization. Until this is addressed, mark these analyses as preserved @@ -10181,7 +10184,7 @@ // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements; LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, *LAIs, LI, ORE, - &Requirements, &Hints, DB, AC, BFI, PSI); + &Requirements, &Hints, DB, AC, MDR, BFI, PSI); if (!LVL.canVectorize(EnableVPlanNativePath)) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); Hints.emitRemarkWithHints(); @@ -10541,8 +10544,8 @@ Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AAResults &AA_, AssumptionCache &AC_, - LoopAccessInfoManager &LAIs_, OptimizationRemarkEmitter &ORE_, - ProfileSummaryInfo *PSI_) { + MemoryDependenceResults &MDR_, LoopAccessInfoManager &LAIs_, + OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) { SE = &SE_; LI = &LI_; TTI = &TTI_; @@ -10555,6 +10558,7 @@ DB = &DB_; ORE = &ORE_; PSI = PSI_; + MDR = &MDR_; // Don't attempt if // 1. the target claims to have no vector registers, and @@ -10619,13 +10623,14 @@ auto &AC = AM.getResult(F); auto &DB = AM.getResult(F); auto &ORE = AM.getResult(F); + auto &MDA = AM.getResult(F); LoopAccessInfoManager &LAIs = AM.getResult(F); auto &MAMProxy = AM.getResult(F); ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(*F.getParent()); LoopVectorizeResult Result = - runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, LAIs, ORE, PSI); + runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, MDA, LAIs, ORE, PSI); if (!Result.MadeAnyChange) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -244,6 +244,8 @@ ; CHECK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis ; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis +; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -36,7 +36,7 @@ ; Postlink pipelines: ; RUN: opt -disable-verify -verify-cfg-preserved=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,%llvmcheckext +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-POSTLINK-O,CHECK-POSTLINK-O1,%llvmcheckext ; RUN: opt -disable-verify -verify-cfg-preserved=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-POSTLINK-O,%llvmcheckext,CHECK-POSTLINK-O2 @@ -221,6 +221,8 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-POSTLINK-O-NEXT: Running analysis: BlockFrequencyAnalysis ; CHECK-POSTLINK-O-NEXT: Running analysis: BranchProbabilityAnalysis +; CHECK-POSTLINK-O1-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-POSTLINK-O1-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -177,6 +177,8 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -189,6 +189,8 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O1-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O1-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass