Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -24,6 +24,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { @@ -193,11 +194,10 @@ const SmallVectorImpl &Instrs) const; }; - MemoryDepChecker(ScalarEvolution *Se, const Loop *L, - SCEVUnionPredicate &Preds) - : SE(Se), InnermostLoop(L), AccessIdx(0), + MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) + : PSE(PSE), InnermostLoop(L), AccessIdx(0), ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), - RecordDependences(true), Preds(Preds) {} + RecordDependences(true) {} /// \brief Register the location (instructions are given increasing numbers) /// of a write access. @@ -266,7 +266,13 @@ bool isWrite) const; private: - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and + /// applies dynamic knowledge to simplify SCEV expressions and convert them + /// to a more usable form. We need this in case assumptions about SCEV + /// expressions need to be made in order to avoid unknown dependences. For + /// example we might assume a unit stride for a pointer in order to prove + /// that a memory access is strided and doesn't wrap. + PredicatedScalarEvolution &PSE; const Loop *InnermostLoop; /// \brief Maps access locations (ptr, read/write) to program order. @@ -317,15 +323,6 @@ /// \brief Check whether the data dependence could prevent store-load /// forwarding. bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The dependence checker needs this in order to convert SCEVs of pointers - /// to more accurate expressions in the context of existing assumptions. - /// We also need this in case assumptions about SCEV expressions need to - /// be made in order to avoid unknown dependences. For example we might - /// assume a unit stride for a pointer in order to prove that a memory access - /// is strided and doesn't wrap. - SCEVUnionPredicate &Preds; }; /// \brief Holds information about the memory runtime legality checks to verify @@ -373,7 +370,7 @@ /// and change \p Preds. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, - SCEVUnionPredicate &Preds); + PredicatedScalarEvolution &PSE); /// \brief No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } @@ -508,8 +505,8 @@ /// ScalarEvolution, we will generate run-time checks by emitting a /// SCEVUnionPredicate. /// -/// Checks for both memory dependences and SCEV predicates must be emitted in -/// order for the results of this analysis to be valid. +/// Checks for both memory dependences and the SCEV predicates contained in the +/// PSE must be emitted in order for the results of this analysis to be valid. class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, @@ -591,14 +588,12 @@ return StoreToLoopInvariantAddress; } - /// The SCEV predicate contains all the SCEV-related assumptions. - /// The is used to keep track of the minimal set of assumptions on SCEV - /// expressions that the analysis needs to make in order to return a - /// meaningful result. All SCEV expressions during the analysis should be - /// re-written (and therefore simplified) according to Preds. + /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts + /// them to a more usable form. All SCEV expressions during the analysis + /// should be re-written (and therefore simplified) according to PSE. /// A user of LoopAccessAnalysis will need to emit the runtime checks /// associated with this predicate. - SCEVUnionPredicate Preds; + PredicatedScalarEvolution PSE; private: /// \brief Analyze the loop. Substitute symbolic strides using Strides. @@ -619,7 +614,6 @@ MemoryDepChecker DepChecker; Loop *TheLoop; - ScalarEvolution *SE; const DataLayout &DL; const TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -654,18 +648,17 @@ /// If \p OrigPtr is not null, use it to look up the stride value instead of \p /// Ptr. \p PtrToStride provides the mapping between the pointer value and its /// stride as collected by LoopVectorizationLegality::collectStridedAccess. -const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, - SCEVUnionPredicate &Preds, Value *Ptr, - Value *OrigPtr = nullptr); + Value *Ptr, Value *OrigPtr = nullptr); /// \brief Check the stride of the pointer and ensure that it does not wrap in /// the address space, assuming \p Preds is true. /// /// If necessary this method will version the stride of the pointer according /// to \p PtrToStride and therefore add a new predicate to \p Preds. -int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap, SCEVUnionPredicate &Preds); +int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap); /// \brief This analysis provides dependence information for the memory accesses /// of a loop. Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -374,6 +374,58 @@ /// \brief Returns the instructions that use values defined in the loop. SmallVector findDefsUsedOutsideOfLoop(Loop *L); + +/// An interface layer with SCEV used to manage how we see SCEV expressions for +/// values in the context of existing predicates. We can add new predicates, +/// but we cannot remove them. +/// +/// This layer has multiple purposes: +/// - provides a simple interface for SCEV versioning. +/// - guarantees that the order of transformations applied on a SCEV +/// expression for a single Value is consistent across two different +/// getSCEV calls. This means that, for example, once we've obtained +/// an AddRec expression for a certain value through expression rewriting, +/// we will continue to get an AddRec expression for that Value. +/// - lowers the number of expression rewrites. +class PredicatedScalarEvolution { +public: + PredicatedScalarEvolution(ScalarEvolution &SE); + const SCEVUnionPredicate &getUnionPredicate() const; + /// \brief Returns the SCEV expression of V, in the context of the current + /// SCEV predicate. + /// The order of transformations applied on the expression of V returned + /// by ScalarEvolution is guaranteed to be preserved, even when adding new + /// predicates. + const SCEV *getSCEV(Value *V); + /// \brief Adds a new predicate. + void addPredicate(const SCEVPredicate &Pred); + /// \brief Returns the ScalarEvolution analysis used. + ScalarEvolution *getSE() const { return &SE; } + +private: + /// \brief Increments the version number of the predicate. + /// This needs to be called every time the SCEV predicate changes. + void updateGeneration(); + /// Holds a SCEV and the version number of the SCEV predicate used to + /// perform the rewrite of the expression. + typedef std::pair RewriteEntry; + /// Maps a SCEV to the rewrite result of that SCEV at a certain version + /// number. If this number doesn't match the current Generation, we will + /// need to do a rewrite. To preserve the transformation order of previous + /// rewrites, we will rewrite the previous result instead of the original + /// SCEV. + DenseMap RewriteMap; + /// The ScalarEvolution analysis. + ScalarEvolution &SE; + /// The SCEVPredicate that forms our context. We will rewrite all expressions + /// assuming that this predicate true. + SCEVUnionPredicate Preds; + /// Marks the version of the SCEV predicate used. When rewriting a SCEV + /// expression we mark it with the version of the predicate. We use this to + /// figure out if the predicate has changed from the last rewrite of the + /// SCEV. If so, we need to perform a new rewrite. + unsigned Generation; +}; } #endif Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -87,11 +87,10 @@ return V; } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, - SCEVUnionPredicate &Preds, Value *Ptr, Value *OrigPtr) { - const SCEV *OrigSCEV = SE->getSCEV(Ptr); + const SCEV *OrigSCEV = PSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. @@ -108,16 +107,17 @@ ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; + ScalarEvolution *SE = PSE.getSE(); const auto *U = cast(SE->getSCEV(StrideVal)); const auto *CT = static_cast(SE->getOne(StrideVal->getType())); - Preds.add(SE->getEqualPredicate(U, CT)); + PSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PSE.getSCEV(Ptr); - const SCEV *ByOne = SE->rewriteUsingPredicate(OrigSCEV, Preds); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); - return ByOne; + return Expr; } // Otherwise, just return the SCEV of the original pointer. @@ -127,11 +127,12 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, - SCEVUnionPredicate &Preds) { + PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); + ScalarEvolution *SE = PSE.getSE(); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); const SCEV *ScStart = AR->getStart(); @@ -423,9 +424,10 @@ typedef SmallPtrSet MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA, SCEVUnionPredicate &Preds) + MemoryDepChecker::DepCandidates &DA, + PredicatedScalarEvolution &PSE) : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), - Preds(Preds) {} + PSE(PSE) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -512,16 +514,16 @@ bool IsRTCheckAnalysisNeeded; /// The SCEV predicate containing all the SCEV-related assumptions. - SCEVUnionPredicate &Preds; + PredicatedScalarEvolution &PSE; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, - Loop *L, SCEVUnionPredicate &Preds) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + Loop *L) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) return false; @@ -564,11 +566,11 @@ else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr, TheLoop, Preds) && + if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap, Preds) == 1)) { + isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -582,7 +584,7 @@ // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, Preds); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -817,9 +819,8 @@ } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap, - SCEVUnionPredicate &Preds) { +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -831,7 +832,7 @@ return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Preds, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { @@ -854,16 +855,16 @@ // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); + bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *PtrScev << "\n"); + << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // Check the step is constant. - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast(Step); @@ -1046,11 +1047,11 @@ BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides, Preds); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides, Preds); + int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1067,10 +1068,10 @@ std::swap(StrideAPtr, StrideBPtr); } - const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "(Induction step: " << StrideAPtr << ")\n"); DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"); @@ -1343,10 +1344,10 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1447,7 +1448,7 @@ MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses, Preds); + AA, LI, DependentAccesses, PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1498,8 +1499,7 @@ // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || - !isStridedPtr(SE, Ptr, TheLoop, Strides, Preds)) { + if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1529,7 +1529,7 @@ // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1556,6 +1556,7 @@ PtrRtChecking.reset(); PtrRtChecking.Need = true; + auto *SE = PSE.getSE(); CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1598,7 +1599,7 @@ } bool LoopAccessInfo::isUniform(Value *V) const { - return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); + return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1679,7 +1680,7 @@ Instruction *Loc, const SmallVectorImpl &PointerChecks) const { - + auto *SE = PSE.getSE(); SCEVExpander Exp(*SE, DL, "induction"); auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); @@ -1749,7 +1750,7 @@ const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtChecking(SE), DepChecker(SE, L, Preds), TheLoop(L), SE(SE), DL(DL), + : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1786,7 +1787,7 @@ << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; - Preds.print(OS, Depth); + PSE.getUnionPredicate().print(OS, Depth); } const LoopAccessInfo & Index: lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- lib/Transforms/Scalar/LoopDistribute.cpp +++ lib/Transforms/Scalar/LoopDistribute.cpp @@ -761,7 +761,7 @@ } // Don't distribute the loop if we need too many SCEV run-time checks. - const SCEVUnionPredicate &Pred = LAI.Preds; + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); if (Pred.getComplexity() > DistributeSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; @@ -790,7 +790,7 @@ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); LoopVersioning LVer(LAI, L, LI, DT, SE, false); LVer.setAliasChecks(std::move(Checks)); - LVer.setSCEVChecks(LAI.Preds); + LVer.setSCEVChecks(LAI.PSE.getUnionPredicate()); LVer.versionLoop(DefsUsedOutside); } Index: lib/Transforms/Scalar/LoopLoadElimination.cpp =================================================================== --- lib/Transforms/Scalar/LoopLoadElimination.cpp +++ lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -459,17 +459,18 @@ return false; } - if (LAI.Preds.getComplexity() > LoadElimSCEVCheckThreshold) { + if (LAI.PSE.getUnionPredicate().getComplexity() > + LoadElimSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; } // Point of no-return, start the transformation. First, version the loop if // necessary. - if (!Checks.empty() || !LAI.Preds.isAlwaysTrue()) { + if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) { LoopVersioning LV(LAI, L, LI, DT, SE, false); LV.setAliasChecks(std::move(Checks)); - LV.setSCEVChecks(LAI.Preds); + LV.setSCEVChecks(LAI.PSE.getUnionPredicate()); LV.versionLoop(); } Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -727,3 +727,46 @@ return UsedOutside; } + +PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE) + : SE(SE), Generation(0) {} + +const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { + const SCEV *Expr = SE.getSCEV(V); + RewriteEntry &Entry = RewriteMap[Expr]; + + // If we already have an entry and the version matches, return it. + if (Entry.second && Generation == Entry.first) + return Entry.second; + + // We found an entry but it's stale. Rewrite the stale entry + // acording to the current predicate. + if (Entry.second) + Expr = Entry.second; + + const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds); + Entry = {Generation, NewSCEV}; + + return NewSCEV; +} + +void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { + if (Preds.implies(&Pred)) + return; + Preds.add(&Pred); + updateGeneration(); +} + +const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { + return Preds; +} + +void PredicatedScalarEvolution::updateGeneration() { + // If the generation number wrapped recompute everything. + if (++Generation == 0) { + for (auto &II : RewriteMap) { + const SCEV *Rewritten = II.second.second; + II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)}; + } + } +} Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -32,7 +32,7 @@ assert(L->getLoopPreheader() && "No preheader"); if (UseLAIChecks) { setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); - setSCEVChecks(LAI.Preds); + setSCEVChecks(LAI.PSE.getUnionPredicate()); } } @@ -58,7 +58,7 @@ LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); - const SCEVUnionPredicate &Pred = LAI.Preds; + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); SCEVRuntimeCheck = Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -310,15 +310,16 @@ /// and reduction variables that were found to a given vectorization factor. class InnerLoopVectorizer { public: - InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const TargetLibraryInfo *TLI, + InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, unsigned VecWidth, - unsigned UnrollFactor, SCEVUnionPredicate &Preds) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), + unsigned UnrollFactor) + : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), + VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr), - AddedSafetyChecks(false), Preds(Preds) {} + AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). // MinimumBitWidths maps scalar integer values to the smallest bitwidth they @@ -486,8 +487,10 @@ /// The original loop. Loop *OrigLoop; - /// Scev analysis to use. - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies + /// dynamic knowledge to simplify SCEV expressions and converts them to a + /// more usable form. + PredicatedScalarEvolution &PSE; /// Loop Info. LoopInfo *LI; /// Dominator Tree. @@ -551,23 +554,15 @@ // Record whether runtime check is added. bool AddedSafetyChecks; - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify existing expressions in the - /// context of existing SCEV assumptions. Since legality checking is - /// not done here, we don't need to use this predicate to record - /// further assumptions. - SCEVUnionPredicate &Preds; }; class InnerLoopUnroller : public InnerLoopVectorizer { public: - InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, unsigned UnrollFactor, - SCEVUnionPredicate &Preds) - : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor, - Preds) {} + InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, unsigned UnrollFactor) + : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -789,9 +784,9 @@ /// between the member and the group in a map. class InterleavedAccessInfo { public: - InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT, - SCEVUnionPredicate &Preds) - : SE(SE), TheLoop(L), DT(DT), Preds(Preds) {} + InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, + DominatorTree *DT) + : PSE(PSE), TheLoop(L), DT(DT) {} ~InterleavedAccessInfo() { SmallSet DelSet; @@ -821,17 +816,14 @@ } private: - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. + /// Simplifies SCEV expressions in the context of existing SCEV assumptions. + /// The interleaved access analysis can also add new predicates (for example + /// by versioning strides of pointers). + PredicatedScalarEvolution &PSE; Loop *TheLoop; DominatorTree *DT; - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify SCEV expressions in the - /// context of existing SCEV assumptions. The interleaved access - /// analysis can also add new predicates (for example by versioning - /// strides of pointers). - SCEVUnionPredicate &Preds; - /// Holds the relationships between the members and the interleave group. DenseMap InterleaveGroupMap; @@ -1189,18 +1181,17 @@ /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - TargetLibraryInfo *TLI, AliasAnalysis *AA, - Function *F, const TargetTransformInfo *TTI, + LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, + DominatorTree *DT, TargetLibraryInfo *TLI, + AliasAnalysis *AA, Function *F, + const TargetTransformInfo *TTI, LoopAccessAnalysis *LAA, LoopVectorizationRequirements *R, - const LoopVectorizeHints *H, - SCEVUnionPredicate &Preds) - : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), - TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), - InterleaveInfo(SE, L, DT, Preds), Induction(nullptr), - WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R), Hints(H), - Preds(Preds) {} + const LoopVectorizeHints *H) + : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F), + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT), + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), + Requirements(R), Hints(H) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -1347,8 +1338,12 @@ /// The loop that we evaluate. Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. + /// Applies dynamic knowledge to simplify SCEV expressions in the context + /// of existing SCEV assumptions. The analysis will also add a minimal set + /// of new predicates if this is required to enable vectorization and + /// unrolling. + PredicatedScalarEvolution &PSE; /// Target Library Info. TargetLibraryInfo *TLI; /// Parent function @@ -1403,13 +1398,6 @@ /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify SCEV expressions in the - /// context of existing SCEV assumptions. The analysis will also - /// add a minimal set of new predicates if this is required to - /// enable vectorization/unrolling. - SCEVUnionPredicate &Preds; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1427,8 +1415,7 @@ const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, const Function *F, const LoopVectorizeHints *Hints, - SmallPtrSetImpl &ValuesToIgnore, - SCEVUnionPredicate &Preds) + SmallPtrSetImpl &ValuesToIgnore) : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} @@ -1758,12 +1745,12 @@ } } - SCEVUnionPredicate Preds; + PredicatedScalarEvolution PSE(*SE); // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements; - LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA, - &Requirements, &Hints, Preds); + LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA, + &Requirements, &Hints); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1781,8 +1768,8 @@ } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, DB, AC, F, &Hints, - ValuesToIgnore, Preds); + LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC, + F, &Hints, ValuesToIgnore); // Check the function attributes to find out if this function should be // optimized for size. @@ -1893,7 +1880,7 @@ assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop then // interleave it. - InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC, Preds); + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC); Unroller.vectorize(&LVL, CM.MinBWs); emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), @@ -1901,7 +1888,7 @@ Twine(IC) + ")"); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC, Preds); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC); LB.vectorize(&LVL, CM.MinBWs); ++LoopsVectorized; @@ -2002,6 +1989,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + auto *SE = PSE.getSE(); // Make sure that the pointer does not point to structs. if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; @@ -2031,7 +2019,7 @@ // Make sure that all of the index operands are loop invariant. for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; InductionDescriptor II = Inductions[Phi]; @@ -2044,14 +2032,14 @@ // operand. for (unsigned i = 0; i != NumOperands; ++i) if (i != InductionOperand && - !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; // We can emit wide load/stores only if the last non-zero index is the // induction variable. const SCEV *Last = nullptr; if (!Strides.count(Gep)) - Last = SE->getSCEV(Gep->getOperand(InductionOperand)); + Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); else { // Because of the multiplication by a stride we can have a s/zext cast. // We are going to replace this stride by 1 so the cast is safe to ignore. @@ -2062,7 +2050,7 @@ // %idxprom = zext i32 %mul to i64 << Safe cast. // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom // - Last = replaceSymbolicStrideSCEV(SE, Strides, Preds, + Last = replaceSymbolicStrideSCEV(PSE, Strides, Gep->getOperand(InductionOperand), Gep); if (const SCEVCastExpr *C = dyn_cast(Last)) Last = @@ -2420,8 +2408,9 @@ Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); - assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), - OrigLoop) && "Base ptr must be invariant"); + assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop) && + "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; @@ -2438,7 +2427,8 @@ if (i == InductionOperand || (GepOperandInst && OrigLoop->contains(GepOperandInst))) { assert((i == InductionOperand || - SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) && + PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst), + OrigLoop)) && "Must be last index or loop invariant"); VectorParts &GEPParts = getVectorValue(GepOperand); @@ -2658,6 +2648,7 @@ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. + ScalarEvolution *SE = PSE.getSE(); const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -2765,8 +2756,10 @@ // Generate the code to check that the SCEV assumptions that we made. // We want the new basic block to start at the first instruction in a // sequence of instructions that form a check. - SCEVExpander Exp(*SE, Bypass->getModule()->getDataLayout(), "scev.check"); - Value *SCEVCheck = Exp.expandCodeForPredicate(&Preds, BB->getTerminator()); + SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(), + "scev.check"); + Value *SCEVCheck = + Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator()); if (auto *C = dyn_cast(SCEVCheck)) if (C->isZero()) @@ -3785,8 +3778,9 @@ // Widen selects. // If the selector is loop invariant we can create a select // instruction with a scalar condition. Otherwise, use vector-select. - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)), - OrigLoop); + auto *SE = PSE.getSE(); + bool InvariantCond = + SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop); setDebugLocFromInst(Builder, &*it); // The condition can be loop invariant but still defined inside the @@ -3967,7 +3961,7 @@ void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. - SE->forgetLoop(OrigLoop); + PSE.getSE()->forgetLoop(OrigLoop); // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && @@ -4119,10 +4113,10 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(VectorizationReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(VectorizationReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); return false; } @@ -4162,7 +4156,7 @@ if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; - if (Preds.getComplexity() > SCEVThreshold) { + if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { emitAnalysis(VectorizationReport() << "Too many SCEV assumptions need to be made and checked " << "at runtime"); @@ -4268,7 +4262,7 @@ } InductionDescriptor ID; - if (InductionDescriptor::isInductionPHI(Phi, SE, ID)) { + if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) { Inductions[Phi] = ID; // Get the widest type. if (!WidestIndTy) @@ -4337,7 +4331,8 @@ // second argument is the same (i.e. loop invariant) if (CI && hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { - if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { + auto *SE = PSE.getSE(); + if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { emitAnalysis(VectorizationReport(&*it) << "intrinsic instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); @@ -4410,7 +4405,7 @@ else return; - Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop); + Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop); if (!Stride) return; @@ -4474,7 +4469,7 @@ } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); - Preds.add(&LAI->Preds); + PSE.addPredicate(LAI->PSE.getUnionPredicate()); return true; } @@ -4589,7 +4584,7 @@ StoreInst *SI = dyn_cast(I); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); - int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides, Preds); + int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides); // The factor of the corresponding interleave group. unsigned Factor = std::abs(Stride); @@ -4598,7 +4593,7 @@ if (Factor < 2 || Factor > MaxInterleaveGroupFactor) continue; - const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); PointerType *PtrTy = dyn_cast(Ptr->getType()); unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType()); @@ -4685,8 +4680,8 @@ continue; // Calculate the distance and prepare for the rule 3. - const SCEVConstant *DistToA = - dyn_cast(SE->getMinusSCEV(DesB.Scev, DesA.Scev)); + const SCEVConstant *DistToA = dyn_cast( + PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev)); if (!DistToA) continue;