Index: include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- include/llvm/Analysis/LoopAccessAnalysis.h +++ include/llvm/Analysis/LoopAccessAnalysis.h @@ -24,6 +24,7 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { @@ -193,11 +194,10 @@ const SmallVectorImpl &Instrs) const; }; - MemoryDepChecker(ScalarEvolution *Se, const Loop *L, - SCEVUnionPredicate &Preds) - : SE(Se), InnermostLoop(L), AccessIdx(0), + MemoryDepChecker(SCEVPredicatedLayer &PredSE, const Loop *L) + : PredSE(PredSE), InnermostLoop(L), AccessIdx(0), ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), - RecordDependences(true), Preds(Preds) {} + RecordDependences(true) {} /// \brief Register the location (instructions are given increasing numbers) /// of a write access. @@ -266,7 +266,13 @@ bool isWrite) const; private: - ScalarEvolution *SE; + /// The SCEV predicated layer is used to add runtime SCEV checks, and applies + /// dynamic knowledge to simplify SCEV expressions and convert them to a more + /// usable form. We need this in case assumptions about SCEV expressions need + /// to be made in order to avoid unknown dependences. For example we might + /// assume a unit stride for a pointer in order to prove that a memory access + /// is strided and doesn't wrap. + SCEVPredicatedLayer &PredSE; const Loop *InnermostLoop; /// \brief Maps access locations (ptr, read/write) to program order. @@ -317,15 +323,6 @@ /// \brief Check whether the data dependence could prevent store-load /// forwarding. bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The dependence checker needs this in order to convert SCEVs of pointers - /// to more accurate expressions in the context of existing assumptions. - /// We also need this in case assumptions about SCEV expressions need to - /// be made in order to avoid unknown dependences. For example we might - /// assume a unit stride for a pointer in order to prove that a memory access - /// is strided and doesn't wrap. - SCEVUnionPredicate &Preds; }; /// \brief Holds information about the memory runtime legality checks to verify @@ -373,7 +370,7 @@ /// and change \p Preds. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, - SCEVUnionPredicate &Preds); + SCEVPredicatedLayer &PredSE); /// \brief No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } @@ -591,14 +588,13 @@ return StoreToLoopInvariantAddress; } - /// The SCEV predicate contains all the SCEV-related assumptions. - /// The is used to keep track of the minimal set of assumptions on SCEV - /// expressions that the analysis needs to make in order to return a - /// meaningful result. All SCEV expressions during the analysis should be + /// The SCEV predicated layer is used to add runtime SCEV checks, and applies + /// dynamic knowledge to simplify SCEV expressions and convert them to a more + /// usable form. All SCEV expressions during the analysis should be /// re-written (and therefore simplified) according to Preds. /// A user of LoopAccessAnalysis will need to emit the runtime checks /// associated with this predicate. - SCEVUnionPredicate Preds; + SCEVPredicatedLayer PredSE; private: /// \brief Analyze the loop. Substitute symbolic strides using Strides. @@ -619,7 +615,6 @@ MemoryDepChecker DepChecker; Loop *TheLoop; - ScalarEvolution *SE; const DataLayout &DL; const TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -654,18 +649,17 @@ /// If \p OrigPtr is not null, use it to look up the stride value instead of \p /// Ptr. \p PtrToStride provides the mapping between the pointer value and its /// stride as collected by LoopVectorizationLegality::collectStridedAccess. -const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *replaceSymbolicStrideSCEV(SCEVPredicatedLayer &PredSE, const ValueToValueMap &PtrToStride, - SCEVUnionPredicate &Preds, Value *Ptr, - Value *OrigPtr = nullptr); + Value *Ptr, Value *OrigPtr = nullptr); /// \brief Check the stride of the pointer and ensure that it does not wrap in /// the address space, assuming \p Preds is true. /// /// If necessary this method will version the stride of the pointer according /// to \p PtrToStride and therefore add a new predicate to \p Preds. -int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap, SCEVUnionPredicate &Preds); +int isStridedPtr(SCEVPredicatedLayer &PredSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap); /// \brief This analysis provides dependence information for the memory accesses /// of a loop. Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -374,6 +374,43 @@ /// \brief Returns the instructions that use values defined in the loop. SmallVector findDefsUsedOutsideOfLoop(Loop *L); + +/// An iterface layer with SCEV used to manage how we see SCEV expressions for +/// values in the context of existing predicates. We can add new predicates, +/// but we cannot remove them. +/// +/// This layer has multiple purposes: +/// - provides a simple interface for SCEV versioning. +/// - guarantees that the order of transformations applied on a SCEV +/// expression for a single Value is consistent across two different +/// getSCEV calls. This means that, for example, once we've obtained +/// an AddRec expression for a certain value through expression rewriting, +/// we will continue to get an AddRec expresion for that Value. +/// - lowers the number of expression rewrites. +class SCEVPredicatedLayer { +public: + SCEVPredicatedLayer(ScalarEvolution &SE); + const SCEVUnionPredicate &getPredicate() const; + /// \brief Returns the SCEV expression of V, in the context of the current + /// SCEV predicate. + /// The order of transformations applied on the expression of V returned + /// by ScalarEvolution is guaranteed to be preserved, even when adding new + /// predicates. + const SCEV *getSCEV(Value *V); + /// \brief Adds a new predicate. + void addPredicate(const SCEVPredicate &Pred); + + ScalarEvolution *getSE() const { return &SE; } + +private: + void updateGeneration(); + + typedef std::pair RewriteEntry; + DenseMap RewriteMap; + ScalarEvolution &SE; + SCEVUnionPredicate Preds; + unsigned Generation; +}; } #endif Index: lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- lib/Analysis/LoopAccessAnalysis.cpp +++ lib/Analysis/LoopAccessAnalysis.cpp @@ -87,11 +87,10 @@ return V; } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(SCEVPredicatedLayer &PredSE, const ValueToValueMap &PtrToStride, - SCEVUnionPredicate &Preds, Value *Ptr, Value *OrigPtr) { - const SCEV *OrigSCEV = SE->getSCEV(Ptr); + const SCEV *OrigSCEV = PredSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. @@ -108,16 +107,17 @@ ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; + ScalarEvolution *SE = PredSE.getSE(); const auto *U = cast(SE->getSCEV(StrideVal)); const auto *CT = static_cast(SE->getOne(StrideVal->getType())); - Preds.add(SE->getEqualPredicate(U, CT)); + PredSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PredSE.getSCEV(Ptr); - const SCEV *ByOne = SE->rewriteUsingPredicate(OrigSCEV, Preds); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); - return ByOne; + return Expr; } // Otherwise, just return the SCEV of the original pointer. @@ -127,11 +127,12 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, - SCEVUnionPredicate &Preds) { + SCEVPredicatedLayer &PredSE) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(PredSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); + ScalarEvolution *SE = PredSE.getSE(); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); const SCEV *ScStart = AR->getStart(); @@ -423,9 +424,10 @@ typedef SmallPtrSet MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA, SCEVUnionPredicate &Preds) + MemoryDepChecker::DepCandidates &DA, + SCEVPredicatedLayer &PredSE) : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), - Preds(Preds) {} + PredSE(PredSE) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -512,16 +514,16 @@ bool IsRTCheckAnalysisNeeded; /// The SCEV predicate containing all the SCEV-related assumptions. - SCEVUnionPredicate &Preds; + SCEVPredicatedLayer &PredSE; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, +static bool hasComputableBounds(SCEVPredicatedLayer &PredSE, const ValueToValueMap &Strides, Value *Ptr, - Loop *L, SCEVUnionPredicate &Preds) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + Loop *L) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PredSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) return false; @@ -564,11 +566,11 @@ else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr, TheLoop, Preds) && + if (hasComputableBounds(PredSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap, Preds) == 1)) { + isStridedPtr(PredSE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -582,7 +584,7 @@ // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, Preds); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PredSE); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -817,9 +819,8 @@ } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap, - SCEVUnionPredicate &Preds) { +int llvm::isStridedPtr(SCEVPredicatedLayer &PredSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -831,7 +832,7 @@ return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Preds, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PredSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { @@ -854,7 +855,7 @@ // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); + bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PredSE.getSE(), Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " @@ -863,7 +864,7 @@ } // Check the step is constant. - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Step = AR->getStepRecurrence(*PredSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast(Step); @@ -1046,11 +1047,11 @@ BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(PredSE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(PredSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides, Preds); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides, Preds); + int StrideAPtr = isStridedPtr(PredSE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(PredSE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1067,7 +1068,7 @@ std::swap(StrideAPtr, StrideBPtr); } - const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + const SCEV *Dist = PredSE.getSE()->getMinusSCEV(Sink, Src); DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink << "(Induction step: " << StrideAPtr << ")\n"); @@ -1343,8 +1344,8 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { + const SCEV *ExitCount = PredSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PredSE.getSE()->getCouldNotCompute()) { emitAnalysis(LoopAccessReport() << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); @@ -1447,7 +1448,7 @@ MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses, Preds); + AA, LI, DependentAccesses, PredSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1499,7 +1500,7 @@ // words may be written to the same address. bool IsReadOnlyPtr = false; if (Seen.insert(Ptr).second || - !isStridedPtr(SE, Ptr, TheLoop, Strides, Preds)) { + !isStridedPtr(PredSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1529,7 +1530,7 @@ // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + Accesses.canCheckPtrAtRT(PtrRtChecking, PredSE.getSE(), TheLoop, Strides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1556,6 +1557,7 @@ PtrRtChecking.reset(); PtrRtChecking.Need = true; + auto *SE = PredSE.getSE(); CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1598,7 +1600,7 @@ } bool LoopAccessInfo::isUniform(Value *V) const { - return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); + return (PredSE.getSE()->isLoopInvariant(PredSE.getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1679,7 +1681,7 @@ Instruction *Loc, const SmallVectorImpl &PointerChecks) const { - + auto *SE = PredSE.getSE(); SCEVExpander Exp(*SE, DL, "induction"); auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); @@ -1749,7 +1751,7 @@ const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtChecking(SE), DepChecker(SE, L, Preds), TheLoop(L), SE(SE), DL(DL), + : PredSE(*SE), PtrRtChecking(SE), DepChecker(PredSE, L), TheLoop(L), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1786,7 +1788,7 @@ << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; - Preds.print(OS, Depth); + PredSE.getPredicate().print(OS, Depth); } const LoopAccessInfo & Index: lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- lib/Transforms/Scalar/LoopDistribute.cpp +++ lib/Transforms/Scalar/LoopDistribute.cpp @@ -761,7 +761,7 @@ } // Don't distribute the loop if we need too many SCEV run-time checks. - const SCEVUnionPredicate &Pred = LAI.Preds; + const SCEVUnionPredicate &Pred = LAI.PredSE.getPredicate(); if (Pred.getComplexity() > DistributeSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; @@ -790,7 +790,7 @@ DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); LoopVersioning LVer(LAI, L, LI, DT, SE, false); LVer.setAliasChecks(std::move(Checks)); - LVer.setSCEVChecks(LAI.Preds); + LVer.setSCEVChecks(LAI.PredSE.getPredicate()); LVer.versionLoop(DefsUsedOutside); } Index: lib/Transforms/Scalar/LoopLoadElimination.cpp =================================================================== --- lib/Transforms/Scalar/LoopLoadElimination.cpp +++ lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -459,17 +459,18 @@ return false; } - if (LAI.Preds.getComplexity() > LoadElimSCEVCheckThreshold) { + if (LAI.PredSE.getPredicate().getComplexity() > + LoadElimSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; } // Point of no-return, start the transformation. First, version the loop if // necessary. - if (!Checks.empty() || !LAI.Preds.isAlwaysTrue()) { + if (!Checks.empty() || !LAI.PredSE.getPredicate().isAlwaysTrue()) { LoopVersioning LV(LAI, L, LI, DT, SE, false); LV.setAliasChecks(std::move(Checks)); - LV.setSCEVChecks(LAI.Preds); + LV.setSCEVChecks(LAI.PredSE.getPredicate()); LV.versionLoop(); } Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -727,3 +727,45 @@ return UsedOutside; } + +SCEVPredicatedLayer::SCEVPredicatedLayer(ScalarEvolution &SE) + : SE(SE), Generation(0) {} + +const SCEV *SCEVPredicatedLayer::getSCEV(Value *V) { + const SCEV *Expr = SE.getSCEV(V); + auto II = RewriteMap.find(Expr); + if (II == RewriteMap.end()) { + const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds); + RewriteMap[Expr] = {Generation, NewSCEV}; + return NewSCEV; + } + + if (Generation == II->second.first) + return II->second.second; + + const SCEV *NewExpr = SE.rewriteUsingPredicate(II->second.second, Preds); + RewriteMap[Expr] = {Generation, NewExpr}; + return NewExpr; +} + +void SCEVPredicatedLayer::addPredicate(const SCEVPredicate &Pred) { + if (Preds.implies(&Pred)) + return; + Preds.add(&Pred); + updateGeneration(); +} + +const SCEVUnionPredicate &SCEVPredicatedLayer::getPredicate() const { + return Preds; +} + +void SCEVPredicatedLayer::updateGeneration() { + Generation++; + // The generation number wrapped. Recompute everything. + if (Generation == 0) { + for (auto &II : RewriteMap) { + const SCEV *Rewritten = II.second.second; + II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)}; + } + } +} Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -33,7 +33,7 @@ assert(L->getLoopPreheader() && "No preheader"); if (UseLAIChecks) { setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); - setSCEVChecks(LAI.Preds); + setSCEVChecks(LAI.PredSE.getPredicate()); } } @@ -59,7 +59,7 @@ LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); - const SCEVUnionPredicate &Pred = LAI.Preds; + const SCEVUnionPredicate &Pred = LAI.PredSE.getPredicate(); SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); SCEVRuntimeCheck = Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -310,15 +310,15 @@ /// and reduction variables that were found to a given vectorization factor. class InnerLoopVectorizer { public: - InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, + InnerLoopVectorizer(Loop *OrigLoop, SCEVPredicatedLayer &PredSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, unsigned VecWidth, - unsigned UnrollFactor, SCEVUnionPredicate &Preds) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), + unsigned UnrollFactor) + : OrigLoop(OrigLoop), PredSE(PredSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), + VF(VecWidth), UF(UnrollFactor), Builder(PredSE.getSE()->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr), - AddedSafetyChecks(false), Preds(Preds) {} + AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). // MinimumBitWidths maps scalar integer values to the smallest bitwidth they @@ -486,8 +486,10 @@ /// The original loop. Loop *OrigLoop; - /// Scev analysis to use. - ScalarEvolution *SE; + /// The SCEV predicated layer is used to add runtime SCEV checks, and applies + /// dynamic knowledge to simplify SCEV expressions and convert them to a more + /// usable form. + SCEVPredicatedLayer &PredSE; /// Loop Info. LoopInfo *LI; /// Dominator Tree. @@ -552,22 +554,15 @@ // Record whether runtime check is added. bool AddedSafetyChecks; - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify existing expressions in the - /// context of existing SCEV assumptions. Since legality checking is - /// not done here, we don't need to use this predicate to record - /// further assumptions. - SCEVUnionPredicate &Preds; }; class InnerLoopUnroller : public InnerLoopVectorizer { public: - InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, + InnerLoopUnroller(Loop *OrigLoop, SCEVPredicatedLayer &PredSE, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, unsigned UnrollFactor, - SCEVUnionPredicate &Preds) - : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor, - Preds) {} + const TargetTransformInfo *TTI, unsigned UnrollFactor) + : InnerLoopVectorizer(OrigLoop, PredSE, LI, DT, TLI, TTI, 1, + UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -788,9 +783,8 @@ /// between the member and the group in a map. class InterleavedAccessInfo { public: - InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT, - SCEVUnionPredicate &Preds) - : SE(SE), TheLoop(L), DT(DT), Preds(Preds) {} + InterleavedAccessInfo(SCEVPredicatedLayer &PredSE, Loop *L, DominatorTree *DT) + : PredSE(PredSE), TheLoop(L), DT(DT) {} ~InterleavedAccessInfo() { SmallSet DelSet; @@ -820,16 +814,14 @@ } private: - ScalarEvolution *SE; - Loop *TheLoop; - DominatorTree *DT; - /// The SCEV predicate containing all the SCEV-related assumptions. /// The predicate is used to simplify SCEV expressions in the /// context of existing SCEV assumptions. The interleaved access /// analysis can also add new predicates (for example by versioning /// strides of pointers). - SCEVUnionPredicate &Preds; + SCEVPredicatedLayer &PredSE; + Loop *TheLoop; + DominatorTree *DT; /// Holds the relationships between the members and the interleave group. DenseMap InterleaveGroupMap; @@ -1188,18 +1180,17 @@ /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - TargetLibraryInfo *TLI, AliasAnalysis *AA, - Function *F, const TargetTransformInfo *TTI, + LoopVectorizationLegality(Loop *L, SCEVPredicatedLayer &PredSE, + DominatorTree *DT, TargetLibraryInfo *TLI, + AliasAnalysis *AA, Function *F, + const TargetTransformInfo *TTI, LoopAccessAnalysis *LAA, LoopVectorizationRequirements *R, - const LoopVectorizeHints *H, - SCEVUnionPredicate &Preds) - : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), - TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), - InterleaveInfo(SE, L, DT, Preds), Induction(nullptr), - WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R), Hints(H), - Preds(Preds) {} + const LoopVectorizeHints *H) + : NumPredStores(0), TheLoop(L), PredSE(PredSE), TLI(TLI), TheFunction(F), + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PredSE, L, DT), + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), + Requirements(R), Hints(H) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -1343,8 +1334,12 @@ /// The loop that we evaluate. Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; + /// The SCEV predicated layer is used to add runtime SCEV checks, and applies + /// dynamic knowledge to simplify SCEV expressions and convert them to a more + /// context of existing SCEV assumptions. The analysis will also add a minimal + /// set of new predicates if this is required to enable vectorization and + /// unrolling. + SCEVPredicatedLayer &PredSE; /// Target Library Info. TargetLibraryInfo *TLI; /// Parent function @@ -1399,13 +1394,6 @@ /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify SCEV expressions in the - /// context of existing SCEV assumptions. The analysis will also - /// add a minimal set of new predicates if this is required to - /// enable vectorization/unrolling. - SCEVUnionPredicate &Preds; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1423,8 +1411,7 @@ const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, const Function *F, const LoopVectorizeHints *Hints, - SmallPtrSetImpl &ValuesToIgnore, - SCEVUnionPredicate &Preds) + SmallPtrSetImpl &ValuesToIgnore) : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} @@ -1754,12 +1741,12 @@ } } - SCEVUnionPredicate Preds; + SCEVPredicatedLayer PredSE(*SE); // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements; - LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA, - &Requirements, &Hints, Preds); + LoopVectorizationLegality LVL(L, PredSE, DT, TLI, AA, F, TTI, LAA, + &Requirements, &Hints); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1777,8 +1764,8 @@ } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, DB, AC, F, &Hints, - ValuesToIgnore, Preds); + LoopVectorizationCostModel CM(L, PredSE.getSE(), LI, &LVL, *TTI, TLI, DB, + AC, F, &Hints, ValuesToIgnore); // Check the function attributes to find out if this function should be // optimized for size. @@ -1889,7 +1876,7 @@ assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop then // interleave it. - InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC, Preds); + InnerLoopUnroller Unroller(L, PredSE, LI, DT, TLI, TTI, IC); Unroller.vectorize(&LVL, CM.MinBWs); emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), @@ -1897,7 +1884,7 @@ Twine(IC) + ")"); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC, Preds); + InnerLoopVectorizer LB(L, PredSE, LI, DT, TLI, TTI, VF.Width, IC); LB.vectorize(&LVL, CM.MinBWs); ++LoopsVectorized; @@ -1998,6 +1985,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + auto *SE = PredSE.getSE(); // Make sure that the pointer does not point to structs. if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; @@ -2027,7 +2015,7 @@ // Make sure that all of the index operands are loop invariant. for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + if (!SE->isLoopInvariant(PredSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; InductionDescriptor II = Inductions[Phi]; @@ -2040,14 +2028,14 @@ // operand. for (unsigned i = 0; i != NumOperands; ++i) if (i != InductionOperand && - !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + !SE->isLoopInvariant(PredSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; // We can emit wide load/stores only if the last non-zero index is the // induction variable. const SCEV *Last = nullptr; if (!Strides.count(Gep)) - Last = SE->getSCEV(Gep->getOperand(InductionOperand)); + Last = PredSE.getSCEV(Gep->getOperand(InductionOperand)); else { // Because of the multiplication by a stride we can have a s/zext cast. // We are going to replace this stride by 1 so the cast is safe to ignore. @@ -2058,7 +2046,7 @@ // %idxprom = zext i32 %mul to i64 << Safe cast. // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom // - Last = replaceSymbolicStrideSCEV(SE, Strides, Preds, + Last = replaceSymbolicStrideSCEV(PredSE, Strides, Gep->getOperand(InductionOperand), Gep); if (const SCEVCastExpr *C = dyn_cast(Last)) Last = @@ -2416,8 +2404,9 @@ Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); - assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), - OrigLoop) && "Base ptr must be invariant"); + assert(PredSE.getSE()->isLoopInvariant( + PredSE.getSCEV(Gep->getPointerOperand()), OrigLoop) && + "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; @@ -2434,7 +2423,8 @@ if (i == InductionOperand || (GepOperandInst && OrigLoop->contains(GepOperandInst))) { assert((i == InductionOperand || - SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) && + PredSE.getSE()->isLoopInvariant(PredSE.getSCEV(GepOperandInst), + OrigLoop)) && "Must be last index or loop invariant"); VectorParts &GEPParts = getVectorValue(GepOperand); @@ -2652,6 +2642,7 @@ IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. + ScalarEvolution *SE = PredSE.getSE(); const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -2758,8 +2749,10 @@ // Generate the code to check that the SCEV assumptions that we made. // We want the new basic block to start at the first instruction in a // sequence of instructions that form a check. - SCEVExpander Exp(*SE, Bypass->getModule()->getDataLayout(), "scev.check"); - Value *SCEVCheck = Exp.expandCodeForPredicate(&Preds, BB->getTerminator()); + SCEVExpander Exp(*PredSE.getSE(), Bypass->getModule()->getDataLayout(), + "scev.check"); + Value *SCEVCheck = + Exp.expandCodeForPredicate(&PredSE.getPredicate(), BB->getTerminator()); if (auto *C = dyn_cast(SCEVCheck)) if (C->isZero()) @@ -3777,8 +3770,9 @@ // Widen selects. // If the selector is loop invariant we can create a select // instruction with a scalar condition. Otherwise, use vector-select. - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)), - OrigLoop); + auto *SE = PredSE.getSE(); + bool InvariantCond = + SE->isLoopInvariant(PredSE.getSCEV(it->getOperand(0)), OrigLoop); setDebugLocFromInst(Builder, &*it); // The condition can be loop invariant but still defined inside the @@ -3958,7 +3952,7 @@ void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. - SE->forgetLoop(OrigLoop); + PredSE.getSE()->forgetLoop(OrigLoop); // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && @@ -4110,8 +4104,8 @@ } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { + const SCEV *ExitCount = PredSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PredSE.getSE()->getCouldNotCompute()) { emitAnalysis(VectorizationReport() << "could not determine number of loop iterations"); DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); @@ -4153,7 +4147,7 @@ if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; - if (Preds.getComplexity() > SCEVThreshold) { + if (PredSE.getPredicate().getComplexity() > SCEVThreshold) { emitAnalysis(VectorizationReport() << "Too many SCEV assumptions need to be made and checked " << "at runtime"); @@ -4259,7 +4253,7 @@ } InductionDescriptor ID; - if (InductionDescriptor::isInductionPHI(Phi, SE, ID)) { + if (InductionDescriptor::isInductionPHI(Phi, PredSE.getSE(), ID)) { Inductions[Phi] = ID; // Get the widest type. if (!WidestIndTy) @@ -4328,7 +4322,8 @@ // second argument is the same (i.e. loop invariant) if (CI && hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { - if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { + auto *SE = PredSE.getSE(); + if (!SE->isLoopInvariant(PredSE.getSCEV(CI->getOperand(1)), TheLoop)) { emitAnalysis(VectorizationReport(&*it) << "intrinsic instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); @@ -4401,7 +4396,7 @@ else return; - Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop); + Value *Stride = getStrideFromPointer(Ptr, PredSE.getSE(), TheLoop); if (!Stride) return; @@ -4465,7 +4460,7 @@ } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); - Preds.add(&LAI->Preds); + PredSE.addPredicate(LAI->PredSE.getPredicate()); return true; } @@ -4580,7 +4575,7 @@ StoreInst *SI = dyn_cast(I); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); - int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides, Preds); + int Stride = isStridedPtr(PredSE, Ptr, TheLoop, Strides); // The factor of the corresponding interleave group. unsigned Factor = std::abs(Stride); @@ -4589,7 +4584,7 @@ if (Factor < 2 || Factor > MaxInterleaveGroupFactor) continue; - const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + const SCEV *Scev = replaceSymbolicStrideSCEV(PredSE, Strides, Ptr); PointerType *PtrTy = dyn_cast(Ptr->getType()); unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType()); @@ -4676,8 +4671,8 @@ continue; // Calculate the distance and prepare for the rule 3. - const SCEVConstant *DistToA = - dyn_cast(SE->getMinusSCEV(DesB.Scev, DesA.Scev)); + const SCEVConstant *DistToA = dyn_cast( + PredSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev)); if (!DistToA) continue;