Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -388,6 +389,15 @@ } } +/// \returns the AA location that is being access by the instruction. +static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) { + if (StoreInst *SI = dyn_cast(I)) + return AA->getLocation(SI); + if (LoadInst *LI = dyn_cast(I)) + return AA->getLocation(LI); + return AliasAnalysis::Location(); +} + /// Bottom Up SLP Vectorizer. class BoUpSLP { public: @@ -555,6 +565,36 @@ }; typedef SmallVector UserList; + /// Checks if two instructions may access the same memory. + /// + /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it + /// is invariant in the calling loop. + bool isAliased(const AliasAnalysis::Location &Loc1, Instruction *Inst1, + Instruction *Inst2) { + + // First check if the result is already in the cache. + AliasCacheKey key = std::make_pair(Inst1, Inst2); + Optional &result = AliasCache[key]; + if (result.hasValue()) { + return result.getValue(); + } + AliasAnalysis::Location Loc2 = getLocation(Inst2, AA); + bool aliased = true; + if (Loc1.Ptr && Loc2.Ptr) { + // Do the alias check. + aliased = AA->alias(Loc1, Loc2); + } + // Store the result in the cache. + result = aliased; + return aliased; + } + + typedef std::pair AliasCacheKey; + + /// Cache for alias results. + /// TODO: consider moving this to the AliasAnalysis itself. + DenseMap> AliasCache; + /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). UserList ExternalUses; @@ -791,7 +831,7 @@ /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, AliasAnalysis *AA); + bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP); /// Un-bundles a group of instructions. void cancelScheduling(ArrayRef VL); @@ -808,7 +848,7 @@ /// Updates the dependency information of a bundle and of all instructions/ /// bundles which depend on the original bundle. void calculateDependencies(ScheduleData *SD, bool InsertInReadyList, - AliasAnalysis *AA); + BoUpSLP *SLP); /// Sets all instruction in the scheduling region to un-scheduled. void resetSchedule(); @@ -883,6 +923,13 @@ DominatorTree *DT; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; + + /// Temporary store for deleted instructions. Instructions will be deleted + /// eventually when the BoUpSLP is descructed. This is required to ensure + /// that there are no incorrect collisions in the AliasCache, which can happen + /// if a new instruction is allocated at the same address as a previously + /// deleted instruction. + SmallVector, 8> DeletedInstructions; }; #ifndef NDEBUG @@ -1069,7 +1116,7 @@ } BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, AA)) { + if (!BS.tryScheduleBundle(VL, this)) { DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); BS.cancelScheduling(VL); newTreeEntry(VL, false); @@ -2360,7 +2407,9 @@ Scalar->replaceAllUsesWith(Undef); } DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); - cast(Scalar)->eraseFromParent(); + Instruction *toDelete = cast(Scalar); + toDelete->removeFromParent(); + DeletedInstructions.push_back(std::unique_ptr(toDelete)); } } @@ -2442,7 +2491,8 @@ if (In->isIdenticalTo(*v) && DT->dominates((*v)->getParent(), In->getParent())) { In->replaceAllUsesWith(*v); - In->eraseFromParent(); + In->removeFromParent(); + DeletedInstructions.push_back(std::unique_ptr(In)); In = nullptr; break; } @@ -2460,7 +2510,7 @@ // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - AliasAnalysis *AA) { + BoUpSLP *SLP) { if (isa(VL[0])) return true; @@ -2517,7 +2567,7 @@ DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " << BB->getName() << "\n"); - calculateDependencies(Bundle, true, AA); + calculateDependencies(Bundle, true, SLP); // Now try to schedule the new bundle. As soon as the bundle is "ready" it // means that there are no cyclic dependencies and we can schedule it. @@ -2648,18 +2698,9 @@ } } -/// \returns the AA location that is being access by the instruction. -static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) { - if (StoreInst *SI = dyn_cast(I)) - return AA->getLocation(SI); - if (LoadInst *LI = dyn_cast(I)) - return AA->getLocation(LI); - return AliasAnalysis::Location(); -} - void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, bool InsertInReadyList, - AliasAnalysis *AA) { + BoUpSLP *SLP) { assert(SD->isSchedulingEntity()); SmallVector WorkList; @@ -2704,14 +2745,14 @@ // Handle the memory dependencies. ScheduleData *DepDest = BundleMember->NextLoadStore; if (DepDest) { - AliasAnalysis::Location SrcLoc = getLocation(BundleMember->Inst, AA); + Instruction *SrcInst = BundleMember->Inst; + AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA); bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); while (DepDest) { assert(isInSchedulingRegion(DepDest)); if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) { - AliasAnalysis::Location DstLoc = getLocation(DepDest->Inst, AA); - if (!SrcLoc.Ptr || !DstLoc.Ptr || AA->alias(SrcLoc, DstLoc)) { + if (SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) { DepDest->MemoryDependencies.push_back(BundleMember); BundleMember->Dependencies++; ScheduleData *DestBundle = DepDest->FirstInBundle; @@ -2779,7 +2820,7 @@ "scheduler and vectorizer have different opinion on what is a bundle"); SD->FirstInBundle->SchedulingPriority = Idx++; if (SD->isSchedulingEntity()) { - BS->calculateDependencies(SD, false, AA); + BS->calculateDependencies(SD, false, this); NumToSchedule++; } }