Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -49,6 +49,50 @@ static unsigned RuntimeMemoryCheckThreshold; }; +struct MemAccessInfo { + PointerIntPair ValueAndBool; + MemAccessInfo(Value *V, bool B, const SCEV *PtrExpr) + : ValueAndBool(V, B), PtrExpr(PtrExpr) {} + MemAccessInfo() : ValueAndBool(nullptr) {} + MemAccessInfo(PointerIntPair V) : ValueAndBool(V) {} + + const SCEV *PtrExpr = nullptr; + + Value *getPointer() { return ValueAndBool.getPointer(); } + bool getInt() { return ValueAndBool.getInt(); } + Value *getPointer() const { return ValueAndBool.getPointer(); } + bool getInt() const { return ValueAndBool.getInt(); } + const SCEV *getPtrExpr() const { return PtrExpr; } + + bool operator<(const MemAccessInfo &RHS) const { + return ValueAndBool < RHS.ValueAndBool; + } + bool operator==(const MemAccessInfo &RHS) const { + return ValueAndBool == RHS.ValueAndBool && PtrExpr == RHS.PtrExpr; + } +}; + +template <> struct DenseMapInfo { + using Ty = DenseMapInfo>; + + static MemAccessInfo getEmptyKey() { + return MemAccessInfo(Ty::getEmptyKey()); + } + + static MemAccessInfo getTombstoneKey() { + return MemAccessInfo(Ty::getTombstoneKey()); + } + + static unsigned getHashValue(MemAccessInfo V) { + uintptr_t IV = reinterpret_cast(V.ValueAndBool.getOpaqueValue()); + return unsigned(IV) ^ unsigned(IV >> 9); + } + + static bool isEqual(const MemAccessInfo &LHS, const MemAccessInfo &RHS) { + return LHS == RHS; + } +}; + /// Checks memory dependences among accesses to the same underlying /// object to determine whether there vectorization is legal or not (and at /// which vectorization factor). @@ -85,7 +129,6 @@ /// class MemoryDepChecker { public: - typedef PointerIntPair MemAccessInfo; typedef SmallVector MemAccessInfoList; /// Set of potential dependent memory accesses. typedef EquivalenceClasses DepCandidates; @@ -173,11 +216,13 @@ /// Register the location (instructions are given increasing numbers) /// of a write access. - void addAccess(StoreInst *SI); + void addAccess(StoreInst *SI, + const DenseMap &SymbolicStrides); /// Register the location (instructions are given increasing numbers) /// of a write access. - void addAccess(LoadInst *LI); + void addAccess(LoadInst *LI, + const DenseMap &SymbolicStrides); /// Check whether the dependencies between the accesses are safe. /// @@ -243,9 +288,11 @@ /// Return the program order indices for the access location (Ptr, IsWrite). /// Returns an empty ArrayRef if there are no accesses for the location. ArrayRef getOrderForAccess(Value *Ptr, bool IsWrite) const { - auto I = Accesses.find({Ptr, IsWrite}); - if (I != Accesses.end()) - return I->second; + for (auto &KV : Accesses) { + if (KV.first.getPointer() == Ptr && KV.first.getInt() == IsWrite) { + return KV.second; + } + } return {}; } @@ -399,17 +446,20 @@ unsigned DependencySetId; /// Holds the id of the disjoint alias set to which this pointer belongs. unsigned AliasSetId; - /// SCEV for the access. + /// SCEV for the computable bounds access const SCEV *Expr; + /// SCEV for the access. + const SCEV *PtrExpr; /// True if the pointer expressions needs to be frozen after expansion. bool NeedsFreeze; PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, - const SCEV *Expr, bool NeedsFreeze) + const SCEV *Expr, const SCEV *PtrExpr, bool NeedsFreeze) : PointerValue(PointerValue), Start(Start), End(End), IsWritePtr(IsWritePtr), DependencySetId(DependencySetId), - AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {} + AliasSetId(AliasSetId), Expr(Expr), PtrExpr(PtrExpr), + NeedsFreeze(NeedsFreeze) {} }; RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE) @@ -427,8 +477,8 @@ /// according to the assumptions that we've made during the analysis. /// The method might also version the pointer stride according to \p Strides, /// and add new predicates to \p PSE. - void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, - bool WritePtr, unsigned DepSetId, unsigned ASId, + void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, const SCEV *Sc, + Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze); /// No run-time memory checking is necessary. @@ -724,9 +774,16 @@ /// memory access was defined. If that access was dead, or UB, then the /// result of this function is undefined. std::optional +getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const SCEV *PtrScev, const Loop *Lp, + const DenseMap &StridesMap = + DenseMap(), + bool Assume = false, bool ShouldCheckWrap = true); +std::optional getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, - const DenseMap &StridesMap = DenseMap(), + const DenseMap &StridesMap = + DenseMap(), bool Assume = false, bool ShouldCheckWrap = true); /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -197,8 +197,9 @@ /// There is no conflict when the intervals are disjoint: /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, - Type *AccessTy, bool WritePtr, - unsigned DepSetId, unsigned ASId, + const SCEV *Sc, Type *AccessTy, + bool WritePtr, unsigned DepSetId, + unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze) { ScalarEvolution *SE = PSE.getSE(); @@ -206,10 +207,10 @@ const SCEV *ScStart; const SCEV *ScEnd; - if (SE->isLoopInvariant(PtrExpr, Lp)) { - ScStart = ScEnd = PtrExpr; + if (SE->isLoopInvariant(Sc, Lp)) { + ScStart = ScEnd = Sc; } else { - const SCEVAddRecExpr *AR = dyn_cast(PtrExpr); + const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); const SCEV *Ex = PSE.getBackedgeTakenCount(); @@ -231,7 +232,7 @@ } } assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant"); - assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant"); + assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant"); // Add the size of the pointed element to ScEnd. auto &DL = Lp->getHeader()->getModule()->getDataLayout(); @@ -239,8 +240,8 @@ const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); - Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr, - NeedsFreeze); + Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc, + PtrExpr, NeedsFreeze); } void RuntimePointerChecking::tryToCreateDiffCheck( @@ -299,6 +300,7 @@ CanUseDiffCheck = false; return; } + const DataLayout &DL = SinkAR->getLoop()->getHeader()->getModule()->getDataLayout(); unsigned AllocSize = @@ -492,8 +494,8 @@ if (Seen.count(I)) continue; - MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, - Pointers[I].IsWritePtr); + MemAccessInfo Access(Pointers[I].PointerValue, Pointers[I].IsWritePtr, + Pointers[I].PtrExpr); SmallVector Groups; auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); @@ -618,7 +620,6 @@ class AccessAnalysis { public: /// Read or write access location. - typedef PointerIntPair MemAccessInfo; typedef SmallVector MemAccessInfoList; AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, @@ -630,19 +631,20 @@ } /// Register a load and whether it is only read from. - void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { - Value *Ptr = const_cast(Loc.Ptr); + void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly, + const SCEV *PtrScev) { + Value *Ptr = const_cast(Loc.Ptr); AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); - Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); + Accesses[MemAccessInfo(Ptr, false, PtrScev)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); } /// Register a store. - void addStore(MemoryLocation &Loc, Type *AccessTy) { - Value *Ptr = const_cast(Loc.Ptr); + void addStore(MemoryLocation &Loc, Type *AccessTy, const SCEV *PtrScev) { + Value *Ptr = const_cast(Loc.Ptr); AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); - Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); + Accesses[MemAccessInfo(Ptr, true, PtrScev)].insert(AccessTy); } /// Check if we can emit a run-time no-alias check for \p Access. @@ -655,7 +657,7 @@ bool createCheckForAccess(RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, const DenseMap &Strides, - DenseMap &DepSetId, + DenseMap &DepSetId, Loop *TheLoop, unsigned &RunningDepId, unsigned ASId, bool ShouldCheckStride, bool Assume); @@ -670,8 +672,8 @@ /// Goes over all memory accesses, checks whether a RT check is needed /// and builds sets of dependent accesses. - void buildDependenceSets() { - processMemAccesses(); + void buildDependenceSets(DenseMap &SymbolicStrides) { + processMemAccesses(SymbolicStrides); } /// Initial processing of memory accesses determined that we need to @@ -694,7 +696,7 @@ /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. - void processMemAccesses(); + void processMemAccesses(DenseMap &SymbolicStrides); /// Map of all accesses. Values are the types used to access memory pointed to /// by the pointer. @@ -741,40 +743,43 @@ /// Check whether a pointer can participate in a runtime bounds check. /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr /// by adding run-time checks (overflow checks) if necessary. -static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, - const SCEV *PtrScev, Loop *L, bool Assume) { +static const SCEV *hasComputableBounds(PredicatedScalarEvolution &PSE, + Value *Ptr, const SCEV *PtrScev, Loop *L, + bool Assume) { // The bounds for loop-invariant pointer is trivial. if (PSE.getSE()->isLoopInvariant(PtrScev, L)) - return true; + return PtrScev; const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR && Assume) AR = PSE.getAsAddRec(Ptr); - if (!AR) - return false; + if (AR && AR->isAffine()) + return AR; - return AR->isAffine(); + return nullptr; } /// Check whether a pointer address cannot wrap. -static bool isNoWrap(PredicatedScalarEvolution &PSE, - const DenseMap &Strides, Value *Ptr, Type *AccessTy, - Loop *L) { - const SCEV *PtrScev = PSE.getSCEV(Ptr); +static bool isNoWrap(PredicatedScalarEvolution &PSE, Value *Ptr, + const SCEV *PtrScev, Type *AccessTy, Loop *L) { + // const SCEV *PtrScev = PSE.getSCEV(Ptr); if (PSE.getSE()->isLoopInvariant(PtrScev, L)) return true; - int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides).value_or(0); + int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, PtrScev, L).value_or(0); if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) return true; return false; } -static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, - function_ref AddPointer) { +static void +visitPointers(Value *StartPtr, const Loop &InnermostLoop, + PredicatedScalarEvolution &PSE, + const DenseMap &SymbolicStrides, + function_ref AddPointer) { SmallPtrSet Visited; SmallVector WorkList; WorkList.push_back(StartPtr); @@ -792,7 +797,7 @@ for (const Use &Inc : PN->incoming_values()) WorkList.push_back(Inc); } else - AddPointer(Ptr); + AddPointer(Ptr, replaceSymbolicStrideSCEV(PSE, SymbolicStrides, Ptr)); } } @@ -976,21 +981,20 @@ return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}}; } -bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, - MemAccessInfo Access, Type *AccessTy, - const DenseMap &StridesMap, - DenseMap &DepSetId, - Loop *TheLoop, unsigned &RunningDepId, - unsigned ASId, bool ShouldCheckWrap, - bool Assume) { +bool AccessAnalysis::createCheckForAccess( + RuntimePointerChecking &RtCheck, MemAccessInfo Access, Type *AccessTy, + const DenseMap &StridesMap, + DenseMap &DepSetId, Loop *TheLoop, + unsigned &RunningDepId, unsigned ASId, bool ShouldCheckWrap, bool Assume) { Value *Ptr = Access.getPointer(); SmallVector> TranslatedPtrs = findForkedPointer(PSE, StridesMap, Ptr, TheLoop); - for (auto &P : TranslatedPtrs) { - const SCEV *PtrExpr = get<0>(P); - if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume)) + for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) { + const SCEV *PtrScev = + hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume); + if (!PtrScev) return false; // When we run after a failing dependency check we have to make sure @@ -1000,9 +1004,9 @@ if (TranslatedPtrs.size() > 1) return false; - if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) { - auto *Expr = PSE.getSCEV(Ptr); - if (!Assume || !isa(Expr)) + if (!isNoWrap(PSE, Ptr, PtrScev, AccessTy, TheLoop)) { + // auto *Expr = PSE.getSCEV(Ptr); + if (!Assume || !isa(PtrScev)) return false; PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); } @@ -1012,14 +1016,12 @@ if (TranslatedPtrs.size() == 1) TranslatedPtrs[0] = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}; - } - for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) { // The id of the dependence set. unsigned DepId; if (isDependencyCheckNeeded()) { - Value *Leader = DepCands.getLeaderValue(Access).getPointer(); + const SCEV *Leader = DepCands.getLeaderValue(Access).getPtrExpr(); unsigned &LeaderId = DepSetId[Leader]; if (!LeaderId) LeaderId = RunningDepId++; @@ -1029,8 +1031,8 @@ DepId = RunningDepId++; bool IsWrite = Access.getInt(); - RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE, - NeedsFreeze); + RtCheck.insert(TheLoop, Ptr, PtrExpr, PtrScev, AccessTy, IsWrite, DepId, + ASId, PSE, NeedsFreeze); LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } @@ -1062,7 +1064,7 @@ // We assign consecutive id to access from different dependence sets. // Accesses within the same set don't need a runtime check. unsigned RunningDepId = 1; - DenseMap DepSetId; + DenseMap DepSetId; SmallVector, 4> Retries; @@ -1071,13 +1073,14 @@ SmallVector AccessInfos; for (const auto &A : AS) { Value *Ptr = A.getValue(); - bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); + bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true, PtrScev)); if (IsWrite) ++NumWritePtrChecks; else ++NumReadPtrChecks; - AccessInfos.emplace_back(Ptr, IsWrite); + AccessInfos.emplace_back(Ptr, IsWrite, PtrScev); } // We do not need runtime checks for this alias set, if there are no writes @@ -1086,8 +1089,11 @@ (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { assert((AS.size() <= 1 || all_of(AS, - [this](auto AC) { - MemAccessInfo AccessWrite(AC.getValue(), true); + [this, &StridesMap](auto AC) { + MemAccessInfo AccessWrite( + AC.getValue(), true, + replaceSymbolicStrideSCEV(PSE, StridesMap, + AC.getValue())); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " @@ -1191,7 +1197,8 @@ return CanDoRTIfNeeded; } -void AccessAnalysis::processMemAccesses() { +void AccessAnalysis::processMemAccesses( + DenseMap &SymbolicStrides) { // We process the set twice: first we process read-write pointers, last we // process read-only pointers. This allows us to skip dependence tests for // read-only pointers. @@ -1249,13 +1256,16 @@ bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; if (UseDeferred && !IsReadOnlyPtr) continue; + + const SCEV *PtrExpr = + replaceSymbolicStrideSCEV(PSE, SymbolicStrides, Ptr); // Otherwise, the pointer must be in the PtrAccessSet, either as a // read or a write. assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || - S.count(MemAccessInfo(Ptr, false))) && + S.count(MemAccessInfo(Ptr, false, PtrExpr))) && "Alias-set pointer not in the access set?"); - MemAccessInfo Access(Ptr, IsWrite); + MemAccessInfo Access(Ptr, IsWrite, PtrExpr); DepCands.insert(Access); // Memorize read-only pointers for later processing and skip them in @@ -1366,11 +1376,11 @@ } /// Check whether the access through \p Ptr has a constant stride. -std::optional llvm::getPtrStride(PredicatedScalarEvolution &PSE, - Type *AccessTy, Value *Ptr, - const Loop *Lp, - const DenseMap &StridesMap, - bool Assume, bool ShouldCheckWrap) { +std::optional +llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const SCEV *PtrScev, const Loop *Lp, + const DenseMap &StridesMap, + bool Assume, bool ShouldCheckWrap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -1380,8 +1390,6 @@ return std::nullopt; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); - const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (Assume && !AR) AR = PSE.getAsAddRec(Ptr); @@ -1465,6 +1473,20 @@ return std::nullopt; } +/// Check whether the access through \p Ptr has a constant stride. +std::optional +llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, + const DenseMap &StridesMap, + bool Assume, bool ShouldCheckWrap) { + Type *Ty = Ptr->getType(); + assert(Ty->isPointerTy() && "Unexpected non-ptr"); + + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); + return ::getPtrStride(PSE, AccessTy, Ptr, PtrScev, Lp, StridesMap, Assume, + ShouldCheckWrap); +} + std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, @@ -1587,19 +1609,23 @@ return Diff && *Diff == 1; } -void MemoryDepChecker::addAccess(StoreInst *SI) { - visitPointers(SI->getPointerOperand(), *InnermostLoop, - [this, SI](Value *Ptr) { - Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); +void MemoryDepChecker::addAccess( + StoreInst *SI, const DenseMap &SymbolicStrides) { + visitPointers(SI->getPointerOperand(), *InnermostLoop, PSE, SymbolicStrides, + [this, SI](Value *Ptr, const SCEV *PtrScev) { + Accesses[MemAccessInfo(Ptr, true, PtrScev)].push_back( + AccessIdx); InstMap.push_back(SI); ++AccessIdx; }); } -void MemoryDepChecker::addAccess(LoadInst *LI) { - visitPointers(LI->getPointerOperand(), *InnermostLoop, - [this, LI](Value *Ptr) { - Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); +void MemoryDepChecker::addAccess( + LoadInst *LI, const DenseMap &SymbolicStrides) { + visitPointers(LI->getPointerOperand(), *InnermostLoop, PSE, SymbolicStrides, + [this, LI](Value *Ptr, const SCEV *PtrScev) { + Accesses[MemAccessInfo(Ptr, false, PtrScev)].push_back( + AccessIdx); InstMap.push_back(LI); ++AccessIdx; }); @@ -1817,10 +1843,14 @@ const DenseMap &Strides) { assert (AIdx < BIdx && "Must pass arguments in program order"); - auto [APtr, AIsWrite] = A; - auto [BPtr, BIsWrite] = B; + auto [AAccess, APtrExpr] = A; + auto [BAccess, BPtrExpr] = B; Type *ATy = getLoadStoreType(InstMap[AIdx]); Type *BTy = getLoadStoreType(InstMap[BIdx]); + auto APtr = AAccess.getPointer(); + auto BPtr = BAccess.getPointer(); + auto AIsWrite = AAccess.getInt(); + auto BIsWrite = BAccess.getInt(); // Two reads are independent. if (!AIsWrite && !BIsWrite) @@ -2023,11 +2053,11 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const DenseMap &Strides) { - MinDepDistBytes = -1; - SmallPtrSet Visited; + SmallPtrSet Visited; + for (MemAccessInfo CurAccess : CheckDeps) { - if (Visited.count(CurAccess)) + if (Visited.count(&CurAccess)) continue; // Get the relevant memory access set. @@ -2042,7 +2072,7 @@ // Check every access pair. while (AI != AE) { - Visited.insert(*AI); + Visited.insert(&*AI); bool AIIsWrite = AI->getInt(); // Check loads only against next equivalent class, but stores also against // other stores in the same equivalence class - to the same address. @@ -2099,13 +2129,15 @@ SmallVector MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { - MemAccessInfo Access(Ptr, isWrite); - auto &IndexVector = Accesses.find(Access)->second; - SmallVector Insts; - transform(IndexVector, - std::back_inserter(Insts), - [&](unsigned Idx) { return this->InstMap[Idx]; }); + for (auto &KV : Accesses) { + if (KV.first.getPointer() == Ptr) { + auto &IndexVector = KV.second; + + transform(IndexVector, std::back_inserter(Insts), + [&](unsigned Idx) { return this->InstMap[Idx]; }); + } + } return Insts; } @@ -2237,9 +2269,10 @@ } NumLoads++; Loads.push_back(Ld); - DepChecker->addAccess(Ld); if (EnableMemAccessVersioningOfLoop) collectStridedAccess(Ld); + + DepChecker->addAccess(Ld, SymbolicStrides); continue; } @@ -2261,9 +2294,11 @@ } NumStores++; Stores.push_back(St); - DepChecker->addAccess(St); + if (EnableMemAccessVersioningOfLoop) collectStridedAccess(St); + + DepChecker->addAccess(St, SymbolicStrides); } } // Next instr. } // Next block. @@ -2321,11 +2356,12 @@ if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) Loc.AATags.TBAA = nullptr; - visitPointers(const_cast(Loc.Ptr), *TheLoop, - [&Accesses, AccessTy, Loc](Value *Ptr) { - MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); - Accesses.addStore(NewLoc, AccessTy); - }); + visitPointers( + const_cast(Loc.Ptr), *TheLoop, *PSE, SymbolicStrides, + [&Accesses, AccessTy, Loc](Value *Ptr, const SCEV *PtrScev) { + MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); + Accesses.addStore(NewLoc, AccessTy, PtrScev); + }); } } @@ -2370,10 +2406,10 @@ if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) Loc.AATags.TBAA = nullptr; - visitPointers(const_cast(Loc.Ptr), *TheLoop, - [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) { + visitPointers(const_cast(Loc.Ptr), *TheLoop, *PSE, SymbolicStrides, + [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr, const SCEV *PtrScev) { MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); - Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr); + Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr, PtrScev); }); } @@ -2387,7 +2423,7 @@ // Build dependence sets and check whether we need a runtime pointer bounds // check. - Accesses.buildDependenceSets(); + Accesses.buildDependenceSets(SymbolicStrides); // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. Index: llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -23,6 +23,42 @@ ; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> ; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 ; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: @@ -139,6 +175,26 @@ ; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 -> ; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 ; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: ForwardButPreventsForwarding: +; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 -> +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: Index: llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll +++ llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types_opaque_ptr.ll @@ -22,6 +22,42 @@ ; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> ; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 ; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, ptr %gep.iv.min.100, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, ptr %gep.iv, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: @@ -138,6 +174,26 @@ ; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 -> ; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8 ; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: +; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 -> +; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: ForwardButPreventsForwarding: +; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 -> +; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: store double %val, ptr %gep.iv.101, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: Index: llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll +++ llvm/test/Analysis/LoopAccessAnalysis/max_safe_dep_dist_non_unit_stride.ll @@ -16,6 +16,14 @@ ; CHECK-NEXT: store i32 %0, ptr %arrayidx2, align 4 -> ; CHECK-NEXT: %1 = load i32, ptr %arrayidx5, align 4 ; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store i32 %0, ptr %arrayidx2, align 4 -> +; CHECK-NEXT: %1 = load i32, ptr %arrayidx5, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store i32 %0, ptr %arrayidx2, align 4 -> +; CHECK-NEXT: %1 = load i32, ptr %arrayidx5, align 4 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: Index: llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll +++ llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll @@ -209,6 +209,10 @@ ; CHECK-NEXT: %v8 = load double, ptr %arrayidx, align 8 -> ; CHECK-NEXT: store double %mul16, ptr %ptr.2, align 8 ; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %v8 = load double, ptr %arrayidx, align 8 -> +; CHECK-NEXT: store double %mul16, ptr %ptr.2, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]): @@ -286,6 +290,10 @@ ; CHECK-NEXT: %v8 = load double, ptr %arrayidx, align 8 -> ; CHECK-NEXT: store double %mul16, ptr %ptr.3, align 8 ; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %v8 = load double, ptr %arrayidx, align 8 -> +; CHECK-NEXT: store double %mul16, ptr %ptr.3, align 8 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GROUP_C:.+]]): @@ -436,6 +444,14 @@ ; CHECK-NEXT: store i16 %lv, ptr %A, align 1 -> ; CHECK-NEXT: %lv2 = load i16, ptr %A, align 1 ; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %lv3 = load i16, ptr %c.sink, align 2 -> +; CHECK-NEXT: store i16 %add, ptr %c.sink, align 1 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %lv3 = load i16, ptr %c.sink, align 2 -> +; CHECK-NEXT: store i16 %add, ptr %c.sink, align 1 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group ([[GROUP_A:.+]]): Index: llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll =================================================================== --- llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll +++ llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -110,8 +110,8 @@ ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: -; CHECK-NEXT: Equal predicate: %stride.2 == 1 ; CHECK-NEXT: Equal predicate: %stride.1 == 1 +; CHECK-NEXT: Equal predicate: %stride.2 == 1 ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul: Index: llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll =================================================================== --- llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll +++ llvm/test/Transforms/LoopLoadElim/symbolic-stride.ll @@ -320,8 +320,8 @@ ; ; DEFAULT-LABEL: @two_strides( ; DEFAULT-NEXT: for.body.lver.check: -; DEFAULT-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE_2:%.*]], 1 -; DEFAULT-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i64 [[STRIDE_1:%.*]], 1 +; DEFAULT-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE_1:%.*]], 1 +; DEFAULT-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i64 [[STRIDE_2:%.*]], 1 ; DEFAULT-NEXT: [[TMP0:%.*]] = or i1 [[IDENT_CHECK]], [[IDENT_CHECK1]] ; DEFAULT-NEXT: br i1 [[TMP0]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; DEFAULT: for.body.ph.lver.orig: Index: llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll +++ llvm/test/Transforms/LoopVectorize/runtime-check-pointer-element-type.ll @@ -9,12 +9,8 @@ ; addition of the size of the element type (a pointer) for the end bound. define void @test(i64 %arg, i32 %arg1, ptr %base) { -; CHECK: LAA: Adding RT check for range: -; CHECK-NEXT: Start: ((8 * (zext i32 (-1 + %arg1) to i64)) + (8 * (1 smin %arg)) + (-8 * %arg) + %base) -; CHECK-SAME: End: (8 + (8 * (zext i32 (-1 + %arg1) to i64)) + %base) -; CHECK-NEXT: LAA: Adding RT check for range: -; CHECK-NEXT: Start: ((8 * (1 smin %arg)) + %base) -; CHECK-SAME: End: (8 + (8 * %arg) + %base) +; CHECK: LV: Interleaving to reduce branch cost. +; CHECK-NEXT: Calculating cost of runtime checks: ; CHECK: vector.body Index: llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll +++ llvm/test/Transforms/LoopVectorize/strided-accesses-interleave-only.ll @@ -5,15 +5,18 @@ ; CHECK-LABEL: define void @test_variable_stride ; CHECK-SAME: (ptr [[DST:%.*]], i32 [[SCALE:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[SCALE]], 1 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[SCALE]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], [[SCALE]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP3]] ; CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 2 @@ -25,7 +28,7 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, 1000 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]