diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -78,21 +78,59 @@ /// /// See docs/AliasAnalysis.html for more information on the specific meanings /// of these values. -enum AliasResult : uint8_t { - /// The two locations do not alias at all. - /// - /// This value is arranged to convert to false, while all other values - /// convert to true. This allows a boolean context to convert the result to - /// a binary flag indicating whether there is the possibility of aliasing. - NoAlias = 0, - /// The two locations may or may not alias. This is the least precise result. - MayAlias, - /// The two locations alias, but only due to a partial overlap. - PartialAlias, - /// The two locations precisely alias each other. - MustAlias, +class AliasResult { +private: + static const int OffsetBits = 28; + static const int AliasBits = 3; + static_assert(AliasBits + 1 + OffsetBits <= 32, + "AliasResult size is intended to be 4 bytes!"); + + unsigned int Alias : AliasBits; + unsigned int HasOffset : 1; + signed int Offset : OffsetBits; + +public: + enum Result : uint8_t { + /// The two locations do not alias at all. + /// + /// This value is arranged to convert to false, while all other values + /// convert to true. This allows a boolean context to convert the result to + /// a binary flag indicating whether there is the possibility of aliasing. + NoAlias = 0, + /// The two locations may or may not alias. This is the least precise + /// result. + MayAlias, + /// The two locations alias, but only due to a partial overlap. + PartialAlias, + /// The two locations precisely alias each other. + MustAlias, + }; + static_assert(MustAlias < (1 << AliasBits), + "Not enough bit field size for the enum!"); + + AliasResult() : HasOffset(false) {} + constexpr AliasResult(const Result &Alias) + : Alias(Alias), HasOffset(false), Offset(-1) {} + + operator Result() const { return static_cast(Alias); } + + constexpr bool hasOffset() const { return HasOffset; } + constexpr int32_t getOffset() const { + assert(HasOffset && "No offset!"); + return Offset; + } + void setOffset(int32_t NewOffset) { + if (-(1 << (OffsetBits - 1)) <= NewOffset && + NewOffset < (1 << (OffsetBits - 1))) { + HasOffset = true; + Offset = NewOffset; + } + } }; +static_assert(sizeof(AliasResult) == 4, + "AliasResult size is intended to be 4 bytes!"); + /// << operator for AliasResult. raw_ostream &operator<<(raw_ostream &OS, AliasResult AR); @@ -344,16 +382,6 @@ /// The information stored in an `AAQueryInfo` is currently limitted to the /// caches used by BasicAA, but can further be extended to fit other AA needs. class AAQueryInfo { - /// Storage for estimated relative offsets between two partially aliased - /// values. Used to optimize out redundant parts of loads/stores (in GVN/DSE). - /// These users cannot process quite complicated addresses (e.g. GEPs with - /// non-constant offsets). Used by BatchAAResults only. - bool CacheOffsets = false; - SmallDenseMap, - std::pair>, - int64_t, 4> - ClobberOffsets; - public: using LocPair = std::pair; struct CacheEntry { @@ -381,9 +409,7 @@ /// assumption is disproven. SmallVector AssumptionBasedResults; - AAQueryInfo(bool CacheOffsets = false) - : CacheOffsets(CacheOffsets), ClobberOffsets(), AliasCache(), - IsCapturedCache() {} + AAQueryInfo() : AliasCache(), IsCapturedCache() {} /// Create a new AAQueryInfo based on this one, but with the cache cleared. /// This is used for recursive queries across phis, where cache results may @@ -393,33 +419,6 @@ NewAAQI.Depth = Depth; return NewAAQI; } - - Optional getClobberOffset(const Value *Ptr1, const Value *Ptr2, - uint64_t Size1, uint64_t Size2) const { - assert(CacheOffsets && "Clobber offset cached in batch mode only!"); - const bool Swapped = Ptr1 > Ptr2; - if (Swapped) { - std::swap(Ptr1, Ptr2); - std::swap(Size1, Size2); - } - const auto IOff = ClobberOffsets.find({{Ptr1, Ptr2}, {Size1, Size2}}); - if (IOff != ClobberOffsets.end()) - return Swapped ? -IOff->second : IOff->second; - return None; - } - - void setClobberOffset(const Value *Ptr1, const Value *Ptr2, uint64_t Size1, - uint64_t Size2, int64_t Offset) { - // Cache offset for batch mode only. - if (!CacheOffsets) - return; - if (Ptr1 > Ptr2) { - std::swap(Ptr1, Ptr2); - std::swap(Size1, Size2); - Offset = -Offset; - } - ClobberOffsets[{{Ptr1, Ptr2}, {Size1, Size2}}] = Offset; - } }; class BatchAAResults; @@ -862,8 +861,7 @@ AAQueryInfo AAQI; public: - BatchAAResults(AAResults &AAR, bool CacheOffsets = false) - : AA(AAR), AAQI(CacheOffsets) {} + BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { return AA.alias(LocA, LocB, AAQI); } @@ -897,8 +895,6 @@ MemoryLocation(V2, LocationSize::precise(1))) == AliasResult::MustAlias; } - Optional getClobberOffset(const MemoryLocation &LocA, - const MemoryLocation &LocB) const; }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -35,18 +35,17 @@ namespace llvm { class AAResults; +class AliasResult; class AliasSetTracker; -class BasicBlock; -class LoadInst; class AnyMemSetInst; class AnyMemTransferInst; +class BasicBlock; +class LoadInst; class raw_ostream; class StoreInst; class VAArgInst; class Value; -enum AliasResult : uint8_t; - class AliasSet : public ilist_node { friend class AliasSetTracker; diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -299,8 +299,9 @@ OptimizedAccessAlias = AR; } - void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false, - Optional AR = AliasResult::MayAlias) { + void setDefiningAccess( + MemoryAccess *DMA, bool Optimized = false, + Optional AR = AliasResult(AliasResult::MayAlias)) { if (!Optimized) { setOperand(0, DMA); return; diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -958,17 +958,6 @@ return AAR; } -Optional -BatchAAResults::getClobberOffset(const MemoryLocation &LocA, - const MemoryLocation &LocB) const { - if (!LocA.Size.hasValue() || !LocB.Size.hasValue()) - return None; - const Value *V1 = LocA.Ptr->stripPointerCastsForAliasAnalysis(); - const Value *V2 = LocB.Ptr->stripPointerCastsForAliasAnalysis(); - return AAQI.getClobberOffset(V1, V2, LocA.Size.getValue(), - LocB.Size.getValue()); -} - bool llvm::isNoAliasCall(const Value *V) { if (const auto *Call = dyn_cast(V)) return Call->hasRetAttr(Attribute::NoAlias); diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1116,8 +1116,9 @@ const Value *RightPtr = GEP1; LocationSize VLeftSize = V2Size; LocationSize VRightSize = V1Size; + const bool Swapped = Off.isNegative(); - if (Off.isNegative()) { + if (Swapped) { // Swap if we have the situation where: // + + // | BaseOffset | @@ -1134,16 +1135,16 @@ if (Off.ult(LSize)) { // Conservatively drop processing if a phi was visited and/or offset is // too big. + AliasResult AR = AliasResult::PartialAlias; if (VisitedPhiBBs.empty() && VRightSize.hasValue() && - Off.ule(INT64_MAX)) { + Off.ule(INT32_MAX) && (Off + VRightSize.getValue()).ule(LSize)) { // Memory referenced by right pointer is nested. Save the offset in - // cache. - const uint64_t RSize = VRightSize.getValue(); - if ((Off + RSize).ule(LSize)) - AAQI.setClobberOffset(LeftPtr, RightPtr, LSize, RSize, - Off.getSExtValue()); + // cache. Note that originally offset estimated as GEP1-V2, but + // AliasResult contains the shift that represents GEP1+Offset=V2. + const int32_t OffsetVal = Off.getSExtValue(); + AR.setOffset(Swapped ? OffsetVal : -OffsetVal); } - return AliasResult::PartialAlias; + return AR; } return AliasResult::NoAlias; } diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -286,7 +286,7 @@ case Intrinsic::invariant_end: case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: - return {false, AliasResult::NoAlias}; + return {false, AliasResult(AliasResult::NoAlias)}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: case Intrinsic::dbg_label: @@ -305,7 +305,8 @@ if (auto *DefLoad = dyn_cast(DefInst)) if (auto *UseLoad = dyn_cast_or_null(UseInst)) - return {!areLoadsReorderable(UseLoad, DefLoad), AliasResult::MayAlias}; + return {!areLoadsReorderable(UseLoad, DefLoad), + AliasResult(AliasResult::MayAlias)}; ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); AR = isMustSet(I) ? AliasResult::MustAlias : AliasResult::MayAlias; @@ -344,7 +345,7 @@ const Instruction *Inst = nullptr; // The MemoryAccess we actually got called with, used to test local domination const MemoryAccess *OriginalAccess = nullptr; - Optional AR = AliasResult::MayAlias; + Optional AR = AliasResult(AliasResult::MayAlias); bool SkipSelfAccess = false; UpwardsMemoryQuery() = default; @@ -570,14 +571,14 @@ for (MemoryAccess *Current : def_chain(Desc.Last)) { Desc.Last = Current; if (Current == StopAt || Current == SkipStopAt) - return {Current, false, AliasResult::MayAlias}; + return {Current, false, AliasResult(AliasResult::MayAlias)}; if (auto *MD = dyn_cast(Current)) { if (MSSA.isLiveOnEntryDef(MD)) - return {MD, true, AliasResult::MustAlias}; + return {MD, true, AliasResult(AliasResult::MustAlias)}; if (!--*UpwardWalkLimit) - return {Current, true, AliasResult::MayAlias}; + return {Current, true, AliasResult(AliasResult::MayAlias)}; ClobberAlias CA = instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); @@ -591,7 +592,7 @@ assert(isa(Desc.Last) && "Ended at a non-clobber that's not a phi?"); - return {Desc.Last, false, AliasResult::MayAlias}; + return {Desc.Last, false, AliasResult(AliasResult::MayAlias)}; } void addSearches(MemoryPhi *Phi, SmallVectorImpl &PausedSearches, @@ -2490,8 +2491,9 @@ StartingAccess->setOptimized(OptimizedAccess); if (MSSA->isLiveOnEntryDef(OptimizedAccess)) StartingAccess->setOptimizedAccessType(None); - else if (Q.AR == AliasResult::MustAlias) - StartingAccess->setOptimizedAccessType(AliasResult::MustAlias); + else if (Q.AR && *Q.AR == AliasResult::MustAlias) + StartingAccess->setOptimizedAccessType( + AliasResult(AliasResult::MustAlias)); } else OptimizedAccess = StartingAccess->getOptimized(); diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -402,9 +402,9 @@ } // If we hit a partial alias we may have a full overwrite - if (AAR == AliasResult::PartialAlias) { - int64_t Off = AA.getClobberOffset(Later, Earlier).getValueOr(0); - if (Off > 0 && (uint64_t)Off + EarlierSize <= LaterSize) + if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) { + int32_t Off = AAR.getOffset(); + if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize) return OW_Complete; } @@ -996,8 +996,8 @@ DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) - : F(F), AA(AA), BatchAA(AA, /*CacheOffsets =*/true), MSSA(MSSA), DT(DT), - PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()) {} + : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), + DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, diff --git a/llvm/unittests/Analysis/AliasAnalysisTest.cpp b/llvm/unittests/Analysis/AliasAnalysisTest.cpp --- a/llvm/unittests/Analysis/AliasAnalysisTest.cpp +++ b/llvm/unittests/Analysis/AliasAnalysisTest.cpp @@ -309,11 +309,11 @@ %i2 = zext i32 %i to i64 %i3 = getelementptr inbounds float, float* %arg, i64 %i2 %i4 = bitcast float* %i3 to <2 x float>* - %L2 = load <2 x float>, <2 x float>* %i4, align 16 + %L1 = load <2 x float>, <2 x float>* %i4, align 16 %i7 = add nuw nsw i32 %i, 1 %i8 = zext i32 %i7 to i64 %i9 = getelementptr inbounds float, float* %arg, i64 %i8 - %L1 = load float, float* %i9, align 4 + %L2 = load float, float* %i9, align 4 ret void } )", @@ -324,18 +324,13 @@ Function *F = M->getFunction("foo"); const auto Loc1 = MemoryLocation::get(getInstructionByName(*F, "L1")); - auto Loc2 = MemoryLocation::get(getInstructionByName(*F, "L2")); + const auto Loc2 = MemoryLocation::get(getInstructionByName(*F, "L2")); auto &AA = getAAResults(*F); - BatchAAResults BatchAA(AA, /*CacheOffsets =*/true); - EXPECT_EQ(AliasResult::PartialAlias, BatchAA.alias(Loc1, Loc2)); - EXPECT_EQ(-4, BatchAA.getClobberOffset(Loc1, Loc2).getValueOr(0)); - EXPECT_EQ(4, BatchAA.getClobberOffset(Loc2, Loc1).getValueOr(0)); - - // Check that no offset returned for different size. - Loc2.Size = LocationSize::precise(42); - EXPECT_EQ(0, BatchAA.getClobberOffset(Loc1, Loc2).getValueOr(0)); + const auto AR = AA.alias(Loc1, Loc2); + EXPECT_EQ(AR, AliasResult::PartialAlias); + EXPECT_EQ(4, AR.getOffset()); } class AAPassInfraTest : public testing::Test { diff --git a/llvm/unittests/Analysis/MemorySSATest.cpp b/llvm/unittests/Analysis/MemorySSATest.cpp --- a/llvm/unittests/Analysis/MemorySSATest.cpp +++ b/llvm/unittests/Analysis/MemorySSATest.cpp @@ -1036,7 +1036,8 @@ } for (LoadInst *V : {LA3, LA4}) { MemoryUse *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(V)); - EXPECT_EQ(MemUse->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemUse->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Load " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1085,7 +1086,8 @@ EXPECT_EQ(MemDef->getOptimizedAccessType(), None) << "Store " << I << " doesn't have the correct alias information"; else - EXPECT_EQ(MemDef->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Store " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1119,7 +1121,8 @@ unsigned I = 0; for (LoadInst *V : {LA1, LB1}) { MemoryUse *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(V)); - EXPECT_EQ(MemUse->getOptimizedAccessType(), AliasResult::MayAlias) + EXPECT_EQ(MemUse->getOptimizedAccessType().getValue(), + AliasResult::MayAlias) << "Load " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1127,7 +1130,8 @@ } for (LoadInst *V : {LA2, LB2}) { MemoryUse *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(V)); - EXPECT_EQ(MemUse->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemUse->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Load " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1187,13 +1191,15 @@ EXPECT_EQ(MemDef->isOptimized(), true) << "Store " << I << " was not optimized"; if (I == 1 || I == 3 || I == 4) - EXPECT_EQ(MemDef->getOptimizedAccessType(), AliasResult::MayAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType().getValue(), + AliasResult::MayAlias) << "Store " << I << " doesn't have the correct alias information"; else if (I == 0 || I == 2) EXPECT_EQ(MemDef->getOptimizedAccessType(), None) << "Store " << I << " doesn't have the correct alias information"; else - EXPECT_EQ(MemDef->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Store " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message.