diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -344,6 +344,16 @@ /// The information stored in an `AAQueryInfo` is currently limitted to the /// caches used by BasicAA, but can further be extended to fit other AA needs. class AAQueryInfo { + /// Storage for estimated relative offsets between two partially aliased + /// values. Used to optimize out redundant parts of loads/stores (in GVN/DSE). + /// These users cannot process quite complicated addresses (e.g. GEPs with + /// non-constant offsets). Used by BatchAAResults only. + bool CacheOffsets = false; + SmallDenseMap, + std::pair>, + int64_t, 4> + ClobberOffsets; + public: using LocPair = std::pair; struct CacheEntry { @@ -371,7 +381,9 @@ /// assumption is disproven. SmallVector AssumptionBasedResults; - AAQueryInfo() : AliasCache(), IsCapturedCache() {} + AAQueryInfo(bool CacheOffsets = false) + : CacheOffsets(CacheOffsets), ClobberOffsets(), AliasCache(), + IsCapturedCache() {} /// Create a new AAQueryInfo based on this one, but with the cache cleared. /// This is used for recursive queries across phis, where cache results may @@ -381,6 +393,33 @@ NewAAQI.Depth = Depth; return NewAAQI; } + + Optional getClobberOffset(const Value *Ptr1, const Value *Ptr2, + uint64_t Size1, uint64_t Size2) const { + assert(CacheOffsets && "Clobber offset cached in batch mode only!"); + const bool Swapped = Ptr1 > Ptr2; + if (Swapped) { + std::swap(Ptr1, Ptr2); + std::swap(Size1, Size2); + } + const auto IOff = ClobberOffsets.find({{Ptr1, Ptr2}, {Size1, Size2}}); + if (IOff != ClobberOffsets.end()) + return Swapped ? -IOff->second : IOff->second; + return None; + } + + void setClobberOffset(const Value *Ptr1, const Value *Ptr2, uint64_t Size1, + uint64_t Size2, int64_t Offset) { + // Cache offset for batch mode only. + if (!CacheOffsets) + return; + if (Ptr1 > Ptr2) { + std::swap(Ptr1, Ptr2); + std::swap(Size1, Size2); + Offset = -Offset; + } + ClobberOffsets[{{Ptr1, Ptr2}, {Size1, Size2}}] = Offset; + } }; class BatchAAResults; @@ -823,7 +862,8 @@ AAQueryInfo AAQI; public: - BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {} + BatchAAResults(AAResults &AAR, bool CacheOffsets = false) + : AA(AAR), AAQI(CacheOffsets) {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { return AA.alias(LocA, LocB, AAQI); } @@ -856,6 +896,8 @@ return alias(MemoryLocation(V1, LocationSize::precise(1)), MemoryLocation(V2, LocationSize::precise(1))) == MustAlias; } + Optional getClobberOffset(const MemoryLocation &LocA, + const MemoryLocation &LocB) const; }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -958,6 +958,17 @@ return AAR; } +Optional +BatchAAResults::getClobberOffset(const MemoryLocation &LocA, + const MemoryLocation &LocB) const { + if (!LocA.Size.hasValue() || !LocB.Size.hasValue()) + return None; + const Value *V1 = LocA.Ptr->stripPointerCastsForAliasAnalysis(); + const Value *V2 = LocB.Ptr->stripPointerCastsForAliasAnalysis(); + return AAQI.getClobberOffset(V1, V2, LocA.Size.getValue(), + LocB.Size.getValue()); +} + bool llvm::isNoAliasCall(const Value *V) { if (const auto *Call = dyn_cast(V)) return Call->hasRetAttr(Attribute::NoAlias); diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1144,24 +1144,43 @@ // that the objects are partially overlapping. If the difference is // greater, we know they do not overlap. if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) { - if (DecompGEP1.Offset.sge(0)) { - if (V2Size.hasValue()) { - if (DecompGEP1.Offset.ult(V2Size.getValue())) - return PartialAlias; - return NoAlias; - } - } else { - // We have the situation where: + APInt &Off = DecompGEP1.Offset; + + // Initialize for Off >= 0 (V2 <= GEP1) case. + const Value *LeftPtr = V2; + const Value *RightPtr = GEP1; + LocationSize VLeftSize = V2Size; + LocationSize VRightSize = V1Size; + + if (Off.isNegative()) { + // Swap if we have the situation where: // + + // | BaseOffset | // ---------------->| // |-->V1Size |-------> V2Size // GEP1 V2 - if (V1Size.hasValue()) { - if ((-DecompGEP1.Offset).ult(V1Size.getValue())) - return PartialAlias; - return NoAlias; + std::swap(LeftPtr, RightPtr); + std::swap(VLeftSize, VRightSize); + Off = -Off; + } + + if (VLeftSize.hasValue()) { + const uint64_t LSize = VLeftSize.getValue(); + if (Off.ult(LSize)) { + // Conservatively drop processing if a phi was visited and/or offset is + // too big. + if (VisitedPhiBBs.empty() && VRightSize.hasValue() && + Off.ule(INT64_MAX)) { + // Memory referenced by right pointer is nested. Save the offset in + // cache. + const uint64_t RSize = VRightSize.getValue(); + if ((Off + RSize).ule(LSize)) + AAQI.setClobberOffset(LeftPtr, RightPtr, LSize, RSize, + Off.getSExtValue()); + } + return PartialAlias; } + return NoAlias; } } diff --git a/llvm/unittests/Analysis/AliasAnalysisTest.cpp b/llvm/unittests/Analysis/AliasAnalysisTest.cpp --- a/llvm/unittests/Analysis/AliasAnalysisTest.cpp +++ b/llvm/unittests/Analysis/AliasAnalysisTest.cpp @@ -298,6 +298,46 @@ EXPECT_EQ(MayAlias, BatchAA.alias(ANextLoc, BNextLoc)); } +// Check that two aliased GEPs with non-constant offsets are correctly +// analyzed and their relative offset can be requested from AA. +TEST_F(AliasAnalysisTest, PartialAliasOffset) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString(R"( + define void @foo(float* %arg, i32 %i) { + bb: + %i2 = zext i32 %i to i64 + %i3 = getelementptr inbounds float, float* %arg, i64 %i2 + %i4 = bitcast float* %i3 to <2 x float>* + %L2 = load <2 x float>, <2 x float>* %i4, align 16 + %i7 = add nuw nsw i32 %i, 1 + %i8 = zext i32 %i7 to i64 + %i9 = getelementptr inbounds float, float* %arg, i64 %i8 + %L1 = load float, float* %i9, align 4 + ret void + } + )", + Err, C); + + if (!M) + Err.print("PartialAliasOffset", errs()); + + Function *F = M->getFunction("foo"); + const auto Loc1 = MemoryLocation::get(getInstructionByName(*F, "L1")); + auto Loc2 = MemoryLocation::get(getInstructionByName(*F, "L2")); + + auto &AA = getAAResults(*F); + + BatchAAResults BatchAA(AA, /*CacheOffsets =*/true); + EXPECT_EQ(PartialAlias, BatchAA.alias(Loc1, Loc2)); + EXPECT_EQ(-4, BatchAA.getClobberOffset(Loc1, Loc2).getValueOr(0)); + EXPECT_EQ(4, BatchAA.getClobberOffset(Loc2, Loc1).getValueOr(0)); + + // Check that no offset returned for different size. + Loc2.Size = LocationSize::precise(42); + EXPECT_EQ(0, BatchAA.getClobberOffset(Loc1, Loc2).getValueOr(0)); +} + class AAPassInfraTest : public testing::Test { protected: LLVMContext C;