diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -266,6 +266,13 @@ return *this; } + /// Comparison for sorting ranges by offset. + /// + /// Returns true if the offset \p L is less than that of \p R. + inline static bool OffsetLessThan(const RangeTy &L, const RangeTy &R) { + return L.Offset < R.Offset; + } + /// Constants used to represent special offsets or sizes. /// - This assumes that Offset and Size are non-negative. /// - The constants should not clash with DenseMapInfo, such as EmptyKey @@ -274,6 +281,11 @@ static constexpr int64_t Unknown = -2; }; +inline raw_ostream &operator<<(raw_ostream &OS, const RangeTy &R) { + OS << "[" << R.Offset << ", " << R.Size << "]"; + return OS; +} + inline bool operator==(const RangeTy &A, const RangeTy &B) { return A.Offset == B.Offset && A.Size == B.Size; } @@ -5009,29 +5021,187 @@ AK_MUST_READ_WRITE = AK_MUST | AK_R | AK_W, }; + /// A container for a list of ranges. + struct RangeList { + // The set of ranges rarely contains more than one element, and is unlikely + // to contain more than say four elements. So we find the middle-ground with + // a sorted vector. This avoids hard-coding a rarely used number like "four" + // into every instance of a SmallSet. + using RangeTy = AA::RangeTy; + using VecTy = SmallVector; + using iterator = VecTy::iterator; + using const_iterator = VecTy::const_iterator; + VecTy Ranges; + + RangeList(const RangeTy &R) { Ranges.push_back(R); } + RangeList(ArrayRef Offsets, int64_t Size) { + Ranges.reserve(Offsets.size()); + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + assert(((i + 1 == e) || Offsets[i] < Offsets[i + 1]) && + "Expected strictly ascending offsets."); + Ranges.emplace_back(Offsets[i], Size); + } + } + RangeList() = default; + + iterator begin() { return Ranges.begin(); } + iterator end() { return Ranges.end(); } + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + + // Helpers required for std::set_difference + using value_type = RangeTy; + void push_back(const RangeTy &R) { + assert((Ranges.empty() || RangeTy::OffsetLessThan(Ranges.back(), R)) && + "Ensure the last element is the greatest."); + Ranges.push_back(R); + } + + /// Copy ranges from \p L that are not in \p R, into \p D. + static void set_difference(const RangeList &L, const RangeList &R, + RangeList &D) { + std::set_difference(L.begin(), L.end(), R.begin(), R.end(), + std::back_inserter(D), RangeTy::OffsetLessThan); + } + + unsigned size() const { return Ranges.size(); } + + bool operator==(const RangeList &OI) const { return Ranges == OI.Ranges; } + + /// Merge the ranges in \p RHS into the current ranges. + /// - Merging a list of unknown ranges makes the current list unknown. + /// - Ranges with the same offset are merged according to RangeTy::operator& + /// \return true if the current RangeList changed. + bool merge(const RangeList &RHS) { + if (isUnknown()) + return false; + if (RHS.isUnknown()) { + setUnknown(); + return true; + } + + if (Ranges.empty()) { + Ranges = RHS.Ranges; + return true; + } + + bool Changed = false; + auto LPos = Ranges.begin(); + for (auto &R : RHS.Ranges) { + auto Result = insert(LPos, R); + if (isUnknown()) + return true; + LPos = Result.first; + Changed |= Result.second; + } + return Changed; + } + + /// Insert \p R at the given iterator \p Pos, and merge if necessary. + /// + /// This assumes that all ranges before \p Pos are OffsetLessThan \p R, and + /// then maintains the sorted order for the suffix list. + /// + /// \return The place of insertion and true iff anything changed. + std::pair insert(iterator Pos, const RangeTy &R) { + if (isUnknown()) + return std::make_pair(Ranges.begin(), false); + if (R.offsetOrSizeAreUnknown()) { + return std::make_pair(setUnknown(), true); + } + + // Maintain this as a sorted vector of unique entries. + auto LB = std::lower_bound(Pos, Ranges.end(), R, RangeTy::OffsetLessThan); + if (LB == Ranges.end() || LB->Offset != R.Offset) + return std::make_pair(Ranges.insert(LB, R), true); + bool Changed = *LB != R; + *LB &= R; + if (LB->offsetOrSizeAreUnknown()) + return std::make_pair(setUnknown(), true); + return std::make_pair(LB, Changed); + } + + /// Insert the given range \p R, maintaining sorted order. + /// + /// \return The place of insertion and true iff anything changed. + std::pair insert(const RangeTy &R) { + return insert(Ranges.begin(), R); + } + + /// Add the increment \p Inc to the offset of every range. + void addToAllOffsets(int64_t Inc) { + assert(!isUnassigned() && + "Cannot increment if the offset is not yet computed!"); + if (isUnknown()) + return; + for (auto &R : Ranges) { + R.Offset += Inc; + } + } + + /// Return true iff there is exactly one range and it is known. + bool isUnique() const { + return Ranges.size() == 1 && !Ranges.front().offsetOrSizeAreUnknown(); + } + + /// Return the unique range, assuming it exists. + const RangeTy &getUnique() const { + assert(isUnique() && "No unique range to return!"); + return Ranges.front(); + } + + /// Return true iff the list contains an unknown range. + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Ranges.front().offsetOrSizeAreUnknown()) { + assert(Ranges.size() == 1 && "Unknown is a singleton range."); + return true; + } + return false; + } + + /// Discard all ranges and insert a single unknown range. + iterator setUnknown() { + Ranges.clear(); + Ranges.push_back(RangeTy::getUnknown()); + return Ranges.begin(); + } + + /// Return true if no ranges have been inserted. + bool isUnassigned() const { return Ranges.size() == 0; } + }; + /// An access description. struct Access { Access(Instruction *I, int64_t Offset, int64_t Size, std::optional Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), Range(Offset, Size), + : LocalI(I), RemoteI(I), Content(Content), Ranges(Offset, Size), Kind(Kind), Ty(Ty) { verify(); } + Access(Instruction *LocalI, Instruction *RemoteI, const RangeList &Ranges, + std::optional Content, AccessKind K, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Ranges(Ranges), + Kind(K), Ty(Ty) { + if (Ranges.size() > 1) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + verify(); + } Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset, int64_t Size, std::optional Content, AccessKind Kind, Type *Ty) : LocalI(LocalI), RemoteI(RemoteI), Content(Content), - Range(Offset, Size), Kind(Kind), Ty(Ty) { + Ranges(Offset, Size), Kind(Kind), Ty(Ty) { verify(); } Access(const Access &Other) = default; - Access(const Access &&Other) - : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - Range(Other.Range), Kind(Other.Kind), Ty(Other.Ty) {} Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { - return LocalI == R.LocalI && RemoteI == R.RemoteI && Range == R.Range && + return LocalI == R.LocalI && RemoteI == R.RemoteI && Ranges == R.Ranges && Content == R.Content && Kind == R.Kind; } bool operator!=(const Access &R) const { return !(*this == R); } @@ -5039,16 +5209,20 @@ Access &operator&=(const Access &R) { assert(RemoteI == R.RemoteI && "Expected same instruction!"); assert(LocalI == R.LocalI && "Expected same instruction!"); + + // Note that every Access object corresponds to a unique Value, and only + // accesses to the same Value are merged. Hence we assume that all ranges + // are the same size. If ranges can be different size, then the contents + // must be dropped. + Ranges.merge(R.Ranges); + Content = + AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); + + // Combine the access kind, which results in a bitwise union. + // - If MAY is present in the union, then any MUST needs to be removed. + // - If there is more than one range, then this must be a MAY. Kind = AccessKind(Kind | R.Kind); - auto Before = Range; - Range &= R.Range; - if (Before.isUnassigned() || Before == Range) { - Content = - AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); - } else { - // Since the Range information changed, set a conservative state -- drop - // the contents, and assume MayAccess rather than MustAccess. - setWrittenValueUnknown(); + if ((Kind & AK_MAY) || Ranges.size() > 1) { Kind = AccessKind(Kind | AK_MAY); Kind = AccessKind(Kind & ~AK_MUST); } @@ -5061,6 +5235,8 @@ "Expect must or may access, not both."); assert(isAssumption() + isWrite() <= 1 && "Expect assumption access or write access, never both."); + assert((isMayAccess() || Ranges.size() == 1) && + "Cannot be a must access if there are multiple ranges."); } /// Return the access kind. @@ -5078,8 +5254,19 @@ /// Return true if this is an assumption access. bool isAssumption() const { return Kind == AK_ASSUMPTION; } - bool isMustAccess() const { return Kind & AK_MUST; } - bool isMayAccess() const { return Kind & AK_MAY; } + bool isMustAccess() const { + bool MustAccess = Kind & AK_MUST; + assert((!MustAccess || Ranges.size() < 2) && + "Cannot be a must access if there are multiple ranges."); + return MustAccess; + } + + bool isMayAccess() const { + bool MayAccess = Kind & AK_MAY; + assert((MayAccess || Ranges.size() < 2) && + "Cannot be a must access if there are multiple ranges."); + return MayAccess; + } /// Return the instruction that causes the access with respect to the local /// scope of the associated attribute. @@ -5113,11 +5300,25 @@ /// determined. std::optional getContent() const { return Content; } - /// Return the offset for this access. - int64_t getOffset() const { return Range.Offset; } + bool hasUniqueRange() const { return Ranges.isUnique(); } + const AA::RangeTy &getUniqueRange() const { return Ranges.getUnique(); } + + /// Add a range accessed by this Access. + /// + /// If there are multiple ranges, then this is a "may access". + void addRange(int64_t Offset, int64_t Size) { + Ranges.insert({Offset, Size}); + if (!hasUniqueRange()) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + } + + const RangeList &getRanges() const { return Ranges; } - /// Return the size for this access. - int64_t getSize() const { return Range.Size; } + using const_iterator = RangeList::const_iterator; + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } private: /// The instruction responsible for the access with respect to the local @@ -5131,8 +5332,8 @@ /// cannot be determined. std::optional Content; - /// The object accessed, in terms of an offset and size in bytes. - AA::RangeTy Range; + /// Set of potential ranges accessed from the base pointer. + RangeList Ranges; /// The access kind, e.g., READ, as bitset (could be more than one). AccessKind Kind; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -759,7 +759,7 @@ DenseSet Allowed( {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID, - &AAPointerInfo::ID}); + &AAPointerInfo::ID, &AAPotentialConstantValues::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -819,7 +819,7 @@ /// OffsetBin to the bin for its new offset. /// /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. - ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, + ChangeStatus addAccess(Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I, std::optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, Instruction *RemoteI = nullptr); @@ -865,7 +865,7 @@ bool IsExact = Range == ItRange && !Range.offsetOrSizeAreUnknown(); for (auto Index : It.getSecond()) { auto &Access = AccessList[Index]; - if (!CB(Access, IsExact)) + if (!CB(Access, IsExact && Access.hasUniqueRange())) return false; } } @@ -885,9 +885,12 @@ return true; } - for (auto LI : LocalList->getSecond()) { - auto &Access = AccessList[LI]; - Range &= {Access.getOffset(), Access.getSize()}; + for (unsigned Index : LocalList->getSecond()) { + for (auto &R : AccessList[Index]) { + Range &= R; + if (Range.offsetOrSizeAreUnknown()) + break; + } } return forallInterferingAccesses(Range, CB); } @@ -897,13 +900,11 @@ BooleanState BS; }; -ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset, - int64_t Size, Instruction &I, - std::optional Content, - AAPointerInfo::AccessKind Kind, - Type *Ty, Instruction *RemoteI) { +ChangeStatus AA::PointerInfo::State::addAccess( + Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I, + std::optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, + Instruction *RemoteI) { RemoteI = RemoteI ? RemoteI : &I; - AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty); // Check if we have an access for this instruction, if not, simply add it. auto &LocalList = RemoteIMap[RemoteI]; @@ -917,48 +918,122 @@ break; } } + + auto AddToBins = [&](const AAPointerInfo::RangeList &ToAdd) { + LLVM_DEBUG( + if (ToAdd.size()) + dbgs() << "[AAPointerInfo] Inserting access in new offset bins\n"; + ); + + for (auto Key : ToAdd) { + LLVM_DEBUG(dbgs() << " key " << Key << "\n"); + OffsetBins[Key].insert(AccIndex); + } + }; + if (!AccExists) { - AccessList.push_back(Acc); + AccessList.emplace_back(&I, RemoteI, Ranges, Content, Kind, Ty); + assert((AccessList.size() == AccIndex + 1) && + "New Access should have been at AccIndex"); LocalList.push_back(AccIndex); - } else { - // The new one will be combined with the existing one. - auto &Current = AccessList[AccIndex]; - auto Before = Current; - Current &= Acc; - if (Current == Before) - return ChangeStatus::UNCHANGED; + AddToBins(AccessList[AccIndex].getRanges()); + return ChangeStatus::CHANGED; + } + + // Combine the new Access with the existing Access, and then update the + // mapping in the offset bins. + AAPointerInfo::Access Acc(&I, RemoteI, Ranges, Content, Kind, Ty); + auto &Current = AccessList[AccIndex]; + auto Before = Current; + Current &= Acc; + if (Current == Before) + return ChangeStatus::UNCHANGED; - Acc = Current; - AA::RangeTy Key{Before.getOffset(), Before.getSize()}; + auto &ExistingRanges = Before.getRanges(); + auto &NewRanges = Current.getRanges(); + + // Ranges that are in the old access but not the new access need to be removed + // from the offset bins. + AAPointerInfo::RangeList ToRemove; + AAPointerInfo::RangeList::set_difference(ExistingRanges, NewRanges, ToRemove); + LLVM_DEBUG( + if (ToRemove.size()) + dbgs() << "[AAPointerInfo] Removing access from old offset bins\n"; + ); + + for (auto Key : ToRemove) { + LLVM_DEBUG(dbgs() << " key " << Key << "\n"); assert(OffsetBins.count(Key) && "Existing Access must be in some bin."); auto &Bin = OffsetBins[Key]; assert(Bin.count(AccIndex) && "Expected bin to actually contain the Access."); - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access " - << AccessList[AccIndex] << " with key {" << Key.Offset - << ',' << Key.Size << "}\n"); Bin.erase(AccIndex); } - AA::RangeTy Key{Acc.getOffset(), Acc.getSize()}; - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc - << " with key {" << Key.Offset << ',' << Key.Size << "}\n"); - OffsetBins[Key].insert(AccIndex); + // Ranges that are in the new access but not the old access need to be added + // to the offset bins. + AAPointerInfo::RangeList ToAdd; + AAPointerInfo::RangeList::set_difference(NewRanges, ExistingRanges, ToAdd); + AddToBins(ToAdd); return ChangeStatus::CHANGED; } namespace { -/// Helper struct, will support ranges eventually. -/// -/// FIXME: Tracks a single Offset until we have proper support for a list of -/// RangeTy objects. +/// A helper containing a list of offsets computed for a Use. Ideally this +/// list should be strictly ascending, but we ensure that only when we +/// actually translate the list of offsets to a RangeList. struct OffsetInfo { - int64_t Offset = AA::RangeTy::Unassigned; + using VecTy = SmallVector; + using const_iterator = VecTy::const_iterator; + VecTy Offsets; - bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } + const_iterator begin() const { return Offsets.begin(); } + const_iterator end() const { return Offsets.end(); } + + bool operator==(const OffsetInfo &RHS) const { + return Offsets == RHS.Offsets; + } + + void insert(int64_t Offset) { Offsets.push_back(Offset); } + bool isUnassigned() const { return Offsets.size() == 0; } + + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Offsets.size() == 1) + return Offsets.front() == AA::RangeTy::Unknown; + return false; + } + + void setUnknown() { + Offsets.clear(); + Offsets.push_back(AA::RangeTy::Unknown); + } + + void addToAll(int64_t Inc) { + for (auto &Offset : Offsets) { + Offset += Inc; + } + } + + /// Copy offsets from \p R into the current list. + /// + /// Ideally all lists should be strictly ascending, but we defer that to the + /// actual use of the list. So we just blindly append here. + void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); } }; +static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) { + ListSeparator LS; + OS << "["; + for (auto Offset : OI) { + OS << LS << Offset; + } + OS << "]"; + return OS; +} + struct AAPointerInfoImpl : public StateWrapper { using BaseTy = StateWrapper; @@ -1180,16 +1255,16 @@ RAcc.getContent(), CB, *this, UsedAssumedInformation); AK = AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); - Changed = - Changed | addAccess(A, It.first.Offset, It.first.Size, CB, Content, - AK, RAcc.getType(), RAcc.getRemoteInst()); + + Changed |= addAccess(A, RAcc.getRanges(), CB, Content, AK, + RAcc.getType(), RAcc.getRemoteInst()); } } return Changed; } ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA, - int64_t Offset, CallBase &CB) { + const OffsetInfo &Offsets, CallBase &CB) { using namespace AA::PointerInfo; if (!OtherAA.getState().isValidState() || !isValidState()) return indicatePessimisticFixpoint(); @@ -1200,17 +1275,19 @@ ChangeStatus Changed = ChangeStatus::UNCHANGED; const auto &State = OtherAAImpl.getState(); for (const auto &It : State) { - AA::RangeTy Range = AA::RangeTy::getUnknown(); - if (Offset != AA::RangeTy::Unknown && - !It.first.offsetOrSizeAreUnknown()) { - Range = AA::RangeTy(It.first.Offset + Offset, It.first.Size); - } for (auto Index : It.getSecond()) { const auto &RAcc = State.getAccess(Index); - AccessKind AK = RAcc.getKind(); - Changed = Changed | addAccess(A, Range.Offset, Range.Size, CB, - RAcc.getContent(), AK, RAcc.getType(), - RAcc.getRemoteInst()); + for (auto Offset : Offsets) { + auto NewRanges = Offset == AA::RangeTy::Unknown + ? AA::RangeTy::getUnknown() + : RAcc.getRanges(); + if (!NewRanges.isUnknown()) { + NewRanges.addToAllOffsets(Offset); + } + Changed |= + addAccess(A, NewRanges, CB, RAcc.getContent(), RAcc.getKind(), + RAcc.getType(), RAcc.getRemoteInst()); + } } } return Changed; @@ -1250,38 +1327,102 @@ /// Deal with an access and signal if it was handled successfully. bool handleAccess(Attributor &A, Instruction &I, std::optional Content, AccessKind Kind, - int64_t Offset, ChangeStatus &Changed, Type &Ty) { + SmallVectorImpl &Offsets, ChangeStatus &Changed, + Type &Ty) { using namespace AA::PointerInfo; auto Size = AA::RangeTy::Unknown; const DataLayout &DL = A.getDataLayout(); TypeSize AccessSize = DL.getTypeStoreSize(&Ty); if (!AccessSize.isScalable()) Size = AccessSize.getFixedSize(); - Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, &Ty); + + // Make a strictly ascending list of offsets as required by addAccess() + llvm::sort(Offsets); + auto Last = std::unique(Offsets.begin(), Offsets.end()); + Offsets.erase(Last, Offsets.end()); + + Changed = Changed | addAccess(A, {Offsets, Size}, I, Content, Kind, &Ty); return true; }; /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override; + void collectConstantsForGEP(Attributor &A, const DataLayout &DL, + OffsetInfo &UsrOI, const OffsetInfo &PtrOI, + const GEPOperator *GEP, bool &Follow); + /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition()); } }; +/// If the indices to the GEP can be traced to constants, incorporate all +/// of these into UsrOI. +void AAPointerInfoFloating::collectConstantsForGEP( + Attributor &A, const DataLayout &DL, OffsetInfo &UsrOI, + const OffsetInfo &PtrOI, const GEPOperator *GEP, bool &Follow) { + unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); + MapVector VariableOffsets; + APInt ConstantOffset(BitWidth, 0); + + Follow = true; + + if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { + UsrOI.setUnknown(); + return; + } + + UsrOI = PtrOI; + if (VariableOffsets.empty()) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP + << "\n"); + UsrOI.addToAll(ConstantOffset.getSExtValue()); + return; + } + + auto Union = UsrOI; + Union.addToAll(ConstantOffset.getSExtValue()); + + // Each VI in VariableOffsets has a set of potential constant values. Every + // combination of elements, picked one each from these sets, is separately + // added to the original set of offsets, thus resulting in more offsets. + for (const auto &VI : VariableOffsets) { + auto &PotentialConstantsAA = A.getAAFor( + *this, IRPosition::value(*VI.first), DepClassTy::OPTIONAL); + if (!PotentialConstantsAA.isValidState()) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is not constant " << *GEP + << "\n"); + UsrOI.setUnknown(); + return; + } + OffsetInfo NewUnion; + for (const auto &ConstOffset : PotentialConstantsAA.getAssumedSet()) { + auto CopyPerOffset = Union; + CopyPerOffset.addToAll(ConstOffset.getSExtValue() * + VI.second.getZExtValue()); + NewUnion.merge(CopyPerOffset); + } + Union = NewUnion; + } + + UsrOI = std::move(Union); + return; +} + ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { using namespace AA::PointerInfo; ChangeStatus Changed = ChangeStatus::UNCHANGED; + const DataLayout &DL = A.getDataLayout(); Value &AssociatedValue = getAssociatedValue(); - const DataLayout &DL = A.getDataLayout(); DenseMap OffsetInfoMap; - OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; + OffsetInfoMap[&AssociatedValue].insert(0); auto HandlePassthroughUser = [&](Value *Usr, const OffsetInfo &PtrOI, bool &Follow) { - assert(PtrOI.Offset != AA::RangeTy::Unassigned && + assert(!PtrOI.isUnassigned() && "Cannot pass through if the input Ptr was not visited!"); OffsetInfoMap[Usr] = PtrOI; Follow = true; @@ -1316,23 +1457,14 @@ // already in it though. auto &UsrOI = OffsetInfoMap[Usr]; auto &PtrOI = OffsetInfoMap[CurPtr]; - UsrOI = PtrOI; + Follow = true; - // TODO: Use range information. - APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); - if (PtrOI.Offset == AA::RangeTy::Unknown || - !GEP->accumulateConstantOffset(DL, GEPOffset)) { - LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant " << *GEP - << "\n"); - UsrOI.Offset = AA::RangeTy::Unknown; - Follow = true; + if (PtrOI.isUnknown()) { + UsrOI.setUnknown(); return true; } - LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP - << "\n"); - UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue(); - Follow = true; + collectConstantsForGEP(A, DL, UsrOI, PtrOI, GEP, Follow); return true; } if (isa(Usr)) @@ -1352,17 +1484,17 @@ // Check if the PHI operand has already an unknown offset as we can't // improve on that anymore. - if (PtrOI.Offset == AA::RangeTy::Unknown) { + if (PtrOI.isUnknown()) { LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown " << *CurPtr << " in " << *Usr << "\n"); - Follow = UsrOI.Offset != AA::RangeTy::Unknown; - UsrOI = PtrOI; + Follow = !UsrOI.isUnknown(); + UsrOI.setUnknown(); return true; } // Check if the PHI is invariant (so far). if (UsrOI == PtrOI) { - assert(PtrOI.Offset != AA::RangeTy::Unassigned && + assert(!PtrOI.isUnassigned() && "Cannot assign if the current Ptr was not visited!"); LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)"); return true; @@ -1376,9 +1508,13 @@ DL, Offset, /* AllowNonInbounds */ true); auto It = OffsetInfoMap.find(CurPtrBase); if (It != OffsetInfoMap.end()) { - Offset += It->getSecond().Offset; - if (IsFirstPHIUser || Offset == UsrOI.Offset) + auto BaseOI = It->getSecond(); + BaseOI.addToAll(Offset.getZExtValue()); + if (IsFirstPHIUser || BaseOI == UsrOI) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant " << *CurPtr + << " in " << *Usr << "\n"); return HandlePassthroughUser(Usr, PtrOI, Follow); + } LLVM_DEBUG( dbgs() << "[AAPointerInfo] PHI operand pointer offset mismatch " << *CurPtr << " in " << *Usr << "\n"); @@ -1388,8 +1524,7 @@ } // TODO: Approximate in case we know the direction of the recurrence. - UsrOI = PtrOI; - UsrOI.Offset = AA::RangeTy::Unknown; + UsrOI.setUnknown(); Follow = true; return true; } @@ -1403,7 +1538,7 @@ else AK = AccessKind(AK | AccessKind::AK_MAY); if (!handleAccess(A, *LoadI, /* Content */ nullptr, AK, - OffsetInfoMap[CurPtr].Offset, Changed, + OffsetInfoMap[CurPtr].Offsets, Changed, *LoadI->getType())) return false; @@ -1483,7 +1618,7 @@ return handleAccess( A, *Assumption.second, Assumption.first, AccessKind::AK_ASSUMPTION, - OffsetInfoMap[CurPtr].Offset, Changed, *LoadI->getType()); + OffsetInfoMap[CurPtr].Offsets, Changed, *LoadI->getType()); } auto HandleStoreLike = [&](Instruction &I, Value *ValueOp, Type &ValueTy, @@ -1509,7 +1644,7 @@ if (ValueOp) Content = A.getAssumedSimplified( *ValueOp, *this, UsedAssumedInformation, AA::Interprocedural); - return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offset, + return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offsets, Changed, ValueTy); }; @@ -1536,8 +1671,7 @@ const auto &CSArgPI = A.getAAFor( *this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::REQUIRED); - Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr].Offset, - *CB) | + Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr], *CB) | Changed; return isValidState(); } @@ -1556,8 +1690,8 @@ LLVM_DEBUG({ if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { dbgs() << "[AAPointerInfo] Equivalent use callback failed: " - << OffsetInfoMap[NewU].Offset << " vs " - << OffsetInfoMap[OldU].Offset << "\n"; + << OffsetInfoMap[NewU] << " vs " << OffsetInfoMap[OldU] + << "\n"; } }); return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; @@ -1637,7 +1771,7 @@ auto Kind = ArgNo == 0 ? AccessKind::AK_MUST_WRITE : AccessKind::AK_MUST_READ; Changed = - Changed | addAccess(A, 0, LengthVal, *MI, nullptr, Kind, nullptr); + Changed | addAccess(A, {0, LengthVal}, *MI, nullptr, Kind, nullptr); } LLVM_DEBUG({ dbgs() << "Accesses by bin after update:\n"; @@ -1675,8 +1809,8 @@ bool ReadOnly = AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown); auto Kind = ReadOnly ? AccessKind::AK_MAY_READ : AccessKind::AK_MAY_READ_WRITE; - return addAccess(A, AA::RangeTy::Unknown, AA::RangeTy::Unknown, *getCtxI(), - nullptr, Kind, nullptr); + return addAccess(A, AA::RangeTy::getUnknown(), *getCtxI(), nullptr, Kind, + nullptr); } /// See AbstractAttribute::trackStatistics() diff --git a/llvm/test/CodeGen/AMDGPU/attributor.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll rename from llvm/test/CodeGen/AMDGPU/attributor.ll rename to llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll --- a/llvm/test/CodeGen/AMDGPU/attributor.ll +++ b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll @@ -6,10 +6,12 @@ ; offsets of the phi cannot be determined, and hence the attirbutor assumes that ; hostcall is in use. +; CHECK-LABEL: amdhsa.kernels: ; CHECK: .value_kind: hidden_hostcall_buffer ; CHECK: .value_kind: hidden_multigrid_sync_arg +; CHECK-LABEL: .name: kernel_1 -define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { +define amdgpu_kernel void @kernel_1(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { entry: %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() br i1 %cond, label %old, label %new @@ -35,6 +37,32 @@ ret void } +; The call to intrinsic implicitarg_ptr is combined with an offset produced by +; select'ing between two constants, before it is eventually used in a GEP to +; form the address of a load. This test ensures that AAPointerInfo can look +; through the select to maintain a set of indices, so that it can precisely +; determine that hostcall and other expensive implicit args are not in use. + +; CHECK-NOT: hidden_hostcall_buffer +; CHECK-NOT: hidden_multigrid_sync_arg +; CHECK-LABEL: .name: kernel_2 + +define amdgpu_kernel void @kernel_2(i32 addrspace(1)* %a, i1 %cond) { +entry: + %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %tmp5 = select i1 %cond, i64 12, i64 18 + %tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %tmp5 + %tmp8 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)* + + ;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr + %tmp9 = load i16, i16 addrspace(4)* %tmp8, align 2 + + %idx.ext = sext i16 %tmp9 to i64 + %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext + %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -17,18 +17,12 @@ } define internal i8 @read_arg_index(i8* %p, i64 %index) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) -; TUNIT-LABEL: define {{[^@]+}}@read_arg_index -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 2 -; TUNIT-NEXT: ret i8 [[L]] -; ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@read_arg_index -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0]] { +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly align 16 dereferenceable(1024) [[P:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; CGSCC-NEXT: [[G:%.*]] = getelementptr inbounds i8, i8* [[P]], i64 2 +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[G]], align 1 ; CGSCC-NEXT: ret i8 [[L]] ; entry: @@ -40,7 +34,7 @@ define i8 @call_simplifiable_1() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1 -; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { +; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 @@ -53,7 +47,7 @@ ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4:[0-9]+]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -83,8 +77,8 @@ ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4:[0-9]+]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR5]] ; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] ; CGSCC-NEXT: ret i8 [[Z]] ; @@ -97,7 +91,7 @@ define i8 @call_simplifiable_2() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2 -; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 @@ -113,7 +107,7 @@ ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 ; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 ; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -126,32 +120,32 @@ ret i8 %r } -define i8 @call_not_simplifiable_1() { +define i8 @call_simplifiable_3() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1 -; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3 +; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 -; TUNIT-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] -; TUNIT-NEXT: ret i8 [[R]] +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: ret i8 2 ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_1 +; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_3 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 +; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2 +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 16 dereferenceable(1024) [[I0]]) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0, align 1 - %r = call i8 @read_arg_index(i8* %i0, i64 0) + %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 + %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i2, align 1 + %r = call i8 @read_arg_index(i8* %i0, i64 2) ret i8 %r } @@ -160,7 +154,7 @@ define internal i8 @read_arg_2(i8* %p) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@read_arg_2 -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] { +; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; TUNIT-NEXT: ret i8 [[L]] @@ -180,17 +174,17 @@ define internal i8 @sum_two_different_loads(i8* %p, i8* %q) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] { -; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]] -; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]] +; TUNIT-SAME: (i8* nocapture nofree nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree nonnull readonly dereferenceable(972) [[P]]) #[[ATTR3:[0-9]+]] +; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR3]] ; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] ; TUNIT-NEXT: ret i8 [[Z]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR4]] +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P]]) #[[ATTR5]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR5]] ; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] ; CGSCC-NEXT: ret i8 [[Z]] ; @@ -200,52 +194,104 @@ ret i8 %z } -define i8 @call_not_simplifiable_2() { +define i8 @call_partially_simplifiable_1() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2 -; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 +; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: store i8 3, i8* [[I1]], align 1 -; TUNIT-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]] +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: store i8 2, i8* [[I2]], align 2 +; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: store i8 3, i8* [[I3]], align 1 +; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4 +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]] ; TUNIT-NEXT: ret i8 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2 +; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]] +; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2 +; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; CGSCC-NEXT: store i8 3, i8* [[I3]], align 1 +; CGSCC-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4 +; CGSCC-NEXT: store i8 4, i8* [[I4]], align 4 +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0 - %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 - store i8 3, i8* %i1 - %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 - %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1) + %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i2 + %i3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 3, i8* %i3 + %i4 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 4 + ;;; This store is redundant, hence removed. + store i8 4, i8* %i4 + %r = call i8 @sum_two_different_loads(i8* %i2, i8* %i3) + ret i8 %r +} + +define i8 @call_partially_simplifiable_2(i1 %cond) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_2 +; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51 +; TUNIT-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52 +; TUNIT-NEXT: store i8 2, i8* [[I52]], align 4 +; TUNIT-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53 +; TUNIT-NEXT: store i8 3, i8* [[I53]], align 1 +; TUNIT-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54 +; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]] +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR3]] +; TUNIT-NEXT: ret i8 [[R]] +; +; CGSCC: Function Attrs: nofree nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_2 +; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51 +; CGSCC-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52 +; CGSCC-NEXT: store i8 2, i8* [[I52]], align 4 +; CGSCC-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53 +; CGSCC-NEXT: store i8 3, i8* [[I53]], align 1 +; CGSCC-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54 +; CGSCC-NEXT: store i8 4, i8* [[I54]], align 2 +; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR4]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i51 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 51 + %i52 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 52 + store i8 2, i8* %i52 + %i53 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 53 + store i8 3, i8* %i53 + %i54 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 54 + ;;; This store is redundant, hence removed. Not affected by the select. + store i8 4, i8* %i54 + %sel = select i1 %cond, i8* %i51, i8 *%i52 + %r = call i8 @sum_two_different_loads(i8* %sel, i8* %i53) ret i8 %r } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(argmem: read) } -; CGSCC: attributes #[[ATTR3]] = { willreturn } -; CGSCC: attributes #[[ATTR4]] = { willreturn memory(read) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR4]] = { willreturn } +; CGSCC: attributes #[[ATTR5]] = { willreturn memory(read) } ;. diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll @@ -0,0 +1,342 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +%struct.T = type { i32, [10 x [20 x i8]] } + +define i8 @select_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4 +; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 23, i8* %gep23, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + store i8 29, i8* %gep29, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + store i8 7, i8* %gep7, align 4 + + ;; This store is redundant, hence removed. + %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31 + store i8 42, i8* %gep31, align 4 + + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %i = load i8, i8* %gep.sel, align 4 + ret i8 %i +} + +define i8 @select_offsets_simplifiable_2(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_2 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4 +; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 20, i64 26 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 4 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 23, i8* %gep23, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + store i8 29, i8* %gep29, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + store i8 7, i8* %gep7, align 4 + + ;; This store is redundant, hence removed. + %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31 + store i8 42, i8* %gep31, align 4 + + ;; Adjust the offsets so that they match the stores after adding 3 + %sel0 = select i1 %cnd1, i64 20, i64 26 + %sel1 = select i1 %cnd2, i64 %sel0, i64 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + %i = load i8, i8* %gep.plus, align 4 + ret i8 %i +} + +define i8 @select_offsets_simplifiable_3(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_simplifiable_3 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUNDLE:%.*]] = alloca [[STRUCT_T:%.*]], align 64 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND1]], i64 1, i64 3 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CND2]], i64 5, i64 11 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 [[SEL1]], i64 [[SEL2]] +; CHECK-NEXT: ret i8 100 +; +entry: + %bundle = alloca %struct.T, align 64 + %gep.fixed = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 1, i64 1 + store i8 100, i8* %gep.fixed, align 4 + %sel1 = select i1 %cnd1, i64 1, i64 3 + %sel2 = select i1 %cnd2, i64 5, i64 11 + %gep.sel = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 %sel1, i64 %sel2 + store i8 42, i8* %gep.sel, align 4 + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 100, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 100, i8* %gep23, align 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %i = load i8, i8* %gep.sel, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_2(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_2 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32 +; CHECK-NEXT: store i8 100, i8* [[GEP32]], align 16 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32 + store i8 100, i8* %gep32, align 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + %i = load i8, i8* %gep.plus, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_3 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: store i8 100, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP29]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + store i8 100, i8* %gep.sel, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + %i = load i8, i8* %gep29, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_4 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: store i8 100, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP32]], align 16 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + store i8 100, i8* %gep.plus, align 4 + %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32 + %i = load i8, i8* %gep32, align 4 + ret i8 %i +} + +define i8 @select_offsets_not_simplifiable_5(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@select_offsets_not_simplifiable_5 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BUNDLE:%.*]] = alloca [[STRUCT_T:%.*]], align 64 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 3, i64 5 +; CHECK-NEXT: store i8 100, i8* [[GEP_FIXED]], align 4 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND1]], i64 1, i64 3 +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CND2]], i64 5, i64 11 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [[STRUCT_T]], %struct.T* [[BUNDLE]], i64 0, i32 1, i64 [[SEL1]], i64 [[SEL2]] +; CHECK-NEXT: store i8 42, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %bundle = alloca %struct.T, align 64 + %gep.fixed = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 3, i64 5 + store i8 100, i8* %gep.fixed, align 4 + %sel1 = select i1 %cnd1, i64 1, i64 3 + %sel2 = select i1 %cnd2, i64 5, i64 11 + %gep.sel = getelementptr inbounds %struct.T, %struct.T* %bundle, i64 0, i32 1, i64 %sel1, i64 %sel2 + + ;; This store prevents the constant 100 from being propagated to ret + store i8 42, i8* %gep.sel, align 4 + + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + +define i8 @select_gep_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) +; CHECK-LABEL: define {{[^@]+}}@select_gep_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] +; CHECK-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 +; CHECK-NEXT: ret i8 21 +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 21, i8* %gep3, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 + store i8 42, i8* %sel.ptr, align 4 + %i = load i8, i8* %gep3, align 4 + ret i8 %i +} + +define i8 @select_gep_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn +; CHECK-LABEL: define {{[^@]+}}@select_gep_not_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] +; CHECK-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 + store i8 42, i8* %sel.ptr, align 4 + %i = load i8, i8* %gep7, align 4 + ret i8 %i +} + +; FIXME: This should be simplifiable. See comment inside. + +define i8 @phi_offsets_fixme(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@phi_offsets_fixme +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 +; CHECK-NEXT: store i8 100, i8* [[GEP_FIXED]], align 16 +; CHECK-NEXT: br i1 [[CND1]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 3, [[THEN]] ], [ 11, [[ELSE]] ] +; CHECK-NEXT: [[GEP_PHI:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[PHI]] +; CHECK-NEXT: store i8 42, i8* [[GEP_PHI]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 16 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 + store i8 100, i8* %gep.fixed, align 4 + br i1 %cnd1, label %then, label %else + +then: + br label %join + +else: + br label %join + +join: + ; FIXME: AAPotentialConstantValues does not detect the constant values for the + ; PHI below. It needs to rely on AAPotentialValues. + %phi = phi i64 [ 3, %then ], [ 11, %else ] + %gep.phi = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %phi + store i8 42, i8* %gep.phi, align 4 + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} + +;. +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } +;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -3107,42 +3107,55 @@ ret void } -define i8 @multiple_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn -; TUNIT-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1 -; TUNIT-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 -; TUNIT-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 -; TUNIT-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] -; TUNIT-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 -; TUNIT-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4 -; TUNIT-NEXT: ret i8 [[I]] -; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn -; CGSCC-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1 -; CGSCC-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR5]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 -; CGSCC-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 -; CGSCC-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] -; CGSCC-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 -; CGSCC-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4 -; CGSCC-NEXT: ret i8 [[I]] +define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: ret i8 100 ; entry: %Bytes = alloca [1024 x i8], align 16 - %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 - %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 - ; %phi.ptr = phi i8* [ %gep7, %then ], [ %gep23, %else ] - %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 - store i8 42, i8* %sel.ptr, align 4 - %i = load i8, i8* %gep7, align 4 + %offset = add i64 5, 7 + %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 12 + %gep.sum = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %offset + store i8 100, i8* %gep.fixed, align 4 + %i = load i8, i8* %gep.sum, align 4 ret i8 %i } +; FIXME: This should be simplifiable. See comment inside. + +define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 12 +; CHECK-NEXT: store i8 100, i8* [[GEP_LOADED]], align 4 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_FIXED]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %addr = alloca i64, align 16 + %gep.addr = getelementptr inbounds i64, i64* %addr, i64 0 + store i64 12, i64* %gep.addr, align 8 + %offset = load i64, i64* %gep.addr, align 8 + %gep.fixed = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 12 + + ; FIXME: AAPotentialConstantValues does not detect the constant offset being + ; passed to this GEP. It needs to rely on AAPotentialValues. + %gep.loaded = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %offset + store i8 100, i8* %gep.loaded, align 4 + + %i = load i8, i8* %gep.fixed, align 4 + ret i8 %i +} !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2}