diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -265,6 +265,13 @@ return *this; } + /// Comparison for sorting ranges by offset. + /// + /// Returns true if the offset \p L is less than that of \p R. + inline static bool OffsetLessThan(const RangeTy &L, const RangeTy &R) { + return L.Offset < R.Offset; + } + /// Constants used to represent special offsets or sizes. /// - This assumes that Offset and Size are non-negative. /// - The constants should not clash with DenseMapInfo, such as EmptyKey @@ -273,6 +280,11 @@ static constexpr int64_t Unknown = -2; }; +inline raw_ostream &operator<<(raw_ostream &OS, const RangeTy &R) { + OS << "[" << R.Offset << ", " << R.Size << "]"; + return OS; +} + inline bool operator==(const RangeTy &A, const RangeTy &B) { return A.Offset == B.Offset && A.Size == B.Size; } @@ -5003,28 +5015,185 @@ AK_MUST_READ_WRITE = AK_MUST | AK_R | AK_W, }; + /// A container for a list of ranges. + struct RangeList { + // The set of ranges rarely contains more than one element, and is unlikely + // to contain more than say four elements. So we find the middle-ground with + // a sorted vector. This avoids hard-coding a rarely used number like "four" + // into every instance of a SmallSet. + using RangeTy = AA::RangeTy; + using VecTy = SmallVector; + using iterator = VecTy::iterator; + using const_iterator = VecTy::const_iterator; + VecTy Ranges; + + RangeList(const RangeTy &R) { Ranges.push_back(R); } + RangeList(ArrayRef Offsets, int64_t Size) { + Ranges.reserve(Offsets.size()); + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + assert(((i + 1 == e) || Offsets[i] < Offsets[i + 1]) && + "Expected strictly ascending offsets."); + Ranges.emplace_back(Offsets[i], Size); + } + } + RangeList() = default; + + iterator begin() { return Ranges.begin(); } + iterator end() { return Ranges.end(); } + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } + + // Helpers required for std::set_difference + using value_type = RangeTy; + void push_back(const RangeTy &R) { + assert((Ranges.empty() || RangeTy::OffsetLessThan(Ranges.back(), R)) && + "Ensure the last element is the greatest."); + Ranges.push_back(R); + } + + /// Copy ranges from \p L that are not in \p R, into \p D. + static void set_difference(const RangeList &L, const RangeList &R, + RangeList &D) { + std::set_difference(L.begin(), L.end(), R.begin(), R.end(), + std::back_inserter(D), RangeTy::OffsetLessThan); + } + + unsigned size() const { return Ranges.size(); } + + bool operator==(const RangeList &OI) const { return Ranges == OI.Ranges; } + + /// Merge the ranges in \p RHS into the current ranges. + /// - Merging a list of unknown ranges makes the current list unknown. + /// - Ranges with the same offset are merged according to RangeTy::operator& + /// \return true if the current RangeList changed. + bool merge(const RangeList &RHS) { + if (isUnknown()) + return false; + if (RHS.isUnknown()) { + setUnknown(); + return true; + } + + if (Ranges.empty()) { + Ranges = RHS.Ranges; + return true; + } + + bool Changed = false; + auto LPos = Ranges.begin(); + for (auto &R : RHS.Ranges) { + auto Result = insert(LPos, R); + if (isUnknown()) + return true; + LPos = Result.first; + Changed |= Result.second; + } + return Changed; + } + + /// Insert \p R at the given iterator \p Pos, and merge if necessary. + /// + /// This assumes that all ranges before \p Pos are OffsetLessThan \p R, and + /// then maintains the sorted order for the suffix list. + /// + /// \return The place of insertion and true iff anything changed. + std::pair insert(iterator Pos, const RangeTy &R) { + if (isUnknown()) + return std::make_pair(Ranges.begin(), false); + if (R.offsetOrSizeAreUnknown()) { + return std::make_pair(setUnknown(), true); + } + + // Maintain this as a sorted vector of unique entries. + auto LB = std::lower_bound(Pos, Ranges.end(), R, RangeTy::OffsetLessThan); + if (LB == Ranges.end() || LB->Offset != R.Offset) + return std::make_pair(Ranges.insert(LB, R), true); + bool Changed = *LB != R; + *LB &= R; + if (LB->offsetOrSizeAreUnknown()) + return std::make_pair(setUnknown(), true); + return std::make_pair(LB, Changed); + } + + /// Insert the given range \p R, maintaining sorted order. + /// + /// \return The place of insertion and true iff anything changed. + std::pair insert(const RangeTy &R) { + return insert(Ranges.begin(), R); + } + + /// Add the increment \p Inc to the offset of every range. + void addToAllOffsets(int64_t Inc) { + assert(!isUnassigned()); + if (isUnknown()) + return; + for (auto &R : Ranges) { + R.Offset += Inc; + } + } + + /// Return true iff there is exactly one range and it is known. + bool isUnique() const { + return Ranges.size() == 1 && !Ranges.front().offsetOrSizeAreUnknown(); + } + + /// Return the unique range, assuming it exists. + const RangeTy &getUnique() const { + assert(isUnique()); + return Ranges.front(); + } + + /// Return true iff the list contains an unknown range. + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Ranges.front().offsetOrSizeAreUnknown()) { + assert(Ranges.size() == 1); + return true; + } + return false; + } + + /// Discard all ranges and insert a single unknown range. + iterator setUnknown() { + Ranges.clear(); + Ranges.push_back(RangeTy::getUnknown()); + return Ranges.begin(); + } + + /// Return true if no ranges have been inserted. + bool isUnassigned() const { return Ranges.size() == 0; } + }; + /// An access description. struct Access { Access(Instruction *I, int64_t Offset, int64_t Size, Optional Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), Range(Offset, Size), + : LocalI(I), RemoteI(I), Content(Content), Ranges(Offset, Size), Kind(Kind), Ty(Ty) { verify(); } + Access(Instruction *LocalI, Instruction *RemoteI, const RangeList &Ranges, + Optional Content, AccessKind K, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Ranges(Ranges), + Kind(K), Ty(Ty) { + if (Ranges.size() > 1) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + verify(); + } Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset, int64_t Size, Optional Content, AccessKind Kind, Type *Ty) : LocalI(LocalI), RemoteI(RemoteI), Content(Content), - Range(Offset, Size), Kind(Kind), Ty(Ty) { + Ranges(Offset, Size), Kind(Kind), Ty(Ty) { verify(); } Access(const Access &Other) = default; - Access(const Access &&Other) - : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - Range(Other.Range), Kind(Other.Kind), Ty(Other.Ty) {} Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { - return LocalI == R.LocalI && RemoteI == R.RemoteI && Range == R.Range && + return LocalI == R.LocalI && RemoteI == R.RemoteI && Ranges == R.Ranges && Content == R.Content && Kind == R.Kind; } bool operator!=(const Access &R) const { return !(*this == R); } @@ -5033,9 +5202,8 @@ assert(RemoteI == R.RemoteI && "Expected same instruction!"); assert(LocalI == R.LocalI && "Expected same instruction!"); Kind = AccessKind(Kind | R.Kind); - auto Before = Range; - Range &= R.Range; - if (Before.isUnassigned() || Before == Range) { + bool Changed = Ranges.merge(R.Ranges); + if (!Changed) { Content = AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); } else { @@ -5052,6 +5220,8 @@ void verify() { assert(isMustAccess() + isMayAccess() == 1 && "Expect must or may access, not both."); + assert((isMayAccess() || Ranges.size() == 1) && + "Cannot be a must access if there are multiple ranges."); } /// Return the access kind. @@ -5063,8 +5233,19 @@ /// Return true if this is a write access. bool isWrite() const { return Kind & AK_W; } - bool isMustAccess() const { return Kind & AK_MUST; } - bool isMayAccess() const { return Kind & AK_MAY; } + bool isMustAccess() const { + bool MustAccess = Kind & AK_MUST; + assert((!MustAccess || Ranges.size() == 1) && + "Cannot be a must access if there are multiple ranges."); + return MustAccess; + } + + bool isMayAccess() const { + bool MayAccess = Kind & AK_MAY; + assert((MayAccess || Ranges.size() == 1) && + "Cannot be a must access if there are multiple ranges."); + return MayAccess; + } /// Return the instruction that causes the access with respect to the local /// scope of the associated attribute. @@ -5098,11 +5279,25 @@ /// determined. Optional getContent() const { return Content; } - /// Return the offset for this access. - int64_t getOffset() const { return Range.Offset; } + bool hasUniqueRange() const { return Ranges.isUnique(); } + const AA::RangeTy &getUniqueRange() const { return Ranges.getUnique(); } + + /// Add a range accessed by this Access. + /// + /// If there are multiple ranges, then this is a "may access". + void addRange(int64_t Offset, int64_t Size) { + Ranges.insert({Offset, Size}); + if (!hasUniqueRange()) { + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + } + + const RangeList &getRanges() const { return Ranges; } - /// Return the size for this access. - int64_t getSize() const { return Range.Size; } + using const_iterator = RangeList::const_iterator; + const_iterator begin() const { return Ranges.begin(); } + const_iterator end() const { return Ranges.end(); } private: /// The instruction responsible for the access with respect to the local @@ -5116,8 +5311,8 @@ /// cannot be determined. Optional Content; - /// The object accessed, in terms of an offset and size in bytes. - AA::RangeTy Range; + /// Set of potential ranges accessed from the base pointer. + RangeList Ranges; /// The access kind, e.g., READ, as bitset (could be more than one). AccessKind Kind; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -818,7 +818,7 @@ /// OffsetBin to the bin for its new offset. /// /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. - ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, + ChangeStatus addAccess(Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I, Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, Instruction *RemoteI = nullptr); @@ -864,7 +864,7 @@ bool IsExact = Range == ItRange && !Range.offsetOrSizeAreUnknown(); for (auto Index : It.getSecond()) { auto &Access = AccessList[Index]; - if (!CB(Access, IsExact)) + if (!CB(Access, IsExact && Access.hasUniqueRange())) return false; } } @@ -884,9 +884,12 @@ return true; } - for (auto LI : LocalList->getSecond()) { - auto &Access = AccessList[LI]; - Range &= {Access.getOffset(), Access.getSize()}; + for (unsigned Index : LocalList->getSecond()) { + for (auto &R : AccessList[Index]) { + Range &= R; + if (Range.offsetOrSizeAreUnknown()) + break; + } } return forallInterferingAccesses(Range, CB); } @@ -896,13 +899,11 @@ BooleanState BS; }; -ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset, - int64_t Size, Instruction &I, - Optional Content, - AAPointerInfo::AccessKind Kind, - Type *Ty, Instruction *RemoteI) { +ChangeStatus AA::PointerInfo::State::addAccess( + Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I, + Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, + Instruction *RemoteI) { RemoteI = RemoteI ? RemoteI : &I; - AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty); // Check if we have an access for this instruction, if not, simply add it. auto &LocalList = RemoteIMap[RemoteI]; @@ -916,48 +917,122 @@ break; } } + + auto AddToBins = [&](const AAPointerInfo::RangeList &ToAdd) { + LLVM_DEBUG( + if (ToAdd.size()) + dbgs() << "[AAPointerInfo] Inserting access in new offset bins\n"; + ); + + for (auto Key : ToAdd) { + LLVM_DEBUG(dbgs() << " key " << Key << "\n"); + OffsetBins[Key].insert(AccIndex); + } + }; + if (!AccExists) { - AccessList.push_back(Acc); + AccessList.emplace_back(&I, RemoteI, Ranges, Content, Kind, Ty); + assert((AccessList.size() == AccIndex + 1) && + "New Access should have been at AccIndex"); LocalList.push_back(AccIndex); - } else { - // The new one will be combined with the existing one. - auto &Current = AccessList[AccIndex]; - auto Before = Current; - Current &= Acc; - if (Current == Before) - return ChangeStatus::UNCHANGED; + AddToBins(AccessList[AccIndex].getRanges()); + return ChangeStatus::CHANGED; + } + + // Combine the new Access with the existing Access, and then update the + // mapping in the offset bins. + AAPointerInfo::Access Acc(&I, RemoteI, Ranges, Content, Kind, Ty); + auto &Current = AccessList[AccIndex]; + auto Before = Current; + Current &= Acc; + if (Current == Before) + return ChangeStatus::UNCHANGED; + + auto &ExistingRanges = Before.getRanges(); + auto &NewRanges = Current.getRanges(); + + // Ranges that are in the old access but not the new access need to be removed + // from the offset bins. + AAPointerInfo::RangeList ToRemove; + AAPointerInfo::RangeList::set_difference(ExistingRanges, NewRanges, ToRemove); + LLVM_DEBUG( + if (ToRemove.size()) + dbgs() << "[AAPointerInfo] Removing access from old offset bins\n"; + ); - Acc = Current; - AA::RangeTy Key{Before.getOffset(), Before.getSize()}; + for (auto Key : ToRemove) { + LLVM_DEBUG(dbgs() << " key " << Key << "\n"); assert(OffsetBins.count(Key) && "Existing Access must be in some bin."); auto &Bin = OffsetBins[Key]; assert(Bin.count(AccIndex) && "Expected bin to actually contain the Access."); - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access " - << AccessList[AccIndex] << " with key {" << Key.Offset - << ',' << Key.Size << "}\n"); Bin.erase(AccIndex); } - AA::RangeTy Key{Acc.getOffset(), Acc.getSize()}; - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc - << " with key {" << Key.Offset << ',' << Key.Size << "}\n"); - OffsetBins[Key].insert(AccIndex); + // Ranges that are in the new access but not the old access need to be added + // to the offset bins. + AAPointerInfo::RangeList ToAdd; + AAPointerInfo::RangeList::set_difference(NewRanges, ExistingRanges, ToAdd); + AddToBins(ToAdd); return ChangeStatus::CHANGED; } namespace { -/// Helper struct, will support ranges eventually. -/// -/// FIXME: Tracks a single Offset until we have proper support for a list of -/// RangeTy objects. +/// A helper containing a list of offsets computed for a Use. Ideally this +/// list should be strictly ascending, but we ensure that only when we +/// actually translate the list of offsets to a RangeList. struct OffsetInfo { - int64_t Offset = AA::RangeTy::Unassigned; + using VecTy = SmallVector; + using const_iterator = VecTy::const_iterator; + VecTy Offsets; + + const_iterator begin() const { return Offsets.begin(); } + const_iterator end() const { return Offsets.end(); } + + bool operator==(const OffsetInfo &RHS) const { + return Offsets == RHS.Offsets; + } + + void insert(int64_t Offset) { Offsets.push_back(Offset); } + bool isUnassigned() const { return Offsets.size() == 0; } - bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Offsets.size() == 1) + return Offsets.front() == AA::RangeTy::Unknown; + return false; + } + + void setUnknown() { + Offsets.clear(); + Offsets.push_back(AA::RangeTy::Unknown); + } + + void addToAll(int64_t Inc) { + for (auto &Offset : Offsets) { + Offset += Inc; + } + } + + /// Copy offsets from \p R into the current list. + /// + /// Ideally all lists should be strictly ascending, but we defer that to the + /// actual use of the list. So we just blindly append here. + void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); } }; +static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) { + ListSeparator LS; + OS << "["; + for (auto Offset : OI) { + OS << LS << Offset; + } + OS << "]"; + return OS; +} + struct AAPointerInfoImpl : public StateWrapper { using BaseTy = StateWrapper; @@ -1175,21 +1250,20 @@ continue; bool UsedAssumedInformation = false; AccessKind AK = RAcc.getKind(); - Optional Content = RAcc.getContent(); - Content = A.translateArgumentToCallSiteContent( + Optional Content = A.translateArgumentToCallSiteContent( RAcc.getContent(), CB, *this, UsedAssumedInformation); AK = AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); - Changed = - Changed | addAccess(A, It.first.Offset, It.first.Size, CB, Content, - AK, RAcc.getType(), RAcc.getRemoteInst()); + + Changed |= addAccess(A, RAcc.getRanges(), CB, Content, AK, + RAcc.getType(), RAcc.getRemoteInst()); } } return Changed; } ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA, - int64_t Offset, CallBase &CB) { + const OffsetInfo &Offsets, CallBase &CB) { using namespace AA::PointerInfo; if (!OtherAA.getState().isValidState() || !isValidState()) return indicatePessimisticFixpoint(); @@ -1200,17 +1274,19 @@ ChangeStatus Changed = ChangeStatus::UNCHANGED; const auto &State = OtherAAImpl.getState(); for (const auto &It : State) { - AA::RangeTy Range = AA::RangeTy::getUnknown(); - if (Offset != AA::RangeTy::Unknown && - !It.first.offsetOrSizeAreUnknown()) { - Range = AA::RangeTy(It.first.Offset + Offset, It.first.Size); - } for (auto Index : It.getSecond()) { const auto &RAcc = State.getAccess(Index); - AccessKind AK = RAcc.getKind(); - Optional Content = RAcc.getContent(); - Changed = Changed | addAccess(A, Range.Offset, Range.Size, CB, Content, - AK, RAcc.getType(), RAcc.getRemoteInst()); + for (auto Offset : Offsets) { + auto NewRanges = Offset == AA::RangeTy::Unknown + ? AA::RangeTy::getUnknown() + : RAcc.getRanges(); + if (!NewRanges.isUnknown()) { + NewRanges.addToAllOffsets(Offset); + } + Changed |= + addAccess(A, NewRanges, CB, RAcc.getContent(), RAcc.getKind(), + RAcc.getType(), RAcc.getRemoteInst()); + } } } return Changed; @@ -1249,15 +1325,21 @@ /// Deal with an access and signal if it was handled successfully. bool handleAccess(Attributor &A, Instruction &I, Optional Content, - AccessKind Kind, int64_t Offset, ChangeStatus &Changed, - Type *Ty) { + AccessKind Kind, SmallVectorImpl &Offsets, + ChangeStatus &Changed, Type *Ty) { using namespace AA::PointerInfo; auto Size = AA::RangeTy::Unknown; const DataLayout &DL = A.getDataLayout(); TypeSize AccessSize = DL.getTypeStoreSize(Ty); if (!AccessSize.isScalable()) Size = AccessSize.getFixedSize(); - Changed = Changed | addAccess(A, Offset, Size, I, Content, Kind, Ty); + + // Make a strictly ascending list of offsets as required by addAccess() + llvm::sort(Offsets); + auto Last = std::unique(Offsets.begin(), Offsets.end()); + Offsets.erase(Last, Offsets.end()); + + Changed = Changed | addAccess(A, {Offsets, Size}, I, Content, Kind, Ty); return true; }; @@ -1270,18 +1352,87 @@ } }; +/// If \p Select represents a tree of SelectInst with Constant leaves, insert +/// all those constants into \p Offsets. +/// +/// \return true iff \p Select really is such a tree. +static bool collectConstantOffsets(SmallVectorImpl &Offsets, + SelectInst *Select) { + SmallVector Stack; + SmallSet Visited; + Stack.push_back(Select); + Visited.insert(Select); + + while (!Stack.empty()) { + auto *Select = Stack.pop_back_val(); + Visited.insert(Select); + + // Operands [1] and [2] are the True and False inputs. + for (int i = 1; i != 3; ++i) { + auto *Opnd = Select->getOperand(i); + if (auto *Sel = dyn_cast(Opnd)) { + if (Visited.insert(Sel).second) + Stack.push_back(Sel); + continue; + } + if (auto *ConstantOp = dyn_cast(Opnd)) { + Offsets.push_back(ConstantOp->getZExtValue()); + continue; + } + return false; + } + } + + return true; +} + +/// If \p VariableOffsets is a forest of SelectInst with Constant arguments, +/// incorporate all of these into \p UsrOI, adding \p ConstantOffset to each. +/// +/// Each Constant leaf in the forest produces an incremented copy of the +/// original \p UsrOI, and the final state of \p UsrOI is a union of all these +/// copies. +/// +/// \return true iff \p VariableOffsets really is such a forest. +static bool +handleConstantSelect(OffsetInfo &UsrOI, unsigned ConstantOffset, + const MapVector &VariableOffsets) { + auto NewOI = UsrOI; + NewOI.addToAll(ConstantOffset); + + for (const auto &VI : VariableOffsets) { + auto *Select = dyn_cast(VI.first); + if (!Select) + return false; + SmallVector Offsets; + if (!collectConstantOffsets(Offsets, Select)) + return false; + + OffsetInfo UnionOfAllCopies; + for (auto ConstOffset : Offsets) { + auto CopyPerOffset = NewOI; + CopyPerOffset.addToAll(ConstOffset); + UnionOfAllCopies.merge(CopyPerOffset); + } + NewOI = UnionOfAllCopies; + } + + UsrOI = std::move(NewOI); + return true; +} + ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { using namespace AA::PointerInfo; ChangeStatus Changed = ChangeStatus::UNCHANGED; + const DataLayout &DL = A.getDataLayout(); Value &AssociatedValue = getAssociatedValue(); - const DataLayout &DL = A.getDataLayout(); DenseMap OffsetInfoMap; - OffsetInfoMap[&AssociatedValue] = OffsetInfo{0}; + OffsetInfoMap[&AssociatedValue].insert(0); auto HandlePassthroughUser = [&](Value *Usr, const OffsetInfo &PtrOI, bool &Follow) { - assert(PtrOI.Offset != AA::RangeTy::Unassigned && + assert(!PtrOI.isUnassigned() && "Cannot pass through if the input Ptr was not visited!"); OffsetInfoMap[Usr] = PtrOI; Follow = true; @@ -1316,23 +1467,35 @@ // already in it though. auto &UsrOI = OffsetInfoMap[Usr]; auto &PtrOI = OffsetInfoMap[CurPtr]; - UsrOI = PtrOI; + Follow = true; - // TODO: Use range information. - APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); - if (PtrOI.Offset == AA::RangeTy::Unknown || - !GEP->accumulateConstantOffset(DL, GEPOffset)) { - LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant " << *GEP - << "\n"); - UsrOI.Offset = AA::RangeTy::Unknown; - Follow = true; + if (PtrOI.isUnknown()) { + UsrOI.setUnknown(); return true; } - LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP - << "\n"); - UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue(); - Follow = true; + // If the indices to the GEP can be traced to constants, incorporate all + // of these into UsrOI. + unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); + MapVector VariableOffsets; + APInt ConstantOffset(BitWidth, 0); + if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { + UsrOI.setUnknown(); + return true; + } + + UsrOI = PtrOI; + if (VariableOffsets.empty()) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP + << "\n"); + UsrOI.addToAll(ConstantOffset.getZExtValue()); + } else if (!handleConstantSelect(UsrOI, ConstantOffset.getZExtValue(), + VariableOffsets)) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is not constant " + << *GEP << "\n"); + UsrOI.setUnknown(); + } + return true; } if (isa(Usr)) @@ -1352,17 +1515,17 @@ // Check if the PHI operand has already an unknown offset as we can't // improve on that anymore. - if (PtrOI.Offset == AA::RangeTy::Unknown) { + if (PtrOI.isUnknown()) { LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown " << *CurPtr << " in " << *Usr << "\n"); - Follow = UsrOI.Offset != AA::RangeTy::Unknown; - UsrOI = PtrOI; + Follow = !UsrOI.isUnknown(); + UsrOI.setUnknown(); return true; } // Check if the PHI is invariant (so far). if (UsrOI == PtrOI) { - assert(PtrOI.Offset != AA::RangeTy::Unassigned && + assert(!PtrOI.isUnassigned() && "Cannot assign if the current Ptr was not visited!"); LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)"); return true; @@ -1376,9 +1539,13 @@ DL, Offset, /* AllowNonInbounds */ true); auto It = OffsetInfoMap.find(CurPtrBase); if (It != OffsetInfoMap.end()) { - Offset += It->getSecond().Offset; - if (IsFirstPHIUser || Offset == UsrOI.Offset) + auto BaseOI = It->getSecond(); + BaseOI.addToAll(Offset.getZExtValue()); + if (IsFirstPHIUser || BaseOI == UsrOI) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant " << *CurPtr + << " in " << *Usr << "\n"); return HandlePassthroughUser(Usr, PtrOI, Follow); + } LLVM_DEBUG( dbgs() << "[AAPointerInfo] PHI operand pointer offset mismatch " << *CurPtr << " in " << *Usr << "\n"); @@ -1388,8 +1555,7 @@ } // TODO: Approximate in case we know the direction of the recurrence. - UsrOI = PtrOI; - UsrOI.Offset = AA::RangeTy::Unknown; + UsrOI.setUnknown(); Follow = true; return true; } @@ -1402,9 +1568,11 @@ AK = AccessKind(AK | AccessKind::AK_MUST); else AK = AccessKind(AK | AccessKind::AK_MAY); - return handleAccess(A, *LoadI, /* Content */ nullptr, AK, - OffsetInfoMap[CurPtr].Offset, Changed, - LoadI->getType()); + if (!handleAccess(A, *LoadI, /* Content */ nullptr, AK, + OffsetInfoMap[CurPtr].Offsets, Changed, + LoadI->getType())) + return false; + return true; } auto HandleStoreLike = [&](Instruction &I, Value *ValueOp, Type &ValueTy, @@ -1430,8 +1598,10 @@ if (ValueOp) Content = A.getAssumedSimplified( *ValueOp, *this, UsedAssumedInformation, AA::Interprocedural); - return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offset, - Changed, &ValueTy); + if (!handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offsets, + Changed, &ValueTy)) + return false; + return true; }; if (auto *StoreI = dyn_cast(Usr)) @@ -1457,8 +1627,7 @@ const auto &CSArgPI = A.getAAFor( *this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::REQUIRED); - Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr].Offset, - *CB) | + Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr], *CB) | Changed; return isValidState(); } @@ -1476,8 +1645,8 @@ LLVM_DEBUG({ if (!(OffsetInfoMap[NewU] == OffsetInfoMap[OldU])) { dbgs() << "[AAPointerInfo] Equivalent use callback failed: " - << OffsetInfoMap[NewU].Offset << " vs " - << OffsetInfoMap[OldU].Offset << "\n"; + << OffsetInfoMap[NewU] << " vs " << OffsetInfoMap[OldU] + << "\n"; } }); return OffsetInfoMap[NewU] == OffsetInfoMap[OldU]; @@ -1557,7 +1726,7 @@ auto Kind = ArgNo == 0 ? AccessKind::AK_MUST_WRITE : AccessKind::AK_MUST_READ; Changed = - Changed | addAccess(A, 0, LengthVal, *MI, nullptr, Kind, nullptr); + Changed | addAccess(A, {0, LengthVal}, *MI, nullptr, Kind, nullptr); } LLVM_DEBUG({ dbgs() << "Accesses by bin after update:\n"; @@ -1595,8 +1764,8 @@ bool ReadOnly = AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown); auto Kind = ReadOnly ? AccessKind::AK_MAY_READ : AccessKind::AK_MAY_READ_WRITE; - return addAccess(A, AA::RangeTy::Unknown, AA::RangeTy::Unknown, *getCtxI(), - nullptr, Kind, nullptr); + return addAccess(A, AA::RangeTy::getUnknown(), *getCtxI(), nullptr, Kind, + nullptr); } /// See AbstractAttribute::trackStatistics() diff --git a/llvm/test/CodeGen/AMDGPU/attributor.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll rename from llvm/test/CodeGen/AMDGPU/attributor.ll rename to llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll --- a/llvm/test/CodeGen/AMDGPU/attributor.ll +++ b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll @@ -6,10 +6,12 @@ ; offsets of the phi cannot be determined, and hence the attirbutor assumes that ; hostcall is in use. +; CHECK-LABEL: amdhsa.kernels: ; CHECK: .value_kind: hidden_hostcall_buffer ; CHECK: .value_kind: hidden_multigrid_sync_arg +; CHECK-LABEL: .name: kernel_1 -define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { +define amdgpu_kernel void @kernel_1(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { entry: %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() br i1 %cond, label %old, label %new @@ -35,6 +37,32 @@ ret void } +; The call to intrinsic implicitarg_ptr is combined with an offset produced by +; select'ing between two constants, before it is eventually used in a GEP to +; form the address of a load. This test ensures that AAPointerInfo can look +; through the select to maintain a set of indices, so that it can precisely +; determine that hostcall and other expensive implicit args are not in use. + +; CHECK-NOT: .value_kind: hidden_hostcall_buffer +; CHECK-NOT: .value_kind: hidden_multigrid_sync_arg +; CHECK-LABEL: .name: kernel_2 + +define amdgpu_kernel void @kernel_2(i32 addrspace(1)* %a, i1 %cond) { +entry: + %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %tmp5 = select i1 %cond, i64 12, i64 18 + %tmp6 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %tmp5 + %tmp8 = bitcast i8 addrspace(4)* %tmp6 to i16 addrspace(4)* + + ;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr + %tmp9 = load i16, i16 addrspace(4)* %tmp8, align 2 + + %idx.ext = sext i16 %tmp9 to i64 + %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext + %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -53,7 +53,7 @@ ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4:[0-9]+]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -83,8 +83,8 @@ ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4:[0-9]+]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR5]] ; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] ; CGSCC-NEXT: ret i8 [[Z]] ; @@ -113,7 +113,7 @@ ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 ; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 ; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -134,7 +134,7 @@ ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 -; TUNIT-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] +; TUNIT-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]] ; TUNIT-NEXT: ret i8 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) @@ -144,7 +144,7 @@ ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -160,7 +160,7 @@ define internal i8 @read_arg_2(i8* %p) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@read_arg_2 -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] { +; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; TUNIT-NEXT: ret i8 [[L]] @@ -180,17 +180,17 @@ define internal i8 @sum_two_different_loads(i8* %p, i8* %q) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] { -; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]] -; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]] +; TUNIT-SAME: (i8* nocapture nofree nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree nonnull readonly dereferenceable(972) [[P]]) #[[ATTR3]] +; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR3]] ; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] ; TUNIT-NEXT: ret i8 [[Z]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR4]] +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[P]]) #[[ATTR5]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[Q]]) #[[ATTR5]] ; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] ; CGSCC-NEXT: ret i8 [[Z]] ; @@ -200,52 +200,104 @@ ret i8 %z } -define i8 @call_not_simplifiable_2() { +define i8 @call_partially_simplifiable_1() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2 +; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: store i8 3, i8* [[I1]], align 1 -; TUNIT-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]] +; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: store i8 2, i8* [[I2]], align 2 +; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: store i8 3, i8* [[I3]], align 1 +; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4 +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]] ; TUNIT-NEXT: ret i8 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2 +; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_1 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]] +; CGSCC-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I2]], align 2 +; CGSCC-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; CGSCC-NEXT: store i8 3, i8* [[I3]], align 1 +; CGSCC-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 4 +; CGSCC-NEXT: store i8 4, i8* [[I4]], align 4 +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0 - %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 - store i8 3, i8* %i1 - %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 - %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1) + %i2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i2 + %i3 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 3, i8* %i3 + %i4 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 4 + ;;; This store is redundant, hence removed. + store i8 4, i8* %i4 + %r = call i8 @sum_two_different_loads(i8* %i2, i8* %i3) + ret i8 %r +} + +define i8 @call_partially_simplifiable_2(i1 %cond) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_2 +; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51 +; TUNIT-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52 +; TUNIT-NEXT: store i8 2, i8* [[I52]], align 4 +; TUNIT-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53 +; TUNIT-NEXT: store i8 3, i8* [[I53]], align 1 +; TUNIT-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54 +; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]] +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR3]] +; TUNIT-NEXT: ret i8 [[R]] +; +; CGSCC: Function Attrs: nofree nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@call_partially_simplifiable_2 +; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I51:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 51 +; CGSCC-NEXT: [[I52:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 52 +; CGSCC-NEXT: store i8 2, i8* [[I52]], align 4 +; CGSCC-NEXT: [[I53:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 53 +; CGSCC-NEXT: store i8 3, i8* [[I53]], align 1 +; CGSCC-NEXT: [[I54:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 54 +; CGSCC-NEXT: store i8 4, i8* [[I54]], align 2 +; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[COND]], i8* [[I51]], i8* [[I52]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly dereferenceable(972) [[SEL]], i8* nocapture nofree noundef nonnull readonly dereferenceable(971) [[I53]]) #[[ATTR4]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i51 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 51 + %i52 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 52 + store i8 2, i8* %i52 + %i53 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 53 + store i8 3, i8* %i53 + %i54 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 54 + ;;; This store is redundant, hence removed. Not affected by the select. + store i8 4, i8* %i54 + %sel = select i1 %cond, i8* %i51, i8 *%i52 + %r = call i8 @sum_two_different_loads(i8* %sel, i8* %i53) ret i8 %r } ;. ; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(argmem: read) } -; CGSCC: attributes #[[ATTR3]] = { willreturn } -; CGSCC: attributes #[[ATTR4]] = { willreturn memory(read) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR4]] = { willreturn } +; CGSCC: attributes #[[ATTR5]] = { willreturn memory(read) } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -3107,6 +3107,89 @@ ret void } +define i8 @multiple_offsets_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@multiple_offsets_simplifiable_1 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4 +; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 23, i8* %gep23, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + store i8 29, i8* %gep29, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + store i8 7, i8* %gep7, align 4 + + ;; This store is redundant, hence removed. + %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31 + store i8 42, i8* %gep31, align 4 + + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %i = load i8, i8* %gep.sel, align 4 + ret i8 %i +} + +define i8 @multiple_offsets_simplifiable_2(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@multiple_offsets_simplifiable_2 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 23, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: store i8 29, i8* [[GEP29]], align 4 +; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CHECK-NEXT: store i8 7, i8* [[GEP7]], align 4 +; CHECK-NEXT: [[GEP31:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 31 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 20, i64 26 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 4 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 23, i8* %gep23, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + store i8 29, i8* %gep29, align 4 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + store i8 7, i8* %gep7, align 4 + + ;; This store is redundant, hence removed. + %gep31 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 31 + store i8 42, i8* %gep31, align 4 + + ;; Adjust the offsets so that they match the stores after adding 3 + %sel0 = select i1 %cnd1, i64 20, i64 26 + %sel1 = select i1 %cnd2, i64 %sel0, i64 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + %i = load i8, i8* %gep.plus, align 4 + ret i8 %i +} + define i8 @multiple_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1 @@ -3136,13 +3219,115 @@ %Bytes = alloca [1024 x i8], align 16 %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 - ; %phi.ptr = phi i8* [ %gep7, %then ], [ %gep23, %else ] %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 store i8 42, i8* %sel.ptr, align 4 %i = load i8, i8* %gep7, align 4 ret i8 %i } +define i8 @multiple_offsets_not_simplifiable_2(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_2 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CHECK-NEXT: store i8 100, i8* [[GEP23]], align 4 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + store i8 100, i8* %gep23, align 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %i = load i8, i8* %gep.sel, align 4 + ret i8 %i +} + +define i8 @multiple_offsets_not_simplifiable_3(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_3 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32 +; CHECK-NEXT: store i8 100, i8* [[GEP32]], align 16 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32 + store i8 100, i8* %gep32, align 4 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + %i = load i8, i8* %gep.plus, align 4 + ret i8 %i +} + +define i8 @multiple_offsets_not_simplifiable_4(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_4 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: store i8 100, i8* [[GEP_SEL]], align 4 +; CHECK-NEXT: [[GEP29:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 29 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP29]], align 4 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + store i8 100, i8* %gep.sel, align 4 + %gep29 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 29 + %i = load i8, i8* %gep29, align 4 + ret i8 %i +} + +define i8 @multiple_offsets_not_simplifiable_5(i1 %cnd1, i1 %cnd2) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_5 +; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CND1]], i64 23, i64 29 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CND2]], i64 [[SEL0]], i64 7 +; CHECK-NEXT: [[GEP_SEL:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[SEL1]] +; CHECK-NEXT: [[GEP_PLUS:%.*]] = getelementptr inbounds i8, i8* [[GEP_SEL]], i64 3 +; CHECK-NEXT: store i8 100, i8* [[GEP_PLUS]], align 4 +; CHECK-NEXT: [[GEP32:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 32 +; CHECK-NEXT: [[I:%.*]] = load i8, i8* [[GEP32]], align 16 +; CHECK-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %sel0 = select i1 %cnd1, i64 23, i64 29 + %sel1 = select i1 %cnd2, i64 %sel0, i64 7 + %gep.sel = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %sel1 + %gep.plus = getelementptr inbounds i8, i8* %gep.sel, i64 3 + store i8 100, i8* %gep.plus, align 4 + %gep32 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 32 + %i = load i8, i8* %gep32, align 4 + ret i8 %i +} !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2}