diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -250,6 +250,22 @@ return OAS.Offset + OAS.Size > Offset && OAS.Offset < Offset + Size; } + OffsetAndSize &operator&=(const OffsetAndSize &R) { + if (Offset == Unassigned) + Offset = R.Offset; + else if (R.Offset != Unassigned && R.Offset != Offset) + Offset = Unknown; + + if (Size == Unassigned) + Size = R.Size; + else if (Size == Unknown || R.Size == Unknown) + Size = Unknown; + else if (R.Size != Unassigned) + Size = std::max(Size, R.Size); + + return *this; + } + /// Constants used to represent special offsets or sizes. /// - This assumes that Offset and Size are non-negative. /// - The constants should not clash with DenseMapInfo, such as EmptyKey @@ -4992,33 +5008,47 @@ /// An access description. struct Access { - Access(Instruction *I, Optional Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) { + Access(Instruction *I, int64_t Offset, int64_t Size, + Optional Content, AccessKind Kind, Type *Ty) + : LocalI(I), RemoteI(I), Content(Content), OAS(Offset, Size), + Kind(Kind), Ty(Ty) { verify(); } - Access(Instruction *LocalI, Instruction *RemoteI, Optional Content, - AccessKind Kind, Type *Ty) - : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), - Ty(Ty) { + Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset, + int64_t Size, Optional Content, AccessKind Kind, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), OAS(Offset, Size), + Kind(Kind), Ty(Ty) { verify(); } Access(const Access &Other) = default; Access(const Access &&Other) : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - Kind(Other.Kind), Ty(Other.Ty) {} + OAS(Other.OAS), Kind(Other.Kind), Ty(Other.Ty) {} Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { - return LocalI == R.LocalI && RemoteI == R.RemoteI && + return LocalI == R.LocalI && RemoteI == R.RemoteI && OAS == R.OAS && Content == R.Content && Kind == R.Kind; } bool operator!=(const Access &R) const { return !(*this == R); } Access &operator&=(const Access &R) { assert(RemoteI == R.RemoteI && "Expected same instruction!"); - Content = - AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); + assert(LocalI == R.LocalI && "Expected same instruction!"); Kind = AccessKind(Kind | R.Kind); + auto Before = OAS; + OAS &= R.OAS; + if (Before.isUnassigned() || Before == OAS) { + Content = + AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); + } else { + // Since the OAS information changed, set a conservative state -- drop + // the contents, and assume MayAccess rather than MustAccess. + setWrittenValueUnknown(); + Kind = AccessKind(Kind | AK_MAY); + Kind = AccessKind(Kind & ~AK_MUST); + } + verify(); return *this; } @@ -5054,18 +5084,29 @@ return Content.has_value() && !*Content; } + /// Set the value written to nullptr, i.e., unknown. + void setWrittenValueUnknown() { Content = nullptr; } + /// Return the type associated with the access, if known. Type *getType() const { return Ty; } - /// Return the value writen, if any. As long as - /// isWrittenValueYetUndetermined return true this function shall not be - /// called. - Value *getWrittenValue() const { return *Content; } + /// Return the value writen, if any. + Value *getWrittenValue() const { + assert(!isWrittenValueYetUndetermined() && + "Value needs to be determined before accessing it."); + return *Content; + } /// Return the written value which can be `llvm::null` if it is not yet /// determined. Optional getContent() const { return Content; } + /// Return the offset for this access. + int64_t getOffset() const { return OAS.Offset; } + + /// Return the size for this access. + int64_t getSize() const { return OAS.Size; } + private: /// The instruction responsible for the access with respect to the local /// scope of the associated attribute. @@ -5078,6 +5119,9 @@ /// cannot be determined. Optional Content; + /// The object accessed, in terms of an offset and size in bytes. + AA::OffsetAndSize OAS; + /// The access kind, e.g., READ, as bitset (could be more than one). AccessKind Kind; @@ -5113,7 +5157,7 @@ virtual bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref CB, bool &HasBeenWrittenTo, - AA::OffsetAndSize *OASPtr = nullptr) const = 0; + AA::OffsetAndSize &OAS) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -457,7 +457,7 @@ auto &PI = A.getAAFor(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, - HasBeenWrittenTo, &OAS)) { + HasBeenWrittenTo, OAS)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -757,13 +757,6 @@ /// A type to track pointer/struct usage and accesses for AAPointerInfo. struct AA::PointerInfo::State : public AbstractState { - - ~State() { - // We do not delete the Accesses objects but need to destroy them still. - for (auto &It : AccessBins) - It.second->~Accesses(); - } - /// Return the best possible representable state. static State getBestState(const State &SIS) { return State(); } @@ -775,9 +768,7 @@ } State() = default; - State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) { - SIS.AccessBins.clear(); - } + State(State &&SIS) = default; const State &getAssumed() const { return *this; } @@ -803,7 +794,9 @@ if (this == &R) return *this; BS = R.BS; - AccessBins = R.AccessBins; + AccessList = R.AccessList; + OffsetBins = R.OffsetBins; + RemoteIMap = R.RemoteIMap; return *this; } @@ -811,99 +804,52 @@ if (this == &R) return *this; std::swap(BS, R.BS); - std::swap(AccessBins, R.AccessBins); + std::swap(AccessList, R.AccessList); + std::swap(OffsetBins, R.OffsetBins); + std::swap(RemoteIMap, R.RemoteIMap); return *this; } - bool operator==(const State &R) const { - if (BS != R.BS) - return false; - if (AccessBins.size() != R.AccessBins.size()) - return false; - auto It = begin(), RIt = R.begin(), E = end(); - while (It != E) { - if (It->getFirst() != RIt->getFirst()) - return false; - auto &Accs = It->getSecond(); - auto &RAccs = RIt->getSecond(); - if (Accs->size() != RAccs->size()) - return false; - for (const auto &ZipIt : llvm::zip(*Accs, *RAccs)) - if (std::get<0>(ZipIt) != std::get<1>(ZipIt)) - return false; - ++It; - ++RIt; - } - return true; - } - bool operator!=(const State &R) const { return !(*this == R); } - - /// We store accesses in a set with the instruction as key. - struct Accesses { - SmallVector Accesses; - DenseMap Map; - - unsigned size() const { return Accesses.size(); } - - using vec_iterator = decltype(Accesses)::iterator; - vec_iterator begin() { return Accesses.begin(); } - vec_iterator end() { return Accesses.end(); } - - using iterator = decltype(Map)::const_iterator; - iterator find(AAPointerInfo::Access &Acc) { - return Map.find(Acc.getRemoteInst()); - } - iterator find_end() { return Map.end(); } - - AAPointerInfo::Access &get(iterator &It) { - return Accesses[It->getSecond()]; - } - - void insert(AAPointerInfo::Access &Acc) { - Map[Acc.getRemoteInst()] = Accesses.size(); - Accesses.push_back(Acc); - } - }; - - /// We store all accesses in bins denoted by their offset and size. - using AccessBinsTy = DenseMap; - - AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); } - AccessBinsTy::const_iterator end() const { return AccessBins.end(); } - -protected: - /// The bins with all the accesses for the associated pointer. - AccessBinsTy AccessBins; - - /// Add a new access to the state at offset \p Offset and with size \p Size. + /// Add a new Access to the state at offset \p Offset and with size \p Size. /// The access is associated with \p I, writes \p Content (if anything), and - /// is of kind \p Kind. + /// is of kind \p Kind. If an Access already exists for the same \p I and same + /// \p RemoteI, the two are combined, potentially losing information about + /// offset and size. The resulting access must now be moved from its original + /// OffsetBin to the bin for its new offset. + /// /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, Instruction &I, Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, - Instruction *RemoteI = nullptr, - Accesses *BinPtr = nullptr) { - AA::OffsetAndSize Key{Offset, Size}; - Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key]; - if (!Bin) - Bin = new (A.Allocator) Accesses; - AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty); - // Check if we have an access for this instruction in this bin, if not, - // simply add it. - auto It = Bin->find(Acc); - if (It == Bin->find_end()) { - Bin->insert(Acc); - return ChangeStatus::CHANGED; - } - // If the existing access is the same as then new one, nothing changed. - AAPointerInfo::Access &Current = Bin->get(It); - AAPointerInfo::Access Before = Current; - // The new one will be combined with the existing one. - Current &= Acc; - return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + Instruction *RemoteI = nullptr); + + using OffsetBinsTy = DenseMap>; + + using const_bin_iterator = OffsetBinsTy::const_iterator; + const_bin_iterator begin() const { return OffsetBins.begin(); } + const_bin_iterator end() const { return OffsetBins.end(); } + + const AAPointerInfo::Access &getAccess(unsigned Index) const { + return AccessList[Index]; } +protected: + // Every memory instruction results in an Access object. We maintain a list of + // all Access objects that we own, along with the following maps: + // + // - OffsetBins: OffsetAndSize -> { Access } + // - RemoteIMap: RemoteI x LocalI -> Access + // + // A RemoteI is any instruction that accesses memory. RemoteI is different + // from LocalI if and only if LocalI is a call; then RemoteI is some + // instruction in the callgraph starting from LocalI. Multiple paths in the + // callgraph from LocalI to RemoteI may produce multiple accesses, but these + // are all combined into a single Access object. This may result in loss of + // information in OffsetAndSize in the Access object. + SmallVector AccessList; + OffsetBinsTy OffsetBins; + DenseMap> RemoteIMap; + /// See AAPointerInfo::forallInterferingAccesses. bool forallInterferingAccesses( AA::OffsetAndSize OAS, @@ -911,14 +857,16 @@ if (!isValidState()) return false; - for (const auto &It : AccessBins) { + for (const auto &It : OffsetBins) { AA::OffsetAndSize ItOAS = It.getFirst(); if (!OAS.mayOverlap(ItOAS)) continue; bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); - for (auto &Access : *It.getSecond()) + for (auto Index : It.getSecond()) { + auto &Access = AccessList[Index]; if (!CB(Access, IsExact)) return false; + } } return true; } @@ -927,32 +875,19 @@ bool forallInterferingAccesses( Instruction &I, function_ref CB, - AA::OffsetAndSize *OASPtr) const { + AA::OffsetAndSize &OAS) const { if (!isValidState()) return false; - // First find the offset and size of I. - AA::OffsetAndSize OAS; - for (const auto &It : AccessBins) { - for (auto &Access : *It.getSecond()) { - if (Access.getRemoteInst() == &I) { - OAS = It.getFirst(); - break; - } - } - if (OAS.Size != AA::OffsetAndSize::Unassigned) - break; - } - - if (OASPtr) - *OASPtr = OAS; - - // No access for I was found, we are done. - if (OAS.Size == AA::OffsetAndSize::Unassigned) + auto LocalList = RemoteIMap.find(&I); + if (LocalList == RemoteIMap.end()) { return true; + } - // Now that we have an offset and size, find all overlapping ones and use - // the callback on the accesses. + for (auto LI : LocalList->getSecond()) { + auto &Access = AccessList[LI]; + OAS &= {Access.getOffset(), Access.getSize()}; + } return forallInterferingAccesses(OAS, CB); } @@ -961,6 +896,56 @@ BooleanState BS; }; +ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset, + int64_t Size, Instruction &I, + Optional Content, + AAPointerInfo::AccessKind Kind, + Type *Ty, Instruction *RemoteI) { + RemoteI = RemoteI ? RemoteI : &I; + AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty); + + // Check if we have an access for this instruction, if not, simply add it. + auto &LocalList = RemoteIMap[RemoteI]; + bool AccExists = false; + unsigned AccIndex = AccessList.size(); + for (auto Index : LocalList) { + auto &A = AccessList[Index]; + if (A.getLocalInst() == &I) { + AccExists = true; + AccIndex = Index; + break; + } + } + if (!AccExists) { + AccessList.push_back(Acc); + LocalList.push_back(AccIndex); + } else { + // The new one will be combined with the existing one. + auto &Current = AccessList[AccIndex]; + auto Before = Current; + Current &= Acc; + if (Current == Before) + return ChangeStatus::UNCHANGED; + + Acc = Current; + AA::OffsetAndSize Key{Before.getOffset(), Before.getSize()}; + assert(OffsetBins.count(Key) && "Existing Access must be in some bin."); + auto &Bin = OffsetBins[Key]; + assert(Bin.count(AccIndex) && + "Expected bin to actually contain the Access."); + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access " + << AccessList[AccIndex] << " with key {" << Key.Offset + << ',' << Key.Size << "}\n"); + Bin.erase(AccIndex); + } + + AA::OffsetAndSize Key{Acc.getOffset(), Acc.getSize()}; + LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc + << " with key {" << Key.Offset << ',' << Key.Size << "}\n"); + OffsetBins[Key].insert(AccIndex); + return ChangeStatus::CHANGED; +} + namespace { struct AAPointerInfoImpl : public StateWrapper { @@ -971,7 +956,7 @@ const std::string getAsStr() const override { return std::string("PointerInfo ") + (isValidState() ? (std::string("#") + - std::to_string(AccessBins.size()) + " bins") + std::to_string(OffsetBins.size()) + " bins") : ""); } @@ -990,7 +975,7 @@ bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref UserCB, bool &HasBeenWrittenTo, - AA::OffsetAndSize *OASPtr = nullptr) const override { + AA::OffsetAndSize &OAS) const override { HasBeenWrittenTo = false; SmallPtrSet DominatingWrites; @@ -1105,7 +1090,7 @@ InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(I, AccessCB, OASPtr)) + if (!State::forallInterferingAccesses(I, AccessCB, OAS)) return false; if (HasBeenWrittenTo) { @@ -1172,14 +1157,15 @@ // Combine the accesses bin by bin. ChangeStatus Changed = ChangeStatus::UNCHANGED; - for (const auto &It : OtherAAImpl.getState()) { + const auto &State = OtherAAImpl.getState(); + for (const auto &It : State) { AA::OffsetAndSize OAS = AA::OffsetAndSize::getUnknown(); if (Offset != AA::OffsetAndSize::Unknown && !It.first.offsetOrSizeAreUnknown()) { OAS = AA::OffsetAndSize(It.first.Offset + Offset, It.first.Size); } - Accesses *Bin = AccessBins.lookup(OAS); - for (const AAPointerInfo::Access &RAcc : *It.second) { + for (auto Index : It.getSecond()) { + const auto &RAcc = State.getAccess(Index); if (IsByval && !RAcc.isRead()) continue; bool UsedAssumedInformation = false; @@ -1192,9 +1178,8 @@ AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); } - Changed = - Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, - RAcc.getType(), RAcc.getRemoteInst(), Bin); + Changed = Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, + RAcc.getType(), RAcc.getRemoteInst()); } } return Changed; @@ -1206,10 +1191,11 @@ /// Dump the state into \p O. void dumpState(raw_ostream &O) { - for (auto &It : AccessBins) { + for (auto &It : OffsetBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size - << "] : " << It.getSecond()->size() << "\n"; - for (auto &Acc : *It.getSecond()) { + << "] : " << It.getSecond().size() << "\n"; + for (auto AccIndex : It.getSecond()) { + auto &Acc = AccessList[AccIndex]; O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; if (Acc.getLocalInst() != Acc.getRemoteInst()) O << " --> " << *Acc.getRemoteInst() diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CGSCC ; @@ -53,7 +53,7 @@ ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -64,6 +64,68 @@ ret i8 %r } +;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. + +define internal i8 @read_arg_1(i8* %p) { +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) +; CGSCC-LABEL: define {{[^@]+}}@read_arg_1 +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; CGSCC-NEXT: ret i8 [[L]] +; +entry: + %l = load i8, i8* %p, align 1 + ret i8 %l +} + +define internal i8 @sum_two_same_loads(i8* %p) { +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) +; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] { +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] +; CGSCC-NEXT: ret i8 [[Z]] +; + %x = call i8 @read_arg_1(i8* %p) + %y = call i8 @read_arg_1(i8* %p) + %z = add nsw i8 %x, %y + ret i8 %z +} + +define i8 @call_simplifiable_2() { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2 +; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: ret i8 4 +; +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) +; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_2 +; CGSCC-SAME: () #[[ATTR1]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 +; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i0 + %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 3, i8* %i1 + %r = call i8 @sum_two_same_loads(i8* %i0) + ret i8 %r +} + define i8 @call_not_simplifiable_1() { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1 @@ -82,7 +144,7 @@ ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -93,6 +155,89 @@ ret i8 %r } +;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. + +define internal i8 @read_arg_2(i8* %p) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) +; TUNIT-LABEL: define {{[^@]+}}@read_arg_2 +; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; TUNIT-NEXT: ret i8 [[L]] +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) +; CGSCC-LABEL: define {{[^@]+}}@read_arg_2 +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 +; CGSCC-NEXT: ret i8 [[L]] +; +entry: + %l = load i8, i8* %p, align 1 + ret i8 %l +} + +define internal i8 @sum_two_different_loads(i8* %p, i8* %q) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) +; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads +; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]] +; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]] +; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] +; TUNIT-NEXT: ret i8 [[Z]] +; +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) +; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads +; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR4]] +; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR4]] +; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] +; CGSCC-NEXT: ret i8 [[Z]] +; + %x = call i8 @read_arg_2(i8* %p) + %y = call i8 @read_arg_2(i8* %q) + %z = add nsw i8 %x, %y + ret i8 %z +} + +define i8 @call_not_simplifiable_2() { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2 +; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 +; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; TUNIT-NEXT: store i8 3, i8* [[I1]], align 1 +; TUNIT-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 +; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]] +; TUNIT-NEXT: ret i8 [[R]] +; +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) +; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2 +; CGSCC-SAME: () #[[ATTR1]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 +; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 +; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 +; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 +; CGSCC-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 +; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]] +; CGSCC-NEXT: ret i8 [[R]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 + store i8 2, i8* %i0 + %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 + store i8 3, i8* %i1 + %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 + %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1) + ret i8 %r +} + ;. ; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } @@ -100,5 +245,7 @@ ;. ; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } -; CGSCC: attributes #[[ATTR2]] = { willreturn } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR3]] = { willreturn } +; CGSCC: attributes #[[ATTR4]] = { willreturn memory(read) } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -2223,6 +2223,7 @@ ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2241,6 +2242,7 @@ ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2281,6 +2283,7 @@ ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2302,6 +2305,7 @@ ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] +; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -3103,6 +3107,43 @@ ret void } +define i8 @multiple_offsets_not_simplifiable_1(i1 %cnd1, i1 %cnd2) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1 +; TUNIT-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; TUNIT-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; TUNIT-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; TUNIT-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] +; TUNIT-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 +; TUNIT-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4 +; TUNIT-NEXT: ret i8 [[I]] +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@multiple_offsets_not_simplifiable_1 +; CGSCC-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR5]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 +; CGSCC-NEXT: [[GEP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 7 +; CGSCC-NEXT: [[GEP23:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 23 +; CGSCC-NEXT: [[SEL_PTR:%.*]] = select i1 [[CND1]], i8* [[GEP7]], i8* [[GEP23]] +; CGSCC-NEXT: store i8 42, i8* [[SEL_PTR]], align 4 +; CGSCC-NEXT: [[I:%.*]] = load i8, i8* [[GEP7]], align 4 +; CGSCC-NEXT: ret i8 [[I]] +; +entry: + %Bytes = alloca [1024 x i8], align 16 + %gep7 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 7 + %gep23 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 23 + ; %phi.ptr = phi i8* [ %gep7, %then ], [ %gep23, %else ] + %sel.ptr = select i1 %cnd1, i8* %gep7, i8* %gep23 + store i8 42, i8* %sel.ptr, align 4 + %i = load i8, i8* %gep7, align 4 + ret i8 %i +} + + !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2}