diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -250,6 +250,22 @@
     return OAS.Offset + OAS.Size > Offset && OAS.Offset < Offset + Size;
   }
 
+  OffsetAndSize &operator&=(const OffsetAndSize &R) {
+    if (Offset == Unassigned)
+      Offset = R.Offset;
+    else if (R.Offset != Unassigned && R.Offset != Offset)
+      Offset = Unknown;
+
+    if (Size == Unassigned)
+      Size = R.Size;
+    else if (Size == Unknown || R.Size == Unknown)
+      Size = Unknown;
+    else if (R.Size != Unassigned)
+      Size = std::max(Size, R.Size);
+
+    return *this;
+  }
+
   /// Constants used to represent special offsets or sizes.
   /// - This assumes that Offset and Size are non-negative.
   /// - The constants should not clash with DenseMapInfo, such as EmptyKey
@@ -4992,33 +5008,47 @@
 
   /// An access description.
   struct Access {
-    Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty)
-        : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {
+    Access(Instruction *I, int64_t Offset, int64_t Size,
+           Optional<Value *> Content, AccessKind Kind, Type *Ty)
+        : LocalI(I), RemoteI(I), Content(Content), OAS(Offset, Size),
+          Kind(Kind), Ty(Ty) {
       verify();
     }
-    Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content,
-           AccessKind Kind, Type *Ty)
-        : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
-          Ty(Ty) {
+    Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset,
+           int64_t Size, Optional<Value *> Content, AccessKind Kind, Type *Ty)
+        : LocalI(LocalI), RemoteI(RemoteI), Content(Content), OAS(Offset, Size),
+          Kind(Kind), Ty(Ty) {
       verify();
     }
     Access(const Access &Other) = default;
     Access(const Access &&Other)
         : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
-          Kind(Other.Kind), Ty(Other.Ty) {}
+          OAS(Other.OAS), Kind(Other.Kind), Ty(Other.Ty) {}
 
     Access &operator=(const Access &Other) = default;
     bool operator==(const Access &R) const {
-      return LocalI == R.LocalI && RemoteI == R.RemoteI &&
+      return LocalI == R.LocalI && RemoteI == R.RemoteI && OAS == R.OAS &&
              Content == R.Content && Kind == R.Kind;
     }
     bool operator!=(const Access &R) const { return !(*this == R); }
 
     Access &operator&=(const Access &R) {
       assert(RemoteI == R.RemoteI && "Expected same instruction!");
-      Content =
-          AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
+      assert(LocalI == R.LocalI && "Expected same instruction!");
       Kind = AccessKind(Kind | R.Kind);
+      auto Before = OAS;
+      OAS &= R.OAS;
+      if (Before.isUnassigned() || Before == OAS) {
+        Content =
+            AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
+      } else {
+        // Since the OAS information changed, set a conservative state -- drop
+        // the contents, and assume MayAccess rather than MustAccess.
+        Content.reset();
+        Kind = AccessKind(Kind | AK_MAY);
+        Kind = AccessKind(Kind & ~AK_MUST);
+      }
+      verify();
       return *this;
     }
 
@@ -5066,6 +5096,12 @@
     /// determined.
     Optional<Value *> getContent() const { return Content; }
 
+    /// Return the offset for this access.
+    int64_t getOffset() const { return OAS.Offset; }
+
+    /// Return the size for this access.
+    int64_t getSize() const { return OAS.Size; }
+
   private:
     /// The instruction responsible for the access with respect to the local
     /// scope of the associated attribute.
@@ -5078,6 +5114,9 @@
     /// cannot be determined.
     Optional<Value *> Content;
 
+    /// The object accessed, in terms of an offset and size in bytes.
+    AA::OffsetAndSize OAS;
+
     /// The access kind, e.g., READ, as bitset (could be more than one).
     AccessKind Kind;
 
@@ -5113,7 +5152,7 @@
   virtual bool forallInterferingAccesses(
       Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
       function_ref<bool(const Access &, bool)> CB, bool &HasBeenWrittenTo,
-      AA::OffsetAndSize *OASPtr = nullptr) const = 0;
+      AA::OffsetAndSize &OAS) const = 0;
 
   /// This function should return true if the type of the \p AA is AAPointerInfo
   static bool classof(const AbstractAttribute *AA) {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -456,7 +456,7 @@
     auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
                                          DepClassTy::NONE);
     if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess,
-                                      HasBeenWrittenTo, &OAS)) {
+                                      HasBeenWrittenTo, OAS)) {
       LLVM_DEBUG(
           dbgs()
           << "Failed to verify all interfering accesses for underlying object: "
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -757,13 +757,6 @@
 
 /// A type to track pointer/struct usage and accesses for AAPointerInfo.
 struct AA::PointerInfo::State : public AbstractState {
-
-  ~State() {
-    // We do not delete the Accesses objects but need to destroy them still.
-    for (auto &It : AccessBins)
-      It.second->~Accesses();
-  }
-
   /// Return the best possible representable state.
   static State getBestState(const State &SIS) { return State(); }
 
@@ -775,9 +768,7 @@
   }
 
   State() = default;
-  State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {
-    SIS.AccessBins.clear();
-  }
+  State(State &&SIS) = default;
 
   const State &getAssumed() const { return *this; }
 
@@ -803,7 +794,9 @@
     if (this == &R)
       return *this;
     BS = R.BS;
-    AccessBins = R.AccessBins;
+    AccessList = R.AccessList;
+    OffsetBins = R.OffsetBins;
+    RemoteIMap = R.RemoteIMap;
     return *this;
   }
 
@@ -811,99 +804,52 @@
     if (this == &R)
       return *this;
     std::swap(BS, R.BS);
-    std::swap(AccessBins, R.AccessBins);
+    std::swap(AccessList, R.AccessList);
+    std::swap(OffsetBins, R.OffsetBins);
+    std::swap(RemoteIMap, R.RemoteIMap);
     return *this;
   }
 
-  bool operator==(const State &R) const {
-    if (BS != R.BS)
-      return false;
-    if (AccessBins.size() != R.AccessBins.size())
-      return false;
-    auto It = begin(), RIt = R.begin(), E = end();
-    while (It != E) {
-      if (It->getFirst() != RIt->getFirst())
-        return false;
-      auto &Accs = It->getSecond();
-      auto &RAccs = RIt->getSecond();
-      if (Accs->size() != RAccs->size())
-        return false;
-      for (const auto &ZipIt : llvm::zip(*Accs, *RAccs))
-        if (std::get<0>(ZipIt) != std::get<1>(ZipIt))
-          return false;
-      ++It;
-      ++RIt;
-    }
-    return true;
-  }
-  bool operator!=(const State &R) const { return !(*this == R); }
-
-  /// We store accesses in a set with the instruction as key.
-  struct Accesses {
-    SmallVector<AAPointerInfo::Access, 4> Accesses;
-    DenseMap<const Instruction *, unsigned> Map;
-
-    unsigned size() const { return Accesses.size(); }
-
-    using vec_iterator = decltype(Accesses)::iterator;
-    vec_iterator begin() { return Accesses.begin(); }
-    vec_iterator end() { return Accesses.end(); }
-
-    using iterator = decltype(Map)::const_iterator;
-    iterator find(AAPointerInfo::Access &Acc) {
-      return Map.find(Acc.getRemoteInst());
-    }
-    iterator find_end() { return Map.end(); }
-
-    AAPointerInfo::Access &get(iterator &It) {
-      return Accesses[It->getSecond()];
-    }
-
-    void insert(AAPointerInfo::Access &Acc) {
-      Map[Acc.getRemoteInst()] = Accesses.size();
-      Accesses.push_back(Acc);
-    }
-  };
-
-  /// We store all accesses in bins denoted by their offset and size.
-  using AccessBinsTy = DenseMap<AA::OffsetAndSize, Accesses *>;
-
-  AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); }
-  AccessBinsTy::const_iterator end() const { return AccessBins.end(); }
-
-protected:
-  /// The bins with all the accesses for the associated pointer.
-  AccessBinsTy AccessBins;
-
-  /// Add a new access to the state at offset \p Offset and with size \p Size.
+  /// Add a new Access to the state at offset \p Offset and with size \p Size.
   /// The access is associated with \p I, writes \p Content (if anything), and
-  /// is of kind \p Kind.
+  /// is of kind \p Kind. If an Access already exists for the same \p I and same
+  /// \p RemoteI, the two are combined, potentially losing information about
+  /// offset and size. The resulting access must now be moved from its original
+  /// OffsetBin to the bin for its new offset.
+  ///
   /// \Returns CHANGED, if the state changed, UNCHANGED otherwise.
   ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size,
                          Instruction &I, Optional<Value *> Content,
                          AAPointerInfo::AccessKind Kind, Type *Ty,
-                         Instruction *RemoteI = nullptr,
-                         Accesses *BinPtr = nullptr) {
-    AA::OffsetAndSize Key{Offset, Size};
-    Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key];
-    if (!Bin)
-      Bin = new (A.Allocator) Accesses;
-    AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty);
-    // Check if we have an access for this instruction in this bin, if not,
-    // simply add it.
-    auto It = Bin->find(Acc);
-    if (It == Bin->find_end()) {
-      Bin->insert(Acc);
-      return ChangeStatus::CHANGED;
-    }
-    // If the existing access is the same as then new one, nothing changed.
-    AAPointerInfo::Access &Current = Bin->get(It);
-    AAPointerInfo::Access Before = Current;
-    // The new one will be combined with the existing one.
-    Current &= Acc;
-    return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+                         Instruction *RemoteI = nullptr);
+
+  using OffsetBinsTy = DenseMap<OffsetAndSize, SmallSet<unsigned, 4>>;
+
+  using const_bin_iterator = OffsetBinsTy::const_iterator;
+  const_bin_iterator begin() const { return OffsetBins.begin(); }
+  const_bin_iterator end() const { return OffsetBins.end(); }
+
+  const AAPointerInfo::Access &getAccess(unsigned Index) const {
+    return AccessList[Index];
   }
 
+protected:
+  // Every memory instruction results in an Access object. We maintain a list of
+  // all Access objects that we own, along with the following maps:
+  //
+  // - OffsetBins: OffsetAndSize -> { Access }
+  // - RemoteIMap: RemoteI x LocalI -> Access
+  //
+  // A RemoteI is any instruction that accesses memory. RemoteI is different
+  // from LocalI if and only if LocalI is a call; then RemoteI is some
+  // instruction in the callgraph starting from LocalI. Multiple paths in the
+  // callgraph from LocalI to RemoteI may produce multiple accesses, but these
+  // are all combined into a single Access object. This may result in loss of
+  // information in OffsetAndSize in the Access object.
+  SmallVector<AAPointerInfo::Access> AccessList;
+  OffsetBinsTy OffsetBins;
+  DenseMap<const Instruction *, SmallVector<unsigned>> RemoteIMap;
+
   /// See AAPointerInfo::forallInterferingAccesses.
   bool forallInterferingAccesses(
       AA::OffsetAndSize OAS,
@@ -911,14 +857,16 @@
     if (!isValidState())
       return false;
 
-    for (const auto &It : AccessBins) {
+    for (const auto &It : OffsetBins) {
       AA::OffsetAndSize ItOAS = It.getFirst();
       if (!OAS.mayOverlap(ItOAS))
         continue;
       bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown();
-      for (auto &Access : *It.getSecond())
+      for (auto Index : It.getSecond()) {
+        auto &Access = AccessList[Index];
         if (!CB(Access, IsExact))
           return false;
+      }
     }
     return true;
   }
@@ -927,32 +875,19 @@
   bool forallInterferingAccesses(
       Instruction &I,
       function_ref<bool(const AAPointerInfo::Access &, bool)> CB,
-      AA::OffsetAndSize *OASPtr) const {
+      AA::OffsetAndSize &OAS) const {
     if (!isValidState())
       return false;
 
-    // First find the offset and size of I.
-    AA::OffsetAndSize OAS;
-    for (const auto &It : AccessBins) {
-      for (auto &Access : *It.getSecond()) {
-        if (Access.getRemoteInst() == &I) {
-          OAS = It.getFirst();
-          break;
-        }
-      }
-      if (OAS.Size != AA::OffsetAndSize::Unassigned)
-        break;
-    }
-
-    if (OASPtr)
-      *OASPtr = OAS;
-
-    // No access for I was found, we are done.
-    if (OAS.Size == AA::OffsetAndSize::Unassigned)
+    auto LocalList = RemoteIMap.find(&I);
+    if (LocalList == RemoteIMap.end()) {
       return true;
+    }
 
-    // Now that we have an offset and size, find all overlapping ones and use
-    // the callback on the accesses.
+    for (auto LI : LocalList->getSecond()) {
+      auto &Access = AccessList[LI];
+      OAS &= {Access.getOffset(), Access.getSize()};
+    }
     return forallInterferingAccesses(OAS, CB);
   }
 
@@ -961,6 +896,56 @@
   BooleanState BS;
 };
 
+ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset,
+                                               int64_t Size, Instruction &I,
+                                               Optional<Value *> Content,
+                                               AAPointerInfo::AccessKind Kind,
+                                               Type *Ty, Instruction *RemoteI) {
+  RemoteI = RemoteI ? RemoteI : &I;
+  AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty);
+
+  // Check if we have an access for this instruction, if not, simply add it.
+  auto &LocalList = RemoteIMap[RemoteI];
+  bool AccExists = false;
+  unsigned AccIndex = AccessList.size();
+  for (auto Index : LocalList) {
+    auto &A = AccessList[Index];
+    if (A.getLocalInst() == &I) {
+      AccExists = true;
+      AccIndex = Index;
+      break;
+    }
+  }
+  if (!AccExists) {
+    AccessList.push_back(Acc);
+    LocalList.push_back(AccIndex);
+  } else {
+    // The new one will be combined with the existing one.
+    auto &Current = AccessList[AccIndex];
+    auto Before = Current;
+    Current &= Acc;
+    if (Current == Before)
+      return ChangeStatus::UNCHANGED;
+
+    Acc = Current;
+    AA::OffsetAndSize Key{Before.getOffset(), Before.getSize()};
+    assert(OffsetBins.count(Key) && "Existing Access must be in some bin.");
+    auto &Bin = OffsetBins[Key];
+    assert(Bin.count(AccIndex) &&
+           "Expected bin to actually contain the Access.");
+    LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access "
+                      << AccessList[AccIndex] << " with key {" << Key.Offset
+                      << ',' << Key.Size << "}\n");
+    Bin.erase(AccIndex);
+  }
+
+  AA::OffsetAndSize Key{Acc.getOffset(), Acc.getSize()};
+  LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc
+                    << " with key {" << Key.Offset << ',' << Key.Size << "}\n");
+  OffsetBins[Key].insert(AccIndex);
+  return ChangeStatus::CHANGED;
+}
+
 namespace {
 struct AAPointerInfoImpl
     : public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
@@ -971,7 +956,7 @@
   const std::string getAsStr() const override {
     return std::string("PointerInfo ") +
            (isValidState() ? (std::string("#") +
-                              std::to_string(AccessBins.size()) + " bins")
+                              std::to_string(OffsetBins.size()) + " bins")
                            : "<invalid>");
   }
 
@@ -990,7 +975,7 @@
   bool forallInterferingAccesses(
       Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
       function_ref<bool(const Access &, bool)> UserCB, bool &HasBeenWrittenTo,
-      AA::OffsetAndSize *OASPtr = nullptr) const override {
+      AA::OffsetAndSize &OAS) const override {
     HasBeenWrittenTo = false;
 
     SmallPtrSet<const Access *, 8> DominatingWrites;
@@ -1105,7 +1090,7 @@
       InterferingAccesses.push_back({&Acc, Exact});
       return true;
     };
-    if (!State::forallInterferingAccesses(I, AccessCB, OASPtr))
+    if (!State::forallInterferingAccesses(I, AccessCB, OAS))
       return false;
 
     if (HasBeenWrittenTo) {
@@ -1172,14 +1157,15 @@
 
     // Combine the accesses bin by bin.
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
-    for (const auto &It : OtherAAImpl.getState()) {
+    const auto &State = OtherAAImpl.getState();
+    for (const auto &It : State) {
       AA::OffsetAndSize OAS = AA::OffsetAndSize::getUnknown();
       if (Offset != AA::OffsetAndSize::Unknown &&
           !It.first.offsetOrSizeAreUnknown()) {
         OAS = AA::OffsetAndSize(It.first.Offset + Offset, It.first.Size);
       }
-      Accesses *Bin = AccessBins.lookup(OAS);
-      for (const AAPointerInfo::Access &RAcc : *It.second) {
+      for (auto Index : It.getSecond()) {
+        const auto &RAcc = State.getAccess(Index);
         if (IsByval && !RAcc.isRead())
           continue;
         bool UsedAssumedInformation = false;
@@ -1192,9 +1178,8 @@
               AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW));
           AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST));
         }
-        Changed =
-            Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK,
-                                RAcc.getType(), RAcc.getRemoteInst(), Bin);
+        Changed = Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK,
+                                      RAcc.getType(), RAcc.getRemoteInst());
       }
     }
     return Changed;
@@ -1206,10 +1191,11 @@
 
   /// Dump the state into \p O.
   void dumpState(raw_ostream &O) {
-    for (auto &It : AccessBins) {
+    for (auto &It : OffsetBins) {
       O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size
-        << "] : " << It.getSecond()->size() << "\n";
-      for (auto &Acc : *It.getSecond()) {
+        << "] : " << It.getSecond().size() << "\n";
+      for (auto AccIndex : It.getSecond()) {
+        auto &Acc = AccessList[AccIndex];
         O << "     - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n";
         if (Acc.getLocalInst() != Acc.getRemoteInst())
           O << "     -->                         " << *Acc.getRemoteInst()
diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
--- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CGSCC
 ;
 
@@ -53,7 +53,7 @@
 ; CGSCC-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CGSCC-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
 ; CGSCC-NEXT:    store i8 2, i8* [[I0]], align 2
-; CGSCC-NEXT:    [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]]
+; CGSCC-NEXT:    [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]]
 ; CGSCC-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -64,6 +64,68 @@
   ret i8 %r
 }
 
+;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph.
+
+define internal i8 @read_arg_1(i8* %p) {
+; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; CGSCC-LABEL: define {{[^@]+}}@read_arg_1
+; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[L:%.*]] = load i8, i8* [[P]], align 1
+; CGSCC-NEXT:    ret i8 [[L]]
+;
+entry:
+  %l = load i8, i8* %p, align 1
+  ret i8 %l
+}
+
+define internal i8 @sum_two_same_loads(i8* %p) {
+; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads
+; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] {
+; CGSCC-NEXT:    [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]]
+; CGSCC-NEXT:    [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]]
+; CGSCC-NEXT:    [[Z:%.*]] = add nsw i8 [[X]], [[Y]]
+; CGSCC-NEXT:    ret i8 [[Z]]
+;
+  %x = call i8 @read_arg_1(i8* %p)
+  %y = call i8 @read_arg_1(i8* %p)
+  %z = add nsw i8 %x, %y
+  ret i8 %z
+}
+
+define i8 @call_simplifiable_2() {
+; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2
+; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
+; TUNIT-NEXT:    ret i8 4
+;
+; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn
+; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_2
+; CGSCC-SAME: () #[[ATTR1]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CGSCC-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; CGSCC-NEXT:    store i8 2, i8* [[I0]], align 2
+; CGSCC-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
+; CGSCC-NEXT:    store i8 3, i8* [[I1]], align 1
+; CGSCC-NEXT:    [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]]
+; CGSCC-NEXT:    ret i8 [[R]]
+;
+entry:
+  %Bytes = alloca [1024 x i8], align 16
+  %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2
+  store i8 2, i8* %i0
+  %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3
+  store i8 3, i8* %i1
+  %r = call i8 @sum_two_same_loads(i8* %i0)
+  ret i8 %r
+}
+
 define i8 @call_not_simplifiable_1() {
 ; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1
@@ -82,7 +144,7 @@
 ; CGSCC-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CGSCC-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
 ; CGSCC-NEXT:    store i8 2, i8* [[I0]], align 2
-; CGSCC-NEXT:    [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2]]
+; CGSCC-NEXT:    [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]]
 ; CGSCC-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -93,6 +155,89 @@
   ret i8 %r
 }
 
+;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph.
+
+define internal i8 @read_arg_2(i8* %p) {
+; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; TUNIT-LABEL: define {{[^@]+}}@read_arg_2
+; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[L:%.*]] = load i8, i8* [[P]], align 1
+; TUNIT-NEXT:    ret i8 [[L]]
+;
+; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; CGSCC-LABEL: define {{[^@]+}}@read_arg_2
+; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[L:%.*]] = load i8, i8* [[P]], align 1
+; CGSCC-NEXT:    ret i8 [[L]]
+;
+entry:
+  %l = load i8, i8* %p, align 1
+  ret i8 %l
+}
+
+define internal i8 @sum_two_different_loads(i8* %p, i8* %q) {
+; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads
+; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] {
+; TUNIT-NEXT:    [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]]
+; TUNIT-NEXT:    [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]]
+; TUNIT-NEXT:    [[Z:%.*]] = add nsw i8 [[X]], [[Y]]
+; TUNIT-NEXT:    ret i8 [[Z]]
+;
+; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads
+; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] {
+; CGSCC-NEXT:    [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]]
+; CGSCC-NEXT:    [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR3]]
+; CGSCC-NEXT:    [[Z:%.*]] = add nsw i8 [[X]], [[Y]]
+; CGSCC-NEXT:    ret i8 [[Z]]
+;
+  %x = call i8 @read_arg_2(i8* %p)
+  %y = call i8 @read_arg_2(i8* %q)
+  %z = add nsw i8 %x, %y
+  ret i8 %z
+}
+
+define i8 @call_not_simplifiable_2() {
+; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2
+; TUNIT-SAME: () #[[ATTR1]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT:    store i8 2, i8* [[I0]], align 2
+; TUNIT-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
+; TUNIT-NEXT:    store i8 3, i8* [[I1]], align 1
+; TUNIT-NEXT:    [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
+; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]]
+; TUNIT-NEXT:    ret i8 [[R]]
+;
+; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn
+; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2
+; CGSCC-SAME: () #[[ATTR1]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CGSCC-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2
+; CGSCC-NEXT:    store i8 2, i8* [[I0]], align 2
+; CGSCC-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3
+; CGSCC-NEXT:    store i8 3, i8* [[I1]], align 1
+; CGSCC-NEXT:    [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
+; CGSCC-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]]
+; CGSCC-NEXT:    ret i8 [[R]]
+;
+entry:
+  %Bytes = alloca [1024 x i8], align 16
+  %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2
+  store i8 2, i8* %i0
+  %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3
+  store i8 3, i8* %i1
+  %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0
+  %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1)
+  ret i8 %r
+}
+
 ;.
 ; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
@@ -100,5 +245,6 @@
 ;.
 ; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; CGSCC: attributes #[[ATTR2]] = { readonly willreturn }
+; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind readonly willreturn }
+; CGSCC: attributes #[[ATTR3]] = { readonly willreturn }
 ;.
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -2223,6 +2223,7 @@
 ; TUNIT:       loop:
 ; TUNIT-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
 ; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; TUNIT-NEXT:    store i8 1, i8* [[P]], align 2
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
@@ -2241,6 +2242,7 @@
 ; CGSCC:       loop:
 ; CGSCC-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
 ; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; CGSCC-NEXT:    store i8 1, i8* [[P]], align 2
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
@@ -2281,6 +2283,7 @@
 ; TUNIT:       loop:
 ; TUNIT-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
 ; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; TUNIT-NEXT:    store i8 1, i8* [[P]], align 2
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
@@ -2302,6 +2305,7 @@
 ; CGSCC:       loop:
 ; CGSCC-NEXT:    [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
 ; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
+; CGSCC-NEXT:    store i8 1, i8* [[P]], align 2
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7