diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -14,6 +14,7 @@
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
+class TargetTransformInfo;
 
 /// Argument promotion pass.
 ///
@@ -26,6 +27,17 @@
 public:
   ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
 
+  /// Check if callers and the callee \p F agree how promoted arguments would be
+  /// passed. The ones that they do not agree on are eliminated from the sets but
+  /// the return value has to be observed as well.
+  static bool areFunctionArgsABICompatible(
+      const Function &F, const TargetTransformInfo &TTI,
+      SmallPtrSetImpl<Argument *> &ArgsToPromote,
+      SmallPtrSetImpl<Argument *> &ByValArgsToTransform);
+
+  /// Checks if a type could have padding bytes.
+  static bool isDenselyPacked(Type *type, const DataLayout &DL);
+
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 };
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -104,6 +104,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/PassManager.h"
 
@@ -2198,6 +2199,45 @@
   static const char ID;
 };
 
+/// An abstract interface for privatizability.
+///
+/// A pointer is privatizable if it can be replaced by a new, private one.
+/// Privatizing pointer reduces the use count, interaction between unrelated
+/// code parts.
+///
+/// In order for a pointer to be privatizable its value cannot be observed
+/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
+/// what values are necessary to make the private copy look like the original
+/// one, and the values we need can be loaded (=dereferenceable).
+struct AAPrivatizablePtr : public StateWrapper<BooleanState, AbstractAttribute>,
+                           public IRPosition {
+  AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {}
+
+  /// Returns true if pointer privatization is assumed to be possible.
+  bool isAssumedPrivatizablePtr() const { return getAssumed(); }
+
+  /// Returns true if pointer privatization is known to be possible.
+  bool isKnownPrivatizablePtr() const { return getKnown(); }
+
+  /// Return the type we can choose for a private copy of the underlying
+  /// value. None means it is not clear yet, nullptr means there is none.
+  virtual Optional<Type *> getPrivatizableType() const = 0;
+
+  /// Return an IR position, see struct IRPosition.
+  ///
+  ///{
+  IRPosition &getIRPosition() { return *this; }
+  const IRPosition &getIRPosition() const { return *this; }
+  ///}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
+                                              Attributor &A);
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
 /// An abstract interface for all memory related attributes.
 struct AAMemoryBehavior
     : public IRAttribute<
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -774,8 +774,7 @@
   return true;
 }
 
-/// Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
   // There is no size information, so be conservative.
   if (!type->isSized())
     return false;
@@ -844,12 +843,14 @@
   return false;
 }
 
-static bool areFunctionArgsABICompatible(
+bool ArgumentPromotionPass::areFunctionArgsABICompatible(
     const Function &F, const TargetTransformInfo &TTI,
     SmallPtrSetImpl<Argument *> &ArgsToPromote,
     SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
   for (const Use &U : F.uses()) {
     CallSite CS(U.getUser());
+    if (!CS)
+      return false;
     const Function *Caller = CS.getCaller();
     const Function *Callee = CS.getCalledFunction();
     if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
@@ -951,9 +952,9 @@
     // If this is a byval argument, and if the aggregate type is small, just
     // pass the elements, which is always safe, if the passed value is densely
     // packed or if we can prove the padding bytes are never accessed.
-    bool isSafeToPromote =
-        PtrArg->hasByValAttr() &&
-        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+    bool isSafeToPromote = PtrArg->hasByValAttr() &&
+                           (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
+                            !canPaddingBeAccessed(PtrArg));
     if (isSafeToPromote) {
       if (StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
@@ -1011,8 +1012,8 @@
   if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
     return nullptr;
 
-  if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
-                                    ByValArgsToTransform))
+  if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+          *F, TTI, ArgsToPromote, ByValArgsToTransform))
     return nullptr;
 
   return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -29,12 +29,15 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
@@ -197,6 +200,75 @@
     U->set(&New);
 }
 
+/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
+/// advanced by \p Offset bytes. To aid later analysis the method tries to build
+/// getelement pointer instructions that traverse the natural type of \p Ptr if
+/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
+/// through a cast to i8*.
+///
+/// TODO: This could probably live somewhere more prominantly if it doesn't
+///       already exist.
+static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset,
+                               IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
+  assert(Offset >= 0 && "Negative offset not supported yet!");
+  LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
+                    << "-bytes as " << *ResTy << "\n");
+
+  // The initial type we are trying to traverse to get nice GEPs.
+  Type *Ty = Ptr->getType();
+
+  SmallVector<Value *, 4> Indices;
+  std::string GEPName = Ptr->getName();
+  while (Offset) {
+    uint64_t Idx, Rem;
+
+    if (auto *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = DL.getStructLayout(STy);
+      if (int64_t(SL->getSizeInBytes()) < Offset)
+        break;
+      Idx = SL->getElementContainingOffset(Offset);
+      assert(Idx < STy->getNumElements() && "Offset calculation error!");
+      Rem = Offset - SL->getElementOffset(Idx);
+      Ty = STy->getElementType(Idx);
+    } else if (auto *PTy = dyn_cast<PointerType>(Ty)) {
+      Ty = PTy->getElementType();
+      if (!Ty->isSized())
+        break;
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      assert(ElementSize && "Expected type with size!");
+      Idx = Offset / ElementSize;
+      Rem = Offset % ElementSize;
+    } else {
+      // Non-aggregate type, we cast and make byte-wise progress now.
+      break;
+    }
+
+    LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
+                      << " Idx: " << Idx << " Rem: " << Rem << "\n");
+
+    GEPName += "." + std::to_string(Idx);
+    Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
+    Offset = Rem;
+  }
+
+  // Create a GEP if we collected indices above.
+  if (Indices.size())
+    Ptr = IRB.CreateGEP(Ptr, Indices, GEPName);
+
+  // If an offset is left we use byte-wise adjustment.
+  if (Offset) {
+    Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
+    Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset),
+                        GEPName + ".b" + Twine(Offset));
+  }
+
+  // Ensure the result has the requested type.
+  Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
+
+  LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
+  return Ptr;
+}
+
 /// Recursively visit all values that might become \p IRP at some point. This
 /// will be done by looking through cast instructions, selects, phis, and calls
 /// with the "returned" attribute. Once we cannot look through the value any
@@ -4392,6 +4464,434 @@
   }
 };
 
+/// ----------------------- Privatizable Pointers ------------------------------
+struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
+  AAPrivatizablePtrImpl(const IRPosition &IRP)
+      : AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {}
+
+  ChangeStatus indicatePessimisticFixpoint() override {
+    AAPrivatizablePtr::indicatePessimisticFixpoint();
+    PrivatizableType = nullptr;
+    return ChangeStatus::CHANGED;
+  }
+
+  /// Identify the type we can chose for a private copy of the underlying
+  /// argument. None means it is not clear yet, nullptr means there is none.
+  virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
+
+  /// Return a privatizable type that encloses both T0 and T1.
+  /// TODO: This is merely a stub for now as we should manage a mapping as well.
+  Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
+    if (!T0.hasValue())
+      return T1;
+    if (!T1.hasValue())
+      return T0;
+    if (T0 == T1)
+      return T0;
+    return nullptr;
+  }
+
+  Optional<Type *> getPrivatizableType() const override {
+    return PrivatizableType;
+  }
+
+  const std::string getAsStr() const override {
+    return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
+  }
+
+protected:
+  Optional<Type *> PrivatizableType;
+};
+
+// TODO: Do this for call site arguments (probably also other values) as well.
+
+struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
+  AAPrivatizablePtrArgument(const IRPosition &IRP)
+      : AAPrivatizablePtrImpl(IRP) {}
+
+  /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+  Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+    // If this is a byval argument and we know all the call sites (so we can
+    // rewrite them), there is no need to check them explicitly.
+    if (getIRPosition().hasAttr(Attribute::ByVal) &&
+        A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
+                               true))
+      return getAssociatedValue().getType()->getPointerElementType();
+
+    Optional<Type *> Ty;
+    unsigned ArgNo = getIRPosition().getArgNo();
+
+    // Make sure the associated call site argument has the same type at all call
+    // sites and it is an allocation we know is safe to privatize, for now that
+    // means we only allow alloca instructions.
+    // TODO: We can additionally analyze the accesses in the callee to  create
+    //       the type from that information instead. That is a little more
+    //       involved and will be done in a follow up patch.
+    auto CallSiteCheck = [&](AbstractCallSite ACS) {
+      IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+      // Check if a coresponding argument was found or if it is one not
+      // associated (which can happen for callback calls).
+      if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+        return false;
+
+      // Check that all call sites agree on a type.
+      auto &PrivCSArgAA = A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos);
+      Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
+
+      LLVM_DEBUG({
+        dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
+        if (CSTy.hasValue() && CSTy.getValue())
+          CSTy.getValue()->print(dbgs());
+        else if (CSTy.hasValue())
+          dbgs() << "<nullptr>";
+        else
+          dbgs() << "<none>";
+      });
+
+      Ty = combineTypes(Ty, CSTy);
+
+      LLVM_DEBUG({
+        dbgs() << " : New Type: ";
+        if (Ty.hasValue() && Ty.getValue())
+          Ty.getValue()->print(dbgs());
+        else if (Ty.hasValue())
+          dbgs() << "<nullptr>";
+        else
+          dbgs() << "<none>";
+        dbgs() << "\n";
+      });
+
+      return !Ty.hasValue() || Ty.getValue();
+    };
+
+    if (!A.checkForAllCallSites(CallSiteCheck, *this, true))
+      return nullptr;
+    return Ty;
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    PrivatizableType = identifyPrivatizableType(A);
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    if (!PrivatizableType.getValue())
+      return indicatePessimisticFixpoint();
+
+    // Avoid arguments with padding for now.
+    if (!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
+                                                A.getInfoCache().getDL())) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    // Verify callee and caller agree on how the promoted argument would be
+    // passed.
+    // TODO: The use of the ArgumentPromotion interface here is ugly, we need a
+    // specialized form of TargetTransformInfo::areFunctionArgsABICompatible
+    // which doesn't require the arguments ArgumentPromotion wanted to pass.
+    Function &Fn = *getIRPosition().getAnchorScope();
+    SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy;
+    ArgsToPromote.insert(getAssociatedArgument());
+    const auto *TTI =
+        A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn);
+    if (!TTI ||
+        !ArgumentPromotionPass::areFunctionArgsABICompatible(
+            Fn, *TTI, ArgsToPromote, Dummy) ||
+        ArgsToPromote.empty()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// Given a type to private \p PrivType, collect the constituates (which are
+  /// used) in \p ReplacementTypes.
+  static void
+  identifyReplacementTypes(Type *PrivType,
+                           SmallVectorImpl<Type *> &ReplacementTypes) {
+    // TODO: For now we expand the privatization type to the fullest which can
+    //       lead to dead arguments that need to be removed later.
+    assert(PrivType && "Expected privatizable type!");
+
+    // Traverse the type, extract constituate types on the outermost level.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++)
+        ReplacementTypes.push_back(PrivStructType->getElementType(u));
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      ReplacementTypes.append(PrivArrayType->getNumElements(),
+                              PrivArrayType->getElementType());
+    } else {
+      ReplacementTypes.push_back(PrivType);
+    }
+  }
+
+  /// Initialize \p Base according to the type \p PrivType at position \p IP.
+  /// The values needed are taken from the arguments of \p F starting at
+  /// position \p ArgNo.
+  static void createInitialization(Type *PrivType, Value &Base, Function &F,
+                                   unsigned ArgNo, Instruction &IP) {
+    assert(PrivType && "Expected privatizable type!");
+
+    IRBuilder<NoFolder> IRB(&IP);
+    const DataLayout &DL = F.getParent()->getDataLayout();
+
+    // Traverse the type, build GEPs and stores.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+        Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
+        Value *Ptr = constructPointer(
+            PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
+        new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+      }
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo();
+      uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy);
+      for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+        Value *Ptr =
+            constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
+        new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+      }
+    } else {
+      new StoreInst(F.getArg(ArgNo), &Base, &IP);
+    }
+  }
+
+  /// Extract values from \p Base according to the type \p PrivType at the
+  /// call position \p ACS. The values are appended to \p ReplacementValues.
+  void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
+                               Value *Base,
+                               SmallVectorImpl<Value *> &ReplacementValues) {
+    assert(Base && "Expected base value!");
+    assert(PrivType && "Expected privatizable type!");
+    Instruction *IP = ACS.getInstruction();
+
+    IRBuilder<NoFolder> IRB(IP);
+    const DataLayout &DL = IP->getModule()->getDataLayout();
+
+    if (Base->getType()->getPointerElementType() != PrivType)
+      Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
+                                                 "", ACS.getInstruction());
+
+    // Traverse the type, build GEPs and loads.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+        Type *PointeeTy = PrivStructType->getElementType(u);
+        Value *Ptr =
+            constructPointer(PointeeTy->getPointerTo(), Base,
+                             PrivStructLayout->getElementOffset(u), IRB, DL);
+        ReplacementValues.push_back(new LoadInst(PointeeTy, Ptr, "", IP));
+      }
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      Type *PointeeTy = PrivArrayType->getElementType();
+      uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
+      Type *PointeePtrTy = PointeeTy->getPointerTo();
+      for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+        Value *Ptr =
+            constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
+        ReplacementValues.push_back(new LoadInst(PointeePtrTy, Ptr, "", IP));
+      }
+    } else {
+      ReplacementValues.push_back(new LoadInst(PrivType, Base, "", IP));
+    }
+  }
+
+  /// See AbstractAttribute::manifest(...)
+  ChangeStatus manifest(Attributor &A) override {
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    assert(PrivatizableType.getValue() && "Expected privatizable type!");
+
+    // Collect all tail calls in the function as we cannot allow new allocas to
+    // escape into tail recursion.
+    // TODO: Be smarter about new allocas escaping into tail calls.
+    SmallVector<CallInst *, 16> TailCalls;
+    if (!A.checkForAllInstructions(
+            [&](Instruction &I) {
+              CallInst &CI = cast<CallInst>(I);
+              if (CI.isTailCall())
+                TailCalls.push_back(&CI);
+              return true;
+            },
+            *this, {Instruction::Call}))
+      return ChangeStatus::UNCHANGED;
+
+    Argument *Arg = getAssociatedArgument();
+
+    // Callback to repair the associated function. A new alloca is placed at the
+    // beginning and initialized with the values passed through arguments. The
+    // new alloca replaces the use of the old pointer argument.
+    Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB =
+        [=](const Attributor::ArgumentReplacementInfo &ARI,
+            Function &ReplacementFn, Function::arg_iterator ArgIt) {
+          BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
+          Instruction *IP = &*EntryBB.getFirstInsertionPt();
+          auto *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+                                    Arg->getName() + ".priv", IP);
+          createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
+                               ArgIt->getArgNo(), *IP);
+          Arg->replaceAllUsesWith(AI);
+
+          for (CallInst *CI : TailCalls)
+            CI->setTailCall(false);
+        };
+
+    // Callback to repair a call site of the associated function. The elements
+    // of the privatizable type are loaded prior to the call and passed to the
+    // new function version.
+    Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
+        [=](const Attributor::ArgumentReplacementInfo &ARI,
+            AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
+          createReplacementValues(
+              PrivatizableType.getValue(), ACS,
+              ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
+                                  NewArgOperands);
+        };
+
+    // Collect the types that will replace the privatizable type in the function
+    // signature.
+    SmallVector<Type *, 16> ReplacementTypes;
+    identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
+    // Register a rewrite of the argument.
+    if (A.registerFunctionSignatureRewrite(
+        *Arg, ReplacementTypes, std::move(FnRepairCB), std::move(ACSRepairCB)))
+      return ChangeStatus::CHANGED;
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_ARG_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
+  AAPrivatizablePtrFloating(const IRPosition &IRP)
+      : AAPrivatizablePtrImpl(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  virtual void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::"
+                     "updateImpl will not be called");
+  }
+
+  /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+  Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+    Value *Obj =
+        GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL());
+    if (!Obj) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
+      return nullptr;
+    }
+
+    if (auto *AI = dyn_cast<AllocaInst>(Obj))
+      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+        if (CI->isOne())
+          return Obj->getType()->getPointerElementType();
+    if (auto *Arg = dyn_cast<Argument>(Obj)) {
+      auto &PrivArgAA =
+          A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(*Arg));
+      if (PrivArgAA.isAssumedPrivatizablePtr())
+        return Obj->getType()->getPointerElementType();
+    }
+
+    LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
+                         "alloca nor privatizable argument: "
+                      << *Obj << "!\n");
+    return nullptr;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrCallSiteArgument final
+    : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {}
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    PrivatizableType = identifyPrivatizableType(A);
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    if (!PrivatizableType.getValue())
+      return indicatePessimisticFixpoint();
+
+    const IRPosition &IRP = getIRPosition();
+    auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+    if (!NoCaptureAA.isAssumedNoCapture()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+    if (!NoAliasAA.isAssumedNoAlias()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, IRP);
+    if (!MemBehaviorAA.isAssumedReadOnly()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrCallSiteReturned final
+    : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrReturned(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
+  }
+};
+
 /// -------------------- Memory Behavior Attributes ----------------------------
 /// Includes read-none, read-only, and write-only.
 /// ----------------------------------------------------------------------------
@@ -5864,6 +6364,9 @@
 
       // Every argument with pointer type might be marked nofree.
       getOrCreateAAFor<AANoFree>(ArgPos);
+
+      // Every argument with pointer type might be privatizable (or promotable)
+      getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
     }
   }
 
@@ -6095,6 +6598,7 @@
 const char AANoCapture::ID = 0;
 const char AAValueSimplify::ID = 0;
 const char AAHeapToStack::ID = 0;
+const char AAPrivatizablePtr::ID = 0;
 const char AAMemoryBehavior::ID = 0;
 
 // Macro magic to create the static generator function for attributes that
@@ -6198,6 +6702,7 @@
 
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -3,9 +3,11 @@
 
 define internal i32 @deref(i32* %x) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@deref
-; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
@@ -19,7 +21,8 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[X_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]])
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
@@ -27,7 +27,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP]])
+; CHECK-NEXT:    call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -45,9 +45,11 @@
 
 define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
-; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]])
+; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <4 x i64>
+; CHECK-NEXT:    store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -65,7 +67,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(32) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,11 @@
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -28,7 +30,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -47,9 +50,11 @@
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -67,7 +72,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -86,9 +92,11 @@
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -106,7 +114,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -125,9 +134,11 @@
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -145,7 +156,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -184,7 +196,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -223,7 +235,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -242,9 +254,11 @@
 ; This should promote
 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -262,7 +276,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -281,9 +296,11 @@
 ; This should promote
 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -301,7 +318,8 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -6,13 +6,20 @@
 ; Don't drop 'byval' on %X here.
 define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]], i32 [[I:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[I:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[X]]
+; CHECK-NEXT:    store i32 0, i32* [[X_PRIV]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -36,7 +43,12 @@
 ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; CHECK-NEXT:    call void @f(%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]], i32 zeroext 0)
+; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]]
+; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]]
+; CHECK-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0)
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
@@ -4,9 +4,13 @@
 
 define internal i32 @test(i32* %X, i32* %Y) {
 ; CHECK-LABEL: define {{[^@]+}}@test
-; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[X]], align 4
-; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[Y]], align 4
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK-NEXT:    [[Y_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[Y_PRIV]]
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[Y_PRIV]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -18,10 +22,14 @@
 
 define internal i32 @caller(i32* %B) {
 ; CHECK-LABEL: define {{[^@]+}}@caller
-; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV]]
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 1, i32* [[A]], align 4
-; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[B_PRIV]]
+; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = alloca i32
@@ -34,7 +42,8 @@
 ; CHECK-LABEL: define {{[^@]+}}@callercaller()
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 2, i32* [[B]], align 4
-; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   %B = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
@@ -5,13 +5,20 @@
 
 define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[X]]
+; CHECK-NEXT:    store i32 0, i32* [[X_PRIV]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -33,7 +40,12 @@
 ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; CHECK-NEXT:    call void @f(%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]])
+; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]]
+; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]]
+; CHECK-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -7,9 +7,14 @@
 
 define internal void @f(%struct.ss* byval  %b) nounwind  {
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
@@ -26,9 +31,14 @@
 
 define internal void @g(%struct.ss* byval align 32 %b) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@g
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
@@ -51,8 +61,16 @@
 ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; CHECK-NEXT:    call void @f(%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[S]])
-; CHECK-NEXT:    call void @g(%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[S]])
+; CHECK-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST1]]
+; CHECK-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_12]]
+; CHECK-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]])
+; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST]]
+; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_1]]
+; CHECK-NEXT:    call void @g(i32 [[TMP2]], i64 [[TMP3]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -5,12 +5,14 @@
 
 define internal i32 @callee(i1 %C, i32* %P) {
 ; CHECK-LABEL: define {{[^@]+}}@callee
-; CHECK-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CHECK-SAME: (i1 [[C:%.*]], i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[P_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[P_PRIV]]
 ; CHECK-NEXT:    br i1 false, label [[T:%.*]], label [[F:%.*]]
 ; CHECK:       T:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       F:
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P_PRIV]], align 4
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   br i1 %C, label %T, label %F
@@ -27,7 +29,8 @@
 ; CHECK-LABEL: define {{[^@]+}}@foo()
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 17, i32* [[A]], align 4
-; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]]
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   %A = alloca i32         ; <i32*> [#uses=2]
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll
@@ -28,8 +28,13 @@
 
 define internal i32 @test_byval_used(%struct.pair* byval %P) {
 ; CHECK-LABEL: define {{[^@]+}}@test_byval_used
-; CHECK-SAME: (%struct.pair* nocapture nofree nonnull readonly byval align 8 dereferenceable(4) [[P:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = bitcast %struct.pair* [[P]] to i32*
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK-NEXT:    [[P_PRIV:%.*]] = alloca [[STRUCT_PAIR:%.*]]
+; CHECK-NEXT:    [[P_PRIV_CAST:%.*]] = bitcast %struct.pair* [[P_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[P_PRIV_CAST]]
+; CHECK-NEXT:    [[P_PRIV_0_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[P_PRIV_0_1]]
+; CHECK-NEXT:    [[C:%.*]] = bitcast %struct.pair* [[P_PRIV]] to i32*
 ; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[C]], align 8
 ; CHECK-NEXT:    ret i32 [[V]]
 ;
@@ -43,7 +48,11 @@
 ; CHECK-SAME: (i32** nocapture readonly [[Y:%.*]], %struct.pair* nocapture nofree readonly [[P:%.*]])
 ; CHECK-NEXT:    call void @test(i32** nocapture readonly [[Y]]), !dbg !4
 ; CHECK-NEXT:    call void @test_byval(), !dbg !5
-; CHECK-NEXT:    [[V:%.*]] = call i32 @test_byval_used(%struct.pair* nocapture nofree readonly [[P]]), !dbg !5
+; CHECK-NEXT:    [[P_CAST:%.*]] = bitcast %struct.pair* [[P]] to i32*, !dbg !5
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P_CAST]]
+; CHECK-NEXT:    [[P_0_1:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[P_0_1]]
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test_byval_used(i32 [[TMP1]], i32 [[TMP2]]), !dbg !5
 ; CHECK-NEXT:    ret i32 [[V]]
 ;
   call void @test(i32** %Y), !dbg !1
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
@@ -19,10 +19,15 @@
 ; ATTRIBUTOR-NEXT:    ret i32 [[R]]
 ;
 ; GLOBALOPT_ATTRIBUTOR-LABEL: define {{[^@]+}}@f
-; GLOBALOPT_ATTRIBUTOR-SAME: (%struct.ss* noalias nocapture nofree nonnull readonly align 4 dereferenceable(8) [[S:%.*]]) unnamed_addr
+; GLOBALOPT_ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) unnamed_addr
 ; GLOBALOPT_ATTRIBUTOR-NEXT:  entry:
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_PRIV_CAST:%.*]] = bitcast %struct.ss* [[S_PRIV]] to i32*
+; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 [[TMP0]], i32* [[S_PRIV_CAST]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 [[TMP1]], i32* [[S_PRIV_0_1]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 0
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[A:%.*]] = load i32, i32* [[F0]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[B:%.*]] = load i32, i32* [[F1]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = add i32 [[A]], [[B]]
@@ -55,7 +60,11 @@
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 1, i32* [[F0]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 2, i32* [[F1]], align 4
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = call fastcc i32 @f(%struct.ss* noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]])
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[S_0_1]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 [[TMP0]], i32 [[TMP1]])
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    ret i32 [[R]]
 ;
 entry:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -8,7 +8,8 @@
 ; CHECK-LABEL: define {{[^@]+}}@caller()
 ; CHECK-NEXT:    [[X:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 42, i32* [[X]], align 4
-; CHECK-NEXT:    call void @promote_i32_ptr(i32* noalias nocapture nonnull align 4 dereferenceable(4) [[X]]), !prof !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]]
+; CHECK-NEXT:    call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0
 ; CHECK-NEXT:    ret void
 ;
   %x = alloca i32
@@ -19,8 +20,10 @@
 
 define internal void @promote_i32_ptr(i32* %xp) {
 ; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr
-; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]])
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[XP]], align 4
+; CHECK-SAME: (i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[XP_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[XP_PRIV]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4
 ; CHECK-NEXT:    call void @use_i32(i32 [[X]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
@@ -28,7 +28,7 @@
 ; CHECK-LABEL: define {{[^@]+}}@f()
 ; CHECK-NEXT:    [[R:%.*]] = alloca i32
 ; CHECK-NEXT:    [[PAIR:%.*]] = alloca { i32, i32 }
-; CHECK-NEXT:    call void @add({ i32, i32 }* noalias nocapture nofree nonnull align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret align 4 dereferenceable(4) [[R]])
+; CHECK-NEXT:    call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]])
 ; CHECK-NEXT:    ret void
 ;
   %r = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
@@ -10,8 +10,13 @@
 
 define internal void @bar(%pair* byval %Data) {
 ; CHECK-LABEL: define {{[^@]+}}@bar
-; CHECK-SAME: (%pair* byval [[DATA:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8* @foo(%pair* [[DATA]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; CHECK-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
+; CHECK-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]])
 ; CHECK-NEXT:    ret void
 ;
   tail call i8* @foo(%pair* %Data)
@@ -21,7 +26,11 @@
 define void @zed(%pair* byval %Data) {
 ; CHECK-LABEL: define {{[^@]+}}@zed
 ; CHECK-SAME: (%pair* nocapture readonly byval [[DATA:%.*]])
-; CHECK-NEXT:    call void @bar(%pair* nocapture readonly byval [[DATA]])
+; CHECK-NEXT:    [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]]
+; CHECK-NEXT:    [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]]
+; CHECK-NEXT:    call void @bar(i32 [[TMP1]], i32 [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
   call void @bar(%pair* byval %Data)
diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
 ; FIXME: Add -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations below.
 ;        This flag was removed because max iterations is 2 in most cases, but in windows it is 1.
 ; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-annotate-decl-cs < %s | FileCheck %s
@@ -16,7 +16,8 @@
 ; FIXME: The callee -> call site direction is not working yet.
 
 define void @t0_caller(i32* %a) {
-; CHECK-LABEL: @t0_caller(
+; CHECK-LABEL: define {{[^@]+}}@t0_caller
+; CHECK-SAME: (i32* [[A:%.*]])
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
@@ -24,7 +25,7 @@
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
 ; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -41,12 +42,13 @@
 ; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
 ; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
 define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
-; CHECK-LABEL: @t0_callback_callee(
+; CHECK-LABEL: define {{[^@]+}}@t0_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 8
-; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C:%.*]], align 64
-; CHECK-NEXT:    tail call void @t0_check(i32* align 256 [[A:%.*]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t0_check(i32* align 256 [[A]], i64 99, i32* [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -61,5 +63,112 @@
 
 declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...)
 
+; Test 1
+;
+; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure
+; we deduce and propagate noalias and others properly.
+
+define void @t1_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t1_caller
+; CHECK-SAME: (i32* noalias [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
+; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca i32, align 32
+  %c = alloca i32*, align 64
+  %ptr = alloca i32, align 128
+  %0 = bitcast i32* %b to i8*
+  store i32 42, i32* %b, align 4
+  store i32* %b, i32** %c, align 8
+  call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+  ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr_val = load i32, i32* %ptr, align 8
+  store i32 %ptr_val, i32* %is_not_null
+  %0 = load i32*, i32** %c, align 8
+  tail call void @t1_check(i32* %a, i64 %b, i32* %0)
+  ret void
+}
+
+declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync
+
+declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
+; Test 2
+;
+; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved.
+
+define void @t2_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t2_caller
+; CHECK-SAME: (i32* noalias [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
+; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca i32, align 32
+  %c = alloca i32*, align 64
+  %ptr = alloca i32, align 128
+  %0 = bitcast i32* %b to i8*
+  store i32 42, i32* %b, align 4
+  store i32* %b, i32** %c, align 8
+  call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+  ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+;
+; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
+define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr_val = load i32, i32* %ptr, align 8
+  store i32 %ptr_val, i32* %is_not_null
+  %0 = load i32*, i32** %c, align 8
+  tail call void @t2_check(i32* %a, i64 %b, i32* %0)
+  ret void
+}
+
+declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture)
+
+declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
 !0 = !{!1}
 !1 = !{i64 2, i64 -1, i64 -1, i1 true}