diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -14,6 +14,7 @@
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
+class TargetTransformInfo;
 
 /// Argument promotion pass.
 ///
@@ -26,6 +27,17 @@
 public:
   ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
 
+  /// Check if callers and the callee \p F agree how promoted arguments would be
+  /// passed. The ones that they do not agree on are eliminated from the sets but
+  /// the return value has to be observed as well.
+  static bool areFunctionArgsABICompatible(
+      const Function &F, const TargetTransformInfo &TTI,
+      SmallPtrSetImpl<Argument *> &ArgsToPromote,
+      SmallPtrSetImpl<Argument *> &ByValArgsToTransform);
+
+  /// Checks if a type could have padding bytes.
+  static bool isDenselyPacked(Type *type, const DataLayout &DL);
+
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 };
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -104,6 +104,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/PassManager.h"
 
@@ -275,17 +276,7 @@
   }
 
   /// Return the associated argument, if any.
-  Argument *getAssociatedArgument() const {
-    if (auto *Arg = dyn_cast<Argument>(&getAnchorValue()))
-      return Arg;
-    int ArgNo = getArgNo();
-    if (ArgNo < 0)
-      return nullptr;
-    Function *AssociatedFn = getAssociatedFunction();
-    if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo))
-      return nullptr;
-    return AssociatedFn->arg_begin() + ArgNo;
-  }
+  Argument *getAssociatedArgument() const;
 
   /// Return true if the position refers to a function interface, that is the
   /// function scope, the function return, or an argumnt.
@@ -602,6 +593,10 @@
   TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) {
     return AG.getAnalysis<TargetLibraryAnalysis>(F);
   }
+  const TargetTransformInfo *
+  getTargetTransformInfoForFunction(const Function &F) {
+    return AG.getAnalysis<TargetIRAnalysis>(F);
+  }
 
   /// Return AliasAnalysis Result for function \p F.
   AAResults *getAAResultsForFunction(const Function &F) {
@@ -2085,6 +2080,40 @@
   static const char ID;
 };
 
+/// An abstract interface for privatizability.
+///
+/// A pointer is privatizable if it can be replaced by a new, private one.
+/// Privatizing pointer reduces the use count, interaction between unrelated
+/// code parts.
+struct AAPrivatizablePtr : public StateWrapper<BooleanState, AbstractAttribute>,
+                           public IRPosition {
+  AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {}
+
+  /// Returns true if pointer privatization is assumed to be possible.
+  bool isAssumedPrivatizablePtr() const { return getAssumed(); }
+
+  /// Returns true if pointer privatization is known to be possible.
+  bool isKnownPrivatizablePtr() const { return getKnown(); }
+
+  /// Return the type we can chose for a private copy of the underlying
+  /// value. None means it is not clear yet, nullptr means there is none.
+  virtual Optional<Type *> getPrivatizableType() const = 0;
+
+  /// Return an IR position, see struct IRPosition.
+  ///
+  ///{
+  IRPosition &getIRPosition() { return *this; }
+  const IRPosition &getIRPosition() const { return *this; }
+  ///}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
+                                              Attributor &A);
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
 /// An abstract interface for all memory related attributes.
 struct AAMemoryBehavior
     : public IRAttribute<
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -773,8 +773,7 @@
   return true;
 }
 
-/// Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
   // There is no size information, so be conservative.
   if (!type->isSized())
     return false;
@@ -843,12 +842,14 @@
   return false;
 }
 
-static bool areFunctionArgsABICompatible(
+bool ArgumentPromotionPass::areFunctionArgsABICompatible(
     const Function &F, const TargetTransformInfo &TTI,
     SmallPtrSetImpl<Argument *> &ArgsToPromote,
     SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
   for (const Use &U : F.uses()) {
     CallSite CS(U.getUser());
+    if (!CS)
+      return false;
     const Function *Caller = CS.getCaller();
     const Function *Callee = CS.getCalledFunction();
     if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
@@ -950,9 +951,9 @@
     // If this is a byval argument, and if the aggregate type is small, just
     // pass the elements, which is always safe, if the passed value is densely
     // packed or if we can prove the padding bytes are never accessed.
-    bool isSafeToPromote =
-        PtrArg->hasByValAttr() &&
-        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+    bool isSafeToPromote = PtrArg->hasByValAttr() &&
+                           (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
+                            !canPaddingBeAccessed(PtrArg));
     if (isSafeToPromote) {
       if (StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
@@ -1010,8 +1011,8 @@
   if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
     return nullptr;
 
-  if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
-                                    ByValArgsToTransform))
+  if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+          *F, TTI, ArgsToPromote, ByValArgsToTransform))
     return nullptr;
 
   return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -29,11 +29,14 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
@@ -147,6 +150,92 @@
 }
 ///}
 
+Argument *IRPosition::getAssociatedArgument() const {
+  if (getPositionKind() == IRP_ARGUMENT)
+    return cast<Argument>(&getAnchorValue());
+
+  // Not an Argument and no argument number means this is not a call site
+  // argument, thus we cannot find a callback argument to return.
+  int ArgNo = getArgNo();
+  if (ArgNo < 0)
+    return nullptr;
+
+  const Function *Callee = getAssociatedFunction();
+  if (Callee && Callee->arg_size() > unsigned(ArgNo))
+    return Callee->getArg(ArgNo);
+
+  return nullptr;
+}
+
+/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
+/// advanced by \p Offset bytes. To aid later analysis the method tries to build
+/// getelement pointer instructions that traverse the natural type of \p Ptr if
+/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
+/// through a cast to i8*.
+///
+/// TODO: This could probably live somewhere more prominantly if it doesn't
+///       already exist.
+static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset,
+                               IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
+  assert(Offset >= 0 && "Negative offset not supported yet!");
+  LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
+                    << "-bytes as " << *ResTy << "\n");
+
+  // The initial type we are trying to traverse to get nice GEPs.
+  Type *Ty = Ptr->getType();
+
+  SmallVector<Value *, 4> Indices;
+  std::string GEPName = Ptr->getName();
+  while (Offset) {
+    uint64_t Idx, Rem;
+
+    if (auto *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = DL.getStructLayout(STy);
+      if (int64_t(SL->getSizeInBytes()) < Offset)
+        break;
+      Idx = SL->getElementContainingOffset(Offset);
+      assert(Idx < STy->getNumElements() && "Offset calculation error!");
+      Rem = Offset - SL->getElementOffset(Idx);
+      Ty = STy->getElementType(Idx);
+    } else if (auto *PTy = dyn_cast<PointerType>(Ty)) {
+      Ty = PTy->getElementType();
+      if (!Ty->isSized())
+        break;
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      assert(ElementSize && "Expected type with size!");
+      Idx = Offset / ElementSize;
+      Rem = Offset % ElementSize;
+    } else {
+      // Non-aggregate type, we cast and make byte-wise progress now.
+      break;
+    }
+
+    LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
+                      << " Idx: " << Idx << " Rem: " << Rem << "\n");
+
+    GEPName += "." + std::to_string(Idx);
+    Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
+    Offset = Rem;
+  }
+
+  // Create a GEP if we collected indices above.
+  if (Indices.size())
+    Ptr = IRB.CreateGEP(Ptr, Indices, GEPName);
+
+  // If an offset is left we use byte-wise adjustment.
+  if (Offset) {
+    Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
+    Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset),
+                        GEPName + ".b" + Twine(Offset));
+  }
+
+  // Ensure the result has the requested type.
+  Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
+
+  LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
+  return Ptr;
+}
+
 /// Recursively visit all values that might become \p IRP at some point. This
 /// will be done by looking through cast instructions, selects, phis, and calls
 /// with the "returned" attribute. Once we cannot look through the value any
@@ -4045,6 +4134,432 @@
   }
 };
 
+/// ----------------------- Privatizable Pointers ------------------------------
+struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
+  AAPrivatizablePtrImpl(const IRPosition &IRP)
+      : AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {}
+
+  ChangeStatus indicatePessimisticFixpoint() override {
+    AAPrivatizablePtr::indicatePessimisticFixpoint();
+    PrivatizableType = nullptr;
+    return ChangeStatus::CHANGED;
+  }
+
+  /// Identify the type we can chose for a private copy of the underlying
+  /// argument. None means it is not clear yet, nullptr means there is none.
+  virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
+
+  /// Return a privatizable type that encloses both T0 and T1.
+  /// TODO: This is merely a stub for now as we should manage a mapping as well.
+  Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
+    if (!T0.hasValue())
+      return T1;
+    if (!T1.hasValue())
+      return T0;
+    if (T0 == T1)
+      return T0;
+    return nullptr;
+  }
+
+  Optional<Type *> getPrivatizableType() const override {
+    return PrivatizableType;
+  }
+
+  const std::string getAsStr() const override {
+    return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
+  }
+
+protected:
+  Optional<Type *> PrivatizableType;
+};
+
+// TODO: Do this for call site arguments (probably also other values) as well.
+
+struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
+  AAPrivatizablePtrArgument(const IRPosition &IRP)
+      : AAPrivatizablePtrImpl(IRP) {}
+
+  /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+  Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+    // If this is a byval argument and we know all the call sites (so we can
+    // rewrite them), there is no need to check them explicitly.
+    if (getIRPosition().hasAttr(Attribute::ByVal) &&
+        A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
+                               true))
+      return getAssociatedValue().getType()->getPointerElementType();
+
+    Optional<Type *> Ty;
+    unsigned ArgNo = getIRPosition().getArgNo();
+
+    // Make sure the associated call site argument has the same type at all call
+    // sites and it is an allocation we know is safe to privatize, for now that
+    // means we only allow alloca instructions.
+    // TODO: We can additionally analyze the accesses in the callee to  create
+    //       the type from that information instead. That is a little more
+    //       involved and will be done in a follow up patch.
+    auto CallSiteCheck = [&](AbstractCallSite ACS) {
+      IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+      // Check if a coresponding argument was found or if it is one not
+      // associated (which can happen for callback calls).
+      if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+        return false;
+
+      // Check that all call sites agree on a type.
+      auto &PrivCSArgAA = A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos);
+      Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
+
+      LLVM_DEBUG({
+        dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
+        if (CSTy.hasValue() && CSTy.getValue())
+          CSTy.getValue()->print(dbgs());
+        else if (CSTy.hasValue())
+          dbgs() << "<nullptr>";
+        else
+          dbgs() << "<none>";
+      });
+
+      Ty = combineTypes(Ty, CSTy);
+
+      LLVM_DEBUG({
+        dbgs() << " : New Type: ";
+        if (Ty.hasValue() && Ty.getValue())
+          Ty.getValue()->print(dbgs());
+        else if (Ty.hasValue())
+          dbgs() << "<nullptr>";
+        else
+          dbgs() << "<none>";
+        dbgs() << "\n";
+      });
+
+      return !Ty.hasValue() || Ty.getValue();
+    };
+
+    if (!A.checkForAllCallSites(CallSiteCheck, *this, true))
+      return nullptr;
+    return Ty;
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    PrivatizableType = identifyPrivatizableType(A);
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    if (!PrivatizableType.getValue())
+      return indicatePessimisticFixpoint();
+
+    // Avoid arguments with padding for now.
+    if (!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
+                                                A.getInfoCache().getDL())) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    // Verify callee and caller agree on how the promoted argument would be
+    // passed.
+    // TODO: We should re-implement this and not reuse the ArgumentPromotion
+    //       version of this.
+    Function &Fn = *getIRPosition().getAnchorScope();
+    SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy;
+    ArgsToPromote.insert(getAssociatedArgument());
+    const auto *TTI = A.getInfoCache().getTargetTransformInfoForFunction(Fn);
+    if (!TTI ||
+        !ArgumentPromotionPass::areFunctionArgsABICompatible(
+            Fn, *TTI, ArgsToPromote, Dummy) ||
+        ArgsToPromote.empty()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// Given a type to private \p PrivType, collect the constituates (which are
+  /// used) in \p ReplacementTypes.
+  static void
+  identifyReplacementTypes(Type *PrivType,
+                           SmallVectorImpl<Type *> &ReplacementTypes) {
+    // TODO: For now we expand the privatization type to the fullest which can
+    //       lead to dead arguments that need to be removed later.
+    assert(PrivType && "Expected privatizable type!");
+
+    // Traverse the type, extract constituate types on the outermost level.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++)
+        ReplacementTypes.push_back(PrivStructType->getElementType(u));
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      ReplacementTypes.append(PrivArrayType->getNumElements(),
+                              PrivArrayType->getElementType());
+    } else {
+      ReplacementTypes.push_back(PrivType);
+    }
+  }
+
+  /// Initialize \p Base according to the type \p PrivType at position \p IP.
+  /// The values needed are taken from the arguments of \p F starting at
+  /// position \p ArgNo.
+  static void createInitialization(Type *PrivType, Value &Base, Function &F,
+                                   unsigned ArgNo, Instruction &IP) {
+    assert(PrivType && "Expected privatizable type!");
+
+    IRBuilder<NoFolder> IRB(&IP);
+    const DataLayout &DL = F.getParent()->getDataLayout();
+
+    // Traverse the type, build GEPs and stores.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+        Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
+        Value *Ptr = constructPointer(
+            PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
+        new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+      }
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo();
+      uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy);
+      for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+        Value *Ptr =
+            constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
+        new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+      }
+    } else {
+      new StoreInst(F.getArg(ArgNo), &Base, &IP);
+    }
+  }
+
+  /// Extract values from \p Base according to the type \p PrivType at the
+  /// call position \p ACS. The values are appended to \p ReplacementValues.
+  void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
+                               Value *Base,
+                               SmallVectorImpl<Value *> &ReplacementValues) {
+    assert(Base && "Expected base value!");
+    assert(PrivType && "Expected privatizable type!");
+    Instruction *IP = ACS.getInstruction();
+
+    IRBuilder<NoFolder> IRB(IP);
+    const DataLayout &DL = IP->getModule()->getDataLayout();
+
+    if (Base->getType()->getPointerElementType() != PrivType)
+      Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
+                                                 "", ACS.getInstruction());
+
+    // Traverse the type, build GEPs and loads.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+        Type *PointeeTy = PrivStructType->getElementType(u);
+        Value *Ptr =
+            constructPointer(PointeeTy->getPointerTo(), Base,
+                             PrivStructLayout->getElementOffset(u), IRB, DL);
+        ReplacementValues.push_back(new LoadInst(PointeeTy, Ptr, "", IP));
+      }
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      Type *PointeeTy = PrivArrayType->getElementType();
+      uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
+      Type *PointeePtrTy = PointeeTy->getPointerTo();
+      for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+        Value *Ptr =
+            constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
+        ReplacementValues.push_back(new LoadInst(PointeePtrTy, Ptr, "", IP));
+      }
+    } else {
+      ReplacementValues.push_back(new LoadInst(PrivType, Base, "", IP));
+    }
+  }
+
+  /// See AbstractAttribute::manifest(...)
+  ChangeStatus manifest(Attributor &A) override {
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    assert(PrivatizableType.getValue() && "Expected privatizable type!");
+
+    // Collect all tail calls in the function as we cannot allow new allocas to
+    // escape into tail recursion.
+    // TODO: Be smarter about new allocas escaping into tail calls.
+    SmallVector<CallInst *, 16> TailCalls;
+    if (!A.checkForAllInstructions(
+            [&](Instruction &I) {
+              CallInst &CI = cast<CallInst>(I);
+              if (CI.isTailCall())
+                TailCalls.push_back(&CI);
+              return true;
+            },
+            *this, {Instruction::Call}))
+      return ChangeStatus::UNCHANGED;
+
+    Argument *Arg = getAssociatedArgument();
+
+    // Callback to repair the associated function. A new alloca is placed at the
+    // beginning and initialized with the values passed through arguments. The
+    // new alloca replaces the use of the old pointer argument.
+    Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB =
+        [=](const Attributor::ArgumentReplacementInfo &ARI,
+            Function &ReplacementFn, Function::arg_iterator ArgIt) {
+          BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
+          Instruction *IP = &*EntryBB.getFirstInsertionPt();
+          auto *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+                                    Arg->getName() + ".priv", IP);
+          createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
+                               ArgIt->getArgNo(), *IP);
+          Arg->replaceAllUsesWith(AI);
+
+          for (CallInst *CI : TailCalls)
+            CI->setTailCall(false);
+        };
+
+    // Callback to repair a call site of the associated function. The elements
+    // of the privatizable type are loaded prior to the call and passed to the
+    // new function version.
+    Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
+        [=](const Attributor::ArgumentReplacementInfo &ARI,
+            AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
+          createReplacementValues(
+              PrivatizableType.getValue(), ACS,
+              ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
+                                  NewArgOperands);
+        };
+
+    // Collect the types that will replace the privatizable type in the function
+    // signature.
+    SmallVector<Type *, 16> ReplacementTypes;
+    identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
+    // Register a rewrite of the argument.
+    if (A.registerFunctionSignatureRewrite(
+        *Arg, ReplacementTypes, std::move(FnRepairCB), std::move(ACSRepairCB)))
+      return ChangeStatus::CHANGED;
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_ARG_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
+  AAPrivatizablePtrFloating(const IRPosition &IRP)
+      : AAPrivatizablePtrImpl(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  virtual void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::"
+                     "updateImpl will not be called");
+  }
+
+  /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+  Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+    Value *Obj =
+        GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL());
+    if (!Obj) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
+      return nullptr;
+    }
+
+    if (auto *AI = dyn_cast<AllocaInst>(Obj))
+      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+        if (CI->isOne())
+          return Obj->getType()->getPointerElementType();
+    if (auto *Arg = dyn_cast<Argument>(Obj)) {
+      auto &PrivArgAA =
+          A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(*Arg));
+      if (PrivArgAA.isAssumedPrivatizablePtr())
+        return Obj->getType()->getPointerElementType();
+    }
+
+    LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
+                         "alloca nor privatizable argument: "
+                      << *Obj << "!\n");
+    return nullptr;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrCallSiteArgument final
+    : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {}
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    PrivatizableType = identifyPrivatizableType(A);
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    if (!PrivatizableType.getValue())
+      return indicatePessimisticFixpoint();
+
+    const IRPosition &IRP = getIRPosition();
+    auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+    if (!NoCaptureAA.isAssumedNoCapture()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+    if (!NoAliasAA.isAssumedNoAlias()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, IRP);
+    if (!MemBehaviorAA.isAssumedReadOnly()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrCallSiteReturned final
+    : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrReturned(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
+  }
+};
+
 /// -------------------- Memory Behavior Attributes ----------------------------
 /// Includes read-none, read-only, and write-only.
 /// ----------------------------------------------------------------------------
@@ -5445,6 +5960,9 @@
       // Every argument with pointer type might be marked
       // "readnone/readonly/writeonly/..."
       getOrCreateAAFor<AAMemoryBehavior>(ArgPos);
+
+      // Every argument with pointer type might be privatizable (or promotable)
+      getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
     }
   }
 
@@ -5666,6 +6184,7 @@
 const char AANoCapture::ID = 0;
 const char AAValueSimplify::ID = 0;
 const char AAHeapToStack::ID = 0;
+const char AAPrivatizablePtr::ID = 0;
 const char AAMemoryBehavior::ID = 0;
 
 // Macro magic to create the static generator function for attributes that
@@ -5770,6 +6289,7 @@
 
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
--- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -1,11 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 define internal i32 @deref(i32* %x) nounwind {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@deref
-; ARGPROMOTION-SAME: (i32 [[X_VAL:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    ret i32 [[X_VAL]]
+; ALL-LABEL: define {{[^@]+}}@deref
+; ALL-SAME: (i32 [[TMP0:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
   %tmp2 = load i32, i32* %x, align 4
@@ -13,14 +14,11 @@
 }
 
 define i32 @f(i32 %x) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@f
-; ARGPROMOTION-SAME: (i32 [[X:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[X_ADDR:%.*]] = alloca i32
-; ARGPROMOTION-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; ARGPROMOTION-NEXT:    [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4
-; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
-; ARGPROMOTION-NEXT:    ret i32 [[TMP1]]
+; ALL-LABEL: define {{[^@]+}}@f
+; ALL-SAME: (i32 [[X:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[X]])
+; ALL-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %x_addr = alloca i32
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
--- a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 ; PR2498
 
 ; This test tries to convince argpromotion about promoting the load from %A + 2,
@@ -17,6 +18,18 @@
 ; ARGPROMOTION-NEXT:    [[R:%.*]] = load i32, i32* [[A_2]]
 ; ARGPROMOTION-NEXT:    ret i32 [[R]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee
+; ATTRIBUTOR-SAME: (i1 [[C:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[A:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[A_0:%.*]] = load i32, i32* [[A]]
+; ATTRIBUTOR-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; ATTRIBUTOR:       T:
+; ATTRIBUTOR-NEXT:    ret i32 [[A_0]]
+; ATTRIBUTOR:       F:
+; ATTRIBUTOR-NEXT:    [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
+; ATTRIBUTOR-NEXT:    [[R:%.*]] = load i32, i32* [[A_2]]
+; ATTRIBUTOR-NEXT:    ret i32 [[R]]
+;
 entry:
   ; Unconditonally load the element at %A
   %A.0 = load i32, i32* %A
@@ -32,12 +45,18 @@
   ret i32 %R
 }
 
-define i32 @foo() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@foo()
-; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32* null)
+define i32 @foo(i1 %c, i32* %A) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@foo
+; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32* [[A:%.*]])
+; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @callee(i1 [[C]], i32* [[A]])
 ; ARGPROMOTION-NEXT:    ret i32 [[X]]
 ;
-  %X = call i32 @callee(i1 false, i32* null)             ; <i32> [#uses=1]
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@foo
+; ATTRIBUTOR-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[A:%.*]])
+; ATTRIBUTOR-NEXT:    [[X:%.*]] = call i32 @callee(i1 [[C]], i32* nocapture readonly [[A]])
+; ATTRIBUTOR-NEXT:    ret i32 [[X]]
+;
+  %X = call i32 @callee(i1 %c, i32* %A)             ; <i32> [#uses=1]
   ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
--- a/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -inline -argpromotion -disable-output
+; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s
 
 define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
 entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
--- a/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -argpromotion -disable-output
+; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s
 
 define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
 entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
--- a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt -S -argpromotion -mem2reg < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='argpromotion,function(mem2reg)' < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 ; Test that we only promote arguments when the caller/callee have compatible
 ; function attrubtes.
 
@@ -14,6 +15,13 @@
 ; ARGPROMOTION-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@no_promote_avx2
+; ATTRIBUTOR-SAME: (<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32
+; ATTRIBUTOR-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <4 x i64>, <4 x i64>* %arg1
   store <4 x i64> %tmp, <4 x i64>* %arg
@@ -33,6 +41,18 @@
 ; ARGPROMOTION-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@no_promote
+; ATTRIBUTOR-SAME: (<4 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(32) [[TMP]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <4 x i64>, align 32
   %tmp2 = alloca <4 x i64>, align 32
@@ -51,6 +71,12 @@
 ; ARGPROMOTION-NEXT:    store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@promote_avx2
+; ATTRIBUTOR-SAME: (<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <4 x i64>, <4 x i64>* %arg1
   store <4 x i64> %tmp, <4 x i64>* %arg
@@ -71,6 +97,19 @@
 ; ARGPROMOTION-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@promote
+; ATTRIBUTOR-SAME: (<4 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <4 x i64>, align 32
   %tmp2 = alloca <4 x i64>, align 32
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 ; Test that we only promote arguments when the caller/callee have compatible
 ; function attrubtes.
 
@@ -14,6 +15,15 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -34,6 +44,19 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -53,6 +76,15 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -73,6 +105,19 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -92,6 +137,15 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -112,6 +166,19 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -131,6 +198,15 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -151,6 +227,19 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -171,6 +260,13 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -190,6 +286,18 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -210,6 +318,13 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -229,6 +344,18 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -248,6 +375,15 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -268,6 +404,19 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
@@ -287,6 +436,15 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = load <8 x i64>, <8 x i64>* %arg1
   store <8 x i64> %tmp, <8 x i64>* %arg
@@ -307,6 +465,19 @@
 ; ARGPROMOTION-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
+; ATTRIBUTOR-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]]
+; ATTRIBUTOR-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
+; ATTRIBUTOR-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
+; ATTRIBUTOR-NEXT:    ret void
+;
 bb:
   %tmp = alloca <8 x i64>, align 32
   %tmp2 = alloca <8 x i64>, align 32
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll
--- a/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll
@@ -4,8 +4,9 @@
 ; we don't do that anymore. It also verifies that the combination of
 ; globalopt and argpromotion is able to optimize the call safely.
 ;
-; RUN: opt -S -argpromotion %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt -S -globalopt -argpromotion %s | FileCheck %s --check-prefixes=GLOBALOPT_ARGPROMOTION,ALL
+; RUN: opt -S -argpromotion %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -globalopt -argpromotion %s | FileCheck %s --check-prefixes=ALL,GLOBALOPT_ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i386-pc-windows-msvc19.11.0"
@@ -33,6 +34,16 @@
 ; GLOBALOPT_ARGPROMOTION-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]])
 ; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@internalfun
+; ATTRIBUTOR-SAME: (%struct.a* nocapture readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0
+; ATTRIBUTOR-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0
+; ATTRIBUTOR-NEXT:    [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]])
+; ATTRIBUTOR-NEXT:    call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]])
+; ATTRIBUTOR-NEXT:    ret void
+;
 entry:
   %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
   %argmem = alloca inalloca <{ %struct.a }>, align 4
@@ -59,6 +70,14 @@
 ; GLOBALOPT_ARGPROMOTION-NEXT:    call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]])
 ; GLOBALOPT_ARGPROMOTION-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
 ; GLOBALOPT_ARGPROMOTION-NEXT:    ret void
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@exportedfun
+; ATTRIBUTOR-SAME: (%struct.a* nocapture readnone [[A:%.*]])
+; ATTRIBUTOR-NEXT:    [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave()
+; ATTRIBUTOR-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4
+; ATTRIBUTOR-NEXT:    call x86_thiscallcc void @internalfun(%struct.a* nocapture readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]])
+; ATTRIBUTOR-NEXT:    call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]])
+; ATTRIBUTOR-NEXT:    ret void
 ;
   %inalloca.save = tail call i8* @llvm.stacksave()
   %argmem = alloca inalloca <{ %struct.a }>, align 4
diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
--- a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 %T = type { i32, i32, i32, i32 }
 @G = constant %T { i32 0, i32 0, i32 17, i32 25 }
@@ -12,6 +13,16 @@
 ; ARGPROMOTION-NEXT:    [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]]
 ; ARGPROMOTION-NEXT:    ret i32 [[V]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@test
+; ATTRIBUTOR-SAME: (%T* nocapture nonnull readonly align 8 dereferenceable(16) [[P:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 3
+; ATTRIBUTOR-NEXT:    [[B_GEP:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 2
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = load i32, i32* [[A_GEP]]
+; ATTRIBUTOR-NEXT:    [[B:%.*]] = load i32, i32* [[B_GEP]]
+; ATTRIBUTOR-NEXT:    [[V:%.*]] = add i32 [[A]], [[B]]
+; ATTRIBUTOR-NEXT:    ret i32 [[V]]
+;
 entry:
   %a.gep = getelementptr %T, %T* %p, i64 0, i32 3
   %b.gep = getelementptr %T, %T* %p, i64 0, i32 2
@@ -31,6 +42,11 @@
 ; ARGPROMOTION-NEXT:    [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]])
 ; ARGPROMOTION-NEXT:    ret i32 [[V]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[V:%.*]] = call i32 @test(%T* nonnull align 8 dereferenceable(16) @G)
+; ATTRIBUTOR-NEXT:    ret i32 [[V]]
+;
 entry:
   %v = call i32 @test(%T* @G)
   ret i32 %v
diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
--- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
@@ -23,13 +23,20 @@
 ; ARGPROMOTION-NEXT:    ret void
 ;
 ; ATTRIBUTOR-LABEL: define {{[^@]+}}@f
-; ATTRIBUTOR-SAME: (%struct.ss* noalias nocapture nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* nocapture nonnull writeonly byval dereferenceable(4) [[X:%.*]])
+; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
 ; ATTRIBUTOR-NEXT:  entry:
-; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; ATTRIBUTOR-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; ATTRIBUTOR-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; ATTRIBUTOR-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; ATTRIBUTOR-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; ATTRIBUTOR-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; ATTRIBUTOR-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; ATTRIBUTOR-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; ATTRIBUTOR-NEXT:    store i32 0, i32* [[X]]
+; ATTRIBUTOR-NEXT:    store i32 0, i32* [[X_PRIV]]
 ; ATTRIBUTOR-NEXT:    ret void
 ;
 entry:
@@ -68,7 +75,12 @@
 ; ATTRIBUTOR-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; ATTRIBUTOR-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; ATTRIBUTOR-NEXT:    call void @f(%struct.ss* noalias nocapture nonnull byval align 8 dereferenceable(12) [[S]], i32* nocapture writeonly byval [[X]])
+; ATTRIBUTOR-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]]
+; ATTRIBUTOR-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]]
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]]
+; ATTRIBUTOR-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]])
 ; ATTRIBUTOR-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/basictest.ll
--- a/llvm/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/basictest.ll
@@ -1,12 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define internal i32 @test(i32* %X, i32* %Y) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@test
-; ARGPROMOTION-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; ARGPROMOTION-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
-; ARGPROMOTION-NEXT:    ret i32 [[C]]
+; ALL-LABEL: define {{[^@]+}}@test
+; ALL-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; ALL-NEXT:    [[C:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; ALL-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, i32* %X
   %B = load i32, i32* %Y
@@ -15,10 +16,10 @@
 }
 
 define internal i32 @caller(i32* %B) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@caller
-; ARGPROMOTION-SAME: (i32 [[B_VAL1:%.*]])
-; ARGPROMOTION-NEXT:    [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]])
-; ARGPROMOTION-NEXT:    ret i32 [[C]]
+; ALL-LABEL: define {{[^@]+}}@caller
+; ALL-SAME: (i32 [[TMP0:%.*]])
+; ALL-NEXT:    [[C:%.*]] = call i32 @test(i32 1, i32 [[TMP0]])
+; ALL-NEXT:    ret i32 [[C]]
 ;
   %A = alloca i32
   store i32 1, i32* %A
@@ -27,9 +28,9 @@
 }
 
 define i32 @callercaller() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@callercaller()
-; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @caller(i32 2)
-; ARGPROMOTION-NEXT:    ret i32 [[X]]
+; ALL-LABEL: define {{[^@]+}}@callercaller()
+; ALL-NEXT:    [[X:%.*]] = call i32 @caller(i32 2)
+; ALL-NEXT:    ret i32 [[X]]
 ;
   %B = alloca i32
   store i32 2, i32* %B
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
--- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -1,9 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; Arg promotion eliminates the struct argument.
 ; FIXME: Should it eliminate the i32* argument?
+; The attributor eliminates the i32*.
 
 %struct.ss = type { i32, i64 }
 
@@ -23,6 +25,23 @@
 ; ARGPROMOTION-NEXT:    store i32 0, i32* [[X]]
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@f
+; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; ATTRIBUTOR-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; ATTRIBUTOR-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; ATTRIBUTOR-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; ATTRIBUTOR-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; ATTRIBUTOR-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; ATTRIBUTOR-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; ATTRIBUTOR-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
+; ATTRIBUTOR-NEXT:    store i32 0, i32* [[X_PRIV]]
+; ATTRIBUTOR-NEXT:    ret void
+;
 entry:
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
   %tmp1 = load i32, i32* %tmp, align 4
@@ -49,6 +68,22 @@
 ; ARGPROMOTION-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]])
 ; ARGPROMOTION-NEXT:    ret i32 0
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@test
+; ATTRIBUTOR-SAME: (i32* nocapture writeonly [[X:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
+; ATTRIBUTOR-NEXT:    store i32 1, i32* [[TMP1]], align 8
+; ATTRIBUTOR-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    store i64 2, i64* [[TMP4]], align 4
+; ATTRIBUTOR-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]]
+; ATTRIBUTOR-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]]
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]]
+; ATTRIBUTOR-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]])
+; ATTRIBUTOR-NEXT:    ret i32 0
+;
 entry:
   %S = alloca %struct.ss
   %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll
--- a/llvm/test/Transforms/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll
@@ -1,25 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes='argpromotion,function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 %struct.ss = type { i32, i64 }
 
 define internal void @f(%struct.ss* byval  %b) nounwind  {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@f
-; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]]
-; ARGPROMOTION-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    store i32 [[B_0]], i32* [[DOT0]]
-; ARGPROMOTION-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; ARGPROMOTION-NEXT:    store i64 [[B_1]], i64* [[DOT1]]
-; ARGPROMOTION-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
-; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; ARGPROMOTION-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
-; ARGPROMOTION-NEXT:    ret void
+; ALL-LABEL: define {{[^@]+}}@f
+; ALL-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
+; ALL-NEXT:    ret void
 ;
 entry:
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
@@ -31,19 +24,11 @@
 
 
 define internal void @g(%struct.ss* byval align 32 %b) nounwind {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@g
-; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
-; ARGPROMOTION-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    store i32 [[B_0]], i32* [[DOT0]]
-; ARGPROMOTION-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; ARGPROMOTION-NEXT:    store i64 [[B_1]], i64* [[DOT1]]
-; ARGPROMOTION-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
-; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; ARGPROMOTION-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
-; ARGPROMOTION-NEXT:    ret void
+; ALL-LABEL: define {{[^@]+}}@g
+; ALL-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
+; ALL-NEXT:    ret void
 ;
 entry:
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
@@ -55,24 +40,11 @@
 
 
 define i32 @main() nounwind  {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@main()
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
-; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    store i32 1, i32* [[TMP1]], align 8
-; ARGPROMOTION-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; ARGPROMOTION-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; ARGPROMOTION-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]]
-; ARGPROMOTION-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; ARGPROMOTION-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]]
-; ARGPROMOTION-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
-; ARGPROMOTION-NEXT:    [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[S_01_VAL:%.*]] = load i32, i32* [[S_01]]
-; ARGPROMOTION-NEXT:    [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; ARGPROMOTION-NEXT:    [[S_12_VAL:%.*]] = load i64, i64* [[S_12]]
-; ARGPROMOTION-NEXT:    call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
-; ARGPROMOTION-NEXT:    ret i32 0
+; ALL-LABEL: define {{[^@]+}}@main()
+; ALL-NEXT:  entry:
+; ALL-NEXT:    call void @f(i32 1, i64 2)
+; ALL-NEXT:    call void @g(i32 1, i64 2)
+; ALL-NEXT:    ret i32 0
 ;
 entry:
   %S = alloca %struct.ss
diff --git a/llvm/test/Transforms/ArgumentPromotion/chained.ll b/llvm/test/Transforms/ArgumentPromotion/chained.ll
--- a/llvm/test/Transforms/ArgumentPromotion/chained.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/chained.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 @G1 = constant i32 0
 @G2 = constant i32* @G1
@@ -11,6 +12,13 @@
 ; ARGPROMOTION-NEXT:  entry:
 ; ARGPROMOTION-NEXT:    ret i32 [[X_VAL_VAL]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@test
+; ATTRIBUTOR-SAME: (i32** nocapture nonnull readonly align 8 dereferenceable(8) [[X:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[Y:%.*]] = load i32*, i32** @G2, align 8
+; ATTRIBUTOR-NEXT:    [[Z:%.*]] = load i32, i32* [[Y]]
+; ATTRIBUTOR-NEXT:    ret i32 [[Z]]
+;
 entry:
   %y = load i32*, i32** %x
   %z = load i32, i32* %y
@@ -25,6 +33,11 @@
 ; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]])
 ; ARGPROMOTION-NEXT:    ret i32 [[X]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[X:%.*]] = call i32 @test(i32** nonnull align 8 dereferenceable(8) @G2)
+; ATTRIBUTOR-NEXT:    ret i32 [[X]]
+;
 entry:
   %x = call i32 @test(i32** @G2)
   ret i32 %x
diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll
--- a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll
@@ -1,18 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; Don't promote around control flow.
-define internal i32 @callee(i1 %C, i32* %P) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@callee
-; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32* [[P:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; ARGPROMOTION:       T:
-; ARGPROMOTION-NEXT:    ret i32 17
-; ARGPROMOTION:       F:
-; ARGPROMOTION-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
-; ARGPROMOTION-NEXT:    ret i32 [[X]]
+define internal i32 @callee(i1 %C, i32* nocapture readonly %P) {
+; ALL-LABEL: define {{[^@]+}}@callee
+; ALL-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; ALL:       T:
+; ALL-NEXT:    ret i32 17
+; ALL:       F:
+; ALL-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
+; ALL-NEXT:    ret i32 [[X]]
 ;
 entry:
   br i1 %C, label %T, label %F
@@ -25,14 +26,15 @@
   ret i32 %X
 }
 
-define i32 @foo() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@foo()
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @callee(i1 true, i32* null)
-; ARGPROMOTION-NEXT:    ret i32 [[X]]
+define i32 @foo(i1 %C, i32* nocapture readonly %P) {
+; ALL-LABEL: define {{[^@]+}}@foo
+; ALL-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[X:%.*]] = call i32 @callee(i1 [[C]], i32* nocapture readonly [[P]])
+; ALL-NEXT:    ret i32 [[X]]
 ;
 entry:
-  %X = call i32 @callee(i1 true, i32* null)
+  %X = call i32 @callee(i1 %C, i32* nocapture readonly %P)
   ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
--- a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes='argpromotion,function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
@@ -12,6 +13,14 @@
 ; ARGPROMOTION-NEXT:    ret i32 17
 ; ARGPROMOTION:       F:
 ; ARGPROMOTION-NEXT:    ret i32 [[P_VAL]]
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee
+; ATTRIBUTOR-SAME: (i1 [[C:%.*]])
+; ATTRIBUTOR-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; ATTRIBUTOR:       T:
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       F:
+; ATTRIBUTOR-NEXT:    unreachable
 ;
   br i1 %C, label %T, label %F
 
@@ -23,17 +32,20 @@
   ret i32 %X
 }
 
-define i32 @foo() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@foo()
-; ARGPROMOTION-NEXT:    [[A:%.*]] = alloca i32
-; ARGPROMOTION-NEXT:    store i32 17, i32* [[A]]
-; ARGPROMOTION-NEXT:    [[A_VAL:%.*]] = load i32, i32* [[A]]
-; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]])
+define i32 @foo(i1 %C) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@foo
+; ARGPROMOTION-SAME: (i1 [[C:%.*]])
+; ARGPROMOTION-NEXT:    [[X:%.*]] = call i32 @callee(i1 [[C]], i32 17)
 ; ARGPROMOTION-NEXT:    ret i32 [[X]]
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@foo
+; ATTRIBUTOR-SAME: (i1 [[C:%.*]])
+; ATTRIBUTOR-NEXT:    [[X:%.*]] = call i32 @callee(i1 [[C]])
+; ATTRIBUTOR-NEXT:    ret i32 [[X]]
 ;
   %A = alloca i32         ; <i32*> [#uses=2]
   store i32 17, i32* %A
-  %X = call i32 @callee( i1 false, i32* %A )              ; <i32> [#uses=1]
+  %X = call i32 @callee( i1 %C, i32* %A )              ; <i32> [#uses=1]
   ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/crash.ll b/llvm/test/Transforms/ArgumentPromotion/crash.ll
--- a/llvm/test/Transforms/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/crash.ll
@@ -1,35 +1,49 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S < %s -inline -argpromotion | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM
-; RUN: opt -S < %s -passes=inline,argpromotion | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM
+; RUN: opt -S < %s -inline -argpromotion | FileCheck %s --check-prefixes=ALL,ARGPROMOTION_OLDPM
+; RUN: opt -S < %s -passes=inline,argpromotion | FileCheck %s --check-prefixes=ALL,ARGPROMOTION_NEWPM
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 %S = type { %S* }
 
 ; Inlining should nuke the invoke (and any inlined calls) here even with
 ; argument promotion running along with it.
 define void @zot() personality i32 (...)* @wibble {
-; ALL-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
-; ALL-NEXT:  bb:
-; ALL-NEXT:    unreachable
-; ALL:       hoge.exit:
-; ALL-NEXT:    br label [[BB1:%.*]]
-; ALL:       bb1:
-; ALL-NEXT:    unreachable
-; ALL:       bb2:
-; ALL-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
-; ALL-NEXT:    cleanup
-; ALL-NEXT:    unreachable
+; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
+; ARGPROMOTION_OLDPM-NEXT:  bb:
+; ARGPROMOTION_OLDPM-NEXT:    unreachable
+; ARGPROMOTION_OLDPM:       hoge.exit:
+; ARGPROMOTION_OLDPM-NEXT:    br label [[BB1:%.*]]
+; ARGPROMOTION_OLDPM:       bb1:
+; ARGPROMOTION_OLDPM-NEXT:    unreachable
+; ARGPROMOTION_OLDPM:       bb2:
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
+; ARGPROMOTION_OLDPM-NEXT:    cleanup
+; ARGPROMOTION_OLDPM-NEXT:    unreachable
 ;
-; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
-; ARGPROMOTION-NEXT:  bb:
-; ARGPROMOTION-NEXT:    unreachable
-; ARGPROMOTION:       hoge.exit:
-; ARGPROMOTION-NEXT:    br label [[BB1:%.*]]
-; ARGPROMOTION:       bb1:
-; ARGPROMOTION-NEXT:    unreachable
-; ARGPROMOTION:       bb2:
-; ARGPROMOTION-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
-; ARGPROMOTION-NEXT:    cleanup
-; ARGPROMOTION-NEXT:    unreachable
+; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble
+; ARGPROMOTION_NEWPM-NEXT:  bb:
+; ARGPROMOTION_NEWPM-NEXT:    unreachable
+; ARGPROMOTION_NEWPM:       hoge.exit:
+; ARGPROMOTION_NEWPM-NEXT:    br label [[BB1:%.*]]
+; ARGPROMOTION_NEWPM:       bb1:
+; ARGPROMOTION_NEWPM-NEXT:    unreachable
+; ARGPROMOTION_NEWPM:       bb2:
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP:%.*]] = landingpad { i8*, i32 }
+; ARGPROMOTION_NEWPM-NEXT:    cleanup
+; ARGPROMOTION_NEWPM-NEXT:    unreachable
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@zot() #0 personality i32 (...)* @wibble
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    call void @hoge()
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       bb.split:
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       bb1.i2c:
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       bb1:
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       bb2:
+; ATTRIBUTOR-NEXT:    unreachable
 ;
 bb:
   invoke void @hoge()
@@ -45,6 +59,10 @@
 }
 
 define internal void @hoge() {
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@hoge()
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    unreachable
+;
 bb:
   %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs)
   %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney)
@@ -61,10 +79,6 @@
 ; ARGPROMOTION_NEWPM-NEXT:  bb:
 ; ARGPROMOTION_NEWPM-NEXT:    unreachable
 ;
-; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs()
-; ALL_NEWPM-NEXT:  bb:
-; ALL_NEWPM-NEXT:    unreachable
-;
 bb:
   %tmp = call zeroext i1 @barney(i8* %arg)
   unreachable
@@ -76,21 +90,28 @@
 }
 
 define i32 @test_inf_promote_caller(i32 %arg) {
-; ALL-LABEL: define {{[^@]+}}@test_inf_promote_caller
-; ALL-SAME: (i32 [[ARG:%.*]])
-; ALL-NEXT:  bb:
-; ALL-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
-; ALL-NEXT:    [[TMP1:%.*]] = alloca [[S]]
-; ALL-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
-; ALL-NEXT:    ret i32 0
+; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@test_inf_promote_caller
+; ARGPROMOTION_OLDPM-SAME: (i32 [[ARG:%.*]])
+; ARGPROMOTION_OLDPM-NEXT:  bb:
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP1:%.*]] = alloca [[S]]
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
+; ARGPROMOTION_OLDPM-NEXT:    ret i32 0
 ;
-; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller
-; ARGPROMOTION-SAME: (i32 [[ARG:%.*]])
-; ARGPROMOTION-NEXT:  bb:
-; ARGPROMOTION-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
-; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = alloca [[S]]
-; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
-; ARGPROMOTION-NEXT:    ret i32 0
+; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@test_inf_promote_caller
+; ARGPROMOTION_NEWPM-SAME: (i32 [[ARG:%.*]])
+; ARGPROMOTION_NEWPM-NEXT:  bb:
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]]
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP1:%.*]] = alloca [[S]]
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]])
+; ARGPROMOTION_NEWPM-NEXT:    ret i32 0
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_inf_promote_caller
+; ATTRIBUTOR-SAME: (i32 [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  bb:
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       bb.split:
+; ATTRIBUTOR-NEXT:    unreachable
 ;
 bb:
   %tmp = alloca %S
@@ -100,26 +121,26 @@
   ret i32 0
 }
 
-define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) {
-; ALL-LABEL: define {{[^@]+}}@test_inf_promote_callee
-; ALL-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]])
-; ALL-NEXT:  bb:
-; ALL-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
-; ALL-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
-; ALL-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
-; ALL-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
-; ALL-NEXT:    ret i32 0
+define internal i32 @test_inf_promote_callee(%S* nocapture readonly %arg, %S* nocapture readonly %arg1) {
+; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@test_inf_promote_callee
+; ARGPROMOTION_OLDPM-SAME: (%S* nocapture readonly [[ARG:%.*]], %S* nocapture readonly [[ARG1:%.*]])
+; ARGPROMOTION_OLDPM-NEXT:  bb:
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
+; ARGPROMOTION_OLDPM-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
+; ARGPROMOTION_OLDPM-NEXT:    unreachable
 ;
-; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee
-; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]])
-; ARGPROMOTION-NEXT:  bb:
-; ARGPROMOTION-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
-; ARGPROMOTION-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
-; ARGPROMOTION-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
-; ARGPROMOTION-NEXT:    ret i32 0
+; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@test_inf_promote_callee
+; ARGPROMOTION_NEWPM-SAME: (%S* nocapture readonly [[ARG:%.*]], %S* nocapture readonly [[ARG1:%.*]])
+; ARGPROMOTION_NEWPM-NEXT:  bb:
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP2:%.*]] = load %S*, %S** [[TMP]]
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP4:%.*]] = load %S*, %S** [[TMP3]]
+; ARGPROMOTION_NEWPM-NEXT:    [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]])
+; ARGPROMOTION_NEWPM-NEXT:    unreachable
 ;
 bb:
   %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0
@@ -127,8 +148,7 @@
   %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0
   %tmp4 = load %S*, %S** %tmp3
   %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2)
-
-  ret i32 0
+  unreachable
 }
 
 declare i32 @wibble(...)
diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
--- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
@@ -1,35 +1,54 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 %union.u = type { x86_fp80 }
 %struct.s = type { double, i16, i8, [5 x i8] }
-
-@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
-
 %struct.Foo = type { i32, i64 }
-@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
 
-define void @run() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@run()
+define x86_fp80 @run(%struct.Foo* %a, %struct.s* %b, i8* %i8, i64* %i64a, i64* %i64b) {
+; ARGPROMOTION-LABEL: define {{[^@]+}}@run
+; ARGPROMOTION-SAME: (%struct.Foo* [[A:%.*]], %struct.s* [[B:%.*]], i8* [[I8:%.*]], i64* [[I64A:%.*]], i64* [[I64B:%.*]])
 ; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
-; ARGPROMOTION-NEXT:    [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0
-; ARGPROMOTION-NEXT:    [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]]
-; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]])
-; ARGPROMOTION-NEXT:    [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
-; ARGPROMOTION-NEXT:    [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a)
-; ARGPROMOTION-NEXT:    ret void
+; ARGPROMOTION-NEXT:    [[BC:%.*]] = bitcast %struct.s* [[B]] to %union.u*
+; ARGPROMOTION-NEXT:    [[V0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 [[BC]])
+; ARGPROMOTION-NEXT:    store i8 [[V0]], i8* [[I8]]
+; ARGPROMOTION-NEXT:    [[BC_0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* [[BC]], i32 0, i32 0
+; ARGPROMOTION-NEXT:    [[BC_0_VAL:%.*]] = load x86_fp80, x86_fp80* [[BC_0]]
+; ARGPROMOTION-NEXT:    [[V1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[BC_0_VAL]])
+; ARGPROMOTION-NEXT:    [[V2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* [[A]])
+; ARGPROMOTION-NEXT:    store i64 [[V2]], i64* [[I64A]]
+; ARGPROMOTION-NEXT:    [[V3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* [[A]])
+; ARGPROMOTION-NEXT:    store i64 [[V3]], i64* [[I64B]]
+; ARGPROMOTION-NEXT:    ret x86_fp80 [[V1]]
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@run
+; ATTRIBUTOR-SAME: (%struct.Foo* [[A:%.*]], %struct.s* nocapture readonly [[B:%.*]], i8* nocapture writeonly [[I8:%.*]], i64* nocapture writeonly [[I64A:%.*]], i64* nocapture readnone [[I64B:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[BC:%.*]] = bitcast %struct.s* [[B]] to %union.u*
+; ATTRIBUTOR-NEXT:    [[V0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* nocapture readonly byval align 16 [[BC]])
+; ATTRIBUTOR-NEXT:    store i8 [[V0]], i8* [[I8]]
+; ATTRIBUTOR-NEXT:    [[V2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* nocapture [[A]])
+; ATTRIBUTOR-NEXT:    store i64 [[V2]], i64* [[I64A]]
+; ATTRIBUTOR-NEXT:    [[V3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* [[A]])
+; ATTRIBUTOR-NEXT:    unreachable
+; ATTRIBUTOR:       entry.split:
+; ATTRIBUTOR-NEXT:    unreachable
 ;
 entry:
-  tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
-  tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
-  call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
-  call i64 @CaptureAStruct(%struct.Foo* @a)
-  ret void
+  %bc = bitcast %struct.s* %b to %union.u*
+  %v0 = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %bc)
+  store i8 %v0, i8* %i8
+  %v1 = tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %bc)
+  %v2 = call i64 @AccessPaddingOfStruct(%struct.Foo* %a)
+  store i64 %v2, i64* %i64a
+  %v3 = call i64 @CaptureAStruct(%struct.Foo* %a)
+  store i64 %v3, i64* %i64b
+  ret x86_fp80 %v1
 }
 
 define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
@@ -41,6 +60,14 @@
 ; ARGPROMOTION-NEXT:    [[RESULT:%.*]] = load i8, i8* [[GEP]]
 ; ARGPROMOTION-NEXT:    ret i8 [[RESULT]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely
+; ATTRIBUTOR-SAME: (%union.u* nocapture readonly byval align 16 [[ARG:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s*
+; ATTRIBUTOR-NEXT:    [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2
+; ATTRIBUTOR-NEXT:    [[RESULT:%.*]] = load i8, i8* [[GEP]]
+; ATTRIBUTOR-NEXT:    ret i8 [[RESULT]]
+;
 entry:
   %bitcast = bitcast %union.u* %arg to %struct.s*
   %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
@@ -69,6 +96,12 @@
 ; ARGPROMOTION-NEXT:    [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64*
 ; ARGPROMOTION-NEXT:    [[V:%.*]] = load i64, i64* [[P]]
 ; ARGPROMOTION-NEXT:    ret i64 [[V]]
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@AccessPaddingOfStruct
+; ATTRIBUTOR-SAME: (%struct.Foo* nocapture readonly byval [[A:%.*]])
+; ATTRIBUTOR-NEXT:    [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64*
+; ATTRIBUTOR-NEXT:    [[V:%.*]] = load i64, i64* [[P]]
+; ATTRIBUTOR-NEXT:    ret i64 [[V]]
 ;
   %p = bitcast %struct.Foo* %a to i64*
   %v = load i64, i64* %p
@@ -88,6 +121,18 @@
 ; ARGPROMOTION-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
 ; ARGPROMOTION-NEXT:    br label [[LOOP]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@CaptureAStruct
+; ATTRIBUTOR-SAME: (%struct.Foo* writeonly byval [[A:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
+; ATTRIBUTOR-NEXT:    br label [[LOOP:%.*]]
+; ATTRIBUTOR:       loop:
+; ATTRIBUTOR-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; ATTRIBUTOR-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
+; ATTRIBUTOR-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
+; ATTRIBUTOR-NEXT:    br label [[LOOP]]
+;
 entry:
   %a_ptr = alloca %struct.Foo*
   br label %loop
diff --git a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll
--- a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll
@@ -1,44 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 %struct.ss = type { i32, i32 }
 
-; Argpromote + sroa should change this to passing the two integers by value.
-define internal i32 @f(%struct.ss* inalloca  %s) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@f
-; ARGPROMOTION-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]]
-; ARGPROMOTION-NEXT:    ret i32 [[R]]
-;
-entry:
-  %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
-  %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1
-  %a = load i32, i32* %f0, align 4
-  %b = load i32, i32* %f1, align 4
-  %r = add i32 %a, %b
-  ret i32 %r
-}
-
-define i32 @main() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@main() local_unnamed_addr
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2)
-; ARGPROMOTION-NEXT:    ret i32 [[R]]
-;
-entry:
-  %S = alloca inalloca %struct.ss
-  %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
-  %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
-  store i32 1, i32* %f0, align 4
-  store i32 2, i32* %f1, align 4
-  %r = call i32 @f(%struct.ss* inalloca %S)
-  ret i32 %r
-}
-
 ; Argpromote can't promote %a because of the icmp use.
 define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind  {
 ; ARGPROMOTION-LABEL: define {{[^@]+}}@g
@@ -59,6 +27,10 @@
 ; ARGPROMOTION-NEXT:    [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]])
 ; ARGPROMOTION-NEXT:    ret i32 0
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@test()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    ret i32 0
+;
 entry:
   %S = alloca inalloca %struct.ss
   %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
diff --git a/llvm/test/Transforms/ArgumentPromotion/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/invalidation.ll
--- a/llvm/test/Transforms/ArgumentPromotion/invalidation.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/invalidation.ll
@@ -7,7 +7,8 @@
 ; invalidation this will crash in the second printer as it tries to reuse
 ; now-invalid demanded bits.
 ;
-; RUN: opt < %s -passes='function(print<demanded-bits>),cgscc(argpromotion,function(print<demanded-bits>))' -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -passes='function(print<demanded-bits>),cgscc(argpromotion,function(print<demanded-bits>))' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=6 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 @G = constant i32 0
 
@@ -17,6 +18,12 @@
 ; ARGPROMOTION-NEXT:  entry:
 ; ARGPROMOTION-NEXT:    ret i32 [[X_VAL]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@a
+; ATTRIBUTOR-SAME: (i32* nocapture nonnull readonly align 4 dereferenceable(4) [[X:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[V:%.*]] = load i32, i32* @G, align 4
+; ATTRIBUTOR-NEXT:    ret i32 [[V]]
+;
 entry:
   %v = load i32, i32* %x
   ret i32 %v
@@ -29,6 +36,11 @@
 ; ARGPROMOTION-NEXT:    [[V:%.*]] = call i32 @a(i32 [[G_VAL]])
 ; ARGPROMOTION-NEXT:    ret i32 [[V]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@b()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[V:%.*]] = call i32 @a(i32* nonnull align 4 dereferenceable(4) @G)
+; ATTRIBUTOR-NEXT:    ret i32 [[V]]
+;
 entry:
   %v = call i32 @a(i32* @G)
   ret i32 %v
@@ -43,6 +55,13 @@
 ; ARGPROMOTION-NEXT:    [[RESULT:%.*]] = add i32 [[V1]], [[V2]]
 ; ARGPROMOTION-NEXT:    ret i32 [[RESULT]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@c()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[V1:%.*]] = call i32 @a(i32* nonnull align 4 dereferenceable(4) @G)
+; ATTRIBUTOR-NEXT:    [[V2:%.*]] = call i32 @b()
+; ATTRIBUTOR-NEXT:    [[RESULT:%.*]] = add i32 [[V1]], [[V2]]
+; ATTRIBUTOR-NEXT:    ret i32 [[RESULT]]
+;
 entry:
   %v1 = call i32 @a(i32* @G)
   %v2 = call i32 @b()
diff --git a/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll
--- a/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; Don't promote paramaters of/arguments to naked functions
 
@@ -11,18 +12,23 @@
 ; ARGPROMOTION-NEXT:    [[CALL:%.*]] = call i32 @foo(i32* @g)
 ; ARGPROMOTION-NEXT:    ret i32 [[CALL]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[CALL:%.*]] = call i32 @foo(i32* nonnull align 4 dereferenceable(4) @g)
+; ATTRIBUTOR-NEXT:    ret i32 [[CALL]]
+;
 entry:
   %call = call i32 @foo(i32* @g)
   ret i32 %call
 }
 
 define internal i32 @foo(i32*) #0 {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@foo
-; ARGPROMOTION-SAME: (i32* [[TMP0:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-; ARGPROMOTION-NEXT:    call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
-; ARGPROMOTION-NEXT:    unreachable
+; ALL-LABEL: define {{[^@]+}}@foo
+; ALL-SAME: (i32* [[TMP0:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; ALL-NEXT:    call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; ALL-NEXT:    unreachable
 ;
 entry:
   %retval = alloca i32, align 4
diff --git a/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll
--- a/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; ArgumentPromotion should preserve the default function address space
 ; from the data layout.
@@ -11,21 +12,27 @@
 define i32 @bar() {
 ; ARGPROMOTION-LABEL: define {{[^@]+}}@bar() addrspace(1)
 ; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[CALL:%.*]] = call addrspace(1) i32 @foo()
+; ARGPROMOTION-NEXT:    [[CALL:%.*]] = call addrspace(1) i32 @foo(i32* @g)
 ; ARGPROMOTION-NEXT:    ret i32 [[CALL]]
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar() addrspace(1)
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[CALL:%.*]] = call addrspace(1) i32 @foo(i32* nonnull align 4 dereferenceable(4) @g)
+; ATTRIBUTOR-NEXT:    ret i32 [[CALL]]
+;
 
 entry:
   %call = call i32 @foo(i32* @g)
   ret i32 %call
 }
 
-define internal i32 @foo(i32*) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() addrspace(1)
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-; ARGPROMOTION-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
-; ARGPROMOTION-NEXT:    unreachable
+define internal i32 @foo(i32*) naked {
+; ALL-LABEL: define {{[^@]+}}@foo
+; ALL-SAME: (i32* [[TMP0:%.*]]) addrspace(1)
+; ALL-NEXT:  entry:
+; ALL-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+; ALL-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
+; ALL-NEXT:    unreachable
 ;
 entry:
   %retval = alloca i32, align 4
diff --git a/llvm/test/Transforms/ArgumentPromotion/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/pr27568.ll
--- a/llvm/test/Transforms/ArgumentPromotion/pr27568.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/pr27568.ll
@@ -1,14 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 ; RUN: opt -S -debugify -o /dev/null < %s
 target triple = "x86_64-pc-windows-msvc"
 
 define internal void @callee(i8*) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@callee()
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    call void @thunk()
-; ARGPROMOTION-NEXT:    ret void
+; ALL-LABEL: define {{[^@]+}}@callee()
+; ALL-NEXT:  entry:
+; ALL-NEXT:    call void @thunk()
+; ALL-NEXT:    ret void
 ;
 entry:
   call void @thunk()
@@ -16,16 +17,16 @@
 }
 
 define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    invoke void @thunk()
-; ARGPROMOTION-NEXT:    to label [[OUT:%.*]] unwind label [[CPAD:%.*]]
-; ARGPROMOTION:       out:
-; ARGPROMOTION-NEXT:    ret void
-; ARGPROMOTION:       cpad:
-; ARGPROMOTION-NEXT:    [[PAD:%.*]] = cleanuppad within none []
-; ARGPROMOTION-NEXT:    call void @callee() [ "funclet"(token [[PAD]]) ]
-; ARGPROMOTION-NEXT:    cleanupret from [[PAD]] unwind to caller
+; ALL-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3
+; ALL-NEXT:  entry:
+; ALL-NEXT:    invoke void @thunk()
+; ALL-NEXT:    to label [[OUT:%.*]] unwind label [[CPAD:%.*]]
+; ALL:       out:
+; ALL-NEXT:    ret void
+; ALL:       cpad:
+; ALL-NEXT:    [[PAD:%.*]] = cleanuppad within none []
+; ALL-NEXT:    call void @callee() [ "funclet"(token [[PAD]]) ]
+; ALL-NEXT:    cleanupret from [[PAD]] unwind to caller
 ;
 entry:
   invoke void @thunk()
diff --git a/llvm/test/Transforms/ArgumentPromotion/pr3085.ll b/llvm/test/Transforms/ArgumentPromotion/pr3085.ll
--- a/llvm/test/Transforms/ArgumentPromotion/pr3085.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/pr3085.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -disable-output -loop-extract-single -loop-rotate -loop-reduce -argpromotion
+; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s
 ; PR 3085
 
 	%struct.Lit = type { i8 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll
--- a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 ; PR 32917
 
 @b = common local_unnamed_addr global i32 0, align 4
@@ -14,6 +15,13 @@
 ; ARGPROMOTION-NEXT:    [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4
 ; ARGPROMOTION-NEXT:    call fastcc void @fn1(i32 [[DOTIDX_VAL]])
 ; ARGPROMOTION-NEXT:    ret i32 undef
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* @b, align 4
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; ATTRIBUTOR-NEXT:    call fastcc void @fn1(i32* [[TMP3]])
+; ATTRIBUTOR-NEXT:    ret i32 undef
 ;
   %1 = load i32, i32* @b, align 4
   %2 = sext i32 %1 to i64
@@ -27,6 +35,13 @@
 ; ARGPROMOTION-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr
 ; ARGPROMOTION-NEXT:    store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4
 ; ARGPROMOTION-NEXT:    ret void
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn1
+; ATTRIBUTOR-SAME: (i32* nocapture readonly [[TMP0:%.*]]) unnamed_addr
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 -1
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; ATTRIBUTOR-NEXT:    store i32 [[TMP3]], i32* @a, align 4
+; ATTRIBUTOR-NEXT:    ret void
 ;
   %2 = getelementptr inbounds i32, i32* %0, i64 -1
   %3 = load i32, i32* %2, align 4
diff --git a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll
--- a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='function(dse),attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to
 ; dbg.value which still used the removed argument.
@@ -7,12 +8,13 @@
 ; The %p argument should be removed, and the use of it in dbg.value should be
 ; changed to undef.
 
+
 %p_t = type i16*
 %fun_t = type void (%p_t)*
 
 define void @foo() {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@foo()
-; ARGPROMOTION-NEXT:    ret void
+; ALL-LABEL: define {{[^@]+}}@foo()
+; ALL-NEXT:    ret void
 ;
   %tmp = alloca %fun_t
   store %fun_t @bar, %fun_t* %tmp
diff --git a/llvm/test/Transforms/ArgumentPromotion/profile.ll b/llvm/test/Transforms/ArgumentPromotion/profile.ll
--- a/llvm/test/Transforms/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/profile.ll
@@ -1,13 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 ; Checks if !prof metadata is corret in deadargelim.
 
 define void @caller() #0 {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@caller()
-; ARGPROMOTION-NEXT:    call void @promote_i32_ptr(i32 42), !prof !0
-; ARGPROMOTION-NEXT:    ret void
+; ALL-LABEL: define {{[^@]+}}@caller()
+; ALL-NEXT:    call void @promote_i32_ptr(i32 42), !prof !0
+; ALL-NEXT:    ret void
 ;
   %x = alloca i32
   store i32 42, i32* %x
@@ -16,10 +17,10 @@
 }
 
 define internal void @promote_i32_ptr(i32* %xp) {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@promote_i32_ptr
-; ARGPROMOTION-SAME: (i32 [[XP_VAL:%.*]])
-; ARGPROMOTION-NEXT:    call void @use_i32(i32 [[XP_VAL]])
-; ARGPROMOTION-NEXT:    ret void
+; ALL-LABEL: define {{[^@]+}}@promote_i32_ptr
+; ALL-SAME: (i32 [[TMP0:%.*]])
+; ALL-NEXT:    call void @use_i32(i32 [[TMP0]])
+; ALL-NEXT:    ret void
 ;
   %x = load i32, i32* %xp
   call void @use_i32(i32 %x)
diff --git a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
--- a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -1,12 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; PR17906
 ; When we promote two arguments in a single function with different types,
 ; before the fix, we used the same tag for the newly-created two loads.
 ; This testing case makes sure that we correctly transfer the tbaa tags from the
 ; original loads to the newly-created loads when promoting pointer arguments.
+;
+; TODO: This test doesn't work with the ATTRIBUTOR as it will not promote the arguments but propagate them.
 
 @a = global i32* null, align 8
 @e = global i32** @a, align 8
@@ -23,6 +26,16 @@
 ; ARGPROMOTION-NEXT:    store i8 [[CONV1]], i8* @d, align 1, !tbaa !0
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn
+; ATTRIBUTOR-SAME: (i32* nocapture nonnull readonly align 4 dereferenceable(4) [[P1:%.*]])
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i64, i64* @c, align 8, !tbaa !0
+; ATTRIBUTOR-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* @g, align 4, !tbaa !4
+; ATTRIBUTOR-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP1]] to i8
+; ATTRIBUTOR-NEXT:    store i8 [[CONV1]], i8* @d, align 1, !tbaa !6
+; ATTRIBUTOR-NEXT:    ret void
+;
 entry:
   %0 = load i64, i64* %p2, align 8, !tbaa !1
   %conv = trunc i64 %0 to i32
@@ -44,6 +57,15 @@
 ; ARGPROMOTION-NEXT:    call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]])
 ; ARGPROMOTION-NEXT:    ret i32 0
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@main()
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !7
+; ATTRIBUTOR-NEXT:    store i32* @g, i32** [[TMP0]], align 8, !tbaa !7
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !7
+; ATTRIBUTOR-NEXT:    store i32 1, i32* [[TMP1]], align 4, !tbaa !4
+; ATTRIBUTOR-NEXT:    call fastcc void @fn(i32* nonnull align 4 dereferenceable(4) @g)
+; ATTRIBUTOR-NEXT:    ret i32 0
+;
 entry:
   %0 = load i32**, i32*** @e, align 8, !tbaa !8
   store i32* @g, i32** %0, align 8, !tbaa !8
diff --git a/llvm/test/Transforms/ArgumentPromotion/sret.ll b/llvm/test/Transforms/ArgumentPromotion/sret.ll
--- a/llvm/test/Transforms/ArgumentPromotion/sret.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/sret.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc"
@@ -11,6 +12,16 @@
 ; ARGPROMOTION-NEXT:    [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]]
 ; ARGPROMOTION-NEXT:    store i32 [[AB]], i32* [[R]]
 ; ARGPROMOTION-NEXT:    ret void
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@add
+; ATTRIBUTOR-SAME: ({ i32, i32 }* noalias nocapture nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]])
+; ATTRIBUTOR-NEXT:    [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0
+; ATTRIBUTOR-NEXT:    [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    [[A:%.*]] = load i32, i32* [[AP]], align 8
+; ATTRIBUTOR-NEXT:    [[B:%.*]] = load i32, i32* [[BP]]
+; ATTRIBUTOR-NEXT:    [[AB:%.*]] = add i32 [[A]], [[B]]
+; ATTRIBUTOR-NEXT:    store i32 [[AB]], i32* [[R]], align 4
+; ATTRIBUTOR-NEXT:    ret void
 ;
   %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0
   %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1
@@ -31,6 +42,12 @@
 ; ARGPROMOTION-NEXT:    [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]]
 ; ARGPROMOTION-NEXT:    call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]])
 ; ARGPROMOTION-NEXT:    ret void
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@f()
+; ATTRIBUTOR-NEXT:    [[R:%.*]] = alloca i32
+; ATTRIBUTOR-NEXT:    [[PAIR:%.*]] = alloca { i32, i32 }
+; ATTRIBUTOR-NEXT:    call void @add({ i32, i32 }* noalias nocapture nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nonnull sret writeonly align 4 dereferenceable(4) [[R]])
+; ATTRIBUTOR-NEXT:    ret void
 ;
   %r = alloca i32
   %pair = alloca {i32, i32}
diff --git a/llvm/test/Transforms/ArgumentPromotion/tail.ll b/llvm/test/Transforms/ArgumentPromotion/tail.ll
--- a/llvm/test/Transforms/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/tail.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -argpromotion -S -o - | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt %s -passes=argpromotion -S -o - | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt %s -argpromotion -sroa -S -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt %s -passes='argpromotion,function(sroa)' -S -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 ; PR14710
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -19,6 +20,16 @@
 ; ARGPROMOTION-NEXT:    store i32 [[DATA_1]], i32* [[DOT1]]
 ; ARGPROMOTION-NEXT:    [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]])
 ; ARGPROMOTION-NEXT:    ret void
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar
+; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; ATTRIBUTOR-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; ATTRIBUTOR-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
+; ATTRIBUTOR-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
+; ATTRIBUTOR-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
+; ATTRIBUTOR-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* nonnull [[DATA_PRIV]])
+; ATTRIBUTOR-NEXT:    ret void
 ;
   tail call i8* @foo(%pair* %Data)
   ret void
@@ -33,6 +44,15 @@
 ; ARGPROMOTION-NEXT:    [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]]
 ; ARGPROMOTION-NEXT:    call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]])
 ; ARGPROMOTION-NEXT:    ret void
+;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@zed
+; ATTRIBUTOR-SAME: (%pair* nonnull byval [[DATA:%.*]])
+; ATTRIBUTOR-NEXT:    [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
+; ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]]
+; ATTRIBUTOR-NEXT:    [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1
+; ATTRIBUTOR-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]]
+; ATTRIBUTOR-NEXT:    call void @bar(i32 [[TMP1]], i32 [[TMP2]])
+; ATTRIBUTOR-NEXT:    ret void
 ;
   call void @bar(%pair* byval %Data)
   ret void
diff --git a/llvm/test/Transforms/ArgumentPromotion/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/variadic.ll
--- a/llvm/test/Transforms/ArgumentPromotion/variadic.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/variadic.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
-; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL
+; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION
+; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR
 
 ; Unused arguments from variadic functions cannot be eliminated as that changes
 ; their classiciation according to the SysV amd64 ABI. Clang and other frontends
@@ -16,11 +17,11 @@
 
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
-; ARGPROMOTION-LABEL: define {{[^@]+}}@main
-; ARGPROMOTION-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]])
-; ARGPROMOTION-NEXT:  entry:
-; ARGPROMOTION-NEXT:    tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
-; ARGPROMOTION-NEXT:    ret i32 0
+; ALL-LABEL: define {{[^@]+}}@main
+; ALL-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]])
+; ALL-NEXT:  entry:
+; ALL-NEXT:    tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+; ALL-NEXT:    ret i32 0
 ;
 entry:
   tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
@@ -34,6 +35,11 @@
 ; ARGPROMOTION-NEXT:  entry:
 ; ARGPROMOTION-NEXT:    ret void
 ;
+; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_t0f
+; ATTRIBUTOR-SAME: (i8* noalias nocapture nonnull readnone [[TP13:%.*]], i8* noalias nocapture nonnull readnone [[TP14:%.*]], i8* noalias nocapture nonnull readnone [[TP15:%.*]], i8* noalias nocapture nonnull readnone [[TP16:%.*]], i8* noalias nocapture nonnull readnone [[TP17:%.*]], ...)
+; ATTRIBUTOR-NEXT:  entry:
+; ATTRIBUTOR-NEXT:    ret void
+;
 entry:
   ret void
 }
diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/FunctionAttrs/callbacks.ll
--- a/llvm/test/Transforms/FunctionAttrs/callbacks.ll
+++ b/llvm/test/Transforms/FunctionAttrs/callbacks.ll
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
 ; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -61,5 +61,112 @@
 
 declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...)
 
+; Test 1
+;
+; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure
+; we deduce and propagate noalias and others properly.
+
+define void @t1_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t1_caller
+; CHECK-SAME: (i32* noalias nocapture [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
+; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca i32, align 32
+  %c = alloca i32*, align 64
+  %ptr = alloca i32, align 128
+  %0 = bitcast i32* %b to i8*
+  store i32 42, i32* %b, align 4
+  store i32* %b, i32** %c, align 8
+  call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+  ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr_val = load i32, i32* %ptr, align 8
+  store i32 %ptr_val, i32* %is_not_null
+  %0 = load i32*, i32** %c, align 8
+  tail call void @t1_check(i32* %a, i64 %b, i32* %0)
+  ret void
+}
+
+declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync
+
+declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
+; Test 2
+;
+; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved.
+
+define void @t2_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t2_caller
+; CHECK-SAME: (i32* noalias nocapture [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
+; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca i32, align 32
+  %c = alloca i32*, align 64
+  %ptr = alloca i32, align 128
+  %0 = bitcast i32* %b to i8*
+  store i32 42, i32* %b, align 4
+  store i32* %b, i32** %c, align 8
+  call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+  ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+;
+; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
+define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* nocapture [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr_val = load i32, i32* %ptr, align 8
+  store i32 %ptr_val, i32* %is_not_null
+  %0 = load i32*, i32** %c, align 8
+  tail call void @t2_check(i32* %a, i64 %b, i32* %0)
+  ret void
+}
+
+declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture)
+
+declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
 !0 = !{!1}
 !1 = !{i64 2, i64 -1, i64 -1, i1 true}