diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1519,6 +1519,13 @@
                      bool CheckBBLivenessOnly = false,
                      DepClassTy DepClass = DepClassTy::OPTIONAL);
 
+  /// Return true if \p BB is assumed dead.
+  ///
+  /// If \p LivenessAA is not provided it is queried.
+  bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA,
+                     const AAIsDead *FnLivenessAA,
+                     DepClassTy DepClass = DepClassTy::OPTIONAL);
+
   /// Check \p Pred on all (transitive) uses of \p V.
   ///
   /// This method will evaluate \p Pred on all (transitive) uses of the
@@ -2371,7 +2378,8 @@
 /// IRAttribute::manifest is defined in the Attributor.cpp.
 struct IRAttributeManifest {
   static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
-                                    const ArrayRef<Attribute> &DeducedAttrs);
+                                    const ArrayRef<Attribute> &DeducedAttrs,
+                                    bool ForceReplace = false);
 };
 
 /// Helper to tie a abstract state implementation to an abstract attribute.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -96,6 +96,8 @@
 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
 
 Pass *createAMDGPUAnnotateKernelFeaturesPass();
+Pass *createAMDGPUAttributorPass();
+void initializeAMDGPUAttributorPass(PassRegistry &);
 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
 extern char &AMDGPUAnnotateKernelFeaturesID;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -0,0 +1,538 @@
+//===- AMDGPUAttributor.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+
+#define DEBUG_TYPE "amdgpu-attributor"
+
+using namespace llvm;
+
+static constexpr StringLiteral ImplicitAttrNames[] = {
+    // X ids unnecessarily propagated to kernels.
+    "amdgpu-work-item-id-x",  "amdgpu-work-item-id-y",
+    "amdgpu-work-item-id-z",  "amdgpu-work-group-id-x",
+    "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
+    "amdgpu-dispatch-ptr",    "amdgpu-dispatch-id",
+    "amdgpu-queue-ptr",       "amdgpu-implicitarg-ptr"};
+
+// We do not need to note the x workitem or workgroup id because they are always
+// initialized.
+//
+// TODO: We should not add the attributes if the known compile time workgroup
+// size is 1 for y/z.
+static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
+                                     bool &IsQueuePtr) {
+  switch (ID) {
+  case Intrinsic::amdgcn_workitem_id_x:
+    NonKernelOnly = true;
+    return "amdgpu-work-item-id-x";
+  case Intrinsic::amdgcn_workgroup_id_x:
+    NonKernelOnly = true;
+    return "amdgpu-work-group-id-x";
+  case Intrinsic::amdgcn_workitem_id_y:
+  case Intrinsic::r600_read_tidig_y:
+    return "amdgpu-work-item-id-y";
+  case Intrinsic::amdgcn_workitem_id_z:
+  case Intrinsic::r600_read_tidig_z:
+    return "amdgpu-work-item-id-z";
+  case Intrinsic::amdgcn_workgroup_id_y:
+  case Intrinsic::r600_read_tgid_y:
+    return "amdgpu-work-group-id-y";
+  case Intrinsic::amdgcn_workgroup_id_z:
+  case Intrinsic::r600_read_tgid_z:
+    return "amdgpu-work-group-id-z";
+  case Intrinsic::amdgcn_dispatch_ptr:
+    return "amdgpu-dispatch-ptr";
+  case Intrinsic::amdgcn_dispatch_id:
+    return "amdgpu-dispatch-id";
+  case Intrinsic::amdgcn_kernarg_segment_ptr:
+    return "amdgpu-kernarg-segment-ptr";
+  case Intrinsic::amdgcn_implicitarg_ptr:
+    return "amdgpu-implicitarg-ptr";
+  case Intrinsic::amdgcn_queue_ptr:
+  case Intrinsic::amdgcn_is_shared:
+  case Intrinsic::amdgcn_is_private:
+    // TODO: Does not require queue ptr on gfx9+
+  case Intrinsic::trap:
+  case Intrinsic::debugtrap:
+    IsQueuePtr = true;
+    return "amdgpu-queue-ptr";
+  default:
+    return "";
+  }
+}
+
+static bool castRequiresQueuePtr(unsigned SrcAS) {
+  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
+}
+
+static bool isDSAddress(const Constant *C) {
+  const GlobalValue *GV = dyn_cast<GlobalValue>(C);
+  if (!GV)
+    return false;
+  unsigned AS = GV->getAddressSpace();
+  return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
+}
+
+class AMDGPUInformationCache : public InformationCache {
+public:
+  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
+                         BumpPtrAllocator &Allocator,
+                         SetVector<Function *> *CGSCC, TargetMachine &TM)
+      : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
+  TargetMachine &TM;
+
+  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
+
+  bool hasApertureRegs(Function &F) {
+    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+    return ST.hasApertureRegs();
+  }
+
+private:
+  static bool visitConstExpr(const ConstantExpr *CE) {
+    if (CE->getOpcode() == Instruction::AddrSpaceCast) {
+      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
+      return castRequiresQueuePtr(SrcAS);
+    }
+    return false;
+  }
+
+  uint8_t getConstantAccess(const Constant *C) {
+    auto It = ConstantStatus.find(C);
+    if (It != ConstantStatus.end())
+      return It->second;
+
+    uint8_t Result = 0;
+    if (isDSAddress(C))
+      Result = DS_GLOBAL;
+
+    if (const auto *CE = dyn_cast<ConstantExpr>(C))
+      if (visitConstExpr(CE))
+        Result |= ADDR_SPACE_CAST;
+
+    for (const Use &U : C->operands()) {
+      const auto *OpC = dyn_cast<Constant>(U);
+      if (!OpC)
+        continue;
+
+      Result |= getConstantAccess(OpC);
+    }
+    return Result;
+  }
+
+public:
+  bool needsQueuePtr(const Constant *C, Function &Fn) {
+    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
+    bool HasAperture = hasApertureRegs(Fn);
+
+    // No need to explore the constants.
+    if (!IsNonEntryFunc && HasAperture)
+      return false;
+
+    uint8_t Access = getConstantAccess(C);
+
+    // We need to trap on DS globals in non-entry functions.
+    if (IsNonEntryFunc && (Access & DS_GLOBAL))
+      return true;
+
+    return !HasAperture && (Access & ADDR_SPACE_CAST);
+  }
+
+private:
+  DenseMap<const Constant *, uint8_t> ConstantStatus;
+};
+
+struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
+  using Base = StateWrapper<BooleanState, AbstractAttribute>;
+  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDAttributes &createForPosition(const IRPosition &IRP,
+                                            Attributor &A);
+
+  /// See AbstractAttribute::getName().
+  const std::string getName() const override { return "AAAMDAttributes"; }
+
+  /// See AbstractAttribute::getIdAddr().
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAMDAttributes.
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  virtual const DenseSet<StringRef> &getAttributes() const = 0;
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+const char AAAMDAttributes::ID = 0;
+
+struct AAAMDWorkGroupSize
+    : public StateWrapper<BooleanState, AbstractAttribute> {
+  using Base = StateWrapper<BooleanState, AbstractAttribute>;
+  AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
+                                               Attributor &A);
+
+  /// See AbstractAttribute::getName().
+  const std::string getName() const override { return "AAAMDWorkGroupSize"; }
+
+  /// See AbstractAttribute::getIdAddr().
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAAMDAttributes.
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  virtual Optional<bool> getUniformWorkGroupSize() const = 0;
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+const char AAAMDWorkGroupSize::ID = 0;
+
+struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
+  AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
+      : AAAMDWorkGroupSize(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    CallingConv::ID CC = F->getCallingConv();
+    if (F->hasFnAttribute("uniform-work-group-size"))
+      UniformWorkGroupSize = F->getFnAttribute("uniform-work-group-size")
+                                 .getValueAsString()
+                                 .equals("true");
+    else if (CC == CallingConv::AMDGPU_KERNEL)
+      UniformWorkGroupSize = false;
+
+    LLVM_DEBUG(dbgs() << "Initial value " << UniformWorkGroupSize << "\n");
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+    Optional<bool> NewUniformWorkGroupSize = UniformWorkGroupSize;
+    auto CheckCallSite = [&](AbstractCallSite CS) {
+      Function *Caller = CS.getInstruction()->getFunction();
+      LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
+                        << "->" << F->getName() << "\n");
+
+      if (!F->hasExactDefinition()) {
+        LLVM_DEBUG(dbgs() << "[AMDWorkGroupSize] Giving up: " << F->getName()
+                          << "\n");
+        NewUniformWorkGroupSize = false;
+        return true;
+      }
+
+      const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
+          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+      // Propagate it from Caller to Callee.
+      Optional<bool> CallerWorkGroupSize = CallerInfo.getUniformWorkGroupSize();
+      if (CallerWorkGroupSize.hasValue()) {
+        // Assume false if the callers have different values.
+        if (NewUniformWorkGroupSize.hasValue() &&
+            NewUniformWorkGroupSize != CallerWorkGroupSize) {
+          LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] WorkGroupSize conflict\n");
+          NewUniformWorkGroupSize = false;
+          return false;
+        }
+        NewUniformWorkGroupSize = CallerWorkGroupSize;
+      }
+
+      return true;
+    };
+
+    bool AllCallSitesKnown = true;
+    A.checkForAllCallSites(CheckCallSite, *this, false, AllCallSitesKnown);
+
+    if (NewUniformWorkGroupSize != UniformWorkGroupSize)
+      Change = ChangeStatus::CHANGED;
+    UniformWorkGroupSize = NewUniformWorkGroupSize;
+
+    return Change;
+  }
+
+  ChangeStatus manifest(Attributor &A) override {
+    SmallVector<Attribute, 8> AttrList;
+    LLVMContext &Ctx = getAssociatedFunction()->getContext();
+
+    if (!UniformWorkGroupSize.hasValue())
+      UniformWorkGroupSize = false;
+
+    AttrList.push_back(
+        Attribute::get(Ctx, "uniform-work-group-size",
+                       UniformWorkGroupSize.getValue() ? "true" : "false"));
+    return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
+                                              /* ForceReplace */ true);
+  }
+
+  const std::string getAsStr() const override {
+    std::string state = "none";
+    if (UniformWorkGroupSize.hasValue())
+      state = std::to_string(UniformWorkGroupSize.getValue());
+
+    return "AMDWorkGroupSize[" + state + "]";
+  }
+
+  virtual Optional<bool> getUniformWorkGroupSize() const override {
+    return UniformWorkGroupSize;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {}
+
+private:
+  Optional<bool> UniformWorkGroupSize = llvm::None;
+};
+
+AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
+                                                          Attributor &A) {
+  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+    return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
+  llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
+}
+
+struct AAAMDAttributesFunction : public AAAMDAttributes {
+  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
+      : AAAMDAttributes(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    CallingConv::ID CC = F->getCallingConv();
+    bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
+    // Ignore functions with graphics calling conventions, these are currently
+    // not allowed to have kernel arguments.
+    if (AMDGPU::isGraphics(F->getCallingConv())) {
+      indicatePessimisticFixpoint();
+      return;
+    }
+
+    for (StringRef Attr : ImplicitAttrNames) {
+      if (F->hasFnAttribute(Attr))
+        Attributes.insert(Attr);
+    }
+
+    if (CallingConvSupportsAllImplicits &&
+        F->hasAddressTaken(nullptr, true, true, true)) {
+      for (StringRef AttrName : ImplicitAttrNames) {
+        Attributes.insert(AttrName);
+      }
+    }
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    Function *F = getAssociatedFunction();
+    ChangeStatus Change = ChangeStatus::UNCHANGED;
+    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
+    CallingConv::ID CC = F->getCallingConv();
+    bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
+    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+
+    auto AddAttribute = [&](StringRef AttrName) {
+      if (Attributes.insert(AttrName).second)
+        Change = ChangeStatus::CHANGED;
+    };
+
+    // Check for Intrinsics and propagate attributes.
+    const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+        *this, this->getIRPosition(), DepClassTy::REQUIRED);
+
+    // We have to assume that we can reach a function with these attributes.
+    if (CallingConvSupportsAllImplicits && AAEdges.hasUnknownCallee()) {
+      for (StringRef AttrName : ImplicitAttrNames) {
+        AddAttribute(AttrName);
+      }
+    }
+
+    bool NeedsQueuePtr = false;
+    bool HasCall = false;
+    for (Function *Callee : AAEdges.getOptimisticEdges()) {
+      Intrinsic::ID IID = Callee->getIntrinsicID();
+      if (IID != Intrinsic::not_intrinsic) {
+        if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
+          AddAttribute("amdgpu-kernarg-segment-ptr");
+          continue;
+        }
+
+        bool NonKernelOnly = false;
+        StringRef AttrName =
+            intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
+
+        if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
+          AddAttribute(AttrName);
+
+        continue;
+      }
+
+      HasCall = true;
+      const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
+          *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+      const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
+      // Propagate implicit attributes from called function.
+      for (StringRef AttrName : ImplicitAttrNames)
+        if (CalleeAttributes.count(AttrName))
+          AddAttribute(AttrName);
+    }
+
+    HasCall |= AAEdges.hasUnknownCallee();
+    if (!IsNonEntryFunc && HasCall)
+      AddAttribute("amdgpu-calls");
+
+    // Check the function body.
+    auto CheckAlloca = [&](Instruction &I) {
+      AddAttribute("amdgpu-stack-objects");
+      return false;
+    };
+
+    A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca});
+
+    auto CheckAddrSpaceCasts = [&](Instruction &I) {
+      unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
+      if (castRequiresQueuePtr(SrcAS)) {
+        NeedsQueuePtr = true;
+        return false;
+      }
+      return true;
+    };
+
+    // If we found that we need amdgpu-queue-ptr, nothing else to do.
+    if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
+      AddAttribute("amdgpu-queue-ptr");
+      return Change;
+    }
+
+    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
+
+    // `checkForAllInstructions` is much more cheaper than going through all
+    // instructions, try it first.
+
+    // amdgpu-queue-ptr is not needed if aperture regs is present.
+    if (!HasApertureRegs)
+      A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
+                                {Instruction::AddrSpaceCast});
+
+    // If we found  that we need amdgpu-queue-ptr, nothing else to do.
+    if (NeedsQueuePtr) {
+      AddAttribute("amdgpu-queue-ptr");
+      return Change;
+    }
+
+    if (!IsNonEntryFunc && HasApertureRegs)
+      return Change;
+
+    for (BasicBlock &BB : *F) {
+      for (Instruction &I : BB) {
+        for (const Use &U : I.operands()) {
+          if (const auto *C = dyn_cast<Constant>(U)) {
+            if (InfoCache.needsQueuePtr(C, *F)) {
+              AddAttribute("amdgpu-queue-ptr");
+              return Change;
+            }
+          }
+        }
+      }
+    }
+
+    return Change;
+  }
+
+  ChangeStatus manifest(Attributor &A) override {
+    SmallVector<Attribute, 8> AttrList;
+    LLVMContext &Ctx = getAssociatedFunction()->getContext();
+
+    for (StringRef AttrName : Attributes)
+      AttrList.push_back(Attribute::get(Ctx, AttrName));
+
+    return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
+                                              /* ForceReplace */ true);
+  }
+
+  const std::string getAsStr() const override {
+    return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
+  }
+
+  const DenseSet<StringRef> &getAttributes() const override {
+    return Attributes;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {}
+
+private:
+  DenseSet<StringRef> Attributes;
+};
+
+AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
+                                                    Attributor &A) {
+  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+    return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
+  llvm_unreachable("AAAMDAttributes is only valid for function position");
+}
+
+class AMDGPUAttributor : public ModulePass {
+public:
+  AMDGPUAttributor() : ModulePass(ID) {}
+
+  /// doInitialization - Virtual method overridden by subclasses to do
+  /// any necessary initialization before any pass is run.
+  bool doInitialization(Module &) override {
+    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+    if (!TPC)
+      report_fatal_error("TargetMachine is required");
+
+    TM = &TPC->getTM<TargetMachine>();
+    return false;
+  }
+
+  bool runOnModule(Module &M) override {
+    SetVector<Function *> Functions;
+    AnalysisGetter AG;
+    for (Function &F : M)
+      Functions.insert(&F);
+
+    CallGraphUpdater CGUpdater;
+    BumpPtrAllocator Allocator;
+    AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
+    Attributor A(Functions, InfoCache, CGUpdater);
+
+    for (Function &F : M) {
+      A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
+      A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
+    }
+
+    ChangeStatus Change = A.run();
+    return Change == ChangeStatus::CHANGED;
+  }
+
+  StringRef getPassName() const override { return "AMDGPU Attributor"; }
+  TargetMachine *TM;
+  static char ID;
+};
+
+char AMDGPUAttributor::ID = 0;
+
+Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
+INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -234,6 +234,7 @@
   initializeSILoadStoreOptimizerPass(*PR);
   initializeAMDGPUFixFunctionBitcastsPass(*PR);
   initializeAMDGPUAlwaysInlinePass(*PR);
+  initializeAMDGPUAttributorPass(*PR);
   initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
   initializeAMDGPUAnnotateUniformValuesPass(*PR);
   initializeAMDGPUArgumentUsageInfoPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -44,6 +44,7 @@
   AMDGPUAliasAnalysis.cpp
   AMDGPUAlwaysInlinePass.cpp
   AMDGPUAnnotateKernelFeatures.cpp
+  AMDGPUAttributor.cpp
   AMDGPUAnnotateUniformValues.cpp
   AMDGPUArgumentUsageInfo.cpp
   AMDGPUAsmPrinter.cpp
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -197,12 +197,14 @@
 /// attribute list \p Attrs. This is only the case if it was not already present
 /// in \p Attrs at the position describe by \p PK and \p AttrIdx.
 static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
-                             AttributeList &Attrs, int AttrIdx) {
+                             AttributeList &Attrs, int AttrIdx,
+                             bool ForceReplace = false) {
 
   if (Attr.isEnumAttribute()) {
     Attribute::AttrKind Kind = Attr.getKindAsEnum();
     if (Attrs.hasAttribute(AttrIdx, Kind))
-      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+      if (!ForceReplace &&
+          isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
         return false;
     Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
     return true;
@@ -210,7 +212,8 @@
   if (Attr.isStringAttribute()) {
     StringRef Kind = Attr.getKindAsString();
     if (Attrs.hasAttribute(AttrIdx, Kind))
-      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+      if (!ForceReplace &&
+          isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
         return false;
     Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
     return true;
@@ -218,7 +221,8 @@
   if (Attr.isIntAttribute()) {
     Attribute::AttrKind Kind = Attr.getKindAsEnum();
     if (Attrs.hasAttribute(AttrIdx, Kind))
-      if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+      if (!ForceReplace &&
+          isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
         return false;
     Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
     Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
@@ -299,7 +303,8 @@
 
 ChangeStatus
 IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
-                                   const ArrayRef<Attribute> &DeducedAttrs) {
+                                   const ArrayRef<Attribute> &DeducedAttrs,
+                                   bool ForceReplace) {
   Function *ScopeFn = IRP.getAnchorScope();
   IRPosition::Kind PK = IRP.getPositionKind();
 
@@ -327,7 +332,7 @@
   ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
   LLVMContext &Ctx = IRP.getAnchorValue().getContext();
   for (const Attribute &Attr : DeducedAttrs) {
-    if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx()))
+    if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx(), ForceReplace))
       continue;
 
     HasChanged = ChangeStatus::CHANGED;
@@ -760,6 +765,22 @@
   return false;
 }
 
+bool Attributor::isAssumedDead(const BasicBlock &BB,
+                               const AbstractAttribute *QueryingAA,
+                               const AAIsDead *FnLivenessAA,
+                               DepClassTy DepClass) {
+  if (!FnLivenessAA)
+    FnLivenessAA = lookupAAFor<AAIsDead>(IRPosition::function(*BB.getParent()),
+                                         QueryingAA, DepClassTy::NONE);
+  if (FnLivenessAA->isAssumedDead(&BB)) {
+    if (QueryingAA)
+      recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
+    return true;
+  }
+
+  return false;
+}
+
 bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
                                  const AbstractAttribute &QueryingAA,
                                  const Value &V, DepClassTy LivenessDepClass) {
@@ -2032,6 +2053,8 @@
       // The alignment of a pointer is interesting for loads.
     case Instruction::Store:
       // The alignment of a pointer is interesting for stores.
+    case Instruction::Alloca:
+    case Instruction::AddrSpaceCast:
       IsInterestingOpcode = true;
     }
     if (IsInterestingOpcode) {
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
 
 declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
 
@@ -98,9 +99,15 @@
   ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
 }
 
-; HSA: attributes #0 = { argmemonly nofree nounwind willreturn }
-; HSA: attributes #1 = { nounwind }
-; HSA: attributes #2 = { nounwind "amdgpu-queue-ptr" }
+; Attributor assumes for kernels uniform-work-group-size false if it is not present.
+
+; ATTRIBUTOR_HSA: attributes #0 = { argmemonly nofree nounwind willreturn "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #1 = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #2 = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" }
+
+; AKF_HSA: attributes #0 = { argmemonly nofree nounwind willreturn }
+; AKF_HSA: attributes #1 = { nounwind }
+; AKF_HSA: attributes #2 = { nounwind "amdgpu-queue-ptr" }
 
 attributes #0 = { argmemonly nounwind }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,4 +1,6 @@
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
+
 
 declare i32 @llvm.amdgcn.workgroup.id.x() #0
 declare i32 @llvm.amdgcn.workgroup.id.y() #0
@@ -170,7 +172,9 @@
   ret void
 }
 
-; HSA: define void @recursive_use_workitem_id_y() #2 {
+; Attributor sets uniform-work-group-size
+; ATTRIBUTOR_HSA: define void @recursive_use_workitem_id_y() #12 {
+; AKF_HSA: define void @recursive_use_workitem_id_y() #2 {
 define void @recursive_use_workitem_id_y() #1 {
   %val = call i32 @llvm.amdgcn.workitem.id.y()
   store volatile i32 %val, i32 addrspace(1)* undef
@@ -191,14 +195,15 @@
   ret void
 }
 
-; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #12 {
+
+; HSA: define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #[[SHIFTED1:[0-9]+]] {
 define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
   %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
   store volatile i32 0, i32 addrspace(4)* %stof
   ret void
 }
 
-; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #13 {
+; HSA: define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #[[SHIFTED2:[0-9]+]] {
 define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
   %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
   store volatile i32 0, i32 addrspace(4)* %stof
@@ -212,7 +217,8 @@
   ret void
 }
 
-; HSA: define void @indirect_use_group_to_flat_addrspacecast_gfx9() #11 {
+; AKF_HSA: define void @indirect_use_group_to_flat_addrspacecast_gfx9() #11 {
+; ATTRIBUTOR_HSA: define void @indirect_use_group_to_flat_addrspacecast_gfx9() #15 {
 define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
   call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
   ret void
@@ -224,85 +230,96 @@
   ret void
 }
 
-; HSA: define void @use_kernarg_segment_ptr() #14 {
+; HSA: define void @use_kernarg_segment_ptr() #[[SHIFTED3:[0-9]+]] {
 define void @use_kernarg_segment_ptr() #1 {
   %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
   store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef
   ret void
 }
 
-; HSA: define void @func_indirect_use_kernarg_segment_ptr() #11 {
+; AKF_HSA: define void @func_indirect_use_kernarg_segment_ptr() #11 {
+; ATTRIBUTOR_HSA: define void @func_indirect_use_kernarg_segment_ptr() #15 {
 define void @func_indirect_use_kernarg_segment_ptr() #1 {
   call void @use_kernarg_segment_ptr()
   ret void
 }
 
-; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
+
+; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #[[SHIFTED4:[0-9]+]] {
 define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
   ret void
 }
 
-; HSA: define void @use_implicitarg_ptr() #16 {
+
+; AKF_HSA: define void @use_implicitarg_ptr() #16 {
+; ATTRIBUTOR_HSA: define void @use_implicitarg_ptr() #17 {
 define void @use_implicitarg_ptr() #1 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
   ret void
 }
 
-; HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
+
+; Attributor sets uniform-workgroup-size attribute.
+; AKF_HSA: define void @func_indirect_use_implicitarg_ptr() #16 {
+; ATTRIBUTOR_HSA: define void @func_indirect_use_implicitarg_ptr() #17 {
 define void @func_indirect_use_implicitarg_ptr() #1 {
   call void @use_implicitarg_ptr()
   ret void
 }
 
-; HSA: declare void @external.func() #17
+; AKF_HSA: declare void @external.func() #[[SHIFTED5:[0-9]+]]
 declare void @external.func() #3
 
-; HSA: define internal void @defined.func() #17 {
+; This function gets deleted.
+; AKF_HSA: define internal void @defined.func() #17 {
 define internal void @defined.func() #3 {
   ret void
 }
 
-; HSA: define void @func_call_external() #17 {
+; HSA: define void @func_call_external() #[[SHIFTED5:[0-9]+]] {
 define void @func_call_external() #3 {
   call void @external.func()
   ret void
 }
 
-; HSA: define void @func_call_defined() #17 {
+; HSA: define void @func_call_defined() #[[SHIFTED5]] {
 define void @func_call_defined() #3 {
   call void @defined.func()
   ret void
 }
 
-; HSA: define void @func_call_asm() #18 {
+;FIXME: Investigate
+; AKF_HSA: define void @func_call_asm() #18 {
+; ATTRIBUTOR_HSA: define void @func_call_asm() #19 {
 define void @func_call_asm() #3 {
   call void asm sideeffect "", ""() #3
   ret void
 }
 
-; HSA: define amdgpu_kernel void @kern_call_external() #19 {
+; HSA: define amdgpu_kernel void @kern_call_external() #[[SHIFTED6:[0-9]+]] {
 define amdgpu_kernel void @kern_call_external() #3 {
   call void @external.func()
   ret void
 }
 
-; HSA: define amdgpu_kernel void @func_kern_defined() #19 {
+; AKF_HSA: define amdgpu_kernel void @func_kern_defined() #19 {
+; ATTRIBUTOR_HSA: define amdgpu_kernel void @func_kern_defined() #18 {
 define amdgpu_kernel void @func_kern_defined() #3 {
   call void @defined.func()
   ret void
 }
 
-; HSA: define i32 @use_dispatch_ptr_ret_type() #20 {
+; HSA: define i32 @use_dispatch_ptr_ret_type() #[[SHIFTED7:[0-9]*]] {
 define i32 @use_dispatch_ptr_ret_type() #1 {
   %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
   store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
   ret i32 0
 }
 
-; HSA: define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #20 {
+; HSA: define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #[[SHIFTED7]]  {
 define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
   %f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)()
   %fadd = fadd float %f, 1.0
@@ -314,7 +331,9 @@
 attributes #2 = { nounwind "target-cpu"="gfx900" }
 attributes #3 = { nounwind }
 
-; HSA: attributes #0 = { nounwind readnone speculatable willreturn }
+; AKF_HSA: attributes #0 = { nounwind readnone speculatable willreturn }
+; ATTRIBUTOR_HSA: attributes #0 = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
+
 ; HSA: attributes #1 = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #2 = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #3 = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
@@ -324,14 +343,30 @@
 ; HSA: attributes #7 = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #8 = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
 ; HSA: attributes #9 = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
-; HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
-; HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; HSA: attributes #17 = { nounwind "uniform-work-group-size"="false" }
-; HSA: attributes #18 = { nounwind }
-; HSA: attributes #19 = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
-; HSA: attributes #20 = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" }
+
+; AKF_HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #10 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+
+; AKF_HSA: attributes #11 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #11 = { noreturn nounwind readnone "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; Attribute numbers shifted from now on, because of this attribute.
+; ATTRIBUTOR_HSA: attributes #12 = { noreturn nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+
+; HSA: attributes #[[SHIFTED1]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; HSA: attributes #[[SHIFTED2]]   = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+
+; Attributes shift more
+; ATTRIBUTOR_HSA: attributes #15 = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; HSA: attributes #[[SHIFTED3]]  = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; AKF_HSA: attributes #[[SHIFTED4]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[SHIFTED4]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+
+; This is the same as Attributor #17
+; AKF_HSA: attributes #16 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+
+; HSA: attributes #[[SHIFTED5]] = { nounwind "uniform-work-group-size"="false" }
+; HSA: attributes #[[SHIFTED6]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
+
+; AKF_HSA: attributes #[[SHIFTED7]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" }
+; ATTRIBUTOR_AKF: attributes #[[SHIFTED7]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
@@ -280,17 +281,34 @@
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
 
-; HSA: attributes #0 = { nounwind readnone speculatable willreturn }
-; HSA: attributes #1 = { nounwind }
-; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
-; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
-; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
-; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
-; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
-; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
-; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
-; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
-; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
-; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
-; HSA: attributes #13 = { nounwind "amdgpu-stack-objects" }
+; AKF_HSA: attributes #0 = { nounwind readnone speculatable willreturn }
+; AKF_HSA: attributes #1 = { nounwind }
+; AKF_HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
+; AKF_HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
+; AKF_HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
+; AKF_HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
+; AKF_HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
+; AKF_HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
+; AKF_HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; AKF_HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; AKF_HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
+; AKF_HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" }
+; AKF_HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" }
+; AKF_HSA: attributes #13 = { nounwind "amdgpu-stack-objects" }
+
+
+; ATTRIBUTOR_HSA: attributes #0 = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #1 = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR-HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #13 = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" }
+
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s
 
 declare i32 @llvm.r600.read.tgid.x() #0
 declare i32 @llvm.r600.read.tgid.y() #0
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: define internal void @indirect() #0 {
 define internal void @indirect() {
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: define internal void @indirect() #0 {
 define internal void @indirect() {
diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
--- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; Check that no attributes are added to graphics functions
-; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN %s
 
 ; Check that it doesn't crash
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
@@ -9,8 +10,11 @@
 
 target datalayout = "A5"
 
+
 define amdgpu_cs void @test_simple_indirect_call() {
-; GCN-LABEL: define amdgpu_cs void @test_simple_indirect_call() {
+; AKF_GCN-LABEL: define amdgpu_cs void @test_simple_indirect_call() {
+; Attributor adds work-group-size attribute. This should be ok.
+; ATTRIBUTOR_GCN-LABEL: define amdgpu_cs void @test_simple_indirect_call() #0 {
 ; GFX9-LABEL: test_simple_indirect_call:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_getpc_b64 s[36:37]
@@ -53,3 +57,7 @@
 declare i64 @llvm.amdgcn.s.getpc() #0
 
 attributes #0 = { nounwind readnone speculatable willreturn }
+
+; ATTRIBUTOR_GCN: attributes #0 = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #1 = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
+
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=GCN %s
 
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
 
@@ -47,5 +48,6 @@
     ret void
 }
 
+;FIXME: The AMDGPU Attributor does not deduce the uniform-group-size attribute.
 ; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
 ; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -1,9 +1,14 @@
 ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
 
 ; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
 
-; CHECK: define void @foo() #[[FOO:[0-9]+]] {
+; sink function is added to prevent attributor from deleting the functions.
+declare void @sink()
+
+; CHECK: define void @foo() #[[FOO:[0-9]+]]
 define void @foo() #0 {
+  call void @sink()
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -1,9 +1,15 @@
+
 ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
 
 ; Test to verify if the attribute gets propagated across nested function calls
 
-; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
+; Added to prevent Attributor from deleting calls. 
+declare void @sink()
+
+; CHECK: define void @func1() #[[FUNC:[0-9]+]]
 define void @func1() #0 {
+  call void @sink()
   ret void
 }
 
@@ -20,6 +26,5 @@
 }
 
 attributes #2 = { "uniform-work-group-size"="true" }
-
 ; CHECK: attributes #[[FUNC]] = { "uniform-work-group-size"="true" }
 ; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -1,9 +1,14 @@
 ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
+
+; Function added to prevent attributor from deleting call sites.
+declare void @sink()
 
 ; Two kernels with different values of the uniform-work-group-attribute call the same function
 
 ; CHECK: define void @func() #[[FUNC:[0-9]+]] {
 define void @func() #0 {
+  call void @sink()
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -1,8 +1,13 @@
 ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
+
+; function added to prevent attributor from deleting calls.
+declare void @sink()
 
 ; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
 ; CHECK: define void @func() #[[FUNC:[0-9]+]] {
 define void @func() #0 {
+  call void @sink()
   ret void
 }
 
@@ -15,6 +20,7 @@
 ; External declaration of a function
 ; CHECK: define weak_odr void @weak_func() #[[FUNC]] {
 define weak_odr void @weak_func() #0 {
+  call void @sink()
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -1,4 +1,6 @@
 ; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s
+
 
 ; Test to ensure recursive functions exhibit proper behaviour
 ; Test to generate fibonacci numbers
@@ -32,7 +34,9 @@
   ret void
 }
 
+; nounwind and readnone are added to match attributor results.
+attributes #0 = { nounwind readnone }
 attributes #1 = { "uniform-work-group-size"="true" }
 
-; CHECK: attributes #[[FIB]] = { "uniform-work-group-size"="true" }
+; CHECK: attributes #[[FIB]] = { nounwind readnone "uniform-work-group-size"="true" }
 ; CHECK: attributes #[[KERNEL]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -1,29 +1,27 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
 
-; CHECK: define void @func1() #[[FUNC:[0-9]+]] {
-define void @func1() {
-  ret void
-}
 
-; CHECK: define void @func4() #[[FUNC]] {
-define void @func4() {
-  ret void
-}
+; CHECK: declare void @func1() #[[FUNC0:[0-9]+]]
+declare void @func1()
+
+; CHECK: declare void @func4() #[[FUNC0]]
+declare void @func4() 
 
-; CHECK: define void @func2() #[[FUNC]] {
+; CHECK: define void @func2() #[[FUNC0]] {
 define void @func2() #0 {
   call void @func4()
   call void @func1()
   ret void
 }
 
-; CHECK: define void @func3() #[[FUNC]] {
+; CHECK: define void @func3() #[[FUNC0]] {
 define void @func3() {
   call void @func1()
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC:[0-9]+]] {
+; CHECK: define amdgpu_kernel void @kernel3() #[[FUNC1:[0-9]+]] {
 define amdgpu_kernel void @kernel3() #0 {
   call void @func2()
   call void @func3()
@@ -32,4 +30,5 @@
 
 attributes #0 = { "uniform-work-group-size"="false" }
 
-; CHECK: attributes #[[FUNC]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[FUNC0]] = { "uniform-work-group-size"="false" }
+; CHECK: attributes #[[FUNC1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }