Index: llvm/include/llvm/Transforms/IPO/Attributor.h
===================================================================
--- llvm/include/llvm/Transforms/IPO/Attributor.h
+++ llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1315,6 +1315,10 @@
     return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
   }
 
+  const SmallVectorImpl<Function *> &getIndirectlyCallableFunctions() const {
+    return IndirectlyCallableFunctions;
+  }
+
 private:
   struct FunctionInfo {
     ~FunctionInfo();
@@ -1347,6 +1351,10 @@
     return *FI;
   }
 
+  /// Vector of functions that might be callable indirectly, i.a., via a
+  /// function pointer.
+  SmallVector<Function *> IndirectlyCallableFunctions;
+
   /// Initialize the function information cache \p FI for the function \p F.
   ///
   /// This method needs to be called for all function that might be looked at
@@ -1412,6 +1420,10 @@
   /// Flag to determine if we should skip all liveness checks early on.
   bool UseLiveness = true;
 
+  /// Flag to indicate if the entire world is contained in this module, that
+  /// is, no outside functions exist.
+  bool IsClosedWorldModule = false;
+
   /// Callback function to be invoked on internal functions marked live.
   std::function<void(Attributor &A, const Function &F)> InitializationCallback =
       nullptr;
@@ -1687,6 +1699,10 @@
   /// Return true if this is a module pass, false otherwise.
   bool isModulePass() const { return Configuration.IsModulePass; }
 
+  /// Return true if the module contains the whole world, thus, no outside
+  /// functions exist.
+  bool isClosedWorldModule() const { return Configuration.IsClosedWorldModule; }
+
   /// Return true if we derive attributes for \p Fn
   bool isRunOn(Function &Fn) const { return isRunOn(&Fn); }
   bool isRunOn(Function *Fn) const {
Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -950,6 +950,7 @@
     AC.Allowed = &Allowed;
     AC.IsModulePass = true;
     AC.DefaultInitializeLiveInternals = false;
+    AC.IsClosedWorldModule = true;
     AC.IPOAmendableCB = [](const Function &F) {
       return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
     };
Index: llvm/lib/Transforms/IPO/Attributor.cpp
===================================================================
--- llvm/lib/Transforms/IPO/Attributor.cpp
+++ llvm/lib/Transforms/IPO/Attributor.cpp
@@ -3251,11 +3251,20 @@
   // determine if it is part of a must-tail call edge. This will influence what
   // attributes we can derive.
   InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F);
-  if (!isModulePass() && !FI.CalledViaMustTail) {
-    for (const Use &U : F.uses())
+  if (isClosedWorldModule() || (!isModulePass() && !FI.CalledViaMustTail)) {
+    bool IsIndirectlyCallable = !isClosedWorldModule() || !F.hasLocalLinkage();
+    for (const Use &U : F.uses()) {
       if (const auto *CB = dyn_cast<CallBase>(U.getUser()))
-        if (CB->isCallee(&U) && CB->isMustTailCall())
-          FI.CalledViaMustTail = true;
+        if (CB->isCallee(&U))
+          if (CB->isMustTailCall()) {
+            FI.CalledViaMustTail = true;
+            if (IsIndirectlyCallable)
+              break;
+            continue;
+          }
+      if (isClosedWorldModule() && IsIndirectlyCallable)
+        InfoCache.IndirectlyCallableFunctions.push_back(&F);
+    }
   }
 
   IRPosition FPos = IRPosition::function(F);
Index: llvm/lib/Transforms/IPO/AttributorAttributes.cpp
===================================================================
--- llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -10401,18 +10401,58 @@
 struct AACallEdgesCallSite : public AACallEdgesImpl {
   AACallEdgesCallSite(const IRPosition &IRP, Attributor &A)
       : AACallEdgesImpl(IRP, A) {}
+
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     ChangeStatus Change = ChangeStatus::UNCHANGED;
 
+    CallBase *CB = cast<CallBase>(getCtxI());
+
+    auto IsValidTypePun = [](Type &T1, Type &T2) {
+      if (&T1 == &T2)
+        return true;
+      if (T1.isIntOrPtrTy() && T2.isIntOrIntVectorTy())
+        return true;
+      if (T1.isFloatTy() && T2.isFloatTy())
+        return true;
+      if (T1.isDoubleTy() && T2.isDoubleTy())
+        return true;
+      return false;
+    };
+
     auto VisitValue = [&](Value &V, const Instruction *CtxI) -> bool {
       if (Function *Fn = dyn_cast<Function>(&V)) {
         addCalledFunction(Fn, Change);
-      } else {
-        LLVM_DEBUG(dbgs() << "[AACallEdges] Unrecognized value: " << V << "\n");
+        // Explore all values.
+        return true;
+      }
+      if (!A.isClosedWorldModule()) {
+        LLVM_DEBUG(if (!hasUnknownCallee()) dbgs()
+                   << "[AACallEdges] Assume unknown callee due to: " << V
+                   << "\n");
         setHasUnknownCallee(true, Change);
+        // Explore all values.
+        return true;
       }
 
+      unsigned NumArgs = CB->arg_size();
+      LLVM_DEBUG(dbgs() << "[AACallEdges] Unrecognized value: " << V
+                        << ", checking indirect callable functions:\n");
+      for (auto *Fn : A.getInfoCache().getIndirectlyCallableFunctions()) {
+        if (Fn->arg_size() != NumArgs)
+          continue;
+        if (!IsValidTypePun(*Fn->getReturnType(), *CB->getType()))
+          continue;
+        bool Valid = true;
+        for (unsigned ArgNo = 0; Valid && ArgNo < NumArgs; ++ArgNo)
+          Valid &= IsValidTypePun(*Fn->getArg(ArgNo)->getType(),
+                                  *CB->getArgOperand(ArgNo)->getType());
+        if (!Valid)
+          continue;
+        LLVM_DEBUG(dbgs() << "[AACallEdges] Add compatible callable function: "
+                          << Fn->getName() << "\n");
+        addCalledFunction(Fn, Change);
+      }
       // Explore all values.
       return true;
     };
@@ -10435,7 +10475,6 @@
         VisitValue(*VAC.getValue(), VAC.getCtxI());
     };
 
-    CallBase *CB = cast<CallBase>(getCtxI());
 
     if (auto *IA = dyn_cast<InlineAsm>(CB->getCalledOperand())) {
       if (IA->hasSideEffects() &&
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -4,51 +4,36 @@
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
   ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
+  ; CHECK-NEXT:   liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15
-  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14
-  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr8
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(p4) = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p4) = COPY [[COPY2]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
-  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-  ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
-  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
-  ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY4]], [[C]](s64)
+  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[DEF]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY3]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
-  ; CHECK-NEXT:   $sgpr15 = COPY [[DEF]](s32)
-  ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[DEF1]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY5]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[DEF2]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY6]](s32)
+  ; CHECK-NEXT:   $sgpr15 = COPY [[COPY7]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY8]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -769,7 +769,7 @@
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -806,7 +806,7 @@
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_null_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float null()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
Index: llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -51,7 +51,7 @@
 
 define amdgpu_kernel void @entry() {
 ; CHECK-LABEL: define {{[^@]+}}@entry
-; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
 ; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
 ; CHECK-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
@@ -63,5 +63,6 @@
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -11,7 +11,7 @@
 
 define internal void @direct() {
 ; CHECK-LABEL: define {{[^@]+}}@direct
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
 ; CHECK-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
 ; CHECK-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
@@ -27,7 +27,7 @@
 
 define amdgpu_kernel void @test_direct_indirect_call() {
 ; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
-; CHECK-SAME: () #[[ATTR1]] {
+; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    call void @direct()
 ; CHECK-NEXT:    ret void
 ;
@@ -36,5 +36,4 @@
 }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -43,5 +43,5 @@
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
 ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -28,21 +28,21 @@
 ; GCN-NEXT:     enable_mem_ordered = 0
 ; GCN-NEXT:     enable_fwd_progress = 0
 ; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GCN-NEXT:     user_sgpr_count = 14
+; GCN-NEXT:     user_sgpr_count = 8
 ; GCN-NEXT:     enable_trap_handler = 0
 ; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GCN-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GCN-NEXT:     enable_sgpr_workgroup_id_z = 0
 ; GCN-NEXT:     enable_sgpr_workgroup_info = 0
-; GCN-NEXT:     enable_vgpr_workitem_id = 2
+; GCN-NEXT:     enable_vgpr_workitem_id = 0
 ; GCN-NEXT:     enable_exception_msb = 0
 ; GCN-NEXT:     granulated_lds_size = 0
 ; GCN-NEXT:     enable_exception = 0
 ; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GCN-NEXT:     enable_sgpr_queue_ptr = 1
+; GCN-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GCN-NEXT:     enable_sgpr_queue_ptr = 0
 ; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NEXT:     enable_sgpr_dispatch_id = 1
+; GCN-NEXT:     enable_sgpr_dispatch_id = 0
 ; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
 ; GCN-NEXT:     enable_sgpr_private_segment_size = 0
 ; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
@@ -58,7 +58,7 @@
 ; GCN-NEXT:     workitem_private_segment_byte_size = 16384
 ; GCN-NEXT:     workgroup_group_segment_byte_size = 0
 ; GCN-NEXT:     gds_segment_byte_size = 0
-; GCN-NEXT:     kernarg_segment_byte_size = 64
+; GCN-NEXT:     kernarg_segment_byte_size = 4
 ; GCN-NEXT:     workgroup_fbarrier_count = 0
 ; GCN-NEXT:     wavefront_sgpr_count = 68
 ; GCN-NEXT:     workitem_vgpr_count = 42
@@ -77,26 +77,21 @@
 ; GCN-NEXT:    .end_amd_kernel_code_t
 ; GCN-NEXT:  ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
+; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s7
+; GCN-NEXT:    s_add_i32 s6, s6, s9
+; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
+; GCN-NEXT:    s_add_u32 s0, s0, s9
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    s_mov_b32 s14, s16
+; GCN-NEXT:    s_mov_b32 s12, s8
+; GCN-NEXT:    s_getpc_b64 s[6:7]
+; GCN-NEXT:    s_add_u32 s6, s6, gv.fptr0@rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s7, s7, gv.fptr0@rel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GCN-NEXT:    s_add_u32 s8, s4, 8
+; GCN-NEXT:    s_addc_u32 s9, s5, 0
+; GCN-NEXT:    v_mov_b32_e32 v31, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GCN-NEXT:    s_endpgm
 ;
 ; GISEL-LABEL: test_indirect_call_sgpr_ptr:
@@ -121,21 +116,21 @@
 ; GISEL-NEXT:     enable_mem_ordered = 0
 ; GISEL-NEXT:     enable_fwd_progress = 0
 ; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GISEL-NEXT:     user_sgpr_count = 14
+; GISEL-NEXT:     user_sgpr_count = 8
 ; GISEL-NEXT:     enable_trap_handler = 0
 ; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 0
 ; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
-; GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; GISEL-NEXT:     enable_vgpr_workitem_id = 0
 ; GISEL-NEXT:     enable_exception_msb = 0
 ; GISEL-NEXT:     granulated_lds_size = 0
 ; GISEL-NEXT:     enable_exception = 0
 ; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GISEL-NEXT:     enable_sgpr_queue_ptr = 0
 ; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; GISEL-NEXT:     enable_sgpr_dispatch_id = 0
 ; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
 ; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
 ; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
@@ -151,7 +146,7 @@
 ; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
 ; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
 ; GISEL-NEXT:     gds_segment_byte_size = 0
-; GISEL-NEXT:     kernarg_segment_byte_size = 64
+; GISEL-NEXT:     kernarg_segment_byte_size = 4
 ; GISEL-NEXT:     workgroup_fbarrier_count = 0
 ; GISEL-NEXT:     wavefront_sgpr_count = 68
 ; GISEL-NEXT:     workitem_vgpr_count = 42
@@ -170,26 +165,22 @@
 ; GISEL-NEXT:    .end_amd_kernel_code_t
 ; GISEL-NEXT:  ; %bb.0:
 ; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s7
+; GISEL-NEXT:    s_add_i32 s6, s6, s9
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s9
 ; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
-; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_getpc_b64 s[6:7]
+; GISEL-NEXT:    s_add_u32 s6, s6, gv.fptr0@rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s7, s7, gv.fptr0@rel32@hi+12
+; GISEL-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GISEL-NEXT:    s_add_u32 s4, s4, 8
+; GISEL-NEXT:    s_addc_u32 s5, s5, 0
+; GISEL-NEXT:    s_mov_b32 s12, s8
+; GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
+; GISEL-NEXT:    v_mov_b32_e32 v31, v0
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GISEL-NEXT:    s_endpgm
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
@@ -219,21 +210,21 @@
 ; GCN-NEXT:     enable_mem_ordered = 0
 ; GCN-NEXT:     enable_fwd_progress = 0
 ; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GCN-NEXT:     user_sgpr_count = 14
+; GCN-NEXT:     user_sgpr_count = 8
 ; GCN-NEXT:     enable_trap_handler = 0
 ; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GCN-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GCN-NEXT:     enable_sgpr_workgroup_id_z = 0
 ; GCN-NEXT:     enable_sgpr_workgroup_info = 0
-; GCN-NEXT:     enable_vgpr_workitem_id = 2
+; GCN-NEXT:     enable_vgpr_workitem_id = 0
 ; GCN-NEXT:     enable_exception_msb = 0
 ; GCN-NEXT:     granulated_lds_size = 0
 ; GCN-NEXT:     enable_exception = 0
 ; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GCN-NEXT:     enable_sgpr_queue_ptr = 1
+; GCN-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GCN-NEXT:     enable_sgpr_queue_ptr = 0
 ; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NEXT:     enable_sgpr_dispatch_id = 1
+; GCN-NEXT:     enable_sgpr_dispatch_id = 0
 ; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
 ; GCN-NEXT:     enable_sgpr_private_segment_size = 0
 ; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
@@ -249,7 +240,7 @@
 ; GCN-NEXT:     workitem_private_segment_byte_size = 16384
 ; GCN-NEXT:     workgroup_group_segment_byte_size = 0
 ; GCN-NEXT:     gds_segment_byte_size = 0
-; GCN-NEXT:     kernarg_segment_byte_size = 64
+; GCN-NEXT:     kernarg_segment_byte_size = 4
 ; GCN-NEXT:     workgroup_fbarrier_count = 0
 ; GCN-NEXT:     wavefront_sgpr_count = 68
 ; GCN-NEXT:     workitem_vgpr_count = 42
@@ -268,27 +259,22 @@
 ; GCN-NEXT:    .end_amd_kernel_code_t
 ; GCN-NEXT:  ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
+; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s7
+; GCN-NEXT:    s_add_i32 s6, s6, s9
+; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
+; GCN-NEXT:    s_add_u32 s0, s0, s9
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN-NEXT:    s_mov_b32 s12, s8
+; GCN-NEXT:    v_mov_b32_e32 v31, v0
+; GCN-NEXT:    s_getpc_b64 s[6:7]
+; GCN-NEXT:    s_add_u32 s6, s6, gv.fptr1@rel32@lo+4
+; GCN-NEXT:    s_addc_u32 s7, s7, gv.fptr1@rel32@hi+12
+; GCN-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GCN-NEXT:    s_add_u32 s8, s4, 8
+; GCN-NEXT:    s_addc_u32 s9, s5, 0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GCN-NEXT:    s_mov_b32 s14, s16
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GCN-NEXT:    s_endpgm
 ;
 ; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
@@ -313,21 +299,21 @@
 ; GISEL-NEXT:     enable_mem_ordered = 0
 ; GISEL-NEXT:     enable_fwd_progress = 0
 ; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GISEL-NEXT:     user_sgpr_count = 14
+; GISEL-NEXT:     user_sgpr_count = 8
 ; GISEL-NEXT:     enable_trap_handler = 0
 ; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 0
 ; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
-; GISEL-NEXT:     enable_vgpr_workitem_id = 2
+; GISEL-NEXT:     enable_vgpr_workitem_id = 0
 ; GISEL-NEXT:     enable_exception_msb = 0
 ; GISEL-NEXT:     granulated_lds_size = 0
 ; GISEL-NEXT:     enable_exception = 0
 ; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
+; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GISEL-NEXT:     enable_sgpr_queue_ptr = 0
 ; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
+; GISEL-NEXT:     enable_sgpr_dispatch_id = 0
 ; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
 ; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
 ; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
@@ -343,7 +329,7 @@
 ; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
 ; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
 ; GISEL-NEXT:     gds_segment_byte_size = 0
-; GISEL-NEXT:     kernarg_segment_byte_size = 64
+; GISEL-NEXT:     kernarg_segment_byte_size = 4
 ; GISEL-NEXT:     workgroup_fbarrier_count = 0
 ; GISEL-NEXT:     wavefront_sgpr_count = 68
 ; GISEL-NEXT:     workitem_vgpr_count = 42
@@ -362,27 +348,23 @@
 ; GISEL-NEXT:    .end_amd_kernel_code_t
 ; GISEL-NEXT:  ; %bb.0:
 ; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
+; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s7
+; GISEL-NEXT:    s_add_i32 s6, s6, s9
+; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
+; GISEL-NEXT:    s_add_u32 s0, s0, s9
 ; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL-NEXT:    v_mov_b32_e32 v31, v0
+; GISEL-NEXT:    s_getpc_b64 s[6:7]
+; GISEL-NEXT:    s_add_u32 s6, s6, gv.fptr1@rel32@lo+4
+; GISEL-NEXT:    s_addc_u32 s7, s7, gv.fptr1@rel32@hi+12
+; GISEL-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GISEL-NEXT:    s_add_u32 s4, s4, 8
+; GISEL-NEXT:    s_addc_u32 s5, s5, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b32 s14, s16
+; GISEL-NEXT:    s_mov_b32 s12, s8
+; GISEL-NEXT:    s_mov_b64 s[8:9], s[4:5]
 ; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
 ; GISEL-NEXT:    s_endpgm
   %fptr = load ptr, ptr addrspace(4) @gv.fptr1
   call void %fptr(i32 123)
Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -43,9 +43,9 @@
 ; GFX9-LABEL: test_simple_indirect_call:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x4
-; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s12, s17
-; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s13, 0
-; GFX9-NEXT:    s_add_u32 s0, s0, s17
+; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s6, s9
+; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s7, 0
+; GFX9-NEXT:    s_add_u32 s0, s0, s9
 ; GFX9-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshr_b32 s4, s4, 16
@@ -74,5 +74,5 @@
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
 ;.
 ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.