Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -15,10 +15,13 @@
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CycleAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/Attributor.h"
+#include <optional>
 
 #define DEBUG_TYPE "amdgpu-attributor"
 
@@ -944,16 +947,44 @@
         {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
          &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
          &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
-         &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
+         &AAIndirectCallInfo::ID, &AAPotentialConstantValues::ID,
+         &AAUnderlyingObjects::ID});
 
     AttributorConfig AC(CGUpdater);
     AC.Allowed = &Allowed;
     AC.IsModulePass = true;
     AC.DefaultInitializeLiveInternals = false;
+    AC.IsClosedWorldModule = true;
     AC.IPOAmendableCB = [](const Function &F) {
       return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
     };
 
+    // Callback to determine if we should specialize a indirect call site with a
+    // specific callee. It's effectively a heuristic and we can add checks for
+    // the callee size, PGO, etc. For now, we check for single potential callees
+    // and kernel arguments as they are known uniform values.
+    AC.IndirectCalleeSpecializationCallback = [&](Attributor &A,
+                                                  const AbstractAttribute &AA,
+                                                  CallBase &CB,
+                                                  Function &Callee) {
+      bool UsedAssumedInformation = false;
+      std::optional<Value *> SimpleV = A.getAssumedSimplified(
+          *CB.getCalledOperand(), AA, UsedAssumedInformation,
+          AA::ValueScope::AnyScope);
+      assert(SimpleV.has_value() && "No value but potential callee?");
+      // Unknown value.
+      if (!SimpleV.value())
+        return false;
+      // Singleton function.
+      if (isa<Function>(SimpleV.value()))
+        return true;
+      // Uniform (kernel argument) value.
+      if (auto *Arg = dyn_cast_or_null<Argument>(SimpleV.value()))
+        if (Arg->getParent()->getCallingConv() == CallingConv::AMDGPU_KERNEL)
+          return true;
+      return false;
+    };
+
     Attributor A(Functions, InfoCache, AC);
 
     for (Function &F : M) {
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
+; RUN: llc -global-isel -stop-after=irtranslator -attributor-assume-closed-world=false -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CHECK %s
+; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CWRLD %s
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
@@ -52,24 +53,31 @@
   ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
+  ;
+  ; CWRLD-LABEL: name: test_indirect_call_sgpr_ptr
+  ; CWRLD: bb.1 (%ir-block.0):
+  ; CWRLD-NEXT:   liveins: $sgpr4_sgpr5
+  ; CWRLD-NEXT: {{  $}}
+  ; CWRLD-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+  ; CWRLD-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   call void %fptr()
   ret void
 }
 
 define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
-  ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr
-  ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
-  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
-  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; CHECK-NEXT:   SI_RETURN
+  ; SAMEC-LABEL: name: test_gfx_indirect_call_sgpr_ptr
+  ; SAMEC: bb.1 (%ir-block.0):
+  ; SAMEC-NEXT:   liveins: $vgpr0, $vgpr1
+  ; SAMEC-NEXT: {{  $}}
+  ; SAMEC-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; SAMEC-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; SAMEC-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; SAMEC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; SAMEC-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SAMEC-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>)
+  ; SAMEC-NEXT:   $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3
+  ; SAMEC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; SAMEC-NEXT:   SI_RETURN
   call amdgpu_gfx void %fptr()
   ret void
 }
Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor -attributor-assume-closed-world=false < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,OWRLD_ATTR_HSA %s
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,CWRLD_ATTR_HSA %s
 
 ; TODO: The test contains UB which is refined by the Attributor and should be removed.
 
@@ -18,6 +19,16 @@
 declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
 declare i64 @llvm.amdgcn.dispatch.id() #0
 
+@G1 = global ptr undef
+@G2 = global ptr undef
+
+;.
+; AKF_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef
+; AKF_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef
+;.
+; ATTRIBUTOR_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef
+; ATTRIBUTOR_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef
+;.
 define void @use_workitem_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
 ; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
@@ -766,19 +777,55 @@
 ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; AKF_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; AKF_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
-; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
-; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
-; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
-; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
+; OWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; OWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; OWRLD_ATTR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
+; OWRLD_ATTR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; OWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; OWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
+; OWRLD_ATTR_HSA-NEXT:    ret float [[FADD]]
+;
+; CWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
+; CWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] {
+; CWRLD_ATTR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]](), !callees !0
+; CWRLD_ATTR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
+; CWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee1, ptr @G1, align 8
+; CWRLD_ATTR_HSA-NEXT:    store ptr @indirect_callee2, ptr @G2, align 8
+; CWRLD_ATTR_HSA-NEXT:    ret float [[FADD]]
 ;
   %f = call float %fptr()
   %fadd = fadd float %f, 1.0
+  store ptr @indirect_callee1, ptr @G1
+  store ptr @indirect_callee2, ptr @G2
   ret float %fadd
 }
 
+define float @indirect_callee1() {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee1() {
+; AKF_HSA-NEXT:    ret float 0x40091EB860000000
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee1
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-NEXT:    ret float 0x40091EB860000000
+;
+  ret float 0x40091EB860000000
+}
+define float @indirect_callee2(float noundef %arg) {
+; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; AKF_HSA-SAME: (float noundef [[ARG:%.*]]) {
+; AKF_HSA-NEXT:    ret float [[ARG]]
+;
+; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee2
+; ATTRIBUTOR_HSA-SAME: (float noundef [[ARG:%.*]]) #[[ATTR19]] {
+; ATTRIBUTOR_HSA-NEXT:    ret float [[ARG]]
+;
+  ret float %arg
+}
+
 declare float @extern() #3
 define float @func_extern_call() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
@@ -845,7 +892,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -861,7 +908,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -877,7 +924,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -893,7 +940,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -928,7 +975,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -941,7 +988,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_decl()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -956,7 +1003,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_def()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -969,7 +1016,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -980,7 +1027,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -994,7 +1041,7 @@
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR29:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -1040,15 +1087,17 @@
 ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR25:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "uniform-work-group-size"="false" }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
 ;.
+; CWRLD_ATTR_HSA: [[META0:![0-9]+]] = !{ptr @indirect_callee1, ptr @indirect_callee2}
+;.
Index: llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
+++ llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor  -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD
 
 %0 = type { ptr, ptr }
 
@@ -20,19 +21,32 @@
 }
 
 define internal fastcc double @baz(ptr %arg) {
-; CHECK-LABEL: define {{[^@]+}}@baz
-; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call double [[TMP1]]()
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
-; CHECK-NEXT:    br label [[BB5:%.*]]
-; CHECK:       bb5:
-; CHECK-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
-; CHECK-NEXT:    br label [[BB5]]
+; OWRLD-LABEL: define {{[^@]+}}@baz
+; OWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
+; OWRLD-NEXT:  bb:
+; OWRLD-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
+; OWRLD-NEXT:    [[TMP2:%.*]] = tail call double [[TMP1]]()
+; OWRLD-NEXT:    br label [[BB3:%.*]]
+; OWRLD:       bb3:
+; OWRLD-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
+; OWRLD-NEXT:    br label [[BB5:%.*]]
+; OWRLD:       bb5:
+; OWRLD-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+; OWRLD-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
+; OWRLD-NEXT:    br label [[BB5]]
+;
+; CWRLD-LABEL: define {{[^@]+}}@baz
+; CWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
+; CWRLD-NEXT:  bb:
+; CWRLD-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8
+; CWRLD-NEXT:    unreachable
+; CWRLD:       bb3:
+; CWRLD-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1
+; CWRLD-NEXT:    br label [[BB5:%.*]]
+; CWRLD:       bb5:
+; CWRLD-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CWRLD-NEXT:    [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]])
+; CWRLD-NEXT:    br label [[BB5]]
 ;
 bb:
   %tmp1 = load ptr, ptr %arg, align 8
@@ -49,13 +63,19 @@
   br label %bb5
 }
 
-define amdgpu_kernel void @entry() {
-; CHECK-LABEL: define {{[^@]+}}@entry
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
-; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
-; CHECK-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
-; CHECK-NEXT:    ret void
+define amdgpu_kernel void @entry() { ; OWRLD-LABEL: define {{[^@]+}}@entry
+; OWRLD-SAME: () #[[ATTR0]] {
+; OWRLD-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
+; OWRLD-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; OWRLD-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@entry
+; CWRLD-SAME: () #[[ATTR1:[0-9]+]] {
+; CWRLD-NEXT:    [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5)
+; CWRLD-NEXT:    [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CWRLD-NEXT:    [[ARST:%.*]] = call double @baz(ptr [[CAST]])
+; CWRLD-NEXT:    ret void
 ;
   %alloca = alloca %0, align 8, addrspace(5)
   %cast = addrspacecast ptr addrspace(5) %alloca to ptr
@@ -63,5 +83,6 @@
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CWRLD: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor  -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD
 
 define internal void @indirect() {
 ; CHECK-LABEL: define {{[^@]+}}@indirect
@@ -10,13 +11,21 @@
 }
 
 define internal void @direct() {
-; CHECK-LABEL: define {{[^@]+}}@direct
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; CHECK-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; CHECK-NEXT:    call void [[FP]]()
-; CHECK-NEXT:    ret void
+; OWRLD-LABEL: define {{[^@]+}}@direct
+; OWRLD-SAME: () #[[ATTR1:[0-9]+]] {
+; OWRLD-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; OWRLD-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; OWRLD-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; OWRLD-NEXT:    call void [[FP]]()
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@direct
+; CWRLD-SAME: () #[[ATTR0]] {
+; CWRLD-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; CWRLD-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; CWRLD-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; CWRLD-NEXT:    call void @indirect()
+; CWRLD-NEXT:    ret void
 ;
   %fptr = alloca ptr, addrspace(5)
   store ptr @indirect, ptr addrspace(5) %fptr
@@ -26,15 +35,22 @@
 }
 
 define amdgpu_kernel void @test_direct_indirect_call() {
-; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:    call void @direct()
-; CHECK-NEXT:    ret void
+; OWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; OWRLD-SAME: () #[[ATTR1]] {
+; OWRLD-NEXT:    call void @direct()
+; OWRLD-NEXT:    ret void
+;
+; CWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call
+; CWRLD-SAME: () #[[ATTR0]] {
+; CWRLD-NEXT:    call void @direct()
+; CWRLD-NEXT:    ret void
 ;
   call void @direct()
   ret void
 }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; OWRLD: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+;.
+; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor  -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_OWR
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_CWR
 
 define internal void @indirect() {
 ; AKF_GCN-LABEL: define {{[^@]+}}@indirect() {
@@ -22,13 +23,21 @@
 ; AKF_GCN-NEXT:    call void [[FP]]()
 ; AKF_GCN-NEXT:    ret void
 ;
-; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
-; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
-; ATTRIBUTOR_GCN-NEXT:    call void [[FP]]()
-; ATTRIBUTOR_GCN-NEXT:    ret void
+; ATTRIBUTOR_OWR-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_OWR-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_OWR-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_OWR-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_OWR-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_OWR-NEXT:    call void [[FP]]()
+; ATTRIBUTOR_OWR-NEXT:    ret void
+;
+; ATTRIBUTOR_CWR-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_CWR-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_CWR-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_CWR-NEXT:    store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_CWR-NEXT:    [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
+; ATTRIBUTOR_CWR-NEXT:    call void @indirect()
+; ATTRIBUTOR_CWR-NEXT:    ret void
 ;
   %fptr = alloca ptr, addrspace(5)
   store ptr @indirect, ptr addrspace(5) %fptr
@@ -42,6 +51,9 @@
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_OWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_OWR: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
+;.
+; ATTRIBUTOR_CWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CWR: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 ;.
Index: llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
+++ llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -1,18 +1,26 @@
-; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV5C %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV4C %s
+; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV5O %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV4O %s
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
-; No stack objects, only indirect call has to enable scrathch
-; GCN-LABEL: test_indirect_call:
+; No stack objects, only indirect call has to enable scratch
+; GCNO-LABEL: test_indirect_call:
+; GCNC-LABEL: test_indirect_call:
 
-; COV5: .amdhsa_private_segment_fixed_size 0{{$}}
-; COV4: .amdhsa_private_segment_fixed_size 16384{{$}}
+; COV5O: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV5C: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4C: .amdhsa_private_segment_fixed_size 0{{$}}
+; COV4O: .amdhsa_private_segment_fixed_size 16384{{$}}
 
-; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNO: .amdhsa_user_sgpr_private_segment_buffer 1
+; GCNC: .amdhsa_user_sgpr_private_segment_buffer 1
 
-; COV5: .amdhsa_uses_dynamic_stack 1
-; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; COV5O: .amdhsa_uses_dynamic_stack 1
+; COV5C: .amdhsa_uses_dynamic_stack 0
+; GCNO: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+; GCNC: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
 define amdgpu_kernel void @test_indirect_call() {
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/indirect-call.ll
@@ -1,1109 +1,1443 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_O %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_C %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_O %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_C %s
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) {
-; GCN-LABEL: test_indirect_call_sgpr_ptr:
-; GCN:         .amd_kernel_code_t
-; GCN-NEXT:     amd_code_version_major = 1
-; GCN-NEXT:     amd_code_version_minor = 2
-; GCN-NEXT:     amd_machine_kind = 1
-; GCN-NEXT:     amd_machine_version_major = 7
-; GCN-NEXT:     amd_machine_version_minor = 0
-; GCN-NEXT:     amd_machine_version_stepping = 0
-; GCN-NEXT:     kernel_code_entry_byte_offset = 256
-; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
-; GCN-NEXT:     granulated_workitem_vgpr_count = 10
-; GCN-NEXT:     granulated_wavefront_sgpr_count = 8
-; GCN-NEXT:     priority = 0
-; GCN-NEXT:     float_mode = 240
-; GCN-NEXT:     priv = 0
-; GCN-NEXT:     enable_dx10_clamp = 1
-; GCN-NEXT:     debug_mode = 0
-; GCN-NEXT:     enable_ieee_mode = 1
-; GCN-NEXT:     enable_wgp_mode = 0
-; GCN-NEXT:     enable_mem_ordered = 0
-; GCN-NEXT:     enable_fwd_progress = 0
-; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GCN-NEXT:     user_sgpr_count = 14
-; GCN-NEXT:     enable_trap_handler = 0
-; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GCN-NEXT:     enable_sgpr_workgroup_info = 0
-; GCN-NEXT:     enable_vgpr_workitem_id = 2
-; GCN-NEXT:     enable_exception_msb = 0
-; GCN-NEXT:     granulated_lds_size = 0
-; GCN-NEXT:     enable_exception = 0
-; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GCN-NEXT:     enable_sgpr_queue_ptr = 1
-; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NEXT:     enable_sgpr_dispatch_id = 1
-; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GCN-NEXT:     enable_sgpr_private_segment_size = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GCN-NEXT:     enable_wavefront_size32 = 0
-; GCN-NEXT:     enable_ordered_append_gds = 0
-; GCN-NEXT:     private_element_size = 1
-; GCN-NEXT:     is_ptr64 = 1
-; GCN-NEXT:     is_dynamic_callstack = 1
-; GCN-NEXT:     is_debug_enabled = 0
-; GCN-NEXT:     is_xnack_enabled = 0
-; GCN-NEXT:     workitem_private_segment_byte_size = 16384
-; GCN-NEXT:     workgroup_group_segment_byte_size = 0
-; GCN-NEXT:     gds_segment_byte_size = 0
-; GCN-NEXT:     kernarg_segment_byte_size = 64
-; GCN-NEXT:     workgroup_fbarrier_count = 0
-; GCN-NEXT:     wavefront_sgpr_count = 68
-; GCN-NEXT:     workitem_vgpr_count = 42
-; GCN-NEXT:     reserved_vgpr_first = 0
-; GCN-NEXT:     reserved_vgpr_count = 0
-; GCN-NEXT:     reserved_sgpr_first = 0
-; GCN-NEXT:     reserved_sgpr_count = 0
-; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GCN-NEXT:     kernarg_segment_alignment = 4
-; GCN-NEXT:     group_segment_alignment = 4
-; GCN-NEXT:     private_segment_alignment = 4
-; GCN-NEXT:     wavefront_size = 6
-; GCN-NEXT:     call_convention = -1
-; GCN-NEXT:     runtime_loader_kernel_symbol = 0
-; GCN-NEXT:    .end_amd_kernel_code_t
-; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    s_mov_b32 s14, s16
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GCN-NEXT:    s_endpgm
+; GCN_O-LABEL: test_indirect_call_sgpr_ptr:
+; GCN_O:         .amd_kernel_code_t
+; GCN_O-NEXT:     amd_code_version_major = 1
+; GCN_O-NEXT:     amd_code_version_minor = 2
+; GCN_O-NEXT:     amd_machine_kind = 1
+; GCN_O-NEXT:     amd_machine_version_major = 7
+; GCN_O-NEXT:     amd_machine_version_minor = 0
+; GCN_O-NEXT:     amd_machine_version_stepping = 0
+; GCN_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GCN_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GCN_O-NEXT:     priority = 0
+; GCN_O-NEXT:     float_mode = 240
+; GCN_O-NEXT:     priv = 0
+; GCN_O-NEXT:     enable_dx10_clamp = 1
+; GCN_O-NEXT:     debug_mode = 0
+; GCN_O-NEXT:     enable_ieee_mode = 1
+; GCN_O-NEXT:     enable_wgp_mode = 0
+; GCN_O-NEXT:     enable_mem_ordered = 0
+; GCN_O-NEXT:     enable_fwd_progress = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GCN_O-NEXT:     user_sgpr_count = 14
+; GCN_O-NEXT:     enable_trap_handler = 0
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_O-NEXT:     enable_vgpr_workitem_id = 2
+; GCN_O-NEXT:     enable_exception_msb = 0
+; GCN_O-NEXT:     granulated_lds_size = 0
+; GCN_O-NEXT:     enable_exception = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GCN_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GCN_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_O-NEXT:     enable_wavefront_size32 = 0
+; GCN_O-NEXT:     enable_ordered_append_gds = 0
+; GCN_O-NEXT:     private_element_size = 1
+; GCN_O-NEXT:     is_ptr64 = 1
+; GCN_O-NEXT:     is_dynamic_callstack = 1
+; GCN_O-NEXT:     is_debug_enabled = 0
+; GCN_O-NEXT:     is_xnack_enabled = 0
+; GCN_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GCN_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_O-NEXT:     gds_segment_byte_size = 0
+; GCN_O-NEXT:     kernarg_segment_byte_size = 64
+; GCN_O-NEXT:     workgroup_fbarrier_count = 0
+; GCN_O-NEXT:     wavefront_sgpr_count = 68
+; GCN_O-NEXT:     workitem_vgpr_count = 42
+; GCN_O-NEXT:     reserved_vgpr_first = 0
+; GCN_O-NEXT:     reserved_vgpr_count = 0
+; GCN_O-NEXT:     reserved_sgpr_first = 0
+; GCN_O-NEXT:     reserved_sgpr_count = 0
+; GCN_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_O-NEXT:     kernarg_segment_alignment = 4
+; GCN_O-NEXT:     group_segment_alignment = 4
+; GCN_O-NEXT:     private_segment_alignment = 4
+; GCN_O-NEXT:     wavefront_size = 6
+; GCN_O-NEXT:     call_convention = -1
+; GCN_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_O-NEXT:    .end_amd_kernel_code_t
+; GCN_O-NEXT:  ; %bb.0:
+; GCN_O-NEXT:    s_mov_b32 s32, 0
+; GCN_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GCN_O-NEXT:    s_add_i32 s12, s12, s17
+; GCN_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN_O-NEXT:    s_add_u32 s0, s0, s17
+; GCN_O-NEXT:    s_addc_u32 s1, s1, 0
+; GCN_O-NEXT:    s_mov_b32 s13, s15
+; GCN_O-NEXT:    s_mov_b32 s12, s14
+; GCN_O-NEXT:    s_getpc_b64 s[14:15]
+; GCN_O-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
+; GCN_O-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GCN_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GCN_O-NEXT:    s_add_u32 s8, s8, 8
+; GCN_O-NEXT:    s_addc_u32 s9, s9, 0
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN_O-NEXT:    s_mov_b32 s14, s16
+; GCN_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN_O-NEXT:    s_endpgm
 ;
-; GISEL-LABEL: test_indirect_call_sgpr_ptr:
-; GISEL:         .amd_kernel_code_t
-; GISEL-NEXT:     amd_code_version_major = 1
-; GISEL-NEXT:     amd_code_version_minor = 2
-; GISEL-NEXT:     amd_machine_kind = 1
-; GISEL-NEXT:     amd_machine_version_major = 7
-; GISEL-NEXT:     amd_machine_version_minor = 0
-; GISEL-NEXT:     amd_machine_version_stepping = 0
-; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
-; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
-; GISEL-NEXT:     granulated_workitem_vgpr_count = 10
-; GISEL-NEXT:     granulated_wavefront_sgpr_count = 8
-; GISEL-NEXT:     priority = 0
-; GISEL-NEXT:     float_mode = 240
-; GISEL-NEXT:     priv = 0
-; GISEL-NEXT:     enable_dx10_clamp = 1
-; GISEL-NEXT:     debug_mode = 0
-; GISEL-NEXT:     enable_ieee_mode = 1
-; GISEL-NEXT:     enable_wgp_mode = 0
-; GISEL-NEXT:     enable_mem_ordered = 0
-; GISEL-NEXT:     enable_fwd_progress = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GISEL-NEXT:     user_sgpr_count = 14
-; GISEL-NEXT:     enable_trap_handler = 0
-; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
-; GISEL-NEXT:     enable_vgpr_workitem_id = 2
-; GISEL-NEXT:     enable_exception_msb = 0
-; GISEL-NEXT:     granulated_lds_size = 0
-; GISEL-NEXT:     enable_exception = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
-; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
-; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GISEL-NEXT:     enable_wavefront_size32 = 0
-; GISEL-NEXT:     enable_ordered_append_gds = 0
-; GISEL-NEXT:     private_element_size = 1
-; GISEL-NEXT:     is_ptr64 = 1
-; GISEL-NEXT:     is_dynamic_callstack = 1
-; GISEL-NEXT:     is_debug_enabled = 0
-; GISEL-NEXT:     is_xnack_enabled = 0
-; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
-; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
-; GISEL-NEXT:     gds_segment_byte_size = 0
-; GISEL-NEXT:     kernarg_segment_byte_size = 64
-; GISEL-NEXT:     workgroup_fbarrier_count = 0
-; GISEL-NEXT:     wavefront_sgpr_count = 68
-; GISEL-NEXT:     workitem_vgpr_count = 42
-; GISEL-NEXT:     reserved_vgpr_first = 0
-; GISEL-NEXT:     reserved_vgpr_count = 0
-; GISEL-NEXT:     reserved_sgpr_first = 0
-; GISEL-NEXT:     reserved_sgpr_count = 0
-; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GISEL-NEXT:     kernarg_segment_alignment = 4
-; GISEL-NEXT:     group_segment_alignment = 4
-; GISEL-NEXT:     private_segment_alignment = 4
-; GISEL-NEXT:     wavefront_size = 6
-; GISEL-NEXT:     call_convention = -1
-; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
-; GISEL-NEXT:    .end_amd_kernel_code_t
-; GISEL-NEXT:  ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
-; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
-; GISEL-NEXT:    s_mov_b32 s14, s16
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GISEL-NEXT:    s_endpgm
+; GCN_C-LABEL: test_indirect_call_sgpr_ptr:
+; GCN_C:         .amd_kernel_code_t
+; GCN_C-NEXT:     amd_code_version_major = 1
+; GCN_C-NEXT:     amd_code_version_minor = 2
+; GCN_C-NEXT:     amd_machine_kind = 1
+; GCN_C-NEXT:     amd_machine_version_major = 7
+; GCN_C-NEXT:     amd_machine_version_minor = 0
+; GCN_C-NEXT:     amd_machine_version_stepping = 0
+; GCN_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GCN_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GCN_C-NEXT:     priority = 0
+; GCN_C-NEXT:     float_mode = 240
+; GCN_C-NEXT:     priv = 0
+; GCN_C-NEXT:     enable_dx10_clamp = 1
+; GCN_C-NEXT:     debug_mode = 0
+; GCN_C-NEXT:     enable_ieee_mode = 1
+; GCN_C-NEXT:     enable_wgp_mode = 0
+; GCN_C-NEXT:     enable_mem_ordered = 0
+; GCN_C-NEXT:     enable_fwd_progress = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GCN_C-NEXT:     user_sgpr_count = 6
+; GCN_C-NEXT:     enable_trap_handler = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_C-NEXT:     enable_vgpr_workitem_id = 0
+; GCN_C-NEXT:     enable_exception_msb = 0
+; GCN_C-NEXT:     granulated_lds_size = 0
+; GCN_C-NEXT:     enable_exception = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GCN_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_C-NEXT:     enable_wavefront_size32 = 0
+; GCN_C-NEXT:     enable_ordered_append_gds = 0
+; GCN_C-NEXT:     private_element_size = 1
+; GCN_C-NEXT:     is_ptr64 = 1
+; GCN_C-NEXT:     is_dynamic_callstack = 0
+; GCN_C-NEXT:     is_debug_enabled = 0
+; GCN_C-NEXT:     is_xnack_enabled = 0
+; GCN_C-NEXT:     workitem_private_segment_byte_size = 0
+; GCN_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_C-NEXT:     gds_segment_byte_size = 0
+; GCN_C-NEXT:     kernarg_segment_byte_size = 4
+; GCN_C-NEXT:     workgroup_fbarrier_count = 0
+; GCN_C-NEXT:     wavefront_sgpr_count = 0
+; GCN_C-NEXT:     workitem_vgpr_count = 0
+; GCN_C-NEXT:     reserved_vgpr_first = 0
+; GCN_C-NEXT:     reserved_vgpr_count = 0
+; GCN_C-NEXT:     reserved_sgpr_first = 0
+; GCN_C-NEXT:     reserved_sgpr_count = 0
+; GCN_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_C-NEXT:     kernarg_segment_alignment = 4
+; GCN_C-NEXT:     group_segment_alignment = 4
+; GCN_C-NEXT:     private_segment_alignment = 4
+; GCN_C-NEXT:     wavefront_size = 6
+; GCN_C-NEXT:     call_convention = -1
+; GCN_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_C-NEXT:    .end_amd_kernel_code_t
+; GCN_C-NEXT:  ; %bb.0:
+;
+; GISEL_O-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL_O:         .amd_kernel_code_t
+; GISEL_O-NEXT:     amd_code_version_major = 1
+; GISEL_O-NEXT:     amd_code_version_minor = 2
+; GISEL_O-NEXT:     amd_machine_kind = 1
+; GISEL_O-NEXT:     amd_machine_version_major = 7
+; GISEL_O-NEXT:     amd_machine_version_minor = 0
+; GISEL_O-NEXT:     amd_machine_version_stepping = 0
+; GISEL_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GISEL_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GISEL_O-NEXT:     priority = 0
+; GISEL_O-NEXT:     float_mode = 240
+; GISEL_O-NEXT:     priv = 0
+; GISEL_O-NEXT:     enable_dx10_clamp = 1
+; GISEL_O-NEXT:     debug_mode = 0
+; GISEL_O-NEXT:     enable_ieee_mode = 1
+; GISEL_O-NEXT:     enable_wgp_mode = 0
+; GISEL_O-NEXT:     enable_mem_ordered = 0
+; GISEL_O-NEXT:     enable_fwd_progress = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL_O-NEXT:     user_sgpr_count = 14
+; GISEL_O-NEXT:     enable_trap_handler = 0
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_O-NEXT:     enable_vgpr_workitem_id = 2
+; GISEL_O-NEXT:     enable_exception_msb = 0
+; GISEL_O-NEXT:     granulated_lds_size = 0
+; GISEL_O-NEXT:     enable_exception = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GISEL_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GISEL_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_O-NEXT:     enable_wavefront_size32 = 0
+; GISEL_O-NEXT:     enable_ordered_append_gds = 0
+; GISEL_O-NEXT:     private_element_size = 1
+; GISEL_O-NEXT:     is_ptr64 = 1
+; GISEL_O-NEXT:     is_dynamic_callstack = 1
+; GISEL_O-NEXT:     is_debug_enabled = 0
+; GISEL_O-NEXT:     is_xnack_enabled = 0
+; GISEL_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GISEL_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_O-NEXT:     gds_segment_byte_size = 0
+; GISEL_O-NEXT:     kernarg_segment_byte_size = 64
+; GISEL_O-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_O-NEXT:     wavefront_sgpr_count = 68
+; GISEL_O-NEXT:     workitem_vgpr_count = 42
+; GISEL_O-NEXT:     reserved_vgpr_first = 0
+; GISEL_O-NEXT:     reserved_vgpr_count = 0
+; GISEL_O-NEXT:     reserved_sgpr_first = 0
+; GISEL_O-NEXT:     reserved_sgpr_count = 0
+; GISEL_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_O-NEXT:     kernarg_segment_alignment = 4
+; GISEL_O-NEXT:     group_segment_alignment = 4
+; GISEL_O-NEXT:     private_segment_alignment = 4
+; GISEL_O-NEXT:     wavefront_size = 6
+; GISEL_O-NEXT:     call_convention = -1
+; GISEL_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_O-NEXT:    .end_amd_kernel_code_t
+; GISEL_O-NEXT:  ; %bb.0:
+; GISEL_O-NEXT:    s_mov_b32 s32, 0
+; GISEL_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL_O-NEXT:    s_add_i32 s12, s12, s17
+; GISEL_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL_O-NEXT:    s_add_u32 s0, s0, s17
+; GISEL_O-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL_O-NEXT:    s_mov_b32 s13, s15
+; GISEL_O-NEXT:    s_mov_b32 s12, s14
+; GISEL_O-NEXT:    s_getpc_b64 s[14:15]
+; GISEL_O-NEXT:    s_add_u32 s14, s14, gv.fptr0@rel32@lo+4
+; GISEL_O-NEXT:    s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL_O-NEXT:    s_add_u32 s8, s8, 8
+; GISEL_O-NEXT:    s_addc_u32 s9, s9, 0
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
+; GISEL_O-NEXT:    v_or_b32_e32 v31, v0, v1
+; GISEL_O-NEXT:    s_mov_b32 s14, s16
+; GISEL_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL_O-NEXT:    s_endpgm
+;
+; GISEL_C-LABEL: test_indirect_call_sgpr_ptr:
+; GISEL_C:         .amd_kernel_code_t
+; GISEL_C-NEXT:     amd_code_version_major = 1
+; GISEL_C-NEXT:     amd_code_version_minor = 2
+; GISEL_C-NEXT:     amd_machine_kind = 1
+; GISEL_C-NEXT:     amd_machine_version_major = 7
+; GISEL_C-NEXT:     amd_machine_version_minor = 0
+; GISEL_C-NEXT:     amd_machine_version_stepping = 0
+; GISEL_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GISEL_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     priority = 0
+; GISEL_C-NEXT:     float_mode = 240
+; GISEL_C-NEXT:     priv = 0
+; GISEL_C-NEXT:     enable_dx10_clamp = 1
+; GISEL_C-NEXT:     debug_mode = 0
+; GISEL_C-NEXT:     enable_ieee_mode = 1
+; GISEL_C-NEXT:     enable_wgp_mode = 0
+; GISEL_C-NEXT:     enable_mem_ordered = 0
+; GISEL_C-NEXT:     enable_fwd_progress = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GISEL_C-NEXT:     user_sgpr_count = 6
+; GISEL_C-NEXT:     enable_trap_handler = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_C-NEXT:     enable_vgpr_workitem_id = 0
+; GISEL_C-NEXT:     enable_exception_msb = 0
+; GISEL_C-NEXT:     granulated_lds_size = 0
+; GISEL_C-NEXT:     enable_exception = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GISEL_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_C-NEXT:     enable_wavefront_size32 = 0
+; GISEL_C-NEXT:     enable_ordered_append_gds = 0
+; GISEL_C-NEXT:     private_element_size = 1
+; GISEL_C-NEXT:     is_ptr64 = 1
+; GISEL_C-NEXT:     is_dynamic_callstack = 0
+; GISEL_C-NEXT:     is_debug_enabled = 0
+; GISEL_C-NEXT:     is_xnack_enabled = 0
+; GISEL_C-NEXT:     workitem_private_segment_byte_size = 0
+; GISEL_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_C-NEXT:     gds_segment_byte_size = 0
+; GISEL_C-NEXT:     kernarg_segment_byte_size = 4
+; GISEL_C-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_C-NEXT:     wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     workitem_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_vgpr_first = 0
+; GISEL_C-NEXT:     reserved_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_sgpr_first = 0
+; GISEL_C-NEXT:     reserved_sgpr_count = 0
+; GISEL_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_C-NEXT:     kernarg_segment_alignment = 4
+; GISEL_C-NEXT:     group_segment_alignment = 4
+; GISEL_C-NEXT:     private_segment_alignment = 4
+; GISEL_C-NEXT:     wavefront_size = 6
+; GISEL_C-NEXT:     call_convention = -1
+; GISEL_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_C-NEXT:    .end_amd_kernel_code_t
+; GISEL_C-NEXT:  ; %bb.0:
   %fptr = load ptr, ptr addrspace(4) @gv.fptr0
   call void %fptr()
   ret void
 }
 
 define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) {
-; GCN-LABEL: test_indirect_call_sgpr_ptr_arg:
-; GCN:         .amd_kernel_code_t
-; GCN-NEXT:     amd_code_version_major = 1
-; GCN-NEXT:     amd_code_version_minor = 2
-; GCN-NEXT:     amd_machine_kind = 1
-; GCN-NEXT:     amd_machine_version_major = 7
-; GCN-NEXT:     amd_machine_version_minor = 0
-; GCN-NEXT:     amd_machine_version_stepping = 0
-; GCN-NEXT:     kernel_code_entry_byte_offset = 256
-; GCN-NEXT:     kernel_code_prefetch_byte_size = 0
-; GCN-NEXT:     granulated_workitem_vgpr_count = 10
-; GCN-NEXT:     granulated_wavefront_sgpr_count = 8
-; GCN-NEXT:     priority = 0
-; GCN-NEXT:     float_mode = 240
-; GCN-NEXT:     priv = 0
-; GCN-NEXT:     enable_dx10_clamp = 1
-; GCN-NEXT:     debug_mode = 0
-; GCN-NEXT:     enable_ieee_mode = 1
-; GCN-NEXT:     enable_wgp_mode = 0
-; GCN-NEXT:     enable_mem_ordered = 0
-; GCN-NEXT:     enable_fwd_progress = 0
-; GCN-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GCN-NEXT:     user_sgpr_count = 14
-; GCN-NEXT:     enable_trap_handler = 0
-; GCN-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GCN-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GCN-NEXT:     enable_sgpr_workgroup_info = 0
-; GCN-NEXT:     enable_vgpr_workitem_id = 2
-; GCN-NEXT:     enable_exception_msb = 0
-; GCN-NEXT:     granulated_lds_size = 0
-; GCN-NEXT:     enable_exception = 0
-; GCN-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GCN-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GCN-NEXT:     enable_sgpr_queue_ptr = 1
-; GCN-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GCN-NEXT:     enable_sgpr_dispatch_id = 1
-; GCN-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GCN-NEXT:     enable_sgpr_private_segment_size = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GCN-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GCN-NEXT:     enable_wavefront_size32 = 0
-; GCN-NEXT:     enable_ordered_append_gds = 0
-; GCN-NEXT:     private_element_size = 1
-; GCN-NEXT:     is_ptr64 = 1
-; GCN-NEXT:     is_dynamic_callstack = 1
-; GCN-NEXT:     is_debug_enabled = 0
-; GCN-NEXT:     is_xnack_enabled = 0
-; GCN-NEXT:     workitem_private_segment_byte_size = 16384
-; GCN-NEXT:     workgroup_group_segment_byte_size = 0
-; GCN-NEXT:     gds_segment_byte_size = 0
-; GCN-NEXT:     kernarg_segment_byte_size = 64
-; GCN-NEXT:     workgroup_fbarrier_count = 0
-; GCN-NEXT:     wavefront_sgpr_count = 68
-; GCN-NEXT:     workitem_vgpr_count = 42
-; GCN-NEXT:     reserved_vgpr_first = 0
-; GCN-NEXT:     reserved_vgpr_count = 0
-; GCN-NEXT:     reserved_sgpr_first = 0
-; GCN-NEXT:     reserved_sgpr_count = 0
-; GCN-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GCN-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GCN-NEXT:     kernarg_segment_alignment = 4
-; GCN-NEXT:     group_segment_alignment = 4
-; GCN-NEXT:     private_segment_alignment = 4
-; GCN-NEXT:     wavefront_size = 6
-; GCN-NEXT:     call_convention = -1
-; GCN-NEXT:     runtime_loader_kernel_symbol = 0
-; GCN-NEXT:    .end_amd_kernel_code_t
-; GCN-NEXT:  ; %bb.0:
-; GCN-NEXT:    s_mov_b32 s32, 0
-; GCN-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GCN-NEXT:    s_add_i32 s12, s12, s17
-; GCN-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GCN-NEXT:    s_add_u32 s0, s0, s17
-; GCN-NEXT:    s_addc_u32 s1, s1, 0
-; GCN-NEXT:    s_mov_b32 s13, s15
-; GCN-NEXT:    s_mov_b32 s12, s14
-; GCN-NEXT:    s_getpc_b64 s[14:15]
-; GCN-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
-; GCN-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
-; GCN-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GCN-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GCN-NEXT:    s_add_u32 s8, s8, 8
-; GCN-NEXT:    s_addc_u32 s9, s9, 0
-; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
-; GCN-NEXT:    v_or_b32_e32 v31, v0, v2
-; GCN-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GCN-NEXT:    s_mov_b32 s14, s16
-; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GCN-NEXT:    s_endpgm
+; GCN_O-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GCN_O:         .amd_kernel_code_t
+; GCN_O-NEXT:     amd_code_version_major = 1
+; GCN_O-NEXT:     amd_code_version_minor = 2
+; GCN_O-NEXT:     amd_machine_kind = 1
+; GCN_O-NEXT:     amd_machine_version_major = 7
+; GCN_O-NEXT:     amd_machine_version_minor = 0
+; GCN_O-NEXT:     amd_machine_version_stepping = 0
+; GCN_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GCN_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GCN_O-NEXT:     priority = 0
+; GCN_O-NEXT:     float_mode = 240
+; GCN_O-NEXT:     priv = 0
+; GCN_O-NEXT:     enable_dx10_clamp = 1
+; GCN_O-NEXT:     debug_mode = 0
+; GCN_O-NEXT:     enable_ieee_mode = 1
+; GCN_O-NEXT:     enable_wgp_mode = 0
+; GCN_O-NEXT:     enable_mem_ordered = 0
+; GCN_O-NEXT:     enable_fwd_progress = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GCN_O-NEXT:     user_sgpr_count = 14
+; GCN_O-NEXT:     enable_trap_handler = 0
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GCN_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_O-NEXT:     enable_vgpr_workitem_id = 2
+; GCN_O-NEXT:     enable_exception_msb = 0
+; GCN_O-NEXT:     granulated_lds_size = 0
+; GCN_O-NEXT:     enable_exception = 0
+; GCN_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GCN_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GCN_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_O-NEXT:     enable_wavefront_size32 = 0
+; GCN_O-NEXT:     enable_ordered_append_gds = 0
+; GCN_O-NEXT:     private_element_size = 1
+; GCN_O-NEXT:     is_ptr64 = 1
+; GCN_O-NEXT:     is_dynamic_callstack = 1
+; GCN_O-NEXT:     is_debug_enabled = 0
+; GCN_O-NEXT:     is_xnack_enabled = 0
+; GCN_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GCN_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_O-NEXT:     gds_segment_byte_size = 0
+; GCN_O-NEXT:     kernarg_segment_byte_size = 64
+; GCN_O-NEXT:     workgroup_fbarrier_count = 0
+; GCN_O-NEXT:     wavefront_sgpr_count = 68
+; GCN_O-NEXT:     workitem_vgpr_count = 42
+; GCN_O-NEXT:     reserved_vgpr_first = 0
+; GCN_O-NEXT:     reserved_vgpr_count = 0
+; GCN_O-NEXT:     reserved_sgpr_first = 0
+; GCN_O-NEXT:     reserved_sgpr_count = 0
+; GCN_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_O-NEXT:     kernarg_segment_alignment = 4
+; GCN_O-NEXT:     group_segment_alignment = 4
+; GCN_O-NEXT:     private_segment_alignment = 4
+; GCN_O-NEXT:     wavefront_size = 6
+; GCN_O-NEXT:     call_convention = -1
+; GCN_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_O-NEXT:    .end_amd_kernel_code_t
+; GCN_O-NEXT:  ; %bb.0:
+; GCN_O-NEXT:    s_mov_b32 s32, 0
+; GCN_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GCN_O-NEXT:    s_add_i32 s12, s12, s17
+; GCN_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GCN_O-NEXT:    s_add_u32 s0, s0, s17
+; GCN_O-NEXT:    s_addc_u32 s1, s1, 0
+; GCN_O-NEXT:    s_mov_b32 s13, s15
+; GCN_O-NEXT:    s_mov_b32 s12, s14
+; GCN_O-NEXT:    s_getpc_b64 s[14:15]
+; GCN_O-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
+; GCN_O-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GCN_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GCN_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GCN_O-NEXT:    s_add_u32 s8, s8, 8
+; GCN_O-NEXT:    s_addc_u32 s9, s9, 0
+; GCN_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GCN_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GCN_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GCN_O-NEXT:    s_mov_b32 s14, s16
+; GCN_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GCN_O-NEXT:    s_endpgm
+;
+; GCN_C-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GCN_C:         .amd_kernel_code_t
+; GCN_C-NEXT:     amd_code_version_major = 1
+; GCN_C-NEXT:     amd_code_version_minor = 2
+; GCN_C-NEXT:     amd_machine_kind = 1
+; GCN_C-NEXT:     amd_machine_version_major = 7
+; GCN_C-NEXT:     amd_machine_version_minor = 0
+; GCN_C-NEXT:     amd_machine_version_stepping = 0
+; GCN_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GCN_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GCN_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GCN_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GCN_C-NEXT:     priority = 0
+; GCN_C-NEXT:     float_mode = 240
+; GCN_C-NEXT:     priv = 0
+; GCN_C-NEXT:     enable_dx10_clamp = 1
+; GCN_C-NEXT:     debug_mode = 0
+; GCN_C-NEXT:     enable_ieee_mode = 1
+; GCN_C-NEXT:     enable_wgp_mode = 0
+; GCN_C-NEXT:     enable_mem_ordered = 0
+; GCN_C-NEXT:     enable_fwd_progress = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GCN_C-NEXT:     user_sgpr_count = 6
+; GCN_C-NEXT:     enable_trap_handler = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GCN_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GCN_C-NEXT:     enable_vgpr_workitem_id = 0
+; GCN_C-NEXT:     enable_exception_msb = 0
+; GCN_C-NEXT:     granulated_lds_size = 0
+; GCN_C-NEXT:     enable_exception = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GCN_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GCN_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GCN_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GCN_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GCN_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GCN_C-NEXT:     enable_wavefront_size32 = 0
+; GCN_C-NEXT:     enable_ordered_append_gds = 0
+; GCN_C-NEXT:     private_element_size = 1
+; GCN_C-NEXT:     is_ptr64 = 1
+; GCN_C-NEXT:     is_dynamic_callstack = 0
+; GCN_C-NEXT:     is_debug_enabled = 0
+; GCN_C-NEXT:     is_xnack_enabled = 0
+; GCN_C-NEXT:     workitem_private_segment_byte_size = 0
+; GCN_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GCN_C-NEXT:     gds_segment_byte_size = 0
+; GCN_C-NEXT:     kernarg_segment_byte_size = 4
+; GCN_C-NEXT:     workgroup_fbarrier_count = 0
+; GCN_C-NEXT:     wavefront_sgpr_count = 0
+; GCN_C-NEXT:     workitem_vgpr_count = 0
+; GCN_C-NEXT:     reserved_vgpr_first = 0
+; GCN_C-NEXT:     reserved_vgpr_count = 0
+; GCN_C-NEXT:     reserved_sgpr_first = 0
+; GCN_C-NEXT:     reserved_sgpr_count = 0
+; GCN_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GCN_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GCN_C-NEXT:     kernarg_segment_alignment = 4
+; GCN_C-NEXT:     group_segment_alignment = 4
+; GCN_C-NEXT:     private_segment_alignment = 4
+; GCN_C-NEXT:     wavefront_size = 6
+; GCN_C-NEXT:     call_convention = -1
+; GCN_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GCN_C-NEXT:    .end_amd_kernel_code_t
+; GCN_C-NEXT:  ; %bb.0:
 ;
-; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg:
-; GISEL:         .amd_kernel_code_t
-; GISEL-NEXT:     amd_code_version_major = 1
-; GISEL-NEXT:     amd_code_version_minor = 2
-; GISEL-NEXT:     amd_machine_kind = 1
-; GISEL-NEXT:     amd_machine_version_major = 7
-; GISEL-NEXT:     amd_machine_version_minor = 0
-; GISEL-NEXT:     amd_machine_version_stepping = 0
-; GISEL-NEXT:     kernel_code_entry_byte_offset = 256
-; GISEL-NEXT:     kernel_code_prefetch_byte_size = 0
-; GISEL-NEXT:     granulated_workitem_vgpr_count = 10
-; GISEL-NEXT:     granulated_wavefront_sgpr_count = 8
-; GISEL-NEXT:     priority = 0
-; GISEL-NEXT:     float_mode = 240
-; GISEL-NEXT:     priv = 0
-; GISEL-NEXT:     enable_dx10_clamp = 1
-; GISEL-NEXT:     debug_mode = 0
-; GISEL-NEXT:     enable_ieee_mode = 1
-; GISEL-NEXT:     enable_wgp_mode = 0
-; GISEL-NEXT:     enable_mem_ordered = 0
-; GISEL-NEXT:     enable_fwd_progress = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
-; GISEL-NEXT:     user_sgpr_count = 14
-; GISEL-NEXT:     enable_trap_handler = 0
-; GISEL-NEXT:     enable_sgpr_workgroup_id_x = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_y = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_id_z = 1
-; GISEL-NEXT:     enable_sgpr_workgroup_info = 0
-; GISEL-NEXT:     enable_vgpr_workitem_id = 2
-; GISEL-NEXT:     enable_exception_msb = 0
-; GISEL-NEXT:     granulated_lds_size = 0
-; GISEL-NEXT:     enable_exception = 0
-; GISEL-NEXT:     enable_sgpr_private_segment_buffer = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_ptr = 1
-; GISEL-NEXT:     enable_sgpr_queue_ptr = 1
-; GISEL-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
-; GISEL-NEXT:     enable_sgpr_dispatch_id = 1
-; GISEL-NEXT:     enable_sgpr_flat_scratch_init = 1
-; GISEL-NEXT:     enable_sgpr_private_segment_size = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
-; GISEL-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
-; GISEL-NEXT:     enable_wavefront_size32 = 0
-; GISEL-NEXT:     enable_ordered_append_gds = 0
-; GISEL-NEXT:     private_element_size = 1
-; GISEL-NEXT:     is_ptr64 = 1
-; GISEL-NEXT:     is_dynamic_callstack = 1
-; GISEL-NEXT:     is_debug_enabled = 0
-; GISEL-NEXT:     is_xnack_enabled = 0
-; GISEL-NEXT:     workitem_private_segment_byte_size = 16384
-; GISEL-NEXT:     workgroup_group_segment_byte_size = 0
-; GISEL-NEXT:     gds_segment_byte_size = 0
-; GISEL-NEXT:     kernarg_segment_byte_size = 64
-; GISEL-NEXT:     workgroup_fbarrier_count = 0
-; GISEL-NEXT:     wavefront_sgpr_count = 68
-; GISEL-NEXT:     workitem_vgpr_count = 42
-; GISEL-NEXT:     reserved_vgpr_first = 0
-; GISEL-NEXT:     reserved_vgpr_count = 0
-; GISEL-NEXT:     reserved_sgpr_first = 0
-; GISEL-NEXT:     reserved_sgpr_count = 0
-; GISEL-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
-; GISEL-NEXT:     debug_private_segment_buffer_sgpr = 0
-; GISEL-NEXT:     kernarg_segment_alignment = 4
-; GISEL-NEXT:     group_segment_alignment = 4
-; GISEL-NEXT:     private_segment_alignment = 4
-; GISEL-NEXT:     wavefront_size = 6
-; GISEL-NEXT:     call_convention = -1
-; GISEL-NEXT:     runtime_loader_kernel_symbol = 0
-; GISEL-NEXT:    .end_amd_kernel_code_t
-; GISEL-NEXT:  ; %bb.0:
-; GISEL-NEXT:    s_mov_b32 s32, 0
-; GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s13
-; GISEL-NEXT:    s_add_i32 s12, s12, s17
-; GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
-; GISEL-NEXT:    s_add_u32 s0, s0, s17
-; GISEL-NEXT:    s_addc_u32 s1, s1, 0
-; GISEL-NEXT:    s_mov_b32 s13, s15
-; GISEL-NEXT:    s_mov_b32 s12, s14
-; GISEL-NEXT:    s_getpc_b64 s[14:15]
-; GISEL-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
-; GISEL-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
-; GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
-; GISEL-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
-; GISEL-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-NEXT:    s_add_u32 s8, s8, 8
-; GISEL-NEXT:    s_addc_u32 s9, s9, 0
-; GISEL-NEXT:    v_or_b32_e32 v31, v0, v2
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b32 s14, s16
-; GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[18:19]
-; GISEL-NEXT:    s_endpgm
+; GISEL_O-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL_O:         .amd_kernel_code_t
+; GISEL_O-NEXT:     amd_code_version_major = 1
+; GISEL_O-NEXT:     amd_code_version_minor = 2
+; GISEL_O-NEXT:     amd_machine_kind = 1
+; GISEL_O-NEXT:     amd_machine_version_major = 7
+; GISEL_O-NEXT:     amd_machine_version_minor = 0
+; GISEL_O-NEXT:     amd_machine_version_stepping = 0
+; GISEL_O-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_O-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_O-NEXT:     granulated_workitem_vgpr_count = 10
+; GISEL_O-NEXT:     granulated_wavefront_sgpr_count = 8
+; GISEL_O-NEXT:     priority = 0
+; GISEL_O-NEXT:     float_mode = 240
+; GISEL_O-NEXT:     priv = 0
+; GISEL_O-NEXT:     enable_dx10_clamp = 1
+; GISEL_O-NEXT:     debug_mode = 0
+; GISEL_O-NEXT:     enable_ieee_mode = 1
+; GISEL_O-NEXT:     enable_wgp_mode = 0
+; GISEL_O-NEXT:     enable_mem_ordered = 0
+; GISEL_O-NEXT:     enable_fwd_progress = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 1
+; GISEL_O-NEXT:     user_sgpr_count = 14
+; GISEL_O-NEXT:     enable_trap_handler = 0
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_y = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_id_z = 1
+; GISEL_O-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_O-NEXT:     enable_vgpr_workitem_id = 2
+; GISEL_O-NEXT:     enable_exception_msb = 0
+; GISEL_O-NEXT:     granulated_lds_size = 0
+; GISEL_O-NEXT:     enable_exception = 0
+; GISEL_O-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_queue_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_O-NEXT:     enable_sgpr_dispatch_id = 1
+; GISEL_O-NEXT:     enable_sgpr_flat_scratch_init = 1
+; GISEL_O-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_O-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_O-NEXT:     enable_wavefront_size32 = 0
+; GISEL_O-NEXT:     enable_ordered_append_gds = 0
+; GISEL_O-NEXT:     private_element_size = 1
+; GISEL_O-NEXT:     is_ptr64 = 1
+; GISEL_O-NEXT:     is_dynamic_callstack = 1
+; GISEL_O-NEXT:     is_debug_enabled = 0
+; GISEL_O-NEXT:     is_xnack_enabled = 0
+; GISEL_O-NEXT:     workitem_private_segment_byte_size = 16384
+; GISEL_O-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_O-NEXT:     gds_segment_byte_size = 0
+; GISEL_O-NEXT:     kernarg_segment_byte_size = 64
+; GISEL_O-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_O-NEXT:     wavefront_sgpr_count = 68
+; GISEL_O-NEXT:     workitem_vgpr_count = 42
+; GISEL_O-NEXT:     reserved_vgpr_first = 0
+; GISEL_O-NEXT:     reserved_vgpr_count = 0
+; GISEL_O-NEXT:     reserved_sgpr_first = 0
+; GISEL_O-NEXT:     reserved_sgpr_count = 0
+; GISEL_O-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_O-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_O-NEXT:     kernarg_segment_alignment = 4
+; GISEL_O-NEXT:     group_segment_alignment = 4
+; GISEL_O-NEXT:     private_segment_alignment = 4
+; GISEL_O-NEXT:     wavefront_size = 6
+; GISEL_O-NEXT:     call_convention = -1
+; GISEL_O-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_O-NEXT:    .end_amd_kernel_code_t
+; GISEL_O-NEXT:  ; %bb.0:
+; GISEL_O-NEXT:    s_mov_b32 s32, 0
+; GISEL_O-NEXT:    s_mov_b32 flat_scratch_lo, s13
+; GISEL_O-NEXT:    s_add_i32 s12, s12, s17
+; GISEL_O-NEXT:    s_lshr_b32 flat_scratch_hi, s12, 8
+; GISEL_O-NEXT:    s_add_u32 s0, s0, s17
+; GISEL_O-NEXT:    s_addc_u32 s1, s1, 0
+; GISEL_O-NEXT:    s_mov_b32 s13, s15
+; GISEL_O-NEXT:    s_mov_b32 s12, s14
+; GISEL_O-NEXT:    s_getpc_b64 s[14:15]
+; GISEL_O-NEXT:    s_add_u32 s14, s14, gv.fptr1@rel32@lo+4
+; GISEL_O-NEXT:    s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
+; GISEL_O-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
+; GISEL_O-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
+; GISEL_O-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL_O-NEXT:    s_add_u32 s8, s8, 8
+; GISEL_O-NEXT:    s_addc_u32 s9, s9, 0
+; GISEL_O-NEXT:    v_or_b32_e32 v31, v0, v2
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL_O-NEXT:    s_mov_b32 s14, s16
+; GISEL_O-NEXT:    s_waitcnt lgkmcnt(0)
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[18:19]
+; GISEL_O-NEXT:    s_endpgm
+;
+; GISEL_C-LABEL: test_indirect_call_sgpr_ptr_arg:
+; GISEL_C:         .amd_kernel_code_t
+; GISEL_C-NEXT:     amd_code_version_major = 1
+; GISEL_C-NEXT:     amd_code_version_minor = 2
+; GISEL_C-NEXT:     amd_machine_kind = 1
+; GISEL_C-NEXT:     amd_machine_version_major = 7
+; GISEL_C-NEXT:     amd_machine_version_minor = 0
+; GISEL_C-NEXT:     amd_machine_version_stepping = 0
+; GISEL_C-NEXT:     kernel_code_entry_byte_offset = 256
+; GISEL_C-NEXT:     kernel_code_prefetch_byte_size = 0
+; GISEL_C-NEXT:     granulated_workitem_vgpr_count = 0
+; GISEL_C-NEXT:     granulated_wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     priority = 0
+; GISEL_C-NEXT:     float_mode = 240
+; GISEL_C-NEXT:     priv = 0
+; GISEL_C-NEXT:     enable_dx10_clamp = 1
+; GISEL_C-NEXT:     debug_mode = 0
+; GISEL_C-NEXT:     enable_ieee_mode = 1
+; GISEL_C-NEXT:     enable_wgp_mode = 0
+; GISEL_C-NEXT:     enable_mem_ordered = 0
+; GISEL_C-NEXT:     enable_fwd_progress = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_wave_byte_offset = 0
+; GISEL_C-NEXT:     user_sgpr_count = 6
+; GISEL_C-NEXT:     enable_trap_handler = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_x = 1
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_y = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_id_z = 0
+; GISEL_C-NEXT:     enable_sgpr_workgroup_info = 0
+; GISEL_C-NEXT:     enable_vgpr_workitem_id = 0
+; GISEL_C-NEXT:     enable_exception_msb = 0
+; GISEL_C-NEXT:     granulated_lds_size = 0
+; GISEL_C-NEXT:     enable_exception = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_buffer = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_queue_ptr = 0
+; GISEL_C-NEXT:     enable_sgpr_kernarg_segment_ptr = 1
+; GISEL_C-NEXT:     enable_sgpr_dispatch_id = 0
+; GISEL_C-NEXT:     enable_sgpr_flat_scratch_init = 0
+; GISEL_C-NEXT:     enable_sgpr_private_segment_size = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_x = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_y = 0
+; GISEL_C-NEXT:     enable_sgpr_grid_workgroup_count_z = 0
+; GISEL_C-NEXT:     enable_wavefront_size32 = 0
+; GISEL_C-NEXT:     enable_ordered_append_gds = 0
+; GISEL_C-NEXT:     private_element_size = 1
+; GISEL_C-NEXT:     is_ptr64 = 1
+; GISEL_C-NEXT:     is_dynamic_callstack = 0
+; GISEL_C-NEXT:     is_debug_enabled = 0
+; GISEL_C-NEXT:     is_xnack_enabled = 0
+; GISEL_C-NEXT:     workitem_private_segment_byte_size = 0
+; GISEL_C-NEXT:     workgroup_group_segment_byte_size = 0
+; GISEL_C-NEXT:     gds_segment_byte_size = 0
+; GISEL_C-NEXT:     kernarg_segment_byte_size = 4
+; GISEL_C-NEXT:     workgroup_fbarrier_count = 0
+; GISEL_C-NEXT:     wavefront_sgpr_count = 0
+; GISEL_C-NEXT:     workitem_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_vgpr_first = 0
+; GISEL_C-NEXT:     reserved_vgpr_count = 0
+; GISEL_C-NEXT:     reserved_sgpr_first = 0
+; GISEL_C-NEXT:     reserved_sgpr_count = 0
+; GISEL_C-NEXT:     debug_wavefront_private_segment_offset_sgpr = 0
+; GISEL_C-NEXT:     debug_private_segment_buffer_sgpr = 0
+; GISEL_C-NEXT:     kernarg_segment_alignment = 4
+; GISEL_C-NEXT:     group_segment_alignment = 4
+; GISEL_C-NEXT:     private_segment_alignment = 4
+; GISEL_C-NEXT:     wavefront_size = 6
+; GISEL_C-NEXT:     call_convention = -1
+; GISEL_C-NEXT:     runtime_loader_kernel_symbol = 0
+; GISEL_C-NEXT:    .end_amd_kernel_code_t
+; GISEL_C-NEXT:  ; %bb.0:
   %fptr = load ptr, ptr addrspace(4) @gv.fptr1
   call void %fptr(i32 123)
   ret void
 }
 
 define void @test_indirect_call_vgpr_ptr(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB2_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB2_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB2_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB2_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB2_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call void %fptr()
   ret void
 }
 
 define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:    v_mov_b32_e32 v2, 0x7b
-; GCN-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    v_mov_b32_e32 v0, v2
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    ; implicit-def: $vgpr2
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB3_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:    v_mov_b32_e32 v2, 0x7b
+; GCN_O-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v2
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    ; implicit-def: $vgpr2
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB3_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB3_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB3_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call void %fptr(i32 123)
   ret void
 }
 
 define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_ret:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 18
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    s_mov_b64 s[46:47], exec
-; GCN-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    v_mov_b32_e32 v2, v0
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GCN-NEXT:    s_cbranch_execnz .LBB4_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[46:47]
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 18
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    s_mov_b64 s[46:47], exec
+; GCN_O-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    v_mov_b32_e32 v2, v0
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB4_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GCN_O-NEXT:    v_add_i32_e32 v0, vcc, 1, v2
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 18
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    s_mov_b64 s[46:47], exec
+; GISEL_O-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[48:49], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    v_mov_b32_e32 v1, v0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[48:49]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB4_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[46:47]
+; GISEL_O-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 18
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 18
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    s_mov_b64 s[46:47], exec
-; GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[48:49], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    v_mov_b32_e32 v1, v0
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[48:49]
-; GISEL-NEXT:    s_cbranch_execnz .LBB4_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[46:47]
-; GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v1
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 18
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_ret:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   %a = call i32 %fptr()
   %b = add i32 %a, 1
   ret i32 %b
 }
 
 define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch:
-; GCN:       ; %bb.0: ; %bb0
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s16, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[18:19]
-; GCN-NEXT:    v_writelane_b32 v40, s16, 20
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    s_mov_b32 s42, s15
-; GCN-NEXT:    s_mov_b32 s43, s14
-; GCN-NEXT:    s_mov_b32 s44, s13
-; GCN-NEXT:    s_mov_b32 s45, s12
-; GCN-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GCN-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GCN-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GCN-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
-; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
-; GCN-NEXT:    s_and_saveexec_b64 s[46:47], vcc
-; GCN-NEXT:    s_cbranch_execz .LBB5_4
-; GCN-NEXT:  ; %bb.1: ; %bb1
-; GCN-NEXT:    s_mov_b64 s[48:49], exec
-; GCN-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s16, v0
-; GCN-NEXT:    v_readfirstlane_b32 s17, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[50:51], vcc
-; GCN-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GCN-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GCN-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GCN-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GCN-NEXT:    s_mov_b32 s12, s45
-; GCN-NEXT:    s_mov_b32 s13, s44
-; GCN-NEXT:    s_mov_b32 s14, s43
-; GCN-NEXT:    s_mov_b32 s15, s42
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    ; implicit-def: $vgpr31
-; GCN-NEXT:    s_xor_b64 exec, exec, s[50:51]
-; GCN-NEXT:    s_cbranch_execnz .LBB5_2
-; GCN-NEXT:  ; %bb.3:
-; GCN-NEXT:    s_mov_b64 exec, s[48:49]
-; GCN-NEXT:  .LBB5_4: ; %bb2
-; GCN-NEXT:    s_or_b64 exec, exec, s[46:47]
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    v_readlane_b32 s4, v40, 20
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s4
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GCN_O:       ; %bb.0: ; %bb0
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s16, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GCN_O-NEXT:    v_writelane_b32 v40, s16, 20
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    s_mov_b32 s42, s15
+; GCN_O-NEXT:    s_mov_b32 s43, s14
+; GCN_O-NEXT:    s_mov_b32 s44, s13
+; GCN_O-NEXT:    s_mov_b32 s45, s12
+; GCN_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GCN_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GCN_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GCN_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GCN_O-NEXT:    v_and_b32_e32 v2, 1, v2
+; GCN_O-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
+; GCN_O-NEXT:    s_and_saveexec_b64 s[46:47], vcc
+; GCN_O-NEXT:    s_cbranch_execz .LBB5_4
+; GCN_O-NEXT:  ; %bb.1: ; %bb1
+; GCN_O-NEXT:    s_mov_b64 s[48:49], exec
+; GCN_O-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[50:51], vcc
+; GCN_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GCN_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GCN_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GCN_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GCN_O-NEXT:    s_mov_b32 s12, s45
+; GCN_O-NEXT:    s_mov_b32 s13, s44
+; GCN_O-NEXT:    s_mov_b32 s14, s43
+; GCN_O-NEXT:    s_mov_b32 s15, s42
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    ; implicit-def: $vgpr31
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[50:51]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB5_2
+; GCN_O-NEXT:  ; %bb.3:
+; GCN_O-NEXT:    s_mov_b64 exec, s[48:49]
+; GCN_O-NEXT:  .LBB5_4: ; %bb2
+; GCN_O-NEXT:    s_or_b64 exec, exec, s[46:47]
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    v_readlane_b32 s4, v40, 20
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s4
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch:
-; GISEL:       ; %bb.0: ; %bb0
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s16, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[18:19], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[18:19]
-; GISEL-NEXT:    v_writelane_b32 v40, s16, 20
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    s_mov_b32 s42, s15
-; GISEL-NEXT:    s_mov_b32 s43, s14
-; GISEL-NEXT:    s_mov_b32 s44, s13
-; GISEL-NEXT:    s_mov_b32 s45, s12
-; GISEL-NEXT:    s_mov_b64 s[34:35], s[10:11]
-; GISEL-NEXT:    s_mov_b64 s[36:37], s[8:9]
-; GISEL-NEXT:    s_mov_b64 s[38:39], s[6:7]
-; GISEL-NEXT:    s_mov_b64 s[40:41], s[4:5]
-; GISEL-NEXT:    v_and_b32_e32 v2, 1, v2
-; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; GISEL-NEXT:    s_and_saveexec_b64 s[46:47], vcc
-; GISEL-NEXT:    s_cbranch_execz .LBB5_4
-; GISEL-NEXT:  ; %bb.1: ; %bb1
-; GISEL-NEXT:    s_mov_b64 s[48:49], exec
-; GISEL-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s16, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s17, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[50:51], vcc
-; GISEL-NEXT:    s_mov_b64 s[4:5], s[40:41]
-; GISEL-NEXT:    s_mov_b64 s[6:7], s[38:39]
-; GISEL-NEXT:    s_mov_b64 s[8:9], s[36:37]
-; GISEL-NEXT:    s_mov_b64 s[10:11], s[34:35]
-; GISEL-NEXT:    s_mov_b32 s12, s45
-; GISEL-NEXT:    s_mov_b32 s13, s44
-; GISEL-NEXT:    s_mov_b32 s14, s43
-; GISEL-NEXT:    s_mov_b32 s15, s42
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[16:17]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    ; implicit-def: $vgpr31
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[50:51]
-; GISEL-NEXT:    s_cbranch_execnz .LBB5_2
-; GISEL-NEXT:  ; %bb.3:
-; GISEL-NEXT:    s_mov_b64 exec, s[48:49]
-; GISEL-NEXT:  .LBB5_4: ; %bb2
-; GISEL-NEXT:    s_or_b64 exec, exec, s[46:47]
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    v_readlane_b32 s4, v40, 20
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s4
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GCN_C:       ; %bb.0: ; %bb0
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_C-NEXT:    v_and_b32_e32 v0, 1, v2
+; GCN_C-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; GCN_C-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN_C-NEXT:  ; %bb.1: ; %bb1
+; GCN_C-NEXT:    ; divergent unreachable
+; GCN_C-NEXT:  ; %bb.2: ; %UnifiedReturnBlock
+; GCN_C-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN_C-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL_O:       ; %bb.0: ; %bb0
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s16, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[18:19]
+; GISEL_O-NEXT:    v_writelane_b32 v40, s16, 20
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    s_mov_b32 s42, s15
+; GISEL_O-NEXT:    s_mov_b32 s43, s14
+; GISEL_O-NEXT:    s_mov_b32 s44, s13
+; GISEL_O-NEXT:    s_mov_b32 s45, s12
+; GISEL_O-NEXT:    s_mov_b64 s[34:35], s[10:11]
+; GISEL_O-NEXT:    s_mov_b64 s[36:37], s[8:9]
+; GISEL_O-NEXT:    s_mov_b64 s[38:39], s[6:7]
+; GISEL_O-NEXT:    s_mov_b64 s[40:41], s[4:5]
+; GISEL_O-NEXT:    v_and_b32_e32 v2, 1, v2
+; GISEL_O-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[46:47], vcc
+; GISEL_O-NEXT:    s_cbranch_execz .LBB5_4
+; GISEL_O-NEXT:  ; %bb.1: ; %bb1
+; GISEL_O-NEXT:    s_mov_b64 s[48:49], exec
+; GISEL_O-NEXT:  .LBB5_2: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s16, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s17, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[50:51], vcc
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], s[40:41]
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], s[38:39]
+; GISEL_O-NEXT:    s_mov_b64 s[8:9], s[36:37]
+; GISEL_O-NEXT:    s_mov_b64 s[10:11], s[34:35]
+; GISEL_O-NEXT:    s_mov_b32 s12, s45
+; GISEL_O-NEXT:    s_mov_b32 s13, s44
+; GISEL_O-NEXT:    s_mov_b32 s14, s43
+; GISEL_O-NEXT:    s_mov_b32 s15, s42
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr31
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[50:51]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB5_2
+; GISEL_O-NEXT:  ; %bb.3:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[48:49]
+; GISEL_O-NEXT:  .LBB5_4: ; %bb2
+; GISEL_O-NEXT:    s_or_b64 exec, exec, s[46:47]
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    v_readlane_b32 s4, v40, 20
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s4
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_in_branch:
+; GISEL_C:       ; %bb.0: ; %bb0
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_C-NEXT:    v_and_b32_e32 v0, 1, v2
+; GISEL_C-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL_C-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GISEL_C-NEXT:  ; %bb.1: ; %bb1
+; GISEL_C-NEXT:    ; divergent unreachable
+; GISEL_C-NEXT:  ; %bb.2: ; %UnifiedReturnBlock
+; GISEL_C-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GISEL_C-NEXT:    s_setpc_b64 s[30:31]
 bb0:
   br i1 %cond, label %bb1, label %bb2
 
@@ -1116,393 +1450,409 @@
 }
 
 define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s5, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[6:7], exec
-; GCN-NEXT:    s_movk_i32 s4, 0x7b
-; GCN-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s8, v0
-; GCN-NEXT:    v_readfirstlane_b32 s9, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    s_xor_b64 exec, exec, s[10:11]
-; GCN-NEXT:    s_cbranch_execnz .LBB6_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[6:7]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s5
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s5, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[6:7], exec
+; GCN_O-NEXT:    s_movk_i32 s4, 0x7b
+; GCN_O-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s8, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s9, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[10:11]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB6_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s5
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s5, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[6:7], exec
-; GISEL-NEXT:    s_movk_i32 s4, 0x7b
-; GISEL-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s8, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s9, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[10:11], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[10:11]
-; GISEL-NEXT:    s_cbranch_execnz .LBB6_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[6:7], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[6:7]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s5
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s5, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[6:7], exec
+; GISEL_O-NEXT:    s_movk_i32 s4, 0x7b
+; GISEL_O-NEXT:  .LBB6_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s8, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s9, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[10:11], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[10:11]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB6_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[6:7]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s5
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call amdgpu_gfx void %fptr(i32 inreg 123)
   ret void
 }
 
 define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    v_mov_b32_e32 v41, v0
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v1
-; GCN-NEXT:    v_readfirstlane_b32 s7, v2
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    v_mov_b32_e32 v0, v41
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB7_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, v41
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    v_mov_b32_e32 v41, v0
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v41
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    ; implicit-def: $vgpr1_vgpr2
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB7_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v41
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    v_mov_b32_e32 v41, v0
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s6, v1
-; GISEL-NEXT:    v_readfirstlane_b32 s7, v2
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GISEL-NEXT:    v_mov_b32_e32 v0, v41
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GISEL-NEXT:    ; implicit-def: $vgpr1
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execnz .LBB7_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_mov_b32_e32 v0, v41
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    v_mov_b32_e32 v41, v0
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v41
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr1
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB7_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v41
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   call amdgpu_gfx void %fptr(i32 %i)
   ret i32 %i
 }
@@ -1512,391 +1862,410 @@
 ; allocator is not able to do that because the return value clashes with the liverange of an
 ; IMPLICIT_DEF of the argument.
 define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) {
-; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v1
-; GCN-NEXT:    v_readfirstlane_b32 s7, v2
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    v_mov_b32_e32 v3, v0
-; GCN-NEXT:    ; implicit-def: $vgpr1_vgpr2
-; GCN-NEXT:    ; implicit-def: $vgpr0
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB8_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, v3
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v1
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v2
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    v_mov_b32_e32 v3, v0
+; GCN_O-NEXT:    ; implicit-def: $vgpr1_vgpr2
+; GCN_O-NEXT:    ; implicit-def: $vgpr0
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB8_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_mov_b32_e32 v0, v3
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s8, v1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s9, v2
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[6:7], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[8:9]
+; GISEL_O-NEXT:    v_mov_b32_e32 v2, v0
+; GISEL_O-NEXT:    ; implicit-def: $vgpr1
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[6:7]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB8_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_mov_b32_e32 v0, v2
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s8, v1
-; GISEL-NEXT:    v_readfirstlane_b32 s9, v2
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
-; GISEL-NEXT:    s_and_saveexec_b64 s[6:7], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[8:9]
-; GISEL-NEXT:    v_mov_b32_e32 v2, v0
-; GISEL-NEXT:    ; implicit-def: $vgpr1
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[6:7]
-; GISEL-NEXT:    s_cbranch_execnz .LBB8_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_mov_b32_e32 v0, v2
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   %ret = call amdgpu_gfx i32 %fptr(i32 %i)
   ret i32 %ret
 }
 
 ; Calling a vgpr can never be a tail call.
 define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) {
-; GCN-LABEL: test_indirect_tail_call_vgpr_ptr:
-; GCN:       ; %bb.0:
-; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s10, s33
-; GCN-NEXT:    s_mov_b32 s33, s32
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0x400
-; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    v_writelane_b32 v40, s34, 2
-; GCN-NEXT:    v_writelane_b32 v40, s35, 3
-; GCN-NEXT:    v_writelane_b32 v40, s36, 4
-; GCN-NEXT:    v_writelane_b32 v40, s37, 5
-; GCN-NEXT:    v_writelane_b32 v40, s38, 6
-; GCN-NEXT:    v_writelane_b32 v40, s39, 7
-; GCN-NEXT:    v_writelane_b32 v40, s40, 8
-; GCN-NEXT:    v_writelane_b32 v40, s41, 9
-; GCN-NEXT:    v_writelane_b32 v40, s42, 10
-; GCN-NEXT:    v_writelane_b32 v40, s43, 11
-; GCN-NEXT:    v_writelane_b32 v40, s44, 12
-; GCN-NEXT:    v_writelane_b32 v40, s45, 13
-; GCN-NEXT:    v_writelane_b32 v40, s46, 14
-; GCN-NEXT:    v_writelane_b32 v40, s47, 15
-; GCN-NEXT:    v_writelane_b32 v40, s48, 16
-; GCN-NEXT:    v_writelane_b32 v40, s49, 17
-; GCN-NEXT:    v_writelane_b32 v40, s50, 18
-; GCN-NEXT:    v_writelane_b32 v40, s51, 19
-; GCN-NEXT:    v_writelane_b32 v40, s52, 20
-; GCN-NEXT:    v_writelane_b32 v40, s53, 21
-; GCN-NEXT:    v_writelane_b32 v40, s54, 22
-; GCN-NEXT:    v_writelane_b32 v40, s55, 23
-; GCN-NEXT:    v_writelane_b32 v40, s56, 24
-; GCN-NEXT:    v_writelane_b32 v40, s57, 25
-; GCN-NEXT:    v_writelane_b32 v40, s58, 26
-; GCN-NEXT:    v_writelane_b32 v40, s59, 27
-; GCN-NEXT:    v_writelane_b32 v40, s60, 28
-; GCN-NEXT:    v_writelane_b32 v40, s61, 29
-; GCN-NEXT:    v_writelane_b32 v40, s62, 30
-; GCN-NEXT:    v_writelane_b32 v40, s63, 31
-; GCN-NEXT:    s_mov_b64 s[4:5], exec
-; GCN-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GCN-NEXT:    v_readfirstlane_b32 s6, v0
-; GCN-NEXT:    v_readfirstlane_b32 s7, v1
-; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
-; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GCN-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT:    ; implicit-def: $vgpr0_vgpr1
-; GCN-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GCN-NEXT:    s_cbranch_execnz .LBB9_1
-; GCN-NEXT:  ; %bb.2:
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    v_readlane_b32 s63, v40, 31
-; GCN-NEXT:    v_readlane_b32 s62, v40, 30
-; GCN-NEXT:    v_readlane_b32 s61, v40, 29
-; GCN-NEXT:    v_readlane_b32 s60, v40, 28
-; GCN-NEXT:    v_readlane_b32 s59, v40, 27
-; GCN-NEXT:    v_readlane_b32 s58, v40, 26
-; GCN-NEXT:    v_readlane_b32 s57, v40, 25
-; GCN-NEXT:    v_readlane_b32 s56, v40, 24
-; GCN-NEXT:    v_readlane_b32 s55, v40, 23
-; GCN-NEXT:    v_readlane_b32 s54, v40, 22
-; GCN-NEXT:    v_readlane_b32 s53, v40, 21
-; GCN-NEXT:    v_readlane_b32 s52, v40, 20
-; GCN-NEXT:    v_readlane_b32 s51, v40, 19
-; GCN-NEXT:    v_readlane_b32 s50, v40, 18
-; GCN-NEXT:    v_readlane_b32 s49, v40, 17
-; GCN-NEXT:    v_readlane_b32 s48, v40, 16
-; GCN-NEXT:    v_readlane_b32 s47, v40, 15
-; GCN-NEXT:    v_readlane_b32 s46, v40, 14
-; GCN-NEXT:    v_readlane_b32 s45, v40, 13
-; GCN-NEXT:    v_readlane_b32 s44, v40, 12
-; GCN-NEXT:    v_readlane_b32 s43, v40, 11
-; GCN-NEXT:    v_readlane_b32 s42, v40, 10
-; GCN-NEXT:    v_readlane_b32 s41, v40, 9
-; GCN-NEXT:    v_readlane_b32 s40, v40, 8
-; GCN-NEXT:    v_readlane_b32 s39, v40, 7
-; GCN-NEXT:    v_readlane_b32 s38, v40, 6
-; GCN-NEXT:    v_readlane_b32 s37, v40, 5
-; GCN-NEXT:    v_readlane_b32 s36, v40, 4
-; GCN-NEXT:    v_readlane_b32 s35, v40, 3
-; GCN-NEXT:    v_readlane_b32 s34, v40, 2
-; GCN-NEXT:    v_readlane_b32 s31, v40, 1
-; GCN-NEXT:    v_readlane_b32 s30, v40, 0
-; GCN-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GCN-NEXT:    s_mov_b64 exec, s[4:5]
-; GCN-NEXT:    s_addk_i32 s32, 0xfc00
-; GCN-NEXT:    s_mov_b32 s33, s10
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_setpc_b64 s[30:31]
+; GCN_O-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN_O:       ; %bb.0:
+; GCN_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN_O-NEXT:    s_mov_b32 s10, s33
+; GCN_O-NEXT:    s_mov_b32 s33, s32
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0x400
+; GCN_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GCN_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GCN_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GCN_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GCN_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GCN_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GCN_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GCN_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GCN_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GCN_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GCN_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GCN_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GCN_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GCN_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GCN_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GCN_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GCN_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GCN_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GCN_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GCN_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GCN_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GCN_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GCN_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GCN_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GCN_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GCN_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GCN_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GCN_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GCN_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GCN_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GCN_O-NEXT:    s_mov_b64 s[4:5], exec
+; GCN_O-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GCN_O-NEXT:    v_readfirstlane_b32 s6, v0
+; GCN_O-NEXT:    v_readfirstlane_b32 s7, v1
+; GCN_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GCN_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GCN_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GCN_O-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; GCN_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GCN_O-NEXT:    s_cbranch_execnz .LBB9_1
+; GCN_O-NEXT:  ; %bb.2:
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GCN_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GCN_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GCN_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GCN_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GCN_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GCN_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GCN_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GCN_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GCN_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GCN_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GCN_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GCN_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GCN_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GCN_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GCN_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GCN_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GCN_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GCN_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GCN_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GCN_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GCN_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GCN_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GCN_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GCN_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GCN_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GCN_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GCN_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GCN_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GCN_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GCN_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GCN_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GCN_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GCN_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GCN_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GCN_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GCN_O-NEXT:    s_mov_b32 s33, s10
+; GCN_O-NEXT:    s_waitcnt vmcnt(0)
+; GCN_O-NEXT:    s_setpc_b64 s[30:31]
+;
+; GCN_C-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GCN_C:       ; %bb.0:
+; GCN_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+;
+; GISEL_O-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL_O:       ; %bb.0:
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL_O-NEXT:    s_mov_b32 s10, s33
+; GISEL_O-NEXT:    s_mov_b32 s33, s32
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0x400
+; GISEL_O-NEXT:    v_writelane_b32 v40, s30, 0
+; GISEL_O-NEXT:    v_writelane_b32 v40, s31, 1
+; GISEL_O-NEXT:    v_writelane_b32 v40, s34, 2
+; GISEL_O-NEXT:    v_writelane_b32 v40, s35, 3
+; GISEL_O-NEXT:    v_writelane_b32 v40, s36, 4
+; GISEL_O-NEXT:    v_writelane_b32 v40, s37, 5
+; GISEL_O-NEXT:    v_writelane_b32 v40, s38, 6
+; GISEL_O-NEXT:    v_writelane_b32 v40, s39, 7
+; GISEL_O-NEXT:    v_writelane_b32 v40, s40, 8
+; GISEL_O-NEXT:    v_writelane_b32 v40, s41, 9
+; GISEL_O-NEXT:    v_writelane_b32 v40, s42, 10
+; GISEL_O-NEXT:    v_writelane_b32 v40, s43, 11
+; GISEL_O-NEXT:    v_writelane_b32 v40, s44, 12
+; GISEL_O-NEXT:    v_writelane_b32 v40, s45, 13
+; GISEL_O-NEXT:    v_writelane_b32 v40, s46, 14
+; GISEL_O-NEXT:    v_writelane_b32 v40, s47, 15
+; GISEL_O-NEXT:    v_writelane_b32 v40, s48, 16
+; GISEL_O-NEXT:    v_writelane_b32 v40, s49, 17
+; GISEL_O-NEXT:    v_writelane_b32 v40, s50, 18
+; GISEL_O-NEXT:    v_writelane_b32 v40, s51, 19
+; GISEL_O-NEXT:    v_writelane_b32 v40, s52, 20
+; GISEL_O-NEXT:    v_writelane_b32 v40, s53, 21
+; GISEL_O-NEXT:    v_writelane_b32 v40, s54, 22
+; GISEL_O-NEXT:    v_writelane_b32 v40, s55, 23
+; GISEL_O-NEXT:    v_writelane_b32 v40, s56, 24
+; GISEL_O-NEXT:    v_writelane_b32 v40, s57, 25
+; GISEL_O-NEXT:    v_writelane_b32 v40, s58, 26
+; GISEL_O-NEXT:    v_writelane_b32 v40, s59, 27
+; GISEL_O-NEXT:    v_writelane_b32 v40, s60, 28
+; GISEL_O-NEXT:    v_writelane_b32 v40, s61, 29
+; GISEL_O-NEXT:    v_writelane_b32 v40, s62, 30
+; GISEL_O-NEXT:    v_writelane_b32 v40, s63, 31
+; GISEL_O-NEXT:    s_mov_b64 s[4:5], exec
+; GISEL_O-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
+; GISEL_O-NEXT:    v_readfirstlane_b32 s6, v0
+; GISEL_O-NEXT:    v_readfirstlane_b32 s7, v1
+; GISEL_O-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
+; GISEL_O-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GISEL_O-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GISEL_O-NEXT:    ; implicit-def: $vgpr0
+; GISEL_O-NEXT:    s_xor_b64 exec, exec, s[8:9]
+; GISEL_O-NEXT:    s_cbranch_execnz .LBB9_1
+; GISEL_O-NEXT:  ; %bb.2:
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    v_readlane_b32 s63, v40, 31
+; GISEL_O-NEXT:    v_readlane_b32 s62, v40, 30
+; GISEL_O-NEXT:    v_readlane_b32 s61, v40, 29
+; GISEL_O-NEXT:    v_readlane_b32 s60, v40, 28
+; GISEL_O-NEXT:    v_readlane_b32 s59, v40, 27
+; GISEL_O-NEXT:    v_readlane_b32 s58, v40, 26
+; GISEL_O-NEXT:    v_readlane_b32 s57, v40, 25
+; GISEL_O-NEXT:    v_readlane_b32 s56, v40, 24
+; GISEL_O-NEXT:    v_readlane_b32 s55, v40, 23
+; GISEL_O-NEXT:    v_readlane_b32 s54, v40, 22
+; GISEL_O-NEXT:    v_readlane_b32 s53, v40, 21
+; GISEL_O-NEXT:    v_readlane_b32 s52, v40, 20
+; GISEL_O-NEXT:    v_readlane_b32 s51, v40, 19
+; GISEL_O-NEXT:    v_readlane_b32 s50, v40, 18
+; GISEL_O-NEXT:    v_readlane_b32 s49, v40, 17
+; GISEL_O-NEXT:    v_readlane_b32 s48, v40, 16
+; GISEL_O-NEXT:    v_readlane_b32 s47, v40, 15
+; GISEL_O-NEXT:    v_readlane_b32 s46, v40, 14
+; GISEL_O-NEXT:    v_readlane_b32 s45, v40, 13
+; GISEL_O-NEXT:    v_readlane_b32 s44, v40, 12
+; GISEL_O-NEXT:    v_readlane_b32 s43, v40, 11
+; GISEL_O-NEXT:    v_readlane_b32 s42, v40, 10
+; GISEL_O-NEXT:    v_readlane_b32 s41, v40, 9
+; GISEL_O-NEXT:    v_readlane_b32 s40, v40, 8
+; GISEL_O-NEXT:    v_readlane_b32 s39, v40, 7
+; GISEL_O-NEXT:    v_readlane_b32 s38, v40, 6
+; GISEL_O-NEXT:    v_readlane_b32 s37, v40, 5
+; GISEL_O-NEXT:    v_readlane_b32 s36, v40, 4
+; GISEL_O-NEXT:    v_readlane_b32 s35, v40, 3
+; GISEL_O-NEXT:    v_readlane_b32 s34, v40, 2
+; GISEL_O-NEXT:    v_readlane_b32 s31, v40, 1
+; GISEL_O-NEXT:    v_readlane_b32 s30, v40, 0
+; GISEL_O-NEXT:    s_or_saveexec_b64 s[4:5], -1
+; GISEL_O-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; GISEL_O-NEXT:    s_mov_b64 exec, s[4:5]
+; GISEL_O-NEXT:    s_addk_i32 s32, 0xfc00
+; GISEL_O-NEXT:    s_mov_b32 s33, s10
+; GISEL_O-NEXT:    s_waitcnt vmcnt(0)
+; GISEL_O-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr:
-; GISEL:       ; %bb.0:
-; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GISEL-NEXT:    s_mov_b32 s10, s33
-; GISEL-NEXT:    s_mov_b32 s33, s32
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0x400
-; GISEL-NEXT:    v_writelane_b32 v40, s30, 0
-; GISEL-NEXT:    v_writelane_b32 v40, s31, 1
-; GISEL-NEXT:    v_writelane_b32 v40, s34, 2
-; GISEL-NEXT:    v_writelane_b32 v40, s35, 3
-; GISEL-NEXT:    v_writelane_b32 v40, s36, 4
-; GISEL-NEXT:    v_writelane_b32 v40, s37, 5
-; GISEL-NEXT:    v_writelane_b32 v40, s38, 6
-; GISEL-NEXT:    v_writelane_b32 v40, s39, 7
-; GISEL-NEXT:    v_writelane_b32 v40, s40, 8
-; GISEL-NEXT:    v_writelane_b32 v40, s41, 9
-; GISEL-NEXT:    v_writelane_b32 v40, s42, 10
-; GISEL-NEXT:    v_writelane_b32 v40, s43, 11
-; GISEL-NEXT:    v_writelane_b32 v40, s44, 12
-; GISEL-NEXT:    v_writelane_b32 v40, s45, 13
-; GISEL-NEXT:    v_writelane_b32 v40, s46, 14
-; GISEL-NEXT:    v_writelane_b32 v40, s47, 15
-; GISEL-NEXT:    v_writelane_b32 v40, s48, 16
-; GISEL-NEXT:    v_writelane_b32 v40, s49, 17
-; GISEL-NEXT:    v_writelane_b32 v40, s50, 18
-; GISEL-NEXT:    v_writelane_b32 v40, s51, 19
-; GISEL-NEXT:    v_writelane_b32 v40, s52, 20
-; GISEL-NEXT:    v_writelane_b32 v40, s53, 21
-; GISEL-NEXT:    v_writelane_b32 v40, s54, 22
-; GISEL-NEXT:    v_writelane_b32 v40, s55, 23
-; GISEL-NEXT:    v_writelane_b32 v40, s56, 24
-; GISEL-NEXT:    v_writelane_b32 v40, s57, 25
-; GISEL-NEXT:    v_writelane_b32 v40, s58, 26
-; GISEL-NEXT:    v_writelane_b32 v40, s59, 27
-; GISEL-NEXT:    v_writelane_b32 v40, s60, 28
-; GISEL-NEXT:    v_writelane_b32 v40, s61, 29
-; GISEL-NEXT:    v_writelane_b32 v40, s62, 30
-; GISEL-NEXT:    v_writelane_b32 v40, s63, 31
-; GISEL-NEXT:    s_mov_b64 s[4:5], exec
-; GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
-; GISEL-NEXT:    v_readfirstlane_b32 s6, v0
-; GISEL-NEXT:    v_readfirstlane_b32 s7, v1
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1]
-; GISEL-NEXT:    s_and_saveexec_b64 s[8:9], vcc
-; GISEL-NEXT:    s_swappc_b64 s[30:31], s[6:7]
-; GISEL-NEXT:    ; implicit-def: $vgpr0
-; GISEL-NEXT:    s_xor_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    s_cbranch_execnz .LBB9_1
-; GISEL-NEXT:  ; %bb.2:
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    v_readlane_b32 s63, v40, 31
-; GISEL-NEXT:    v_readlane_b32 s62, v40, 30
-; GISEL-NEXT:    v_readlane_b32 s61, v40, 29
-; GISEL-NEXT:    v_readlane_b32 s60, v40, 28
-; GISEL-NEXT:    v_readlane_b32 s59, v40, 27
-; GISEL-NEXT:    v_readlane_b32 s58, v40, 26
-; GISEL-NEXT:    v_readlane_b32 s57, v40, 25
-; GISEL-NEXT:    v_readlane_b32 s56, v40, 24
-; GISEL-NEXT:    v_readlane_b32 s55, v40, 23
-; GISEL-NEXT:    v_readlane_b32 s54, v40, 22
-; GISEL-NEXT:    v_readlane_b32 s53, v40, 21
-; GISEL-NEXT:    v_readlane_b32 s52, v40, 20
-; GISEL-NEXT:    v_readlane_b32 s51, v40, 19
-; GISEL-NEXT:    v_readlane_b32 s50, v40, 18
-; GISEL-NEXT:    v_readlane_b32 s49, v40, 17
-; GISEL-NEXT:    v_readlane_b32 s48, v40, 16
-; GISEL-NEXT:    v_readlane_b32 s47, v40, 15
-; GISEL-NEXT:    v_readlane_b32 s46, v40, 14
-; GISEL-NEXT:    v_readlane_b32 s45, v40, 13
-; GISEL-NEXT:    v_readlane_b32 s44, v40, 12
-; GISEL-NEXT:    v_readlane_b32 s43, v40, 11
-; GISEL-NEXT:    v_readlane_b32 s42, v40, 10
-; GISEL-NEXT:    v_readlane_b32 s41, v40, 9
-; GISEL-NEXT:    v_readlane_b32 s40, v40, 8
-; GISEL-NEXT:    v_readlane_b32 s39, v40, 7
-; GISEL-NEXT:    v_readlane_b32 s38, v40, 6
-; GISEL-NEXT:    v_readlane_b32 s37, v40, 5
-; GISEL-NEXT:    v_readlane_b32 s36, v40, 4
-; GISEL-NEXT:    v_readlane_b32 s35, v40, 3
-; GISEL-NEXT:    v_readlane_b32 s34, v40, 2
-; GISEL-NEXT:    v_readlane_b32 s31, v40, 1
-; GISEL-NEXT:    v_readlane_b32 s30, v40, 0
-; GISEL-NEXT:    s_or_saveexec_b64 s[4:5], -1
-; GISEL-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
-; GISEL-NEXT:    s_mov_b64 exec, s[4:5]
-; GISEL-NEXT:    s_addk_i32 s32, 0xfc00
-; GISEL-NEXT:    s_mov_b32 s33, s10
-; GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL_C-LABEL: test_indirect_tail_call_vgpr_ptr:
+; GISEL_C:       ; %bb.0:
+; GISEL_C-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   tail call amdgpu_gfx void %fptr()
   ret void
 }
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdgpu_code_object_version", i32 200}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}
+; GISEL: {{.*}}
Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_C %s
+; RUN: FileCheck -check-prefix=REMARK %s < %t
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -attributor-assume-closed-world=false -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_O %s
 ; RUN: FileCheck -check-prefix=REMARK %s < %t
 
 ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
@@ -157,16 +159,27 @@
   ret void
 }
 
-; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
-; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs: 39
-; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs: 32
-; STDERR-NEXT: remark: foo.cl:64:0:     AGPRs: 10
-; STDERR-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
-; STDERR-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+; STDERR_O: remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR_O-NEXT: remark: foo.cl:64:0:     SGPRs: 39
+; STDERR_O-NEXT: remark: foo.cl:64:0:     VGPRs: 32
+; STDERR_O-NEXT: remark: foo.cl:64:0:     AGPRs: 10
+; STDERR_O-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     Dynamic Stack: True
+; STDERR_O-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR_O-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
+
+; STDERR_C:      remark: foo.cl:64:0: Function Name: test_indirect_call
+; STDERR_C-NEXT: remark: foo.cl:64:0:     SGPRs: 4
+; STDERR_C-NEXT: remark: foo.cl:64:0:     VGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     AGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     ScratchSize [bytes/lane]: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     Dynamic Stack: False
+; STDERR_C-NEXT: remark: foo.cl:64:0:     Occupancy [waves/SIMD]: 8
+; STDERR_C-NEXT: remark: foo.cl:64:0:     SGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     VGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:64:0:     LDS Size [bytes/block]: 0
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
 define amdgpu_kernel void @test_indirect_call() !dbg !9 {
@@ -175,17 +188,27 @@
   ret void
 }
 
-; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
-; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs: 39
-; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs: 32
-; STDERR-NEXT: remark: foo.cl:74:0:     AGPRs: 10
-; STDERR-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
-; STDERR-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
-; STDERR-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
-; STDERR-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
-; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
+; STDERR_O: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR_O-NEXT: remark: foo.cl:74:0:     SGPRs: 39
+; STDERR_O-NEXT: remark: foo.cl:74:0:     VGPRs: 32
+; STDERR_O-NEXT: remark: foo.cl:74:0:     AGPRs: 10
+; STDERR_O-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
+; STDERR_O-NEXT: remark: foo.cl:74:0:     Dynamic Stack: True
+; STDERR_O-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR_O-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR_O-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 
+; STDERR_C:      remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
+; STDERR_C-NEXT: remark: foo.cl:74:0:     SGPRs: 12
+; STDERR_C-NEXT: remark: foo.cl:74:0:     VGPRs: 1
+; STDERR_C-NEXT: remark: foo.cl:74:0:     AGPRs: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     ScratchSize [bytes/lane]: 144
+; STDERR_C-NEXT: remark: foo.cl:74:0:     Dynamic Stack: False
+; STDERR_C-NEXT: remark: foo.cl:74:0:     Occupancy [waves/SIMD]: 8
+; STDERR_C-NEXT: remark: foo.cl:74:0:     SGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
+; STDERR_C-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
 
 define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
Index: llvm/test/CodeGen/AMDGPU/sibling-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 target datalayout = "A5"
 
 ; FIXME: Why is this commuted only sometimes?
Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -1,8 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features  %s | FileCheck -check-prefix=AKF_GCN %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_CW %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_OW %s
 
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -attributor-assume-closed-world=false < %s | FileCheck -check-prefix=GFX9 %s
 
 target datalayout = "A5"
 
@@ -21,6 +22,17 @@
   ret void
 }
 
+define ptr @helper() {
+; AKF_GCN-LABEL: define {{[^@]+}}@helper() {
+; AKF_GCN-NEXT:    ret ptr @indirect
+;
+; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@helper
+; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] {
+; ATTRIBUTOR_GCN-NEXT:    ret ptr @indirect
+;
+  ret ptr @indirect
+}
+
 define amdgpu_kernel void @test_simple_indirect_call() {
 ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
 ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] {
@@ -31,14 +43,23 @@
 ; AKF_GCN-NEXT:    call void [[FP]]()
 ; AKF_GCN-NEXT:    ret void
 ;
-; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
-; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
-; ATTRIBUTOR_GCN-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
-; ATTRIBUTOR_GCN-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
-; ATTRIBUTOR_GCN-NEXT:    call void [[FP]]()
-; ATTRIBUTOR_GCN-NEXT:    ret void
+; ATTRIBUTOR_GCN_CW-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_GCN_CW-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN_CW-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_CW-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_CW-NEXT:    call void @indirect()
+; ATTRIBUTOR_GCN_CW-NEXT:    ret void
+;
+; ATTRIBUTOR_GCN_OW-LABEL: define {{[^@]+}}@test_simple_indirect_call
+; ATTRIBUTOR_GCN_OW-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
+; ATTRIBUTOR_GCN_OW-NEXT:    store ptr @indirect, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_OW-NEXT:    [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
+; ATTRIBUTOR_GCN_OW-NEXT:    call void [[FP]]()
+; ATTRIBUTOR_GCN_OW-NEXT:    ret void
 ;
 ; GFX9-LABEL: test_simple_indirect_call:
 ; GFX9:       ; %bb.0:
@@ -73,6 +94,9 @@
 ;.
 ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
 ;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_CW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+;.
+; ATTRIBUTOR_GCN_OW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN_OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
 ;.