Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -15,10 +15,13 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" +#include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/Attributor.h" +#include #define DEBUG_TYPE "amdgpu-attributor" @@ -944,16 +947,44 @@ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID, - &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID}); + &AAIndirectCallInfo::ID, &AAPotentialConstantValues::ID, + &AAUnderlyingObjects::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; + AC.IsClosedWorldModule = true; AC.IPOAmendableCB = [](const Function &F) { return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; }; + // Callback to determine if we should specialize a indirect call site with a + // specific callee. It's effectively a heuristic and we can add checks for + // the callee size, PGO, etc. For now, we check for single potential callees + // and kernel arguments as they are known uniform values. + AC.IndirectCalleeSpecializationCallback = [&](Attributor &A, + const AbstractAttribute &AA, + CallBase &CB, + Function &Callee) { + bool UsedAssumedInformation = false; + std::optional SimpleV = A.getAssumedSimplified( + *CB.getCalledOperand(), AA, UsedAssumedInformation, + AA::ValueScope::AnyScope); + assert(SimpleV.has_value() && "No value but potential callee?"); + // Unknown value. + if (!SimpleV.value()) + return false; + // Singleton function. + if (isa(SimpleV.value())) + return true; + // Uniform (kernel argument) value. + if (auto *Arg = dyn_cast_or_null(SimpleV.value())) + if (Arg->getParent()->getCallingConv() == CallingConv::AMDGPU_KERNEL) + return true; + return false; + }; + Attributor A(Functions, InfoCache, AC); for (Function &F : M) { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -attributor-assume-closed-world=false -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CHECK %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CWRLD %s define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr @@ -52,24 +53,31 @@ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 + ; + ; CWRLD-LABEL: name: test_indirect_call_sgpr_ptr + ; CWRLD: bb.1 (%ir-block.0): + ; CWRLD-NEXT: liveins: $sgpr4_sgpr5 + ; CWRLD-NEXT: {{ $}} + ; CWRLD-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; CWRLD-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) call void %fptr() ret void } define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) { - ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; SAMEC-LABEL: name: test_gfx_indirect_call_sgpr_ptr + ; SAMEC: bb.1 (%ir-block.0): + ; SAMEC-NEXT: liveins: $vgpr0, $vgpr1 + ; SAMEC-NEXT: {{ $}} + ; SAMEC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SAMEC-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SAMEC-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; SAMEC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; SAMEC-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; SAMEC-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; SAMEC-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; SAMEC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; SAMEC-NEXT: SI_RETURN call amdgpu_gfx void %fptr() ret void } Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor -attributor-assume-closed-world=false < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,OWRLD_ATTR_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,CWRLD_ATTR_HSA %s ; TODO: The test contains UB which is refined by the Attributor and should be removed. @@ -18,6 +19,16 @@ declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 declare i64 @llvm.amdgcn.dispatch.id() #0 +@G1 = global ptr undef +@G2 = global ptr undef + +;. +; AKF_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +; AKF_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +;. +; ATTRIBUTOR_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +; ATTRIBUTOR_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +;. define void @use_workitem_id_x() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x ; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] { @@ -766,19 +777,55 @@ ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; AKF_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8 +; AKF_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8 ; AKF_HSA-NEXT: ret float [[FADD]] ; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call -; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] { -; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() -; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 -; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] +; OWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call +; OWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] { +; OWRLD_ATTR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() +; OWRLD_ATTR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; OWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8 +; OWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8 +; OWRLD_ATTR_HSA-NEXT: ret float [[FADD]] +; +; CWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call +; CWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] { +; CWRLD_ATTR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]](), !callees !0 +; CWRLD_ATTR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; CWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8 +; CWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8 +; CWRLD_ATTR_HSA-NEXT: ret float [[FADD]] ; %f = call float %fptr() %fadd = fadd float %f, 1.0 + store ptr @indirect_callee1, ptr @G1 + store ptr @indirect_callee2, ptr @G2 ret float %fadd } +define float @indirect_callee1() { +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee1() { +; AKF_HSA-NEXT: ret float 0x40091EB860000000 +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee1 +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: ret float 0x40091EB860000000 +; + ret float 0x40091EB860000000 +} +define float @indirect_callee2(float noundef %arg) { +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee2 +; AKF_HSA-SAME: (float noundef [[ARG:%.*]]) { +; AKF_HSA-NEXT: ret float [[ARG]] +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee2 +; ATTRIBUTOR_HSA-SAME: (float noundef [[ARG:%.*]]) #[[ATTR19]] { +; ATTRIBUTOR_HSA-NEXT: ret float [[ARG]] +; + ret float %arg +} + declare float @extern() #3 define float @func_extern_call() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call @@ -845,7 +892,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -861,7 +908,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -877,7 +924,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -893,7 +940,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -928,7 +975,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -941,7 +988,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl -; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -956,7 +1003,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -969,7 +1016,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -980,7 +1027,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -994,7 +1041,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR29:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -1040,15 +1087,17 @@ ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR25:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" } ;. +; CWRLD_ATTR_HSA: [[META0:![0-9]+]] = !{ptr @indirect_callee1, ptr @indirect_callee2} +;. Index: llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll +++ llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD %0 = type { ptr, ptr } @@ -20,19 +21,32 @@ } define internal fastcc double @baz(ptr %arg) { -; CHECK-LABEL: define {{[^@]+}}@baz -; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = tail call double [[TMP1]]() -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1 -; CHECK-NEXT: br label [[BB5:%.*]] -; CHECK: bb5: -; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]]) -; CHECK-NEXT: br label [[BB5]] +; OWRLD-LABEL: define {{[^@]+}}@baz +; OWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] { +; OWRLD-NEXT: bb: +; OWRLD-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8 +; OWRLD-NEXT: [[TMP2:%.*]] = tail call double [[TMP1]]() +; OWRLD-NEXT: br label [[BB3:%.*]] +; OWRLD: bb3: +; OWRLD-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1 +; OWRLD-NEXT: br label [[BB5:%.*]] +; OWRLD: bb5: +; OWRLD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 +; OWRLD-NEXT: [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]]) +; OWRLD-NEXT: br label [[BB5]] +; +; CWRLD-LABEL: define {{[^@]+}}@baz +; CWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] { +; CWRLD-NEXT: bb: +; CWRLD-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8 +; CWRLD-NEXT: unreachable +; CWRLD: bb3: +; CWRLD-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1 +; CWRLD-NEXT: br label [[BB5:%.*]] +; CWRLD: bb5: +; CWRLD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CWRLD-NEXT: [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]]) +; CWRLD-NEXT: br label [[BB5]] ; bb: %tmp1 = load ptr, ptr %arg, align 8 @@ -49,13 +63,19 @@ br label %bb5 } -define amdgpu_kernel void @entry() { -; CHECK-LABEL: define {{[^@]+}}@entry -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr -; CHECK-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) -; CHECK-NEXT: ret void +define amdgpu_kernel void @entry() { ; OWRLD-LABEL: define {{[^@]+}}@entry +; OWRLD-SAME: () #[[ATTR0]] { +; OWRLD-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) +; OWRLD-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; OWRLD-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) +; OWRLD-NEXT: ret void +; +; CWRLD-LABEL: define {{[^@]+}}@entry +; CWRLD-SAME: () #[[ATTR1:[0-9]+]] { +; CWRLD-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) +; CWRLD-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; CWRLD-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) +; CWRLD-NEXT: ret void ; %alloca = alloca %0, align 8, addrspace(5) %cast = addrspacecast ptr addrspace(5) %alloca to ptr @@ -63,5 +83,6 @@ ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CWRLD: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD define internal void @indirect() { ; CHECK-LABEL: define {{[^@]+}}@indirect @@ -10,13 +11,21 @@ } define internal void @direct() { -; CHECK-LABEL: define {{[^@]+}}@direct -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 -; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; CHECK-NEXT: call void [[FP]]() -; CHECK-NEXT: ret void +; OWRLD-LABEL: define {{[^@]+}}@direct +; OWRLD-SAME: () #[[ATTR1:[0-9]+]] { +; OWRLD-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; OWRLD-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; OWRLD-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; OWRLD-NEXT: call void [[FP]]() +; OWRLD-NEXT: ret void +; +; CWRLD-LABEL: define {{[^@]+}}@direct +; CWRLD-SAME: () #[[ATTR0]] { +; CWRLD-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; CWRLD-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; CWRLD-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; CWRLD-NEXT: call void @indirect() +; CWRLD-NEXT: ret void ; %fptr = alloca ptr, addrspace(5) store ptr @indirect, ptr addrspace(5) %fptr @@ -26,15 +35,22 @@ } define amdgpu_kernel void @test_direct_indirect_call() { -; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @direct() -; CHECK-NEXT: ret void +; OWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call +; OWRLD-SAME: () #[[ATTR1]] { +; OWRLD-NEXT: call void @direct() +; OWRLD-NEXT: ret void +; +; CWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call +; CWRLD-SAME: () #[[ATTR0]] { +; CWRLD-NEXT: call void @direct() +; CWRLD-NEXT: ret void ; call void @direct() ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; OWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; OWRLD: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +;. +; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_OWR +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_CWR define internal void @indirect() { ; AKF_GCN-LABEL: define {{[^@]+}}@indirect() { @@ -22,13 +23,21 @@ ; AKF_GCN-NEXT: call void [[FP]]() ; AKF_GCN-NEXT: ret void ; -; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() -; ATTRIBUTOR_GCN-NEXT: ret void +; ATTRIBUTOR_OWR-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_OWR-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_OWR-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_OWR-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_OWR-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_OWR-NEXT: call void [[FP]]() +; ATTRIBUTOR_OWR-NEXT: ret void +; +; ATTRIBUTOR_CWR-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_CWR-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CWR-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_CWR-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_CWR-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_CWR-NEXT: call void @indirect() +; ATTRIBUTOR_CWR-NEXT: ret void ; %fptr = alloca ptr, addrspace(5) store ptr @indirect, ptr addrspace(5) %fptr @@ -42,6 +51,9 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } +; ATTRIBUTOR_OWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_OWR: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_CWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CWR: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll +++ llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll @@ -1,18 +1,26 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV5C %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV4C %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV5O %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV4O %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 -; No stack objects, only indirect call has to enable scrathch -; GCN-LABEL: test_indirect_call: +; No stack objects, only indirect call has to enable scratch +; GCNO-LABEL: test_indirect_call: +; GCNC-LABEL: test_indirect_call: -; COV5: .amdhsa_private_segment_fixed_size 0{{$}} -; COV4: .amdhsa_private_segment_fixed_size 16384{{$}} +; COV5O: .amdhsa_private_segment_fixed_size 0{{$}} +; COV5C: .amdhsa_private_segment_fixed_size 0{{$}} +; COV4C: .amdhsa_private_segment_fixed_size 0{{$}} +; COV4O: .amdhsa_private_segment_fixed_size 16384{{$}} -; GCN: .amdhsa_user_sgpr_private_segment_buffer 1 +; GCNO: .amdhsa_user_sgpr_private_segment_buffer 1 +; GCNC: .amdhsa_user_sgpr_private_segment_buffer 1 -; COV5: .amdhsa_uses_dynamic_stack 1 -; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +; COV5O: .amdhsa_uses_dynamic_stack 1 +; COV5C: .amdhsa_uses_dynamic_stack 0 +; GCNO: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +; GCNC: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 define amdgpu_kernel void @test_indirect_call() { %fptr = load ptr, ptr addrspace(4) @gv.fptr0 call void %fptr() Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -1,1109 +1,1443 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_O %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_C %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_O %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_C %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) { -; GCN-LABEL: test_indirect_call_sgpr_ptr: -; GCN: .amd_kernel_code_t -; GCN-NEXT: amd_code_version_major = 1 -; GCN-NEXT: amd_code_version_minor = 2 -; GCN-NEXT: amd_machine_kind = 1 -; GCN-NEXT: amd_machine_version_major = 7 -; GCN-NEXT: amd_machine_version_minor = 0 -; GCN-NEXT: amd_machine_version_stepping = 0 -; GCN-NEXT: kernel_code_entry_byte_offset = 256 -; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 10 -; GCN-NEXT: granulated_wavefront_sgpr_count = 8 -; GCN-NEXT: priority = 0 -; GCN-NEXT: float_mode = 240 -; GCN-NEXT: priv = 0 -; GCN-NEXT: enable_dx10_clamp = 1 -; GCN-NEXT: debug_mode = 0 -; GCN-NEXT: enable_ieee_mode = 1 -; GCN-NEXT: enable_wgp_mode = 0 -; GCN-NEXT: enable_mem_ordered = 0 -; GCN-NEXT: enable_fwd_progress = 0 -; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCN-NEXT: user_sgpr_count = 14 -; GCN-NEXT: enable_trap_handler = 0 -; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 -; GCN-NEXT: enable_sgpr_workgroup_info = 0 -; GCN-NEXT: enable_vgpr_workitem_id = 2 -; GCN-NEXT: enable_exception_msb = 0 -; GCN-NEXT: granulated_lds_size = 0 -; GCN-NEXT: enable_exception = 0 -; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 -; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 -; GCN-NEXT: enable_sgpr_queue_ptr = 1 -; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GCN-NEXT: enable_sgpr_dispatch_id = 1 -; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 -; GCN-NEXT: enable_sgpr_private_segment_size = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GCN-NEXT: enable_wavefront_size32 = 0 -; GCN-NEXT: enable_ordered_append_gds = 0 -; GCN-NEXT: private_element_size = 1 -; GCN-NEXT: is_ptr64 = 1 -; GCN-NEXT: is_dynamic_callstack = 1 -; GCN-NEXT: is_debug_enabled = 0 -; GCN-NEXT: is_xnack_enabled = 0 -; GCN-NEXT: workitem_private_segment_byte_size = 16384 -; GCN-NEXT: workgroup_group_segment_byte_size = 0 -; GCN-NEXT: gds_segment_byte_size = 0 -; GCN-NEXT: kernarg_segment_byte_size = 64 -; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 68 -; GCN-NEXT: workitem_vgpr_count = 42 -; GCN-NEXT: reserved_vgpr_first = 0 -; GCN-NEXT: reserved_vgpr_count = 0 -; GCN-NEXT: reserved_sgpr_first = 0 -; GCN-NEXT: reserved_sgpr_count = 0 -; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 -; GCN-NEXT: kernarg_segment_alignment = 4 -; GCN-NEXT: group_segment_alignment = 4 -; GCN-NEXT: private_segment_alignment = 4 -; GCN-NEXT: wavefront_size = 6 -; GCN-NEXT: call_convention = -1 -; GCN-NEXT: runtime_loader_kernel_symbol = 0 -; GCN-NEXT: .end_amd_kernel_code_t -; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_add_i32 s12, s12, s17 -; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GCN-NEXT: s_add_u32 s0, s0, s17 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_mov_b32 s13, s15 -; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_getpc_b64 s[14:15] -; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GCN-NEXT: s_add_u32 s8, s8, 8 -; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: v_or_b32_e32 v0, v0, v1 -; GCN-NEXT: v_or_b32_e32 v31, v0, v2 -; GCN-NEXT: s_mov_b32 s14, s16 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GCN-NEXT: s_endpgm +; GCN_O-LABEL: test_indirect_call_sgpr_ptr: +; GCN_O: .amd_kernel_code_t +; GCN_O-NEXT: amd_code_version_major = 1 +; GCN_O-NEXT: amd_code_version_minor = 2 +; GCN_O-NEXT: amd_machine_kind = 1 +; GCN_O-NEXT: amd_machine_version_major = 7 +; GCN_O-NEXT: amd_machine_version_minor = 0 +; GCN_O-NEXT: amd_machine_version_stepping = 0 +; GCN_O-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_O-NEXT: granulated_workitem_vgpr_count = 10 +; GCN_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GCN_O-NEXT: priority = 0 +; GCN_O-NEXT: float_mode = 240 +; GCN_O-NEXT: priv = 0 +; GCN_O-NEXT: enable_dx10_clamp = 1 +; GCN_O-NEXT: debug_mode = 0 +; GCN_O-NEXT: enable_ieee_mode = 1 +; GCN_O-NEXT: enable_wgp_mode = 0 +; GCN_O-NEXT: enable_mem_ordered = 0 +; GCN_O-NEXT: enable_fwd_progress = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GCN_O-NEXT: user_sgpr_count = 14 +; GCN_O-NEXT: enable_trap_handler = 0 +; GCN_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_O-NEXT: enable_vgpr_workitem_id = 2 +; GCN_O-NEXT: enable_exception_msb = 0 +; GCN_O-NEXT: granulated_lds_size = 0 +; GCN_O-NEXT: enable_exception = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GCN_O-NEXT: enable_sgpr_queue_ptr = 1 +; GCN_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_id = 1 +; GCN_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GCN_O-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_O-NEXT: enable_wavefront_size32 = 0 +; GCN_O-NEXT: enable_ordered_append_gds = 0 +; GCN_O-NEXT: private_element_size = 1 +; GCN_O-NEXT: is_ptr64 = 1 +; GCN_O-NEXT: is_dynamic_callstack = 1 +; GCN_O-NEXT: is_debug_enabled = 0 +; GCN_O-NEXT: is_xnack_enabled = 0 +; GCN_O-NEXT: workitem_private_segment_byte_size = 16384 +; GCN_O-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_O-NEXT: gds_segment_byte_size = 0 +; GCN_O-NEXT: kernarg_segment_byte_size = 64 +; GCN_O-NEXT: workgroup_fbarrier_count = 0 +; GCN_O-NEXT: wavefront_sgpr_count = 68 +; GCN_O-NEXT: workitem_vgpr_count = 42 +; GCN_O-NEXT: reserved_vgpr_first = 0 +; GCN_O-NEXT: reserved_vgpr_count = 0 +; GCN_O-NEXT: reserved_sgpr_first = 0 +; GCN_O-NEXT: reserved_sgpr_count = 0 +; GCN_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_O-NEXT: kernarg_segment_alignment = 4 +; GCN_O-NEXT: group_segment_alignment = 4 +; GCN_O-NEXT: private_segment_alignment = 4 +; GCN_O-NEXT: wavefront_size = 6 +; GCN_O-NEXT: call_convention = -1 +; GCN_O-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_O-NEXT: .end_amd_kernel_code_t +; GCN_O-NEXT: ; %bb.0: +; GCN_O-NEXT: s_mov_b32 s32, 0 +; GCN_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GCN_O-NEXT: s_add_i32 s12, s12, s17 +; GCN_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GCN_O-NEXT: s_add_u32 s0, s0, s17 +; GCN_O-NEXT: s_addc_u32 s1, s1, 0 +; GCN_O-NEXT: s_mov_b32 s13, s15 +; GCN_O-NEXT: s_mov_b32 s12, s14 +; GCN_O-NEXT: s_getpc_b64 s[14:15] +; GCN_O-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 +; GCN_O-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 +; GCN_O-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GCN_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GCN_O-NEXT: s_add_u32 s8, s8, 8 +; GCN_O-NEXT: s_addc_u32 s9, s9, 0 +; GCN_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN_O-NEXT: v_or_b32_e32 v31, v0, v2 +; GCN_O-NEXT: s_mov_b32 s14, s16 +; GCN_O-NEXT: s_waitcnt lgkmcnt(0) +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN_O-NEXT: s_endpgm ; -; GISEL-LABEL: test_indirect_call_sgpr_ptr: -; GISEL: .amd_kernel_code_t -; GISEL-NEXT: amd_code_version_major = 1 -; GISEL-NEXT: amd_code_version_minor = 2 -; GISEL-NEXT: amd_machine_kind = 1 -; GISEL-NEXT: amd_machine_version_major = 7 -; GISEL-NEXT: amd_machine_version_minor = 0 -; GISEL-NEXT: amd_machine_version_stepping = 0 -; GISEL-NEXT: kernel_code_entry_byte_offset = 256 -; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 10 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 -; GISEL-NEXT: priority = 0 -; GISEL-NEXT: float_mode = 240 -; GISEL-NEXT: priv = 0 -; GISEL-NEXT: enable_dx10_clamp = 1 -; GISEL-NEXT: debug_mode = 0 -; GISEL-NEXT: enable_ieee_mode = 1 -; GISEL-NEXT: enable_wgp_mode = 0 -; GISEL-NEXT: enable_mem_ordered = 0 -; GISEL-NEXT: enable_fwd_progress = 0 -; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GISEL-NEXT: user_sgpr_count = 14 -; GISEL-NEXT: enable_trap_handler = 0 -; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 -; GISEL-NEXT: enable_sgpr_workgroup_info = 0 -; GISEL-NEXT: enable_vgpr_workitem_id = 2 -; GISEL-NEXT: enable_exception_msb = 0 -; GISEL-NEXT: granulated_lds_size = 0 -; GISEL-NEXT: enable_exception = 0 -; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 -; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 -; GISEL-NEXT: enable_sgpr_queue_ptr = 1 -; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GISEL-NEXT: enable_sgpr_dispatch_id = 1 -; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 -; GISEL-NEXT: enable_sgpr_private_segment_size = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GISEL-NEXT: enable_wavefront_size32 = 0 -; GISEL-NEXT: enable_ordered_append_gds = 0 -; GISEL-NEXT: private_element_size = 1 -; GISEL-NEXT: is_ptr64 = 1 -; GISEL-NEXT: is_dynamic_callstack = 1 -; GISEL-NEXT: is_debug_enabled = 0 -; GISEL-NEXT: is_xnack_enabled = 0 -; GISEL-NEXT: workitem_private_segment_byte_size = 16384 -; GISEL-NEXT: workgroup_group_segment_byte_size = 0 -; GISEL-NEXT: gds_segment_byte_size = 0 -; GISEL-NEXT: kernarg_segment_byte_size = 64 -; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 68 -; GISEL-NEXT: workitem_vgpr_count = 42 -; GISEL-NEXT: reserved_vgpr_first = 0 -; GISEL-NEXT: reserved_vgpr_count = 0 -; GISEL-NEXT: reserved_sgpr_first = 0 -; GISEL-NEXT: reserved_sgpr_count = 0 -; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 -; GISEL-NEXT: kernarg_segment_alignment = 4 -; GISEL-NEXT: group_segment_alignment = 4 -; GISEL-NEXT: private_segment_alignment = 4 -; GISEL-NEXT: wavefront_size = 6 -; GISEL-NEXT: call_convention = -1 -; GISEL-NEXT: runtime_loader_kernel_symbol = 0 -; GISEL-NEXT: .end_amd_kernel_code_t -; GISEL-NEXT: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s32, 0 -; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GISEL-NEXT: s_add_i32 s12, s12, s17 -; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GISEL-NEXT: s_add_u32 s0, s0, s17 -; GISEL-NEXT: s_addc_u32 s1, s1, 0 -; GISEL-NEXT: s_mov_b32 s13, s15 -; GISEL-NEXT: s_mov_b32 s12, s14 -; GISEL-NEXT: s_getpc_b64 s[14:15] -; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 -; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: s_add_u32 s8, s8, 8 -; GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 -; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 -; GISEL-NEXT: s_mov_b32 s14, s16 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GISEL-NEXT: s_endpgm +; GCN_C-LABEL: test_indirect_call_sgpr_ptr: +; GCN_C: .amd_kernel_code_t +; GCN_C-NEXT: amd_code_version_major = 1 +; GCN_C-NEXT: amd_code_version_minor = 2 +; GCN_C-NEXT: amd_machine_kind = 1 +; GCN_C-NEXT: amd_machine_version_major = 7 +; GCN_C-NEXT: amd_machine_version_minor = 0 +; GCN_C-NEXT: amd_machine_version_stepping = 0 +; GCN_C-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_C-NEXT: granulated_workitem_vgpr_count = 0 +; GCN_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GCN_C-NEXT: priority = 0 +; GCN_C-NEXT: float_mode = 240 +; GCN_C-NEXT: priv = 0 +; GCN_C-NEXT: enable_dx10_clamp = 1 +; GCN_C-NEXT: debug_mode = 0 +; GCN_C-NEXT: enable_ieee_mode = 1 +; GCN_C-NEXT: enable_wgp_mode = 0 +; GCN_C-NEXT: enable_mem_ordered = 0 +; GCN_C-NEXT: enable_fwd_progress = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GCN_C-NEXT: user_sgpr_count = 6 +; GCN_C-NEXT: enable_trap_handler = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_C-NEXT: enable_vgpr_workitem_id = 0 +; GCN_C-NEXT: enable_exception_msb = 0 +; GCN_C-NEXT: granulated_lds_size = 0 +; GCN_C-NEXT: enable_exception = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GCN_C-NEXT: enable_sgpr_queue_ptr = 0 +; GCN_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_id = 0 +; GCN_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_C-NEXT: enable_wavefront_size32 = 0 +; GCN_C-NEXT: enable_ordered_append_gds = 0 +; GCN_C-NEXT: private_element_size = 1 +; GCN_C-NEXT: is_ptr64 = 1 +; GCN_C-NEXT: is_dynamic_callstack = 0 +; GCN_C-NEXT: is_debug_enabled = 0 +; GCN_C-NEXT: is_xnack_enabled = 0 +; GCN_C-NEXT: workitem_private_segment_byte_size = 0 +; GCN_C-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_C-NEXT: gds_segment_byte_size = 0 +; GCN_C-NEXT: kernarg_segment_byte_size = 4 +; GCN_C-NEXT: workgroup_fbarrier_count = 0 +; GCN_C-NEXT: wavefront_sgpr_count = 0 +; GCN_C-NEXT: workitem_vgpr_count = 0 +; GCN_C-NEXT: reserved_vgpr_first = 0 +; GCN_C-NEXT: reserved_vgpr_count = 0 +; GCN_C-NEXT: reserved_sgpr_first = 0 +; GCN_C-NEXT: reserved_sgpr_count = 0 +; GCN_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_C-NEXT: kernarg_segment_alignment = 4 +; GCN_C-NEXT: group_segment_alignment = 4 +; GCN_C-NEXT: private_segment_alignment = 4 +; GCN_C-NEXT: wavefront_size = 6 +; GCN_C-NEXT: call_convention = -1 +; GCN_C-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_C-NEXT: .end_amd_kernel_code_t +; GCN_C-NEXT: ; %bb.0: +; +; GISEL_O-LABEL: test_indirect_call_sgpr_ptr: +; GISEL_O: .amd_kernel_code_t +; GISEL_O-NEXT: amd_code_version_major = 1 +; GISEL_O-NEXT: amd_code_version_minor = 2 +; GISEL_O-NEXT: amd_machine_kind = 1 +; GISEL_O-NEXT: amd_machine_version_major = 7 +; GISEL_O-NEXT: amd_machine_version_minor = 0 +; GISEL_O-NEXT: amd_machine_version_stepping = 0 +; GISEL_O-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_O-NEXT: granulated_workitem_vgpr_count = 10 +; GISEL_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GISEL_O-NEXT: priority = 0 +; GISEL_O-NEXT: float_mode = 240 +; GISEL_O-NEXT: priv = 0 +; GISEL_O-NEXT: enable_dx10_clamp = 1 +; GISEL_O-NEXT: debug_mode = 0 +; GISEL_O-NEXT: enable_ieee_mode = 1 +; GISEL_O-NEXT: enable_wgp_mode = 0 +; GISEL_O-NEXT: enable_mem_ordered = 0 +; GISEL_O-NEXT: enable_fwd_progress = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GISEL_O-NEXT: user_sgpr_count = 14 +; GISEL_O-NEXT: enable_trap_handler = 0 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_O-NEXT: enable_vgpr_workitem_id = 2 +; GISEL_O-NEXT: enable_exception_msb = 0 +; GISEL_O-NEXT: granulated_lds_size = 0 +; GISEL_O-NEXT: enable_exception = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_queue_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_id = 1 +; GISEL_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GISEL_O-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_O-NEXT: enable_wavefront_size32 = 0 +; GISEL_O-NEXT: enable_ordered_append_gds = 0 +; GISEL_O-NEXT: private_element_size = 1 +; GISEL_O-NEXT: is_ptr64 = 1 +; GISEL_O-NEXT: is_dynamic_callstack = 1 +; GISEL_O-NEXT: is_debug_enabled = 0 +; GISEL_O-NEXT: is_xnack_enabled = 0 +; GISEL_O-NEXT: workitem_private_segment_byte_size = 16384 +; GISEL_O-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_O-NEXT: gds_segment_byte_size = 0 +; GISEL_O-NEXT: kernarg_segment_byte_size = 64 +; GISEL_O-NEXT: workgroup_fbarrier_count = 0 +; GISEL_O-NEXT: wavefront_sgpr_count = 68 +; GISEL_O-NEXT: workitem_vgpr_count = 42 +; GISEL_O-NEXT: reserved_vgpr_first = 0 +; GISEL_O-NEXT: reserved_vgpr_count = 0 +; GISEL_O-NEXT: reserved_sgpr_first = 0 +; GISEL_O-NEXT: reserved_sgpr_count = 0 +; GISEL_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_O-NEXT: kernarg_segment_alignment = 4 +; GISEL_O-NEXT: group_segment_alignment = 4 +; GISEL_O-NEXT: private_segment_alignment = 4 +; GISEL_O-NEXT: wavefront_size = 6 +; GISEL_O-NEXT: call_convention = -1 +; GISEL_O-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_O-NEXT: .end_amd_kernel_code_t +; GISEL_O-NEXT: ; %bb.0: +; GISEL_O-NEXT: s_mov_b32 s32, 0 +; GISEL_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL_O-NEXT: s_add_i32 s12, s12, s17 +; GISEL_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL_O-NEXT: s_add_u32 s0, s0, s17 +; GISEL_O-NEXT: s_addc_u32 s1, s1, 0 +; GISEL_O-NEXT: s_mov_b32 s13, s15 +; GISEL_O-NEXT: s_mov_b32 s12, s14 +; GISEL_O-NEXT: s_getpc_b64 s[14:15] +; GISEL_O-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 +; GISEL_O-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GISEL_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL_O-NEXT: s_add_u32 s8, s8, 8 +; GISEL_O-NEXT: s_addc_u32 s9, s9, 0 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v1, 20, v2 +; GISEL_O-NEXT: v_or_b32_e32 v31, v0, v1 +; GISEL_O-NEXT: s_mov_b32 s14, s16 +; GISEL_O-NEXT: s_waitcnt lgkmcnt(0) +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL_O-NEXT: s_endpgm +; +; GISEL_C-LABEL: test_indirect_call_sgpr_ptr: +; GISEL_C: .amd_kernel_code_t +; GISEL_C-NEXT: amd_code_version_major = 1 +; GISEL_C-NEXT: amd_code_version_minor = 2 +; GISEL_C-NEXT: amd_machine_kind = 1 +; GISEL_C-NEXT: amd_machine_version_major = 7 +; GISEL_C-NEXT: amd_machine_version_minor = 0 +; GISEL_C-NEXT: amd_machine_version_stepping = 0 +; GISEL_C-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_C-NEXT: granulated_workitem_vgpr_count = 0 +; GISEL_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GISEL_C-NEXT: priority = 0 +; GISEL_C-NEXT: float_mode = 240 +; GISEL_C-NEXT: priv = 0 +; GISEL_C-NEXT: enable_dx10_clamp = 1 +; GISEL_C-NEXT: debug_mode = 0 +; GISEL_C-NEXT: enable_ieee_mode = 1 +; GISEL_C-NEXT: enable_wgp_mode = 0 +; GISEL_C-NEXT: enable_mem_ordered = 0 +; GISEL_C-NEXT: enable_fwd_progress = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GISEL_C-NEXT: user_sgpr_count = 6 +; GISEL_C-NEXT: enable_trap_handler = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_C-NEXT: enable_vgpr_workitem_id = 0 +; GISEL_C-NEXT: enable_exception_msb = 0 +; GISEL_C-NEXT: granulated_lds_size = 0 +; GISEL_C-NEXT: enable_exception = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_queue_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_id = 0 +; GISEL_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_C-NEXT: enable_wavefront_size32 = 0 +; GISEL_C-NEXT: enable_ordered_append_gds = 0 +; GISEL_C-NEXT: private_element_size = 1 +; GISEL_C-NEXT: is_ptr64 = 1 +; GISEL_C-NEXT: is_dynamic_callstack = 0 +; GISEL_C-NEXT: is_debug_enabled = 0 +; GISEL_C-NEXT: is_xnack_enabled = 0 +; GISEL_C-NEXT: workitem_private_segment_byte_size = 0 +; GISEL_C-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_C-NEXT: gds_segment_byte_size = 0 +; GISEL_C-NEXT: kernarg_segment_byte_size = 4 +; GISEL_C-NEXT: workgroup_fbarrier_count = 0 +; GISEL_C-NEXT: wavefront_sgpr_count = 0 +; GISEL_C-NEXT: workitem_vgpr_count = 0 +; GISEL_C-NEXT: reserved_vgpr_first = 0 +; GISEL_C-NEXT: reserved_vgpr_count = 0 +; GISEL_C-NEXT: reserved_sgpr_first = 0 +; GISEL_C-NEXT: reserved_sgpr_count = 0 +; GISEL_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_C-NEXT: kernarg_segment_alignment = 4 +; GISEL_C-NEXT: group_segment_alignment = 4 +; GISEL_C-NEXT: private_segment_alignment = 4 +; GISEL_C-NEXT: wavefront_size = 6 +; GISEL_C-NEXT: call_convention = -1 +; GISEL_C-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_C-NEXT: .end_amd_kernel_code_t +; GISEL_C-NEXT: ; %bb.0: %fptr = load ptr, ptr addrspace(4) @gv.fptr0 call void %fptr() ret void } define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) { -; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: -; GCN: .amd_kernel_code_t -; GCN-NEXT: amd_code_version_major = 1 -; GCN-NEXT: amd_code_version_minor = 2 -; GCN-NEXT: amd_machine_kind = 1 -; GCN-NEXT: amd_machine_version_major = 7 -; GCN-NEXT: amd_machine_version_minor = 0 -; GCN-NEXT: amd_machine_version_stepping = 0 -; GCN-NEXT: kernel_code_entry_byte_offset = 256 -; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 10 -; GCN-NEXT: granulated_wavefront_sgpr_count = 8 -; GCN-NEXT: priority = 0 -; GCN-NEXT: float_mode = 240 -; GCN-NEXT: priv = 0 -; GCN-NEXT: enable_dx10_clamp = 1 -; GCN-NEXT: debug_mode = 0 -; GCN-NEXT: enable_ieee_mode = 1 -; GCN-NEXT: enable_wgp_mode = 0 -; GCN-NEXT: enable_mem_ordered = 0 -; GCN-NEXT: enable_fwd_progress = 0 -; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCN-NEXT: user_sgpr_count = 14 -; GCN-NEXT: enable_trap_handler = 0 -; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 -; GCN-NEXT: enable_sgpr_workgroup_info = 0 -; GCN-NEXT: enable_vgpr_workitem_id = 2 -; GCN-NEXT: enable_exception_msb = 0 -; GCN-NEXT: granulated_lds_size = 0 -; GCN-NEXT: enable_exception = 0 -; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 -; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 -; GCN-NEXT: enable_sgpr_queue_ptr = 1 -; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GCN-NEXT: enable_sgpr_dispatch_id = 1 -; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 -; GCN-NEXT: enable_sgpr_private_segment_size = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GCN-NEXT: enable_wavefront_size32 = 0 -; GCN-NEXT: enable_ordered_append_gds = 0 -; GCN-NEXT: private_element_size = 1 -; GCN-NEXT: is_ptr64 = 1 -; GCN-NEXT: is_dynamic_callstack = 1 -; GCN-NEXT: is_debug_enabled = 0 -; GCN-NEXT: is_xnack_enabled = 0 -; GCN-NEXT: workitem_private_segment_byte_size = 16384 -; GCN-NEXT: workgroup_group_segment_byte_size = 0 -; GCN-NEXT: gds_segment_byte_size = 0 -; GCN-NEXT: kernarg_segment_byte_size = 64 -; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 68 -; GCN-NEXT: workitem_vgpr_count = 42 -; GCN-NEXT: reserved_vgpr_first = 0 -; GCN-NEXT: reserved_vgpr_count = 0 -; GCN-NEXT: reserved_sgpr_first = 0 -; GCN-NEXT: reserved_sgpr_count = 0 -; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 -; GCN-NEXT: kernarg_segment_alignment = 4 -; GCN-NEXT: group_segment_alignment = 4 -; GCN-NEXT: private_segment_alignment = 4 -; GCN-NEXT: wavefront_size = 6 -; GCN-NEXT: call_convention = -1 -; GCN-NEXT: runtime_loader_kernel_symbol = 0 -; GCN-NEXT: .end_amd_kernel_code_t -; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_add_i32 s12, s12, s17 -; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GCN-NEXT: s_add_u32 s0, s0, s17 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_mov_b32 s13, s15 -; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_getpc_b64 s[14:15] -; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GCN-NEXT: s_add_u32 s8, s8, 8 -; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: v_or_b32_e32 v0, v0, v1 -; GCN-NEXT: v_or_b32_e32 v31, v0, v2 -; GCN-NEXT: v_mov_b32_e32 v0, 0x7b -; GCN-NEXT: s_mov_b32 s14, s16 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GCN-NEXT: s_endpgm +; GCN_O-LABEL: test_indirect_call_sgpr_ptr_arg: +; GCN_O: .amd_kernel_code_t +; GCN_O-NEXT: amd_code_version_major = 1 +; GCN_O-NEXT: amd_code_version_minor = 2 +; GCN_O-NEXT: amd_machine_kind = 1 +; GCN_O-NEXT: amd_machine_version_major = 7 +; GCN_O-NEXT: amd_machine_version_minor = 0 +; GCN_O-NEXT: amd_machine_version_stepping = 0 +; GCN_O-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_O-NEXT: granulated_workitem_vgpr_count = 10 +; GCN_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GCN_O-NEXT: priority = 0 +; GCN_O-NEXT: float_mode = 240 +; GCN_O-NEXT: priv = 0 +; GCN_O-NEXT: enable_dx10_clamp = 1 +; GCN_O-NEXT: debug_mode = 0 +; GCN_O-NEXT: enable_ieee_mode = 1 +; GCN_O-NEXT: enable_wgp_mode = 0 +; GCN_O-NEXT: enable_mem_ordered = 0 +; GCN_O-NEXT: enable_fwd_progress = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GCN_O-NEXT: user_sgpr_count = 14 +; GCN_O-NEXT: enable_trap_handler = 0 +; GCN_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_O-NEXT: enable_vgpr_workitem_id = 2 +; GCN_O-NEXT: enable_exception_msb = 0 +; GCN_O-NEXT: granulated_lds_size = 0 +; GCN_O-NEXT: enable_exception = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GCN_O-NEXT: enable_sgpr_queue_ptr = 1 +; GCN_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_id = 1 +; GCN_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GCN_O-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_O-NEXT: enable_wavefront_size32 = 0 +; GCN_O-NEXT: enable_ordered_append_gds = 0 +; GCN_O-NEXT: private_element_size = 1 +; GCN_O-NEXT: is_ptr64 = 1 +; GCN_O-NEXT: is_dynamic_callstack = 1 +; GCN_O-NEXT: is_debug_enabled = 0 +; GCN_O-NEXT: is_xnack_enabled = 0 +; GCN_O-NEXT: workitem_private_segment_byte_size = 16384 +; GCN_O-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_O-NEXT: gds_segment_byte_size = 0 +; GCN_O-NEXT: kernarg_segment_byte_size = 64 +; GCN_O-NEXT: workgroup_fbarrier_count = 0 +; GCN_O-NEXT: wavefront_sgpr_count = 68 +; GCN_O-NEXT: workitem_vgpr_count = 42 +; GCN_O-NEXT: reserved_vgpr_first = 0 +; GCN_O-NEXT: reserved_vgpr_count = 0 +; GCN_O-NEXT: reserved_sgpr_first = 0 +; GCN_O-NEXT: reserved_sgpr_count = 0 +; GCN_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_O-NEXT: kernarg_segment_alignment = 4 +; GCN_O-NEXT: group_segment_alignment = 4 +; GCN_O-NEXT: private_segment_alignment = 4 +; GCN_O-NEXT: wavefront_size = 6 +; GCN_O-NEXT: call_convention = -1 +; GCN_O-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_O-NEXT: .end_amd_kernel_code_t +; GCN_O-NEXT: ; %bb.0: +; GCN_O-NEXT: s_mov_b32 s32, 0 +; GCN_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GCN_O-NEXT: s_add_i32 s12, s12, s17 +; GCN_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GCN_O-NEXT: s_add_u32 s0, s0, s17 +; GCN_O-NEXT: s_addc_u32 s1, s1, 0 +; GCN_O-NEXT: s_mov_b32 s13, s15 +; GCN_O-NEXT: s_mov_b32 s12, s14 +; GCN_O-NEXT: s_getpc_b64 s[14:15] +; GCN_O-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 +; GCN_O-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 +; GCN_O-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GCN_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GCN_O-NEXT: s_add_u32 s8, s8, 8 +; GCN_O-NEXT: s_addc_u32 s9, s9, 0 +; GCN_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN_O-NEXT: v_or_b32_e32 v31, v0, v2 +; GCN_O-NEXT: v_mov_b32_e32 v0, 0x7b +; GCN_O-NEXT: s_mov_b32 s14, s16 +; GCN_O-NEXT: s_waitcnt lgkmcnt(0) +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN_O-NEXT: s_endpgm +; +; GCN_C-LABEL: test_indirect_call_sgpr_ptr_arg: +; GCN_C: .amd_kernel_code_t +; GCN_C-NEXT: amd_code_version_major = 1 +; GCN_C-NEXT: amd_code_version_minor = 2 +; GCN_C-NEXT: amd_machine_kind = 1 +; GCN_C-NEXT: amd_machine_version_major = 7 +; GCN_C-NEXT: amd_machine_version_minor = 0 +; GCN_C-NEXT: amd_machine_version_stepping = 0 +; GCN_C-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_C-NEXT: granulated_workitem_vgpr_count = 0 +; GCN_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GCN_C-NEXT: priority = 0 +; GCN_C-NEXT: float_mode = 240 +; GCN_C-NEXT: priv = 0 +; GCN_C-NEXT: enable_dx10_clamp = 1 +; GCN_C-NEXT: debug_mode = 0 +; GCN_C-NEXT: enable_ieee_mode = 1 +; GCN_C-NEXT: enable_wgp_mode = 0 +; GCN_C-NEXT: enable_mem_ordered = 0 +; GCN_C-NEXT: enable_fwd_progress = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GCN_C-NEXT: user_sgpr_count = 6 +; GCN_C-NEXT: enable_trap_handler = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_C-NEXT: enable_vgpr_workitem_id = 0 +; GCN_C-NEXT: enable_exception_msb = 0 +; GCN_C-NEXT: granulated_lds_size = 0 +; GCN_C-NEXT: enable_exception = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GCN_C-NEXT: enable_sgpr_queue_ptr = 0 +; GCN_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_id = 0 +; GCN_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_C-NEXT: enable_wavefront_size32 = 0 +; GCN_C-NEXT: enable_ordered_append_gds = 0 +; GCN_C-NEXT: private_element_size = 1 +; GCN_C-NEXT: is_ptr64 = 1 +; GCN_C-NEXT: is_dynamic_callstack = 0 +; GCN_C-NEXT: is_debug_enabled = 0 +; GCN_C-NEXT: is_xnack_enabled = 0 +; GCN_C-NEXT: workitem_private_segment_byte_size = 0 +; GCN_C-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_C-NEXT: gds_segment_byte_size = 0 +; GCN_C-NEXT: kernarg_segment_byte_size = 4 +; GCN_C-NEXT: workgroup_fbarrier_count = 0 +; GCN_C-NEXT: wavefront_sgpr_count = 0 +; GCN_C-NEXT: workitem_vgpr_count = 0 +; GCN_C-NEXT: reserved_vgpr_first = 0 +; GCN_C-NEXT: reserved_vgpr_count = 0 +; GCN_C-NEXT: reserved_sgpr_first = 0 +; GCN_C-NEXT: reserved_sgpr_count = 0 +; GCN_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_C-NEXT: kernarg_segment_alignment = 4 +; GCN_C-NEXT: group_segment_alignment = 4 +; GCN_C-NEXT: private_segment_alignment = 4 +; GCN_C-NEXT: wavefront_size = 6 +; GCN_C-NEXT: call_convention = -1 +; GCN_C-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_C-NEXT: .end_amd_kernel_code_t +; GCN_C-NEXT: ; %bb.0: ; -; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg: -; GISEL: .amd_kernel_code_t -; GISEL-NEXT: amd_code_version_major = 1 -; GISEL-NEXT: amd_code_version_minor = 2 -; GISEL-NEXT: amd_machine_kind = 1 -; GISEL-NEXT: amd_machine_version_major = 7 -; GISEL-NEXT: amd_machine_version_minor = 0 -; GISEL-NEXT: amd_machine_version_stepping = 0 -; GISEL-NEXT: kernel_code_entry_byte_offset = 256 -; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 10 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 -; GISEL-NEXT: priority = 0 -; GISEL-NEXT: float_mode = 240 -; GISEL-NEXT: priv = 0 -; GISEL-NEXT: enable_dx10_clamp = 1 -; GISEL-NEXT: debug_mode = 0 -; GISEL-NEXT: enable_ieee_mode = 1 -; GISEL-NEXT: enable_wgp_mode = 0 -; GISEL-NEXT: enable_mem_ordered = 0 -; GISEL-NEXT: enable_fwd_progress = 0 -; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GISEL-NEXT: user_sgpr_count = 14 -; GISEL-NEXT: enable_trap_handler = 0 -; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 -; GISEL-NEXT: enable_sgpr_workgroup_info = 0 -; GISEL-NEXT: enable_vgpr_workitem_id = 2 -; GISEL-NEXT: enable_exception_msb = 0 -; GISEL-NEXT: granulated_lds_size = 0 -; GISEL-NEXT: enable_exception = 0 -; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 -; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 -; GISEL-NEXT: enable_sgpr_queue_ptr = 1 -; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GISEL-NEXT: enable_sgpr_dispatch_id = 1 -; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 -; GISEL-NEXT: enable_sgpr_private_segment_size = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GISEL-NEXT: enable_wavefront_size32 = 0 -; GISEL-NEXT: enable_ordered_append_gds = 0 -; GISEL-NEXT: private_element_size = 1 -; GISEL-NEXT: is_ptr64 = 1 -; GISEL-NEXT: is_dynamic_callstack = 1 -; GISEL-NEXT: is_debug_enabled = 0 -; GISEL-NEXT: is_xnack_enabled = 0 -; GISEL-NEXT: workitem_private_segment_byte_size = 16384 -; GISEL-NEXT: workgroup_group_segment_byte_size = 0 -; GISEL-NEXT: gds_segment_byte_size = 0 -; GISEL-NEXT: kernarg_segment_byte_size = 64 -; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 68 -; GISEL-NEXT: workitem_vgpr_count = 42 -; GISEL-NEXT: reserved_vgpr_first = 0 -; GISEL-NEXT: reserved_vgpr_count = 0 -; GISEL-NEXT: reserved_sgpr_first = 0 -; GISEL-NEXT: reserved_sgpr_count = 0 -; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 -; GISEL-NEXT: kernarg_segment_alignment = 4 -; GISEL-NEXT: group_segment_alignment = 4 -; GISEL-NEXT: private_segment_alignment = 4 -; GISEL-NEXT: wavefront_size = 6 -; GISEL-NEXT: call_convention = -1 -; GISEL-NEXT: runtime_loader_kernel_symbol = 0 -; GISEL-NEXT: .end_amd_kernel_code_t -; GISEL-NEXT: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s32, 0 -; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GISEL-NEXT: s_add_i32 s12, s12, s17 -; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GISEL-NEXT: s_add_u32 s0, s0, s17 -; GISEL-NEXT: s_addc_u32 s1, s1, 0 -; GISEL-NEXT: s_mov_b32 s13, s15 -; GISEL-NEXT: s_mov_b32 s12, s14 -; GISEL-NEXT: s_getpc_b64 s[14:15] -; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 -; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: s_add_u32 s8, s8, 8 -; GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 -; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GISEL-NEXT: s_mov_b32 s14, s16 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GISEL-NEXT: s_endpgm +; GISEL_O-LABEL: test_indirect_call_sgpr_ptr_arg: +; GISEL_O: .amd_kernel_code_t +; GISEL_O-NEXT: amd_code_version_major = 1 +; GISEL_O-NEXT: amd_code_version_minor = 2 +; GISEL_O-NEXT: amd_machine_kind = 1 +; GISEL_O-NEXT: amd_machine_version_major = 7 +; GISEL_O-NEXT: amd_machine_version_minor = 0 +; GISEL_O-NEXT: amd_machine_version_stepping = 0 +; GISEL_O-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_O-NEXT: granulated_workitem_vgpr_count = 10 +; GISEL_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GISEL_O-NEXT: priority = 0 +; GISEL_O-NEXT: float_mode = 240 +; GISEL_O-NEXT: priv = 0 +; GISEL_O-NEXT: enable_dx10_clamp = 1 +; GISEL_O-NEXT: debug_mode = 0 +; GISEL_O-NEXT: enable_ieee_mode = 1 +; GISEL_O-NEXT: enable_wgp_mode = 0 +; GISEL_O-NEXT: enable_mem_ordered = 0 +; GISEL_O-NEXT: enable_fwd_progress = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GISEL_O-NEXT: user_sgpr_count = 14 +; GISEL_O-NEXT: enable_trap_handler = 0 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_O-NEXT: enable_vgpr_workitem_id = 2 +; GISEL_O-NEXT: enable_exception_msb = 0 +; GISEL_O-NEXT: granulated_lds_size = 0 +; GISEL_O-NEXT: enable_exception = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_queue_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_id = 1 +; GISEL_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GISEL_O-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_O-NEXT: enable_wavefront_size32 = 0 +; GISEL_O-NEXT: enable_ordered_append_gds = 0 +; GISEL_O-NEXT: private_element_size = 1 +; GISEL_O-NEXT: is_ptr64 = 1 +; GISEL_O-NEXT: is_dynamic_callstack = 1 +; GISEL_O-NEXT: is_debug_enabled = 0 +; GISEL_O-NEXT: is_xnack_enabled = 0 +; GISEL_O-NEXT: workitem_private_segment_byte_size = 16384 +; GISEL_O-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_O-NEXT: gds_segment_byte_size = 0 +; GISEL_O-NEXT: kernarg_segment_byte_size = 64 +; GISEL_O-NEXT: workgroup_fbarrier_count = 0 +; GISEL_O-NEXT: wavefront_sgpr_count = 68 +; GISEL_O-NEXT: workitem_vgpr_count = 42 +; GISEL_O-NEXT: reserved_vgpr_first = 0 +; GISEL_O-NEXT: reserved_vgpr_count = 0 +; GISEL_O-NEXT: reserved_sgpr_first = 0 +; GISEL_O-NEXT: reserved_sgpr_count = 0 +; GISEL_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_O-NEXT: kernarg_segment_alignment = 4 +; GISEL_O-NEXT: group_segment_alignment = 4 +; GISEL_O-NEXT: private_segment_alignment = 4 +; GISEL_O-NEXT: wavefront_size = 6 +; GISEL_O-NEXT: call_convention = -1 +; GISEL_O-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_O-NEXT: .end_amd_kernel_code_t +; GISEL_O-NEXT: ; %bb.0: +; GISEL_O-NEXT: s_mov_b32 s32, 0 +; GISEL_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL_O-NEXT: s_add_i32 s12, s12, s17 +; GISEL_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL_O-NEXT: s_add_u32 s0, s0, s17 +; GISEL_O-NEXT: s_addc_u32 s1, s1, 0 +; GISEL_O-NEXT: s_mov_b32 s13, s15 +; GISEL_O-NEXT: s_mov_b32 s12, s14 +; GISEL_O-NEXT: s_getpc_b64 s[14:15] +; GISEL_O-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 +; GISEL_O-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GISEL_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL_O-NEXT: s_add_u32 s8, s8, 8 +; GISEL_O-NEXT: s_addc_u32 s9, s9, 0 +; GISEL_O-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL_O-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL_O-NEXT: s_mov_b32 s14, s16 +; GISEL_O-NEXT: s_waitcnt lgkmcnt(0) +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL_O-NEXT: s_endpgm +; +; GISEL_C-LABEL: test_indirect_call_sgpr_ptr_arg: +; GISEL_C: .amd_kernel_code_t +; GISEL_C-NEXT: amd_code_version_major = 1 +; GISEL_C-NEXT: amd_code_version_minor = 2 +; GISEL_C-NEXT: amd_machine_kind = 1 +; GISEL_C-NEXT: amd_machine_version_major = 7 +; GISEL_C-NEXT: amd_machine_version_minor = 0 +; GISEL_C-NEXT: amd_machine_version_stepping = 0 +; GISEL_C-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_C-NEXT: granulated_workitem_vgpr_count = 0 +; GISEL_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GISEL_C-NEXT: priority = 0 +; GISEL_C-NEXT: float_mode = 240 +; GISEL_C-NEXT: priv = 0 +; GISEL_C-NEXT: enable_dx10_clamp = 1 +; GISEL_C-NEXT: debug_mode = 0 +; GISEL_C-NEXT: enable_ieee_mode = 1 +; GISEL_C-NEXT: enable_wgp_mode = 0 +; GISEL_C-NEXT: enable_mem_ordered = 0 +; GISEL_C-NEXT: enable_fwd_progress = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GISEL_C-NEXT: user_sgpr_count = 6 +; GISEL_C-NEXT: enable_trap_handler = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_C-NEXT: enable_vgpr_workitem_id = 0 +; GISEL_C-NEXT: enable_exception_msb = 0 +; GISEL_C-NEXT: granulated_lds_size = 0 +; GISEL_C-NEXT: enable_exception = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_queue_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_id = 0 +; GISEL_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_C-NEXT: enable_wavefront_size32 = 0 +; GISEL_C-NEXT: enable_ordered_append_gds = 0 +; GISEL_C-NEXT: private_element_size = 1 +; GISEL_C-NEXT: is_ptr64 = 1 +; GISEL_C-NEXT: is_dynamic_callstack = 0 +; GISEL_C-NEXT: is_debug_enabled = 0 +; GISEL_C-NEXT: is_xnack_enabled = 0 +; GISEL_C-NEXT: workitem_private_segment_byte_size = 0 +; GISEL_C-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_C-NEXT: gds_segment_byte_size = 0 +; GISEL_C-NEXT: kernarg_segment_byte_size = 4 +; GISEL_C-NEXT: workgroup_fbarrier_count = 0 +; GISEL_C-NEXT: wavefront_sgpr_count = 0 +; GISEL_C-NEXT: workitem_vgpr_count = 0 +; GISEL_C-NEXT: reserved_vgpr_first = 0 +; GISEL_C-NEXT: reserved_vgpr_count = 0 +; GISEL_C-NEXT: reserved_sgpr_first = 0 +; GISEL_C-NEXT: reserved_sgpr_count = 0 +; GISEL_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_C-NEXT: kernarg_segment_alignment = 4 +; GISEL_C-NEXT: group_segment_alignment = 4 +; GISEL_C-NEXT: private_segment_alignment = 4 +; GISEL_C-NEXT: wavefront_size = 6 +; GISEL_C-NEXT: call_convention = -1 +; GISEL_C-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_C-NEXT: .end_amd_kernel_code_t +; GISEL_C-NEXT: ; %bb.0: %fptr = load ptr, ptr addrspace(4) @gv.fptr1 call void %fptr(i32 123) ret void } define void @test_indirect_call_vgpr_ptr(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 18 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec -; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] -; GCN-NEXT: s_cbranch_execnz .LBB2_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 18 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: s_mov_b64 s[46:47], exec +; GCN_O-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN_O-NEXT: s_cbranch_execnz .LBB2_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[46:47] +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 18 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 18 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec -; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] -; GISEL-NEXT: s_cbranch_execnz .LBB2_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 18 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: s_mov_b64 s[46:47], exec +; GISEL_O-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL_O-NEXT: s_cbranch_execnz .LBB2_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[46:47] +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call void %fptr() ret void } define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_arg: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 18 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec -; GCN-NEXT: v_mov_b32_e32 v2, 0x7b -; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: v_mov_b32_e32 v0, v2 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] -; GCN-NEXT: s_cbranch_execnz .LBB3_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 18 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: s_mov_b64 s[46:47], exec +; GCN_O-NEXT: v_mov_b32_e32 v2, 0x7b +; GCN_O-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: v_mov_b32_e32 v0, v2 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: ; implicit-def: $vgpr2 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN_O-NEXT: s_cbranch_execnz .LBB3_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[46:47] +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 18 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 18 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: s_mov_b64 s[46:47], exec +; GISEL_O-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL_O-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL_O-NEXT: s_cbranch_execnz .LBB3_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[46:47] +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 18 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec -; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] -; GISEL-NEXT: s_cbranch_execnz .LBB3_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call void %fptr(i32 123) ret void } define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_ret: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 18 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec -; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_mov_b32_e32 v2, v0 -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] -; GCN-NEXT: s_cbranch_execnz .LBB4_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_ret: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 18 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: s_mov_b64 s[46:47], exec +; GCN_O-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: v_mov_b32_e32 v2, v0 +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN_O-NEXT: s_cbranch_execnz .LBB4_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[46:47] +; GCN_O-NEXT: v_add_i32_e32 v0, vcc, 1, v2 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 18 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_ret: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_ret: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 18 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: s_mov_b64 s[46:47], exec +; GISEL_O-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: v_mov_b32_e32 v1, v0 +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL_O-NEXT: s_cbranch_execnz .LBB4_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[46:47] +; GISEL_O-NEXT: v_add_i32_e32 v0, vcc, 1, v1 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 18 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec -; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: v_mov_b32_e32 v1, v0 -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] -; GISEL-NEXT: s_cbranch_execnz .LBB4_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_ret: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) %a = call i32 %fptr() %b = add i32 %a, 1 ret i32 %b } define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch: -; GCN: ; %bb.0: ; %bb0 -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 20 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: v_and_b32_e32 v2, 1, v2 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc -; GCN-NEXT: s_cbranch_execz .LBB5_4 -; GCN-NEXT: ; %bb.1: ; %bb1 -; GCN-NEXT: s_mov_b64 s[48:49], exec -; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[50:51] -; GCN-NEXT: s_cbranch_execnz .LBB5_2 -; GCN-NEXT: ; %bb.3: -; GCN-NEXT: s_mov_b64 exec, s[48:49] -; GCN-NEXT: .LBB5_4: ; %bb2 -; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 20 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GCN_O: ; %bb.0: ; %bb0 +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 20 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: v_and_b32_e32 v2, 1, v2 +; GCN_O-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GCN_O-NEXT: s_and_saveexec_b64 s[46:47], vcc +; GCN_O-NEXT: s_cbranch_execz .LBB5_4 +; GCN_O-NEXT: ; %bb.1: ; %bb1 +; GCN_O-NEXT: s_mov_b64 s[48:49], exec +; GCN_O-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[50:51], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[50:51] +; GCN_O-NEXT: s_cbranch_execnz .LBB5_2 +; GCN_O-NEXT: ; %bb.3: +; GCN_O-NEXT: s_mov_b64 exec, s[48:49] +; GCN_O-NEXT: .LBB5_4: ; %bb2 +; GCN_O-NEXT: s_or_b64 exec, exec, s[46:47] +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 20 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch: -; GISEL: ; %bb.0: ; %bb0 -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 20 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc -; GISEL-NEXT: s_cbranch_execz .LBB5_4 -; GISEL-NEXT: ; %bb.1: ; %bb1 -; GISEL-NEXT: s_mov_b64 s[48:49], exec -; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] -; GISEL-NEXT: s_cbranch_execnz .LBB5_2 -; GISEL-NEXT: ; %bb.3: -; GISEL-NEXT: s_mov_b64 exec, s[48:49] -; GISEL-NEXT: .LBB5_4: ; %bb2 -; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 20 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GCN_C: ; %bb.0: ; %bb0 +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_C-NEXT: v_and_b32_e32 v0, 1, v2 +; GCN_C-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN_C-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN_C-NEXT: ; %bb.1: ; %bb1 +; GCN_C-NEXT: ; divergent unreachable +; GCN_C-NEXT: ; %bb.2: ; %UnifiedReturnBlock +; GCN_C-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN_C-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GISEL_O: ; %bb.0: ; %bb0 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 20 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: v_and_b32_e32 v2, 1, v2 +; GISEL_O-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL_O-NEXT: s_and_saveexec_b64 s[46:47], vcc +; GISEL_O-NEXT: s_cbranch_execz .LBB5_4 +; GISEL_O-NEXT: ; %bb.1: ; %bb1 +; GISEL_O-NEXT: s_mov_b64 s[48:49], exec +; GISEL_O-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[50:51], vcc +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[50:51] +; GISEL_O-NEXT: s_cbranch_execnz .LBB5_2 +; GISEL_O-NEXT: ; %bb.3: +; GISEL_O-NEXT: s_mov_b64 exec, s[48:49] +; GISEL_O-NEXT: .LBB5_4: ; %bb2 +; GISEL_O-NEXT: s_or_b64 exec, exec, s[46:47] +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 20 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GISEL_C: ; %bb.0: ; %bb0 +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_C-NEXT: v_and_b32_e32 v0, 1, v2 +; GISEL_C-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL_C-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL_C-NEXT: ; %bb.1: ; %bb1 +; GISEL_C-NEXT: ; divergent unreachable +; GISEL_C-NEXT: ; %bb.2: ; %UnifiedReturnBlock +; GISEL_C-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL_C-NEXT: s_setpc_b64 s[30:31] bb0: br i1 %cond, label %bb1, label %bb2 @@ -1116,393 +1450,409 @@ } define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s5, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[6:7], exec -; GCN-NEXT: s_movk_i32 s4, 0x7b -; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s8, v0 -; GCN-NEXT: v_readfirstlane_b32 s9, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] -; GCN-NEXT: s_cbranch_execnz .LBB6_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s5 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s5, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: s_mov_b64 s[6:7], exec +; GCN_O-NEXT: s_movk_i32 s4, 0x7b +; GCN_O-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s8, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s9, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[10:11] +; GCN_O-NEXT: s_cbranch_execnz .LBB6_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s5 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s5, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[6:7], exec -; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v0 -; GISEL-NEXT: v_readfirstlane_b32 s9, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] -; GISEL-NEXT: s_cbranch_execnz .LBB6_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s5 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s5, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: s_mov_b64 s[6:7], exec +; GISEL_O-NEXT: s_movk_i32 s4, 0x7b +; GISEL_O-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL_O-NEXT: s_cbranch_execnz .LBB6_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s5 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call amdgpu_gfx void %fptr(i32 inreg 123) ret void } define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v1 -; GCN-NEXT: v_readfirstlane_b32 s7, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] -; GCN-NEXT: s_cbranch_execnz .LBB7_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s10 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s10, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: v_mov_b32_e32 v41, v0 +; GCN_O-NEXT: s_mov_b64 s[4:5], exec +; GCN_O-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s6, v1 +; GCN_O-NEXT: v_readfirstlane_b32 s7, v2 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GCN_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN_O-NEXT: v_mov_b32_e32 v0, v41 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN_O-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN_O-NEXT: s_cbranch_execnz .LBB7_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: v_mov_b32_e32 v0, v41 +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s10 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: s_mov_b64 s[4:5], exec -; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v1 -; GISEL-NEXT: v_readfirstlane_b32 s7, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: ; implicit-def: $vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] -; GISEL-NEXT: s_cbranch_execnz .LBB7_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s10 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s10, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: v_mov_b32_e32 v41, v0 +; GISEL_O-NEXT: s_mov_b64 s[4:5], exec +; GISEL_O-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL_O-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL_O-NEXT: v_mov_b32_e32 v0, v41 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL_O-NEXT: ; implicit-def: $vgpr1 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL_O-NEXT: s_cbranch_execnz .LBB7_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: v_mov_b32_e32 v0, v41 +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s10 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call amdgpu_gfx void %fptr(i32 %i) ret i32 %i } @@ -1512,391 +1862,410 @@ ; allocator is not able to do that because the return value clashes with the liverange of an ; IMPLICIT_DEF of the argument. define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v1 -; GCN-NEXT: v_readfirstlane_b32 s7, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: v_mov_b32_e32 v3, v0 -; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] -; GCN-NEXT: s_cbranch_execnz .LBB8_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s10 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s10, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: s_mov_b64 s[4:5], exec +; GCN_O-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s6, v1 +; GCN_O-NEXT: v_readfirstlane_b32 s7, v2 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GCN_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN_O-NEXT: v_mov_b32_e32 v3, v0 +; GCN_O-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GCN_O-NEXT: ; implicit-def: $vgpr0 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN_O-NEXT: s_cbranch_execnz .LBB8_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: v_mov_b32_e32 v0, v3 +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s10 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s10, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: s_mov_b64 s[4:5], exec +; GISEL_O-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL_O-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL_O-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL_O-NEXT: v_mov_b32_e32 v2, v0 +; GISEL_O-NEXT: ; implicit-def: $vgpr1 +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[6:7] +; GISEL_O-NEXT: s_cbranch_execnz .LBB8_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: v_mov_b32_e32 v0, v2 +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s10 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[4:5], exec -; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GISEL-NEXT: v_mov_b32_e32 v2, v0 -; GISEL-NEXT: ; implicit-def: $vgpr1 -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7] -; GISEL-NEXT: s_cbranch_execnz .LBB8_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v0, v2 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s10 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) %ret = call amdgpu_gfx i32 %fptr(i32 %i) ret i32 %ret } ; Calling a vgpr can never be a tail call. define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { -; GCN-LABEL: test_indirect_tail_call_vgpr_ptr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v0 -; GCN-NEXT: v_readfirstlane_b32 s7, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] -; GCN-NEXT: s_cbranch_execnz .LBB9_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s10 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_tail_call_vgpr_ptr: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s10, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: s_mov_b64 s[4:5], exec +; GCN_O-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s6, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s7, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN_O-NEXT: s_cbranch_execnz .LBB9_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s10 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_tail_call_vgpr_ptr: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_tail_call_vgpr_ptr: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s10, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: s_mov_b64 s[4:5], exec +; GISEL_O-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s6, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s7, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL_O-NEXT: s_cbranch_execnz .LBB9_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s10 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[4:5], exec -; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v0 -; GISEL-NEXT: v_readfirstlane_b32 s7, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] -; GISEL-NEXT: s_cbranch_execnz .LBB9_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s10 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_tail_call_vgpr_ptr: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) tail call amdgpu_gfx void %fptr() ret void } !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GISEL: {{.*}} Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_C %s +; RUN: FileCheck -check-prefix=REMARK %s < %t +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -attributor-assume-closed-world=false -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_O %s ; RUN: FileCheck -check-prefix=REMARK %s < %t ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel @@ -157,16 +159,27 @@ ret void } -; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call -; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39 -; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32 -; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10 -; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 -; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True -; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 -; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 +; STDERR_O: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR_O-NEXT: remark: foo.cl:64:0: SGPRs: 39 +; STDERR_O-NEXT: remark: foo.cl:64:0: VGPRs: 32 +; STDERR_O-NEXT: remark: foo.cl:64:0: AGPRs: 10 +; STDERR_O-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR_O-NEXT: remark: foo.cl:64:0: Dynamic Stack: True +; STDERR_O-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR_O-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 + +; STDERR_C: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR_C-NEXT: remark: foo.cl:64:0: SGPRs: 4 +; STDERR_C-NEXT: remark: foo.cl:64:0: VGPRs: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: AGPRs: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: Dynamic Stack: False +; STDERR_C-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR_C-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 define amdgpu_kernel void @test_indirect_call() !dbg !9 { @@ -175,17 +188,27 @@ ret void } -; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack -; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39 -; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32 -; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10 -; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 -; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True -; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 -; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 +; STDERR_O: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack +; STDERR_O-NEXT: remark: foo.cl:74:0: SGPRs: 39 +; STDERR_O-NEXT: remark: foo.cl:74:0: VGPRs: 32 +; STDERR_O-NEXT: remark: foo.cl:74:0: AGPRs: 10 +; STDERR_O-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 +; STDERR_O-NEXT: remark: foo.cl:74:0: Dynamic Stack: True +; STDERR_O-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 +; STDERR_O-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 +; STDERR_C: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack +; STDERR_C-NEXT: remark: foo.cl:74:0: SGPRs: 12 +; STDERR_C-NEXT: remark: foo.cl:74:0: VGPRs: 1 +; STDERR_C-NEXT: remark: foo.cl:74:0: AGPRs: 0 +; STDERR_C-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 +; STDERR_C-NEXT: remark: foo.cl:74:0: Dynamic Stack: False +; STDERR_C-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 +; STDERR_C-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg) define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 { Index: llvm/test/CodeGen/AMDGPU/sibling-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s target datalayout = "A5" ; FIXME: Why is this commuted only sometimes? Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_CW %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_OW %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -attributor-assume-closed-world=false < %s | FileCheck -check-prefix=GFX9 %s target datalayout = "A5" @@ -21,6 +22,17 @@ ret void } +define ptr @helper() { +; AKF_GCN-LABEL: define {{[^@]+}}@helper() { +; AKF_GCN-NEXT: ret ptr @indirect +; +; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@helper +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: ret ptr @indirect +; + ret ptr @indirect +} + define amdgpu_kernel void @test_simple_indirect_call() { ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] { @@ -31,14 +43,23 @@ ; AKF_GCN-NEXT: call void [[FP]]() ; AKF_GCN-NEXT: ret void ; -; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr -; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() -; ATTRIBUTOR_GCN-NEXT: ret void +; ATTRIBUTOR_GCN_CW-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_GCN_CW-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN_CW-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_GCN_CW-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr +; ATTRIBUTOR_GCN_CW-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_CW-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_CW-NEXT: call void @indirect() +; ATTRIBUTOR_GCN_CW-NEXT: ret void +; +; ATTRIBUTOR_GCN_OW-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_GCN_OW-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN_OW-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_GCN_OW-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr +; ATTRIBUTOR_GCN_OW-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_OW-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_OW-NEXT: call void [[FP]]() +; ATTRIBUTOR_GCN_OW-NEXT: ret void ; ; GFX9-LABEL: test_simple_indirect_call: ; GFX9: ; %bb.0: @@ -73,6 +94,9 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN_CW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN_CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_GCN_OW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN_OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;.