Index: llvm/include/llvm/IR/Function.h =================================================================== --- llvm/include/llvm/IR/Function.h +++ llvm/include/llvm/IR/Function.h @@ -888,13 +888,14 @@ /// other than direct calls or invokes to it, or blockaddress expressions. /// Optionally passes back an offending user for diagnostic purposes, /// ignores callback uses, assume like pointer annotation calls, references in - /// llvm.used and llvm.compiler.used variables, and operand bundle - /// "clang.arc.attachedcall". - bool hasAddressTaken(const User ** = nullptr, - bool IgnoreCallbackUses = false, + /// llvm.used and llvm.compiler.used variables, operand bundle + /// "clang.arc.attachedcall", and direct calls with a different call site + /// signature (the function is implicitly casted). + bool hasAddressTaken(const User ** = nullptr, bool IgnoreCallbackUses = false, bool IgnoreAssumeLikeCalls = true, bool IngoreLLVMUsed = false, - bool IgnoreARCAttachedCall = false) const; + bool IgnoreARCAttachedCall = false, + bool IgnoreCastedDirectCall = false) const; /// isDefTriviallyDead - Return true if it is trivially safe to remove /// this function definition from the module (because it isn't externally Index: llvm/include/llvm/Transforms/IPO/Attributor.h =================================================================== --- llvm/include/llvm/Transforms/IPO/Attributor.h +++ llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1316,6 +1316,11 @@ return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX(); } + /// Return all functions that might be called indirectly, only valid for + /// closed world modules (see isClosedWorldModule). + const ArrayRef + getIndirectlyCallableFunctions(Attributor &A) const; + private: struct FunctionInfo { ~FunctionInfo(); @@ -1333,6 +1338,9 @@ /// Function contains a `musttail` call. bool ContainsMustTailCall; + + /// Function is potentially called indirectly + bool IsPotentiallyCalledIndirectly; }; /// A map type from functions to informatio about it. @@ -1348,6 +1356,10 @@ return *FI; } + /// Vector of functions that might be callable indirectly, i.a., via a + /// function pointer. + SmallVector IndirectlyCallableFunctions; + /// Initialize the function information cache \p FI for the function \p F. /// /// This method needs to be called for all function that might be looked at @@ -1413,13 +1425,18 @@ /// Flag to determine if we should skip all liveness checks early on. bool UseLiveness = true; + /// Flag to indicate if the entire world is contained in this module, that + /// is, no outside functions exist. + bool IsClosedWorldModule = false; + /// Callback function to be invoked on internal functions marked live. std::function InitializationCallback = nullptr; /// Callback function to determine if an indirect call targets should be made /// direct call targets (with an if-cascade). - std::function + std::function IndirectCalleeSpecializationCallback = nullptr; /// Helper to update an underlying call graph and to delete functions. @@ -1482,9 +1499,7 @@ /// \param Configuration The Attributor configuration which determines what /// generic features to use. Attributor(SetVector &Functions, InformationCache &InfoCache, - AttributorConfig Configuration) - : Allocator(InfoCache.Allocator), Functions(Functions), - InfoCache(InfoCache), Configuration(Configuration) {} + AttributorConfig Configuration); ~Attributor(); @@ -1695,13 +1710,18 @@ /// Return true if we should specialize the call site \b CB for the potential /// callee \p Fn. - bool shouldSpecializeCallSiteForCallee(CallBase &CB, Function &Callee) { + bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA, + CallBase &CB, Function &Callee) { return Configuration.IndirectCalleeSpecializationCallback - ? Configuration.IndirectCalleeSpecializationCallback(*this, CB, - Callee) + ? Configuration.IndirectCalleeSpecializationCallback(*this, AA, + CB, Callee) : true; } + /// Return true if the module contains the whole world, thus, no outside + /// functions exist. + bool isClosedWorldModule() const; + /// Return true if we derive attributes for \p Fn bool isRunOn(Function &Fn) const { return isRunOn(&Fn); } bool isRunOn(Function *Fn) const { Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -1752,7 +1752,8 @@ bool Function::hasAddressTaken(const User **PutOffender, bool IgnoreCallbackUses, bool IgnoreAssumeLikeCalls, bool IgnoreLLVMUsed, - bool IgnoreARCAttachedCall) const { + bool IgnoreARCAttachedCall, + bool IgnoreCastedDirectCall) const { for (const Use &U : uses()) { const User *FU = U.getUser(); if (isa(FU)) @@ -1801,7 +1802,8 @@ continue; } - if (!Call->isCallee(&U) || Call->getFunctionType() != getFunctionType()) { + if (!Call->isCallee(&U) || (!IgnoreCastedDirectCall && + Call->getFunctionType() != getFunctionType())) { if (IgnoreARCAttachedCall && Call->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall, U.getOperandNo())) Index: llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -15,10 +15,13 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" +#include "llvm/Support/Casting.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/Attributor.h" +#include #define DEBUG_TYPE "amdgpu-attributor" @@ -944,16 +947,44 @@ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID, - &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID}); + &AAIndirectCallInfo::ID, &AAPotentialConstantValues::ID, + &AAUnderlyingObjects::ID}); AttributorConfig AC(CGUpdater); AC.Allowed = &Allowed; AC.IsModulePass = true; AC.DefaultInitializeLiveInternals = false; + AC.IsClosedWorldModule = true; AC.IPOAmendableCB = [](const Function &F) { return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; }; + // Callback to determine if we should specialize a indirect call site with a + // specific callee. It's effectively a heuristic and we can add checks for + // the callee size, PGO, etc. For now, we check for single potential callees + // and kernel arguments as they are known uniform values. + AC.IndirectCalleeSpecializationCallback = [&](Attributor &A, + const AbstractAttribute &AA, + CallBase &CB, + Function &Callee) { + bool UsedAssumedInformation = false; + std::optional SimpleV = A.getAssumedSimplified( + *CB.getCalledOperand(), AA, UsedAssumedInformation, + AA::ValueScope::AnyScope); + assert(SimpleV.has_value() && "No value but potential callee?"); + // Unknown value. + if (!SimpleV.value()) + return false; + // Singleton function. + if (isa(SimpleV.value())) + return true; + // Uniform (kernel argument) value. + if (auto *Arg = dyn_cast_or_null(SimpleV.value())) + if (Arg->getParent()->getCallingConv() == CallingConv::AMDGPU_KERNEL) + return true; + return false; + }; + Attributor A(Functions, InfoCache, AC); for (Function &F : M) { Index: llvm/lib/Transforms/IPO/Attributor.cpp =================================================================== --- llvm/lib/Transforms/IPO/Attributor.cpp +++ llvm/lib/Transforms/IPO/Attributor.cpp @@ -175,6 +175,10 @@ cl::desc("Try to simplify all loads."), cl::init(true)); +static cl::opt CloseWorldAssumption( + "attributor-assume-closed-world", cl::Hidden, + cl::desc("Should a closed world be assumed, or not. Default if not set.")); + /// Logic operators for the change status enum class. /// ///{ @@ -1057,6 +1061,23 @@ return HasChanged; } +Attributor::Attributor(SetVector &Functions, + InformationCache &InfoCache, + AttributorConfig Configuration) + : Allocator(InfoCache.Allocator), Functions(Functions), + InfoCache(InfoCache), Configuration(Configuration) { + if (!isClosedWorldModule()) + return; + for (Function *Fn : Functions) + if (Fn->hasAddressTaken(/*PutOffender=*/nullptr, + /*IgnoreCallbackUses=*/true, + /*IgnoreAssumeLikeCalls=*/true, + /*IgnoreLLVMUsed=*/true, + /*IgnoreARCAttachedCall=*/false, + /*IgnoreCastedDirectCall=*/true)) + InfoCache.IndirectlyCallableFunctions.push_back(Fn); +} + bool Attributor::getAttrsFromAssumes(const IRPosition &IRP, Attribute::AttrKind AK, SmallVectorImpl &Attrs) { @@ -3251,6 +3272,12 @@ It.getSecond()->~InstructionVectorTy(); } +const ArrayRef +InformationCache::getIndirectlyCallableFunctions(Attributor &A) const { + assert(A.isClosedWorldModule() && "Cannot see all indirect callees!"); + return IndirectlyCallableFunctions; +} + void Attributor::recordDependence(const AbstractAttribute &FromAA, const AbstractAttribute &ToAA, DepClassTy DepClass) { @@ -3586,6 +3613,12 @@ assert(Success && "Expected the check call to be successful!"); } +bool Attributor::isClosedWorldModule() const { + if (CloseWorldAssumption.getNumOccurrences()) + return CloseWorldAssumption; + return isModulePass() && Configuration.IsClosedWorldModule; +} + /// Helpers to ease debugging through output streams and print calls. /// ///{ @@ -3752,18 +3785,19 @@ DenseMap>> IndirectCalleeTrackingMap; if (MaxSpecializationPerCB.getNumOccurrences()) { - AC.IndirectCalleeSpecializationCallback = [&](Attributor &, CallBase &CB, - Function &Callee) { - if (MaxSpecializationPerCB == 0) - return false; - auto &Set = IndirectCalleeTrackingMap[&CB]; - if (!Set) - Set = std::make_unique>(); - if (Set->size() >= MaxSpecializationPerCB) - return Set->contains(&Callee); - Set->insert(&Callee); - return true; - }; + AC.IndirectCalleeSpecializationCallback = + [&](Attributor &, const AbstractAttribute &AA, CallBase &CB, + Function &Callee) { + if (MaxSpecializationPerCB == 0) + return false; + auto &Set = IndirectCalleeTrackingMap[&CB]; + if (!Set) + Set = std::make_unique>(); + if (Set->size() >= MaxSpecializationPerCB) + return Set->contains(&Callee); + Set->insert(&Callee); + return true; + }; } Attributor A(Functions, InfoCache, AC); Index: llvm/lib/Transforms/IPO/AttributorAttributes.cpp =================================================================== --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -12066,11 +12066,22 @@ /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees); - if (!MD) + if (!MD && !A.isClosedWorldModule()) return; - for (const auto &Op : MD->operands()) - if (Function *Callee = mdconst::dyn_extract_or_null(Op)) - PotentialCallees.insert(Callee); + + if (MD) { + for (const auto &Op : MD->operands()) + if (Function *Callee = mdconst::dyn_extract_or_null(Op)) + PotentialCallees.insert(Callee); + } else if (A.isClosedWorldModule()) { + ArrayRef IndirectlyCallableFunctions = + A.getInfoCache().getIndirectlyCallableFunctions(A); + PotentialCallees.insert(IndirectlyCallableFunctions.begin(), + IndirectlyCallableFunctions.end()); + } + + if (PotentialCallees.empty()) + indicateOptimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { @@ -12205,7 +12216,7 @@ SmallVector SkippedAssumedCallees; SmallVector> NewCalls; for (Function *NewCallee : AssumedCallees) { - if (!A.shouldSpecializeCallSiteForCallee(*CB, *NewCallee)) { + if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) { SkippedAssumedCallees.push_back(NewCallee); SpecializedForAllCallees = false; continue; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -attributor-assume-closed-world=false -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CHECK %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope --check-prefixes=SAMEC,CWRLD %s define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr @@ -52,24 +53,31 @@ ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[LOAD]](p0), 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 + ; + ; CWRLD-LABEL: name: test_indirect_call_sgpr_ptr + ; CWRLD: bb.1 (%ir-block.0): + ; CWRLD-NEXT: liveins: $sgpr4_sgpr5 + ; CWRLD-NEXT: {{ $}} + ; CWRLD-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 + ; CWRLD-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) call void %fptr() ret void } define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) { - ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; SAMEC-LABEL: name: test_gfx_indirect_call_sgpr_ptr + ; SAMEC: bb.1 (%ir-block.0): + ; SAMEC-NEXT: liveins: $vgpr0, $vgpr1 + ; SAMEC-NEXT: {{ $}} + ; SAMEC-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SAMEC-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SAMEC-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; SAMEC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; SAMEC-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; SAMEC-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; SAMEC-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; SAMEC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; SAMEC-NEXT: SI_RETURN call amdgpu_gfx void %fptr() ret void } Index: llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_HSA %s -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor -attributor-assume-closed-world=false < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,OWRLD_ATTR_HSA %s +; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_HSA,CWRLD_ATTR_HSA %s ; TODO: The test contains UB which is refined by the Attributor and should be removed. @@ -18,6 +19,16 @@ declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 declare i64 @llvm.amdgcn.dispatch.id() #0 +@G1 = global ptr undef +@G2 = global ptr undef + +;. +; AKF_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +; AKF_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +;. +; ATTRIBUTOR_HSA: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +; ATTRIBUTOR_HSA: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global ptr undef +;. define void @use_workitem_id_x() #1 { ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x ; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] { @@ -766,19 +777,55 @@ ; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] { ; AKF_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() ; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; AKF_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8 +; AKF_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8 ; AKF_HSA-NEXT: ret float [[FADD]] ; -; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call -; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] { -; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() -; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 -; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]] +; OWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call +; OWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] { +; OWRLD_ATTR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]]() +; OWRLD_ATTR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; OWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8 +; OWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8 +; OWRLD_ATTR_HSA-NEXT: ret float [[FADD]] +; +; CWRLD_ATTR_HSA-LABEL: define {{[^@]+}}@func_indirect_call +; CWRLD_ATTR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR17]] { +; CWRLD_ATTR_HSA-NEXT: [[F:%.*]] = call float [[FPTR]](), !callees !0 +; CWRLD_ATTR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00 +; CWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee1, ptr @G1, align 8 +; CWRLD_ATTR_HSA-NEXT: store ptr @indirect_callee2, ptr @G2, align 8 +; CWRLD_ATTR_HSA-NEXT: ret float [[FADD]] ; %f = call float %fptr() %fadd = fadd float %f, 1.0 + store ptr @indirect_callee1, ptr @G1 + store ptr @indirect_callee2, ptr @G2 ret float %fadd } +define float @indirect_callee1() { +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee1() { +; AKF_HSA-NEXT: ret float 0x40091EB860000000 +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee1 +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-NEXT: ret float 0x40091EB860000000 +; + ret float 0x40091EB860000000 +} +define float @indirect_callee2(float noundef %arg) { +; AKF_HSA-LABEL: define {{[^@]+}}@indirect_callee2 +; AKF_HSA-SAME: (float noundef [[ARG:%.*]]) { +; AKF_HSA-NEXT: ret float [[ARG]] +; +; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_callee2 +; ATTRIBUTOR_HSA-SAME: (float noundef [[ARG:%.*]]) #[[ATTR19]] { +; ATTRIBUTOR_HSA-NEXT: ret float [[ARG]] +; + ret float %arg +} + declare float @extern() #3 define float @func_extern_call() #3 { ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call @@ -845,7 +892,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -861,7 +908,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, ptr addrspace(1) null, align 4 ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -877,7 +924,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -893,7 +940,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address -; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR23:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @func_sanitize_address() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -928,7 +975,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -941,7 +988,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl -; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_decl() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -956,7 +1003,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: call void @enqueue_block_def() ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -969,7 +1016,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -980,7 +1027,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: ret void ; ret void @@ -994,7 +1041,7 @@ ; AKF_HSA-NEXT: ret void ; ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block -; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] { +; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] { ; ATTRIBUTOR_HSA-NEXT: call void @known_func() #[[ATTR29:[0-9]+]] ; ATTRIBUTOR_HSA-NEXT: ret void ; @@ -1040,15 +1087,17 @@ ; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR25:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" } +; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind } ; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" } ;. +; CWRLD_ATTR_HSA: [[META0:![0-9]+]] = !{ptr @indirect_callee1, ptr @indirect_callee2} +;. Index: llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll +++ llvm/test/CodeGen/AMDGPU/attributor-loop-issue-58639.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD %0 = type { ptr, ptr } @@ -20,19 +21,32 @@ } define internal fastcc double @baz(ptr %arg) { -; CHECK-LABEL: define {{[^@]+}}@baz -; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = tail call double [[TMP1]]() -; CHECK-NEXT: br label [[BB3:%.*]] -; CHECK: bb3: -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1 -; CHECK-NEXT: br label [[BB5:%.*]] -; CHECK: bb5: -; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]]) -; CHECK-NEXT: br label [[BB5]] +; OWRLD-LABEL: define {{[^@]+}}@baz +; OWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] { +; OWRLD-NEXT: bb: +; OWRLD-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8 +; OWRLD-NEXT: [[TMP2:%.*]] = tail call double [[TMP1]]() +; OWRLD-NEXT: br label [[BB3:%.*]] +; OWRLD: bb3: +; OWRLD-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1 +; OWRLD-NEXT: br label [[BB5:%.*]] +; OWRLD: bb5: +; OWRLD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 +; OWRLD-NEXT: [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]]) +; OWRLD-NEXT: br label [[BB5]] +; +; CWRLD-LABEL: define {{[^@]+}}@baz +; CWRLD-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] { +; CWRLD-NEXT: bb: +; CWRLD-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARG]], align 8 +; CWRLD-NEXT: unreachable +; CWRLD: bb3: +; CWRLD-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ARG]], i64 0, i32 1 +; CWRLD-NEXT: br label [[BB5:%.*]] +; CWRLD: bb5: +; CWRLD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CWRLD-NEXT: [[TMP7:%.*]] = call fastcc i1 @widget(ptr [[TMP6]]) +; CWRLD-NEXT: br label [[BB5]] ; bb: %tmp1 = load ptr, ptr %arg, align 8 @@ -49,13 +63,19 @@ br label %bb5 } -define amdgpu_kernel void @entry() { -; CHECK-LABEL: define {{[^@]+}}@entry -; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr -; CHECK-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) -; CHECK-NEXT: ret void +define amdgpu_kernel void @entry() { ; OWRLD-LABEL: define {{[^@]+}}@entry +; OWRLD-SAME: () #[[ATTR0]] { +; OWRLD-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) +; OWRLD-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; OWRLD-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) +; OWRLD-NEXT: ret void +; +; CWRLD-LABEL: define {{[^@]+}}@entry +; CWRLD-SAME: () #[[ATTR1:[0-9]+]] { +; CWRLD-NEXT: [[ALLOCA:%.*]] = alloca [[TMP0:%.*]], align 8, addrspace(5) +; CWRLD-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; CWRLD-NEXT: [[ARST:%.*]] = call double @baz(ptr [[CAST]]) +; CWRLD-NEXT: ret void ; %alloca = alloca %0, align 8, addrspace(5) %cast = addrspacecast ptr addrspace(5) %alloca to ptr @@ -63,5 +83,6 @@ ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; CWRLD: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=CHECK,OWRLD +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=CHECK,CWRLD define internal void @indirect() { ; CHECK-LABEL: define {{[^@]+}}@indirect @@ -10,13 +11,21 @@ } define internal void @direct() { -; CHECK-LABEL: define {{[^@]+}}@direct -; CHECK-SAME: () #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 -; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; CHECK-NEXT: call void [[FP]]() -; CHECK-NEXT: ret void +; OWRLD-LABEL: define {{[^@]+}}@direct +; OWRLD-SAME: () #[[ATTR1:[0-9]+]] { +; OWRLD-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; OWRLD-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; OWRLD-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; OWRLD-NEXT: call void [[FP]]() +; OWRLD-NEXT: ret void +; +; CWRLD-LABEL: define {{[^@]+}}@direct +; CWRLD-SAME: () #[[ATTR0]] { +; CWRLD-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; CWRLD-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; CWRLD-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; CWRLD-NEXT: call void @indirect() +; CWRLD-NEXT: ret void ; %fptr = alloca ptr, addrspace(5) store ptr @indirect, ptr addrspace(5) %fptr @@ -26,15 +35,22 @@ } define amdgpu_kernel void @test_direct_indirect_call() { -; CHECK-LABEL: define {{[^@]+}}@test_direct_indirect_call -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @direct() -; CHECK-NEXT: ret void +; OWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call +; OWRLD-SAME: () #[[ATTR1]] { +; OWRLD-NEXT: call void @direct() +; OWRLD-NEXT: ret void +; +; CWRLD-LABEL: define {{[^@]+}}@test_direct_indirect_call +; CWRLD-SAME: () #[[ATTR0]] { +; CWRLD-NEXT: call void @direct() +; CWRLD-NEXT: ret void ; call void @direct() ret void } ;. -; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; OWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; OWRLD: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +;. +; CWRLD: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll +++ llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_OWR +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s --check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_CWR define internal void @indirect() { ; AKF_GCN-LABEL: define {{[^@]+}}@indirect() { @@ -22,13 +23,21 @@ ; AKF_GCN-NEXT: call void [[FP]]() ; AKF_GCN-NEXT: ret void ; -; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() -; ATTRIBUTOR_GCN-NEXT: ret void +; ATTRIBUTOR_OWR-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_OWR-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_OWR-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_OWR-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_OWR-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_OWR-NEXT: call void [[FP]]() +; ATTRIBUTOR_OWR-NEXT: ret void +; +; ATTRIBUTOR_CWR-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_CWR-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_CWR-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_CWR-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_CWR-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8 +; ATTRIBUTOR_CWR-NEXT: call void @indirect() +; ATTRIBUTOR_CWR-NEXT: ret void ; %fptr = alloca ptr, addrspace(5) store ptr @indirect, ptr addrspace(5) %fptr @@ -42,6 +51,9 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } +; ATTRIBUTOR_OWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_OWR: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_CWR: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_CWR: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. Index: llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll +++ llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll @@ -1,18 +1,26 @@ -; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s -; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV5C %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCNC,COV4C %s +; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV5O %s +; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 | FileCheck -check-prefixes=GCNO,COV4O %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 -; No stack objects, only indirect call has to enable scrathch -; GCN-LABEL: test_indirect_call: +; No stack objects, only indirect call has to enable scratch +; GCNO-LABEL: test_indirect_call: +; GCNC-LABEL: test_indirect_call: -; COV5: .amdhsa_private_segment_fixed_size 0{{$}} -; COV4: .amdhsa_private_segment_fixed_size 16384{{$}} +; COV5O: .amdhsa_private_segment_fixed_size 0{{$}} +; COV5C: .amdhsa_private_segment_fixed_size 0{{$}} +; COV4C: .amdhsa_private_segment_fixed_size 0{{$}} +; COV4O: .amdhsa_private_segment_fixed_size 16384{{$}} -; GCN: .amdhsa_user_sgpr_private_segment_buffer 1 +; GCNO: .amdhsa_user_sgpr_private_segment_buffer 1 +; GCNC: .amdhsa_user_sgpr_private_segment_buffer 1 -; COV5: .amdhsa_uses_dynamic_stack 1 -; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +; COV5O: .amdhsa_uses_dynamic_stack 1 +; COV5C: .amdhsa_uses_dynamic_stack 0 +; GCNO: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +; GCNC: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 define amdgpu_kernel void @test_indirect_call() { %fptr = load ptr, ptr addrspace(4) @gv.fptr0 call void %fptr() Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -1,1109 +1,1443 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_O %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN_C %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_O %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL,GISEL_C %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 define amdgpu_kernel void @test_indirect_call_sgpr_ptr(i8) { -; GCN-LABEL: test_indirect_call_sgpr_ptr: -; GCN: .amd_kernel_code_t -; GCN-NEXT: amd_code_version_major = 1 -; GCN-NEXT: amd_code_version_minor = 2 -; GCN-NEXT: amd_machine_kind = 1 -; GCN-NEXT: amd_machine_version_major = 7 -; GCN-NEXT: amd_machine_version_minor = 0 -; GCN-NEXT: amd_machine_version_stepping = 0 -; GCN-NEXT: kernel_code_entry_byte_offset = 256 -; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 10 -; GCN-NEXT: granulated_wavefront_sgpr_count = 8 -; GCN-NEXT: priority = 0 -; GCN-NEXT: float_mode = 240 -; GCN-NEXT: priv = 0 -; GCN-NEXT: enable_dx10_clamp = 1 -; GCN-NEXT: debug_mode = 0 -; GCN-NEXT: enable_ieee_mode = 1 -; GCN-NEXT: enable_wgp_mode = 0 -; GCN-NEXT: enable_mem_ordered = 0 -; GCN-NEXT: enable_fwd_progress = 0 -; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCN-NEXT: user_sgpr_count = 14 -; GCN-NEXT: enable_trap_handler = 0 -; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 -; GCN-NEXT: enable_sgpr_workgroup_info = 0 -; GCN-NEXT: enable_vgpr_workitem_id = 2 -; GCN-NEXT: enable_exception_msb = 0 -; GCN-NEXT: granulated_lds_size = 0 -; GCN-NEXT: enable_exception = 0 -; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 -; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 -; GCN-NEXT: enable_sgpr_queue_ptr = 1 -; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GCN-NEXT: enable_sgpr_dispatch_id = 1 -; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 -; GCN-NEXT: enable_sgpr_private_segment_size = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GCN-NEXT: enable_wavefront_size32 = 0 -; GCN-NEXT: enable_ordered_append_gds = 0 -; GCN-NEXT: private_element_size = 1 -; GCN-NEXT: is_ptr64 = 1 -; GCN-NEXT: is_dynamic_callstack = 1 -; GCN-NEXT: is_debug_enabled = 0 -; GCN-NEXT: is_xnack_enabled = 0 -; GCN-NEXT: workitem_private_segment_byte_size = 16384 -; GCN-NEXT: workgroup_group_segment_byte_size = 0 -; GCN-NEXT: gds_segment_byte_size = 0 -; GCN-NEXT: kernarg_segment_byte_size = 64 -; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 68 -; GCN-NEXT: workitem_vgpr_count = 42 -; GCN-NEXT: reserved_vgpr_first = 0 -; GCN-NEXT: reserved_vgpr_count = 0 -; GCN-NEXT: reserved_sgpr_first = 0 -; GCN-NEXT: reserved_sgpr_count = 0 -; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 -; GCN-NEXT: kernarg_segment_alignment = 4 -; GCN-NEXT: group_segment_alignment = 4 -; GCN-NEXT: private_segment_alignment = 4 -; GCN-NEXT: wavefront_size = 6 -; GCN-NEXT: call_convention = -1 -; GCN-NEXT: runtime_loader_kernel_symbol = 0 -; GCN-NEXT: .end_amd_kernel_code_t -; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_add_i32 s12, s12, s17 -; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GCN-NEXT: s_add_u32 s0, s0, s17 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_mov_b32 s13, s15 -; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_getpc_b64 s[14:15] -; GCN-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GCN-NEXT: s_add_u32 s8, s8, 8 -; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: v_or_b32_e32 v0, v0, v1 -; GCN-NEXT: v_or_b32_e32 v31, v0, v2 -; GCN-NEXT: s_mov_b32 s14, s16 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GCN-NEXT: s_endpgm +; GCN_O-LABEL: test_indirect_call_sgpr_ptr: +; GCN_O: .amd_kernel_code_t +; GCN_O-NEXT: amd_code_version_major = 1 +; GCN_O-NEXT: amd_code_version_minor = 2 +; GCN_O-NEXT: amd_machine_kind = 1 +; GCN_O-NEXT: amd_machine_version_major = 7 +; GCN_O-NEXT: amd_machine_version_minor = 0 +; GCN_O-NEXT: amd_machine_version_stepping = 0 +; GCN_O-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_O-NEXT: granulated_workitem_vgpr_count = 10 +; GCN_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GCN_O-NEXT: priority = 0 +; GCN_O-NEXT: float_mode = 240 +; GCN_O-NEXT: priv = 0 +; GCN_O-NEXT: enable_dx10_clamp = 1 +; GCN_O-NEXT: debug_mode = 0 +; GCN_O-NEXT: enable_ieee_mode = 1 +; GCN_O-NEXT: enable_wgp_mode = 0 +; GCN_O-NEXT: enable_mem_ordered = 0 +; GCN_O-NEXT: enable_fwd_progress = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GCN_O-NEXT: user_sgpr_count = 14 +; GCN_O-NEXT: enable_trap_handler = 0 +; GCN_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_O-NEXT: enable_vgpr_workitem_id = 2 +; GCN_O-NEXT: enable_exception_msb = 0 +; GCN_O-NEXT: granulated_lds_size = 0 +; GCN_O-NEXT: enable_exception = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GCN_O-NEXT: enable_sgpr_queue_ptr = 1 +; GCN_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_id = 1 +; GCN_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GCN_O-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_O-NEXT: enable_wavefront_size32 = 0 +; GCN_O-NEXT: enable_ordered_append_gds = 0 +; GCN_O-NEXT: private_element_size = 1 +; GCN_O-NEXT: is_ptr64 = 1 +; GCN_O-NEXT: is_dynamic_callstack = 1 +; GCN_O-NEXT: is_debug_enabled = 0 +; GCN_O-NEXT: is_xnack_enabled = 0 +; GCN_O-NEXT: workitem_private_segment_byte_size = 16384 +; GCN_O-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_O-NEXT: gds_segment_byte_size = 0 +; GCN_O-NEXT: kernarg_segment_byte_size = 64 +; GCN_O-NEXT: workgroup_fbarrier_count = 0 +; GCN_O-NEXT: wavefront_sgpr_count = 68 +; GCN_O-NEXT: workitem_vgpr_count = 42 +; GCN_O-NEXT: reserved_vgpr_first = 0 +; GCN_O-NEXT: reserved_vgpr_count = 0 +; GCN_O-NEXT: reserved_sgpr_first = 0 +; GCN_O-NEXT: reserved_sgpr_count = 0 +; GCN_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_O-NEXT: kernarg_segment_alignment = 4 +; GCN_O-NEXT: group_segment_alignment = 4 +; GCN_O-NEXT: private_segment_alignment = 4 +; GCN_O-NEXT: wavefront_size = 6 +; GCN_O-NEXT: call_convention = -1 +; GCN_O-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_O-NEXT: .end_amd_kernel_code_t +; GCN_O-NEXT: ; %bb.0: +; GCN_O-NEXT: s_mov_b32 s32, 0 +; GCN_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GCN_O-NEXT: s_add_i32 s12, s12, s17 +; GCN_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GCN_O-NEXT: s_add_u32 s0, s0, s17 +; GCN_O-NEXT: s_addc_u32 s1, s1, 0 +; GCN_O-NEXT: s_mov_b32 s13, s15 +; GCN_O-NEXT: s_mov_b32 s12, s14 +; GCN_O-NEXT: s_getpc_b64 s[14:15] +; GCN_O-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 +; GCN_O-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 +; GCN_O-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GCN_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GCN_O-NEXT: s_add_u32 s8, s8, 8 +; GCN_O-NEXT: s_addc_u32 s9, s9, 0 +; GCN_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN_O-NEXT: v_or_b32_e32 v31, v0, v2 +; GCN_O-NEXT: s_mov_b32 s14, s16 +; GCN_O-NEXT: s_waitcnt lgkmcnt(0) +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN_O-NEXT: s_endpgm ; -; GISEL-LABEL: test_indirect_call_sgpr_ptr: -; GISEL: .amd_kernel_code_t -; GISEL-NEXT: amd_code_version_major = 1 -; GISEL-NEXT: amd_code_version_minor = 2 -; GISEL-NEXT: amd_machine_kind = 1 -; GISEL-NEXT: amd_machine_version_major = 7 -; GISEL-NEXT: amd_machine_version_minor = 0 -; GISEL-NEXT: amd_machine_version_stepping = 0 -; GISEL-NEXT: kernel_code_entry_byte_offset = 256 -; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 10 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 -; GISEL-NEXT: priority = 0 -; GISEL-NEXT: float_mode = 240 -; GISEL-NEXT: priv = 0 -; GISEL-NEXT: enable_dx10_clamp = 1 -; GISEL-NEXT: debug_mode = 0 -; GISEL-NEXT: enable_ieee_mode = 1 -; GISEL-NEXT: enable_wgp_mode = 0 -; GISEL-NEXT: enable_mem_ordered = 0 -; GISEL-NEXT: enable_fwd_progress = 0 -; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GISEL-NEXT: user_sgpr_count = 14 -; GISEL-NEXT: enable_trap_handler = 0 -; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 -; GISEL-NEXT: enable_sgpr_workgroup_info = 0 -; GISEL-NEXT: enable_vgpr_workitem_id = 2 -; GISEL-NEXT: enable_exception_msb = 0 -; GISEL-NEXT: granulated_lds_size = 0 -; GISEL-NEXT: enable_exception = 0 -; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 -; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 -; GISEL-NEXT: enable_sgpr_queue_ptr = 1 -; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GISEL-NEXT: enable_sgpr_dispatch_id = 1 -; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 -; GISEL-NEXT: enable_sgpr_private_segment_size = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GISEL-NEXT: enable_wavefront_size32 = 0 -; GISEL-NEXT: enable_ordered_append_gds = 0 -; GISEL-NEXT: private_element_size = 1 -; GISEL-NEXT: is_ptr64 = 1 -; GISEL-NEXT: is_dynamic_callstack = 1 -; GISEL-NEXT: is_debug_enabled = 0 -; GISEL-NEXT: is_xnack_enabled = 0 -; GISEL-NEXT: workitem_private_segment_byte_size = 16384 -; GISEL-NEXT: workgroup_group_segment_byte_size = 0 -; GISEL-NEXT: gds_segment_byte_size = 0 -; GISEL-NEXT: kernarg_segment_byte_size = 64 -; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 68 -; GISEL-NEXT: workitem_vgpr_count = 42 -; GISEL-NEXT: reserved_vgpr_first = 0 -; GISEL-NEXT: reserved_vgpr_count = 0 -; GISEL-NEXT: reserved_sgpr_first = 0 -; GISEL-NEXT: reserved_sgpr_count = 0 -; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 -; GISEL-NEXT: kernarg_segment_alignment = 4 -; GISEL-NEXT: group_segment_alignment = 4 -; GISEL-NEXT: private_segment_alignment = 4 -; GISEL-NEXT: wavefront_size = 6 -; GISEL-NEXT: call_convention = -1 -; GISEL-NEXT: runtime_loader_kernel_symbol = 0 -; GISEL-NEXT: .end_amd_kernel_code_t -; GISEL-NEXT: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s32, 0 -; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GISEL-NEXT: s_add_i32 s12, s12, s17 -; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GISEL-NEXT: s_add_u32 s0, s0, s17 -; GISEL-NEXT: s_addc_u32 s1, s1, 0 -; GISEL-NEXT: s_mov_b32 s13, s15 -; GISEL-NEXT: s_mov_b32 s12, s14 -; GISEL-NEXT: s_getpc_b64 s[14:15] -; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 -; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: s_add_u32 s8, s8, 8 -; GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 -; GISEL-NEXT: v_or_b32_e32 v31, v0, v1 -; GISEL-NEXT: s_mov_b32 s14, s16 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GISEL-NEXT: s_endpgm +; GCN_C-LABEL: test_indirect_call_sgpr_ptr: +; GCN_C: .amd_kernel_code_t +; GCN_C-NEXT: amd_code_version_major = 1 +; GCN_C-NEXT: amd_code_version_minor = 2 +; GCN_C-NEXT: amd_machine_kind = 1 +; GCN_C-NEXT: amd_machine_version_major = 7 +; GCN_C-NEXT: amd_machine_version_minor = 0 +; GCN_C-NEXT: amd_machine_version_stepping = 0 +; GCN_C-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_C-NEXT: granulated_workitem_vgpr_count = 0 +; GCN_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GCN_C-NEXT: priority = 0 +; GCN_C-NEXT: float_mode = 240 +; GCN_C-NEXT: priv = 0 +; GCN_C-NEXT: enable_dx10_clamp = 1 +; GCN_C-NEXT: debug_mode = 0 +; GCN_C-NEXT: enable_ieee_mode = 1 +; GCN_C-NEXT: enable_wgp_mode = 0 +; GCN_C-NEXT: enable_mem_ordered = 0 +; GCN_C-NEXT: enable_fwd_progress = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GCN_C-NEXT: user_sgpr_count = 6 +; GCN_C-NEXT: enable_trap_handler = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_C-NEXT: enable_vgpr_workitem_id = 0 +; GCN_C-NEXT: enable_exception_msb = 0 +; GCN_C-NEXT: granulated_lds_size = 0 +; GCN_C-NEXT: enable_exception = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GCN_C-NEXT: enable_sgpr_queue_ptr = 0 +; GCN_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_id = 0 +; GCN_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_C-NEXT: enable_wavefront_size32 = 0 +; GCN_C-NEXT: enable_ordered_append_gds = 0 +; GCN_C-NEXT: private_element_size = 1 +; GCN_C-NEXT: is_ptr64 = 1 +; GCN_C-NEXT: is_dynamic_callstack = 0 +; GCN_C-NEXT: is_debug_enabled = 0 +; GCN_C-NEXT: is_xnack_enabled = 0 +; GCN_C-NEXT: workitem_private_segment_byte_size = 0 +; GCN_C-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_C-NEXT: gds_segment_byte_size = 0 +; GCN_C-NEXT: kernarg_segment_byte_size = 4 +; GCN_C-NEXT: workgroup_fbarrier_count = 0 +; GCN_C-NEXT: wavefront_sgpr_count = 0 +; GCN_C-NEXT: workitem_vgpr_count = 0 +; GCN_C-NEXT: reserved_vgpr_first = 0 +; GCN_C-NEXT: reserved_vgpr_count = 0 +; GCN_C-NEXT: reserved_sgpr_first = 0 +; GCN_C-NEXT: reserved_sgpr_count = 0 +; GCN_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_C-NEXT: kernarg_segment_alignment = 4 +; GCN_C-NEXT: group_segment_alignment = 4 +; GCN_C-NEXT: private_segment_alignment = 4 +; GCN_C-NEXT: wavefront_size = 6 +; GCN_C-NEXT: call_convention = -1 +; GCN_C-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_C-NEXT: .end_amd_kernel_code_t +; GCN_C-NEXT: ; %bb.0: +; +; GISEL_O-LABEL: test_indirect_call_sgpr_ptr: +; GISEL_O: .amd_kernel_code_t +; GISEL_O-NEXT: amd_code_version_major = 1 +; GISEL_O-NEXT: amd_code_version_minor = 2 +; GISEL_O-NEXT: amd_machine_kind = 1 +; GISEL_O-NEXT: amd_machine_version_major = 7 +; GISEL_O-NEXT: amd_machine_version_minor = 0 +; GISEL_O-NEXT: amd_machine_version_stepping = 0 +; GISEL_O-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_O-NEXT: granulated_workitem_vgpr_count = 10 +; GISEL_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GISEL_O-NEXT: priority = 0 +; GISEL_O-NEXT: float_mode = 240 +; GISEL_O-NEXT: priv = 0 +; GISEL_O-NEXT: enable_dx10_clamp = 1 +; GISEL_O-NEXT: debug_mode = 0 +; GISEL_O-NEXT: enable_ieee_mode = 1 +; GISEL_O-NEXT: enable_wgp_mode = 0 +; GISEL_O-NEXT: enable_mem_ordered = 0 +; GISEL_O-NEXT: enable_fwd_progress = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GISEL_O-NEXT: user_sgpr_count = 14 +; GISEL_O-NEXT: enable_trap_handler = 0 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_O-NEXT: enable_vgpr_workitem_id = 2 +; GISEL_O-NEXT: enable_exception_msb = 0 +; GISEL_O-NEXT: granulated_lds_size = 0 +; GISEL_O-NEXT: enable_exception = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_queue_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_id = 1 +; GISEL_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GISEL_O-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_O-NEXT: enable_wavefront_size32 = 0 +; GISEL_O-NEXT: enable_ordered_append_gds = 0 +; GISEL_O-NEXT: private_element_size = 1 +; GISEL_O-NEXT: is_ptr64 = 1 +; GISEL_O-NEXT: is_dynamic_callstack = 1 +; GISEL_O-NEXT: is_debug_enabled = 0 +; GISEL_O-NEXT: is_xnack_enabled = 0 +; GISEL_O-NEXT: workitem_private_segment_byte_size = 16384 +; GISEL_O-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_O-NEXT: gds_segment_byte_size = 0 +; GISEL_O-NEXT: kernarg_segment_byte_size = 64 +; GISEL_O-NEXT: workgroup_fbarrier_count = 0 +; GISEL_O-NEXT: wavefront_sgpr_count = 68 +; GISEL_O-NEXT: workitem_vgpr_count = 42 +; GISEL_O-NEXT: reserved_vgpr_first = 0 +; GISEL_O-NEXT: reserved_vgpr_count = 0 +; GISEL_O-NEXT: reserved_sgpr_first = 0 +; GISEL_O-NEXT: reserved_sgpr_count = 0 +; GISEL_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_O-NEXT: kernarg_segment_alignment = 4 +; GISEL_O-NEXT: group_segment_alignment = 4 +; GISEL_O-NEXT: private_segment_alignment = 4 +; GISEL_O-NEXT: wavefront_size = 6 +; GISEL_O-NEXT: call_convention = -1 +; GISEL_O-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_O-NEXT: .end_amd_kernel_code_t +; GISEL_O-NEXT: ; %bb.0: +; GISEL_O-NEXT: s_mov_b32 s32, 0 +; GISEL_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL_O-NEXT: s_add_i32 s12, s12, s17 +; GISEL_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL_O-NEXT: s_add_u32 s0, s0, s17 +; GISEL_O-NEXT: s_addc_u32 s1, s1, 0 +; GISEL_O-NEXT: s_mov_b32 s13, s15 +; GISEL_O-NEXT: s_mov_b32 s12, s14 +; GISEL_O-NEXT: s_getpc_b64 s[14:15] +; GISEL_O-NEXT: s_add_u32 s14, s14, gv.fptr0@rel32@lo+4 +; GISEL_O-NEXT: s_addc_u32 s15, s15, gv.fptr0@rel32@hi+12 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GISEL_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL_O-NEXT: s_add_u32 s8, s8, 8 +; GISEL_O-NEXT: s_addc_u32 s9, s9, 0 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v1, 20, v2 +; GISEL_O-NEXT: v_or_b32_e32 v31, v0, v1 +; GISEL_O-NEXT: s_mov_b32 s14, s16 +; GISEL_O-NEXT: s_waitcnt lgkmcnt(0) +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL_O-NEXT: s_endpgm +; +; GISEL_C-LABEL: test_indirect_call_sgpr_ptr: +; GISEL_C: .amd_kernel_code_t +; GISEL_C-NEXT: amd_code_version_major = 1 +; GISEL_C-NEXT: amd_code_version_minor = 2 +; GISEL_C-NEXT: amd_machine_kind = 1 +; GISEL_C-NEXT: amd_machine_version_major = 7 +; GISEL_C-NEXT: amd_machine_version_minor = 0 +; GISEL_C-NEXT: amd_machine_version_stepping = 0 +; GISEL_C-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_C-NEXT: granulated_workitem_vgpr_count = 0 +; GISEL_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GISEL_C-NEXT: priority = 0 +; GISEL_C-NEXT: float_mode = 240 +; GISEL_C-NEXT: priv = 0 +; GISEL_C-NEXT: enable_dx10_clamp = 1 +; GISEL_C-NEXT: debug_mode = 0 +; GISEL_C-NEXT: enable_ieee_mode = 1 +; GISEL_C-NEXT: enable_wgp_mode = 0 +; GISEL_C-NEXT: enable_mem_ordered = 0 +; GISEL_C-NEXT: enable_fwd_progress = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GISEL_C-NEXT: user_sgpr_count = 6 +; GISEL_C-NEXT: enable_trap_handler = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_C-NEXT: enable_vgpr_workitem_id = 0 +; GISEL_C-NEXT: enable_exception_msb = 0 +; GISEL_C-NEXT: granulated_lds_size = 0 +; GISEL_C-NEXT: enable_exception = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_queue_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_id = 0 +; GISEL_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_C-NEXT: enable_wavefront_size32 = 0 +; GISEL_C-NEXT: enable_ordered_append_gds = 0 +; GISEL_C-NEXT: private_element_size = 1 +; GISEL_C-NEXT: is_ptr64 = 1 +; GISEL_C-NEXT: is_dynamic_callstack = 0 +; GISEL_C-NEXT: is_debug_enabled = 0 +; GISEL_C-NEXT: is_xnack_enabled = 0 +; GISEL_C-NEXT: workitem_private_segment_byte_size = 0 +; GISEL_C-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_C-NEXT: gds_segment_byte_size = 0 +; GISEL_C-NEXT: kernarg_segment_byte_size = 4 +; GISEL_C-NEXT: workgroup_fbarrier_count = 0 +; GISEL_C-NEXT: wavefront_sgpr_count = 0 +; GISEL_C-NEXT: workitem_vgpr_count = 0 +; GISEL_C-NEXT: reserved_vgpr_first = 0 +; GISEL_C-NEXT: reserved_vgpr_count = 0 +; GISEL_C-NEXT: reserved_sgpr_first = 0 +; GISEL_C-NEXT: reserved_sgpr_count = 0 +; GISEL_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_C-NEXT: kernarg_segment_alignment = 4 +; GISEL_C-NEXT: group_segment_alignment = 4 +; GISEL_C-NEXT: private_segment_alignment = 4 +; GISEL_C-NEXT: wavefront_size = 6 +; GISEL_C-NEXT: call_convention = -1 +; GISEL_C-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_C-NEXT: .end_amd_kernel_code_t +; GISEL_C-NEXT: ; %bb.0: %fptr = load ptr, ptr addrspace(4) @gv.fptr0 call void %fptr() ret void } define amdgpu_kernel void @test_indirect_call_sgpr_ptr_arg(i8) { -; GCN-LABEL: test_indirect_call_sgpr_ptr_arg: -; GCN: .amd_kernel_code_t -; GCN-NEXT: amd_code_version_major = 1 -; GCN-NEXT: amd_code_version_minor = 2 -; GCN-NEXT: amd_machine_kind = 1 -; GCN-NEXT: amd_machine_version_major = 7 -; GCN-NEXT: amd_machine_version_minor = 0 -; GCN-NEXT: amd_machine_version_stepping = 0 -; GCN-NEXT: kernel_code_entry_byte_offset = 256 -; GCN-NEXT: kernel_code_prefetch_byte_size = 0 -; GCN-NEXT: granulated_workitem_vgpr_count = 10 -; GCN-NEXT: granulated_wavefront_sgpr_count = 8 -; GCN-NEXT: priority = 0 -; GCN-NEXT: float_mode = 240 -; GCN-NEXT: priv = 0 -; GCN-NEXT: enable_dx10_clamp = 1 -; GCN-NEXT: debug_mode = 0 -; GCN-NEXT: enable_ieee_mode = 1 -; GCN-NEXT: enable_wgp_mode = 0 -; GCN-NEXT: enable_mem_ordered = 0 -; GCN-NEXT: enable_fwd_progress = 0 -; GCN-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GCN-NEXT: user_sgpr_count = 14 -; GCN-NEXT: enable_trap_handler = 0 -; GCN-NEXT: enable_sgpr_workgroup_id_x = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_y = 1 -; GCN-NEXT: enable_sgpr_workgroup_id_z = 1 -; GCN-NEXT: enable_sgpr_workgroup_info = 0 -; GCN-NEXT: enable_vgpr_workitem_id = 2 -; GCN-NEXT: enable_exception_msb = 0 -; GCN-NEXT: granulated_lds_size = 0 -; GCN-NEXT: enable_exception = 0 -; GCN-NEXT: enable_sgpr_private_segment_buffer = 1 -; GCN-NEXT: enable_sgpr_dispatch_ptr = 1 -; GCN-NEXT: enable_sgpr_queue_ptr = 1 -; GCN-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GCN-NEXT: enable_sgpr_dispatch_id = 1 -; GCN-NEXT: enable_sgpr_flat_scratch_init = 1 -; GCN-NEXT: enable_sgpr_private_segment_size = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GCN-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GCN-NEXT: enable_wavefront_size32 = 0 -; GCN-NEXT: enable_ordered_append_gds = 0 -; GCN-NEXT: private_element_size = 1 -; GCN-NEXT: is_ptr64 = 1 -; GCN-NEXT: is_dynamic_callstack = 1 -; GCN-NEXT: is_debug_enabled = 0 -; GCN-NEXT: is_xnack_enabled = 0 -; GCN-NEXT: workitem_private_segment_byte_size = 16384 -; GCN-NEXT: workgroup_group_segment_byte_size = 0 -; GCN-NEXT: gds_segment_byte_size = 0 -; GCN-NEXT: kernarg_segment_byte_size = 64 -; GCN-NEXT: workgroup_fbarrier_count = 0 -; GCN-NEXT: wavefront_sgpr_count = 68 -; GCN-NEXT: workitem_vgpr_count = 42 -; GCN-NEXT: reserved_vgpr_first = 0 -; GCN-NEXT: reserved_vgpr_count = 0 -; GCN-NEXT: reserved_sgpr_first = 0 -; GCN-NEXT: reserved_sgpr_count = 0 -; GCN-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GCN-NEXT: debug_private_segment_buffer_sgpr = 0 -; GCN-NEXT: kernarg_segment_alignment = 4 -; GCN-NEXT: group_segment_alignment = 4 -; GCN-NEXT: private_segment_alignment = 4 -; GCN-NEXT: wavefront_size = 6 -; GCN-NEXT: call_convention = -1 -; GCN-NEXT: runtime_loader_kernel_symbol = 0 -; GCN-NEXT: .end_amd_kernel_code_t -; GCN-NEXT: ; %bb.0: -; GCN-NEXT: s_mov_b32 s32, 0 -; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GCN-NEXT: s_add_i32 s12, s12, s17 -; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GCN-NEXT: s_add_u32 s0, s0, s17 -; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: s_mov_b32 s13, s15 -; GCN-NEXT: s_mov_b32 s12, s14 -; GCN-NEXT: s_getpc_b64 s[14:15] -; GCN-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 -; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GCN-NEXT: s_add_u32 s8, s8, 8 -; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: v_or_b32_e32 v0, v0, v1 -; GCN-NEXT: v_or_b32_e32 v31, v0, v2 -; GCN-NEXT: v_mov_b32_e32 v0, 0x7b -; GCN-NEXT: s_mov_b32 s14, s16 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GCN-NEXT: s_endpgm +; GCN_O-LABEL: test_indirect_call_sgpr_ptr_arg: +; GCN_O: .amd_kernel_code_t +; GCN_O-NEXT: amd_code_version_major = 1 +; GCN_O-NEXT: amd_code_version_minor = 2 +; GCN_O-NEXT: amd_machine_kind = 1 +; GCN_O-NEXT: amd_machine_version_major = 7 +; GCN_O-NEXT: amd_machine_version_minor = 0 +; GCN_O-NEXT: amd_machine_version_stepping = 0 +; GCN_O-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_O-NEXT: granulated_workitem_vgpr_count = 10 +; GCN_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GCN_O-NEXT: priority = 0 +; GCN_O-NEXT: float_mode = 240 +; GCN_O-NEXT: priv = 0 +; GCN_O-NEXT: enable_dx10_clamp = 1 +; GCN_O-NEXT: debug_mode = 0 +; GCN_O-NEXT: enable_ieee_mode = 1 +; GCN_O-NEXT: enable_wgp_mode = 0 +; GCN_O-NEXT: enable_mem_ordered = 0 +; GCN_O-NEXT: enable_fwd_progress = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GCN_O-NEXT: user_sgpr_count = 14 +; GCN_O-NEXT: enable_trap_handler = 0 +; GCN_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GCN_O-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_O-NEXT: enable_vgpr_workitem_id = 2 +; GCN_O-NEXT: enable_exception_msb = 0 +; GCN_O-NEXT: granulated_lds_size = 0 +; GCN_O-NEXT: enable_exception = 0 +; GCN_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GCN_O-NEXT: enable_sgpr_queue_ptr = 1 +; GCN_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_O-NEXT: enable_sgpr_dispatch_id = 1 +; GCN_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GCN_O-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_O-NEXT: enable_wavefront_size32 = 0 +; GCN_O-NEXT: enable_ordered_append_gds = 0 +; GCN_O-NEXT: private_element_size = 1 +; GCN_O-NEXT: is_ptr64 = 1 +; GCN_O-NEXT: is_dynamic_callstack = 1 +; GCN_O-NEXT: is_debug_enabled = 0 +; GCN_O-NEXT: is_xnack_enabled = 0 +; GCN_O-NEXT: workitem_private_segment_byte_size = 16384 +; GCN_O-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_O-NEXT: gds_segment_byte_size = 0 +; GCN_O-NEXT: kernarg_segment_byte_size = 64 +; GCN_O-NEXT: workgroup_fbarrier_count = 0 +; GCN_O-NEXT: wavefront_sgpr_count = 68 +; GCN_O-NEXT: workitem_vgpr_count = 42 +; GCN_O-NEXT: reserved_vgpr_first = 0 +; GCN_O-NEXT: reserved_vgpr_count = 0 +; GCN_O-NEXT: reserved_sgpr_first = 0 +; GCN_O-NEXT: reserved_sgpr_count = 0 +; GCN_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_O-NEXT: kernarg_segment_alignment = 4 +; GCN_O-NEXT: group_segment_alignment = 4 +; GCN_O-NEXT: private_segment_alignment = 4 +; GCN_O-NEXT: wavefront_size = 6 +; GCN_O-NEXT: call_convention = -1 +; GCN_O-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_O-NEXT: .end_amd_kernel_code_t +; GCN_O-NEXT: ; %bb.0: +; GCN_O-NEXT: s_mov_b32 s32, 0 +; GCN_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GCN_O-NEXT: s_add_i32 s12, s12, s17 +; GCN_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GCN_O-NEXT: s_add_u32 s0, s0, s17 +; GCN_O-NEXT: s_addc_u32 s1, s1, 0 +; GCN_O-NEXT: s_mov_b32 s13, s15 +; GCN_O-NEXT: s_mov_b32 s12, s14 +; GCN_O-NEXT: s_getpc_b64 s[14:15] +; GCN_O-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 +; GCN_O-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 +; GCN_O-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GCN_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GCN_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GCN_O-NEXT: s_add_u32 s8, s8, 8 +; GCN_O-NEXT: s_addc_u32 s9, s9, 0 +; GCN_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GCN_O-NEXT: v_or_b32_e32 v31, v0, v2 +; GCN_O-NEXT: v_mov_b32_e32 v0, 0x7b +; GCN_O-NEXT: s_mov_b32 s14, s16 +; GCN_O-NEXT: s_waitcnt lgkmcnt(0) +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GCN_O-NEXT: s_endpgm +; +; GCN_C-LABEL: test_indirect_call_sgpr_ptr_arg: +; GCN_C: .amd_kernel_code_t +; GCN_C-NEXT: amd_code_version_major = 1 +; GCN_C-NEXT: amd_code_version_minor = 2 +; GCN_C-NEXT: amd_machine_kind = 1 +; GCN_C-NEXT: amd_machine_version_major = 7 +; GCN_C-NEXT: amd_machine_version_minor = 0 +; GCN_C-NEXT: amd_machine_version_stepping = 0 +; GCN_C-NEXT: kernel_code_entry_byte_offset = 256 +; GCN_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GCN_C-NEXT: granulated_workitem_vgpr_count = 0 +; GCN_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GCN_C-NEXT: priority = 0 +; GCN_C-NEXT: float_mode = 240 +; GCN_C-NEXT: priv = 0 +; GCN_C-NEXT: enable_dx10_clamp = 1 +; GCN_C-NEXT: debug_mode = 0 +; GCN_C-NEXT: enable_ieee_mode = 1 +; GCN_C-NEXT: enable_wgp_mode = 0 +; GCN_C-NEXT: enable_mem_ordered = 0 +; GCN_C-NEXT: enable_fwd_progress = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GCN_C-NEXT: user_sgpr_count = 6 +; GCN_C-NEXT: enable_trap_handler = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GCN_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GCN_C-NEXT: enable_sgpr_workgroup_info = 0 +; GCN_C-NEXT: enable_vgpr_workitem_id = 0 +; GCN_C-NEXT: enable_exception_msb = 0 +; GCN_C-NEXT: granulated_lds_size = 0 +; GCN_C-NEXT: enable_exception = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GCN_C-NEXT: enable_sgpr_queue_ptr = 0 +; GCN_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GCN_C-NEXT: enable_sgpr_dispatch_id = 0 +; GCN_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GCN_C-NEXT: enable_sgpr_private_segment_size = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GCN_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GCN_C-NEXT: enable_wavefront_size32 = 0 +; GCN_C-NEXT: enable_ordered_append_gds = 0 +; GCN_C-NEXT: private_element_size = 1 +; GCN_C-NEXT: is_ptr64 = 1 +; GCN_C-NEXT: is_dynamic_callstack = 0 +; GCN_C-NEXT: is_debug_enabled = 0 +; GCN_C-NEXT: is_xnack_enabled = 0 +; GCN_C-NEXT: workitem_private_segment_byte_size = 0 +; GCN_C-NEXT: workgroup_group_segment_byte_size = 0 +; GCN_C-NEXT: gds_segment_byte_size = 0 +; GCN_C-NEXT: kernarg_segment_byte_size = 4 +; GCN_C-NEXT: workgroup_fbarrier_count = 0 +; GCN_C-NEXT: wavefront_sgpr_count = 0 +; GCN_C-NEXT: workitem_vgpr_count = 0 +; GCN_C-NEXT: reserved_vgpr_first = 0 +; GCN_C-NEXT: reserved_vgpr_count = 0 +; GCN_C-NEXT: reserved_sgpr_first = 0 +; GCN_C-NEXT: reserved_sgpr_count = 0 +; GCN_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GCN_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GCN_C-NEXT: kernarg_segment_alignment = 4 +; GCN_C-NEXT: group_segment_alignment = 4 +; GCN_C-NEXT: private_segment_alignment = 4 +; GCN_C-NEXT: wavefront_size = 6 +; GCN_C-NEXT: call_convention = -1 +; GCN_C-NEXT: runtime_loader_kernel_symbol = 0 +; GCN_C-NEXT: .end_amd_kernel_code_t +; GCN_C-NEXT: ; %bb.0: ; -; GISEL-LABEL: test_indirect_call_sgpr_ptr_arg: -; GISEL: .amd_kernel_code_t -; GISEL-NEXT: amd_code_version_major = 1 -; GISEL-NEXT: amd_code_version_minor = 2 -; GISEL-NEXT: amd_machine_kind = 1 -; GISEL-NEXT: amd_machine_version_major = 7 -; GISEL-NEXT: amd_machine_version_minor = 0 -; GISEL-NEXT: amd_machine_version_stepping = 0 -; GISEL-NEXT: kernel_code_entry_byte_offset = 256 -; GISEL-NEXT: kernel_code_prefetch_byte_size = 0 -; GISEL-NEXT: granulated_workitem_vgpr_count = 10 -; GISEL-NEXT: granulated_wavefront_sgpr_count = 8 -; GISEL-NEXT: priority = 0 -; GISEL-NEXT: float_mode = 240 -; GISEL-NEXT: priv = 0 -; GISEL-NEXT: enable_dx10_clamp = 1 -; GISEL-NEXT: debug_mode = 0 -; GISEL-NEXT: enable_ieee_mode = 1 -; GISEL-NEXT: enable_wgp_mode = 0 -; GISEL-NEXT: enable_mem_ordered = 0 -; GISEL-NEXT: enable_fwd_progress = 0 -; GISEL-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 -; GISEL-NEXT: user_sgpr_count = 14 -; GISEL-NEXT: enable_trap_handler = 0 -; GISEL-NEXT: enable_sgpr_workgroup_id_x = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_y = 1 -; GISEL-NEXT: enable_sgpr_workgroup_id_z = 1 -; GISEL-NEXT: enable_sgpr_workgroup_info = 0 -; GISEL-NEXT: enable_vgpr_workitem_id = 2 -; GISEL-NEXT: enable_exception_msb = 0 -; GISEL-NEXT: granulated_lds_size = 0 -; GISEL-NEXT: enable_exception = 0 -; GISEL-NEXT: enable_sgpr_private_segment_buffer = 1 -; GISEL-NEXT: enable_sgpr_dispatch_ptr = 1 -; GISEL-NEXT: enable_sgpr_queue_ptr = 1 -; GISEL-NEXT: enable_sgpr_kernarg_segment_ptr = 1 -; GISEL-NEXT: enable_sgpr_dispatch_id = 1 -; GISEL-NEXT: enable_sgpr_flat_scratch_init = 1 -; GISEL-NEXT: enable_sgpr_private_segment_size = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_x = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_y = 0 -; GISEL-NEXT: enable_sgpr_grid_workgroup_count_z = 0 -; GISEL-NEXT: enable_wavefront_size32 = 0 -; GISEL-NEXT: enable_ordered_append_gds = 0 -; GISEL-NEXT: private_element_size = 1 -; GISEL-NEXT: is_ptr64 = 1 -; GISEL-NEXT: is_dynamic_callstack = 1 -; GISEL-NEXT: is_debug_enabled = 0 -; GISEL-NEXT: is_xnack_enabled = 0 -; GISEL-NEXT: workitem_private_segment_byte_size = 16384 -; GISEL-NEXT: workgroup_group_segment_byte_size = 0 -; GISEL-NEXT: gds_segment_byte_size = 0 -; GISEL-NEXT: kernarg_segment_byte_size = 64 -; GISEL-NEXT: workgroup_fbarrier_count = 0 -; GISEL-NEXT: wavefront_sgpr_count = 68 -; GISEL-NEXT: workitem_vgpr_count = 42 -; GISEL-NEXT: reserved_vgpr_first = 0 -; GISEL-NEXT: reserved_vgpr_count = 0 -; GISEL-NEXT: reserved_sgpr_first = 0 -; GISEL-NEXT: reserved_sgpr_count = 0 -; GISEL-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 -; GISEL-NEXT: debug_private_segment_buffer_sgpr = 0 -; GISEL-NEXT: kernarg_segment_alignment = 4 -; GISEL-NEXT: group_segment_alignment = 4 -; GISEL-NEXT: private_segment_alignment = 4 -; GISEL-NEXT: wavefront_size = 6 -; GISEL-NEXT: call_convention = -1 -; GISEL-NEXT: runtime_loader_kernel_symbol = 0 -; GISEL-NEXT: .end_amd_kernel_code_t -; GISEL-NEXT: ; %bb.0: -; GISEL-NEXT: s_mov_b32 s32, 0 -; GISEL-NEXT: s_mov_b32 flat_scratch_lo, s13 -; GISEL-NEXT: s_add_i32 s12, s12, s17 -; GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 -; GISEL-NEXT: s_add_u32 s0, s0, s17 -; GISEL-NEXT: s_addc_u32 s1, s1, 0 -; GISEL-NEXT: s_mov_b32 s13, s15 -; GISEL-NEXT: s_mov_b32 s12, s14 -; GISEL-NEXT: s_getpc_b64 s[14:15] -; GISEL-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 -; GISEL-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GISEL-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: s_add_u32 s8, s8, 8 -; GISEL-NEXT: s_addc_u32 s9, s9, 0 -; GISEL-NEXT: v_or_b32_e32 v31, v0, v2 -; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GISEL-NEXT: s_mov_b32 s14, s16 -; GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GISEL-NEXT: s_swappc_b64 s[30:31], s[18:19] -; GISEL-NEXT: s_endpgm +; GISEL_O-LABEL: test_indirect_call_sgpr_ptr_arg: +; GISEL_O: .amd_kernel_code_t +; GISEL_O-NEXT: amd_code_version_major = 1 +; GISEL_O-NEXT: amd_code_version_minor = 2 +; GISEL_O-NEXT: amd_machine_kind = 1 +; GISEL_O-NEXT: amd_machine_version_major = 7 +; GISEL_O-NEXT: amd_machine_version_minor = 0 +; GISEL_O-NEXT: amd_machine_version_stepping = 0 +; GISEL_O-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_O-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_O-NEXT: granulated_workitem_vgpr_count = 10 +; GISEL_O-NEXT: granulated_wavefront_sgpr_count = 8 +; GISEL_O-NEXT: priority = 0 +; GISEL_O-NEXT: float_mode = 240 +; GISEL_O-NEXT: priv = 0 +; GISEL_O-NEXT: enable_dx10_clamp = 1 +; GISEL_O-NEXT: debug_mode = 0 +; GISEL_O-NEXT: enable_ieee_mode = 1 +; GISEL_O-NEXT: enable_wgp_mode = 0 +; GISEL_O-NEXT: enable_mem_ordered = 0 +; GISEL_O-NEXT: enable_fwd_progress = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_wave_byte_offset = 1 +; GISEL_O-NEXT: user_sgpr_count = 14 +; GISEL_O-NEXT: enable_trap_handler = 0 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_y = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_id_z = 1 +; GISEL_O-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_O-NEXT: enable_vgpr_workitem_id = 2 +; GISEL_O-NEXT: enable_exception_msb = 0 +; GISEL_O-NEXT: granulated_lds_size = 0 +; GISEL_O-NEXT: enable_exception = 0 +; GISEL_O-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_queue_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_O-NEXT: enable_sgpr_dispatch_id = 1 +; GISEL_O-NEXT: enable_sgpr_flat_scratch_init = 1 +; GISEL_O-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_O-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_O-NEXT: enable_wavefront_size32 = 0 +; GISEL_O-NEXT: enable_ordered_append_gds = 0 +; GISEL_O-NEXT: private_element_size = 1 +; GISEL_O-NEXT: is_ptr64 = 1 +; GISEL_O-NEXT: is_dynamic_callstack = 1 +; GISEL_O-NEXT: is_debug_enabled = 0 +; GISEL_O-NEXT: is_xnack_enabled = 0 +; GISEL_O-NEXT: workitem_private_segment_byte_size = 16384 +; GISEL_O-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_O-NEXT: gds_segment_byte_size = 0 +; GISEL_O-NEXT: kernarg_segment_byte_size = 64 +; GISEL_O-NEXT: workgroup_fbarrier_count = 0 +; GISEL_O-NEXT: wavefront_sgpr_count = 68 +; GISEL_O-NEXT: workitem_vgpr_count = 42 +; GISEL_O-NEXT: reserved_vgpr_first = 0 +; GISEL_O-NEXT: reserved_vgpr_count = 0 +; GISEL_O-NEXT: reserved_sgpr_first = 0 +; GISEL_O-NEXT: reserved_sgpr_count = 0 +; GISEL_O-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_O-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_O-NEXT: kernarg_segment_alignment = 4 +; GISEL_O-NEXT: group_segment_alignment = 4 +; GISEL_O-NEXT: private_segment_alignment = 4 +; GISEL_O-NEXT: wavefront_size = 6 +; GISEL_O-NEXT: call_convention = -1 +; GISEL_O-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_O-NEXT: .end_amd_kernel_code_t +; GISEL_O-NEXT: ; %bb.0: +; GISEL_O-NEXT: s_mov_b32 s32, 0 +; GISEL_O-NEXT: s_mov_b32 flat_scratch_lo, s13 +; GISEL_O-NEXT: s_add_i32 s12, s12, s17 +; GISEL_O-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; GISEL_O-NEXT: s_add_u32 s0, s0, s17 +; GISEL_O-NEXT: s_addc_u32 s1, s1, 0 +; GISEL_O-NEXT: s_mov_b32 s13, s15 +; GISEL_O-NEXT: s_mov_b32 s12, s14 +; GISEL_O-NEXT: s_getpc_b64 s[14:15] +; GISEL_O-NEXT: s_add_u32 s14, s14, gv.fptr1@rel32@lo+4 +; GISEL_O-NEXT: s_addc_u32 s15, s15, gv.fptr1@rel32@hi+12 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; GISEL_O-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; GISEL_O-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 +; GISEL_O-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL_O-NEXT: s_add_u32 s8, s8, 8 +; GISEL_O-NEXT: s_addc_u32 s9, s9, 0 +; GISEL_O-NEXT: v_or_b32_e32 v31, v0, v2 +; GISEL_O-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL_O-NEXT: s_mov_b32 s14, s16 +; GISEL_O-NEXT: s_waitcnt lgkmcnt(0) +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[18:19] +; GISEL_O-NEXT: s_endpgm +; +; GISEL_C-LABEL: test_indirect_call_sgpr_ptr_arg: +; GISEL_C: .amd_kernel_code_t +; GISEL_C-NEXT: amd_code_version_major = 1 +; GISEL_C-NEXT: amd_code_version_minor = 2 +; GISEL_C-NEXT: amd_machine_kind = 1 +; GISEL_C-NEXT: amd_machine_version_major = 7 +; GISEL_C-NEXT: amd_machine_version_minor = 0 +; GISEL_C-NEXT: amd_machine_version_stepping = 0 +; GISEL_C-NEXT: kernel_code_entry_byte_offset = 256 +; GISEL_C-NEXT: kernel_code_prefetch_byte_size = 0 +; GISEL_C-NEXT: granulated_workitem_vgpr_count = 0 +; GISEL_C-NEXT: granulated_wavefront_sgpr_count = 0 +; GISEL_C-NEXT: priority = 0 +; GISEL_C-NEXT: float_mode = 240 +; GISEL_C-NEXT: priv = 0 +; GISEL_C-NEXT: enable_dx10_clamp = 1 +; GISEL_C-NEXT: debug_mode = 0 +; GISEL_C-NEXT: enable_ieee_mode = 1 +; GISEL_C-NEXT: enable_wgp_mode = 0 +; GISEL_C-NEXT: enable_mem_ordered = 0 +; GISEL_C-NEXT: enable_fwd_progress = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; GISEL_C-NEXT: user_sgpr_count = 6 +; GISEL_C-NEXT: enable_trap_handler = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_x = 1 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_y = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_id_z = 0 +; GISEL_C-NEXT: enable_sgpr_workgroup_info = 0 +; GISEL_C-NEXT: enable_vgpr_workitem_id = 0 +; GISEL_C-NEXT: enable_exception_msb = 0 +; GISEL_C-NEXT: granulated_lds_size = 0 +; GISEL_C-NEXT: enable_exception = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_buffer = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_queue_ptr = 0 +; GISEL_C-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; GISEL_C-NEXT: enable_sgpr_dispatch_id = 0 +; GISEL_C-NEXT: enable_sgpr_flat_scratch_init = 0 +; GISEL_C-NEXT: enable_sgpr_private_segment_size = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; GISEL_C-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; GISEL_C-NEXT: enable_wavefront_size32 = 0 +; GISEL_C-NEXT: enable_ordered_append_gds = 0 +; GISEL_C-NEXT: private_element_size = 1 +; GISEL_C-NEXT: is_ptr64 = 1 +; GISEL_C-NEXT: is_dynamic_callstack = 0 +; GISEL_C-NEXT: is_debug_enabled = 0 +; GISEL_C-NEXT: is_xnack_enabled = 0 +; GISEL_C-NEXT: workitem_private_segment_byte_size = 0 +; GISEL_C-NEXT: workgroup_group_segment_byte_size = 0 +; GISEL_C-NEXT: gds_segment_byte_size = 0 +; GISEL_C-NEXT: kernarg_segment_byte_size = 4 +; GISEL_C-NEXT: workgroup_fbarrier_count = 0 +; GISEL_C-NEXT: wavefront_sgpr_count = 0 +; GISEL_C-NEXT: workitem_vgpr_count = 0 +; GISEL_C-NEXT: reserved_vgpr_first = 0 +; GISEL_C-NEXT: reserved_vgpr_count = 0 +; GISEL_C-NEXT: reserved_sgpr_first = 0 +; GISEL_C-NEXT: reserved_sgpr_count = 0 +; GISEL_C-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; GISEL_C-NEXT: debug_private_segment_buffer_sgpr = 0 +; GISEL_C-NEXT: kernarg_segment_alignment = 4 +; GISEL_C-NEXT: group_segment_alignment = 4 +; GISEL_C-NEXT: private_segment_alignment = 4 +; GISEL_C-NEXT: wavefront_size = 6 +; GISEL_C-NEXT: call_convention = -1 +; GISEL_C-NEXT: runtime_loader_kernel_symbol = 0 +; GISEL_C-NEXT: .end_amd_kernel_code_t +; GISEL_C-NEXT: ; %bb.0: %fptr = load ptr, ptr addrspace(4) @gv.fptr1 call void %fptr(i32 123) ret void } define void @test_indirect_call_vgpr_ptr(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 18 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec -; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] -; GCN-NEXT: s_cbranch_execnz .LBB2_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 18 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: s_mov_b64 s[46:47], exec +; GCN_O-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN_O-NEXT: s_cbranch_execnz .LBB2_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[46:47] +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 18 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 18 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec -; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] -; GISEL-NEXT: s_cbranch_execnz .LBB2_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 18 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: s_mov_b64 s[46:47], exec +; GISEL_O-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL_O-NEXT: s_cbranch_execnz .LBB2_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[46:47] +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call void %fptr() ret void } define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_arg: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 18 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec -; GCN-NEXT: v_mov_b32_e32 v2, 0x7b -; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: v_mov_b32_e32 v0, v2 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] -; GCN-NEXT: s_cbranch_execnz .LBB3_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 18 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: s_mov_b64 s[46:47], exec +; GCN_O-NEXT: v_mov_b32_e32 v2, 0x7b +; GCN_O-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: v_mov_b32_e32 v0, v2 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: ; implicit-def: $vgpr2 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN_O-NEXT: s_cbranch_execnz .LBB3_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[46:47] +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 18 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 18 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: s_mov_b64 s[46:47], exec +; GISEL_O-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL_O-NEXT: v_mov_b32_e32 v0, 0x7b +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL_O-NEXT: s_cbranch_execnz .LBB3_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[46:47] +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 18 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec -; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] -; GISEL-NEXT: s_cbranch_execnz .LBB3_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call void %fptr(i32 123) ret void } define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_ret: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 18 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec -; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_mov_b32_e32 v2, v0 -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] -; GCN-NEXT: s_cbranch_execnz .LBB4_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 18 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_ret: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 18 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: s_mov_b64 s[46:47], exec +; GCN_O-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: v_mov_b32_e32 v2, v0 +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN_O-NEXT: s_cbranch_execnz .LBB4_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[46:47] +; GCN_O-NEXT: v_add_i32_e32 v0, vcc, 1, v2 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 18 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_ret: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_ret: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 18 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: s_mov_b64 s[46:47], exec +; GISEL_O-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: v_mov_b32_e32 v1, v0 +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL_O-NEXT: s_cbranch_execnz .LBB4_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[46:47] +; GISEL_O-NEXT: v_add_i32_e32 v0, vcc, 1, v1 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 18 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 18 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec -; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: v_mov_b32_e32 v1, v0 -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] -; GISEL-NEXT: s_cbranch_execnz .LBB4_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 18 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_ret: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) %a = call i32 %fptr() %b = add i32 %a, 1 ret i32 %b } define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_in_branch: -; GCN: ; %bb.0: ; %bb0 -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s16, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[18:19] -; GCN-NEXT: v_writelane_b32 v40, s16, 20 -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: v_and_b32_e32 v2, 1, v2 -; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc -; GCN-NEXT: s_cbranch_execz .LBB5_4 -; GCN-NEXT: ; %bb.1: ; %bb1 -; GCN-NEXT: s_mov_b64 s[48:49], exec -; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: v_readfirstlane_b32 s17, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[50:51] -; GCN-NEXT: s_cbranch_execnz .LBB5_2 -; GCN-NEXT: ; %bb.3: -; GCN-NEXT: s_mov_b64 exec, s[48:49] -; GCN-NEXT: .LBB5_4: ; %bb2 -; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: v_readlane_b32 s4, v40, 20 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s4 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GCN_O: ; %bb.0: ; %bb0 +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s16, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[18:19] +; GCN_O-NEXT: v_writelane_b32 v40, s16, 20 +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: s_mov_b32 s42, s15 +; GCN_O-NEXT: s_mov_b32 s43, s14 +; GCN_O-NEXT: s_mov_b32 s44, s13 +; GCN_O-NEXT: s_mov_b32 s45, s12 +; GCN_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GCN_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GCN_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GCN_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN_O-NEXT: v_and_b32_e32 v2, 1, v2 +; GCN_O-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GCN_O-NEXT: s_and_saveexec_b64 s[46:47], vcc +; GCN_O-NEXT: s_cbranch_execz .LBB5_4 +; GCN_O-NEXT: ; %bb.1: ; %bb1 +; GCN_O-NEXT: s_mov_b64 s[48:49], exec +; GCN_O-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s16, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s17, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[50:51], vcc +; GCN_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GCN_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GCN_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GCN_O-NEXT: s_mov_b32 s12, s45 +; GCN_O-NEXT: s_mov_b32 s13, s44 +; GCN_O-NEXT: s_mov_b32 s14, s43 +; GCN_O-NEXT: s_mov_b32 s15, s42 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: ; implicit-def: $vgpr31 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[50:51] +; GCN_O-NEXT: s_cbranch_execnz .LBB5_2 +; GCN_O-NEXT: ; %bb.3: +; GCN_O-NEXT: s_mov_b64 exec, s[48:49] +; GCN_O-NEXT: .LBB5_4: ; %bb2 +; GCN_O-NEXT: s_or_b64 exec, exec, s[46:47] +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: v_readlane_b32 s4, v40, 20 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s4 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_in_branch: -; GISEL: ; %bb.0: ; %bb0 -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s16, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[18:19] -; GISEL-NEXT: v_writelane_b32 v40, s16, 20 -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 -; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] -; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 -; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc -; GISEL-NEXT: s_cbranch_execz .LBB5_4 -; GISEL-NEXT: ; %bb.1: ; %bb1 -; GISEL-NEXT: s_mov_b64 s[48:49], exec -; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s16, v0 -; GISEL-NEXT: v_readfirstlane_b32 s17, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] -; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] -; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] -; GISEL-NEXT: s_cbranch_execnz .LBB5_2 -; GISEL-NEXT: ; %bb.3: -; GISEL-NEXT: s_mov_b64 exec, s[48:49] -; GISEL-NEXT: .LBB5_4: ; %bb2 -; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: v_readlane_b32 s4, v40, 20 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GCN_C: ; %bb.0: ; %bb0 +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_C-NEXT: v_and_b32_e32 v0, 1, v2 +; GCN_C-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GCN_C-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN_C-NEXT: ; %bb.1: ; %bb1 +; GCN_C-NEXT: ; divergent unreachable +; GCN_C-NEXT: ; %bb.2: ; %UnifiedReturnBlock +; GCN_C-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN_C-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GISEL_O: ; %bb.0: ; %bb0 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s16, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[18:19] +; GISEL_O-NEXT: v_writelane_b32 v40, s16, 20 +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: s_mov_b32 s42, s15 +; GISEL_O-NEXT: s_mov_b32 s43, s14 +; GISEL_O-NEXT: s_mov_b32 s44, s13 +; GISEL_O-NEXT: s_mov_b32 s45, s12 +; GISEL_O-NEXT: s_mov_b64 s[34:35], s[10:11] +; GISEL_O-NEXT: s_mov_b64 s[36:37], s[8:9] +; GISEL_O-NEXT: s_mov_b64 s[38:39], s[6:7] +; GISEL_O-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL_O-NEXT: v_and_b32_e32 v2, 1, v2 +; GISEL_O-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL_O-NEXT: s_and_saveexec_b64 s[46:47], vcc +; GISEL_O-NEXT: s_cbranch_execz .LBB5_4 +; GISEL_O-NEXT: ; %bb.1: ; %bb1 +; GISEL_O-NEXT: s_mov_b64 s[48:49], exec +; GISEL_O-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s16, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s17, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[50:51], vcc +; GISEL_O-NEXT: s_mov_b64 s[4:5], s[40:41] +; GISEL_O-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL_O-NEXT: s_mov_b64 s[8:9], s[36:37] +; GISEL_O-NEXT: s_mov_b64 s[10:11], s[34:35] +; GISEL_O-NEXT: s_mov_b32 s12, s45 +; GISEL_O-NEXT: s_mov_b32 s13, s44 +; GISEL_O-NEXT: s_mov_b32 s14, s43 +; GISEL_O-NEXT: s_mov_b32 s15, s42 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: ; implicit-def: $vgpr31 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[50:51] +; GISEL_O-NEXT: s_cbranch_execnz .LBB5_2 +; GISEL_O-NEXT: ; %bb.3: +; GISEL_O-NEXT: s_mov_b64 exec, s[48:49] +; GISEL_O-NEXT: .LBB5_4: ; %bb2 +; GISEL_O-NEXT: s_or_b64 exec, exec, s[46:47] +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: v_readlane_b32 s4, v40, 20 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s4 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_in_branch: +; GISEL_C: ; %bb.0: ; %bb0 +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_C-NEXT: v_and_b32_e32 v0, 1, v2 +; GISEL_C-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL_C-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GISEL_C-NEXT: ; %bb.1: ; %bb1 +; GISEL_C-NEXT: ; divergent unreachable +; GISEL_C-NEXT: ; %bb.2: ; %UnifiedReturnBlock +; GISEL_C-NEXT: s_or_b64 exec, exec, s[4:5] +; GISEL_C-NEXT: s_setpc_b64 s[30:31] bb0: br i1 %cond, label %bb1, label %bb2 @@ -1116,393 +1450,409 @@ } define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s5, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[6:7], exec -; GCN-NEXT: s_movk_i32 s4, 0x7b -; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s8, v0 -; GCN-NEXT: v_readfirstlane_b32 s9, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] -; GCN-NEXT: s_cbranch_execnz .LBB6_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s5 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s5, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: s_mov_b64 s[6:7], exec +; GCN_O-NEXT: s_movk_i32 s4, 0x7b +; GCN_O-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s8, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s9, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[10:11] +; GCN_O-NEXT: s_cbranch_execnz .LBB6_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[6:7] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s5 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s5, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[6:7], exec -; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v0 -; GISEL-NEXT: v_readfirstlane_b32 s9, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] -; GISEL-NEXT: s_cbranch_execnz .LBB6_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s5 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s5, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: s_mov_b64 s[6:7], exec +; GISEL_O-NEXT: s_movk_i32 s4, 0x7b +; GISEL_O-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL_O-NEXT: s_cbranch_execnz .LBB6_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[6:7] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s5 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call amdgpu_gfx void %fptr(i32 inreg 123) ret void } define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v1 -; GCN-NEXT: v_readfirstlane_b32 s7, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] -; GCN-NEXT: s_cbranch_execnz .LBB7_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s10 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s10, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: v_mov_b32_e32 v41, v0 +; GCN_O-NEXT: s_mov_b64 s[4:5], exec +; GCN_O-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s6, v1 +; GCN_O-NEXT: v_readfirstlane_b32 s7, v2 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GCN_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN_O-NEXT: v_mov_b32_e32 v0, v41 +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN_O-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN_O-NEXT: s_cbranch_execnz .LBB7_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: v_mov_b32_e32 v0, v41 +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s10 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: s_mov_b64 s[4:5], exec -; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v1 -; GISEL-NEXT: v_readfirstlane_b32 s7, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: ; implicit-def: $vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] -; GISEL-NEXT: s_cbranch_execnz .LBB7_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s10 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s10, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: v_mov_b32_e32 v41, v0 +; GISEL_O-NEXT: s_mov_b64 s[4:5], exec +; GISEL_O-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL_O-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL_O-NEXT: v_mov_b32_e32 v0, v41 +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL_O-NEXT: ; implicit-def: $vgpr1 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL_O-NEXT: s_cbranch_execnz .LBB7_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: v_mov_b32_e32 v0, v41 +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s10 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] +; +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) call amdgpu_gfx void %fptr(i32 %i) ret i32 %i } @@ -1512,391 +1862,410 @@ ; allocator is not able to do that because the return value clashes with the liverange of an ; IMPLICIT_DEF of the argument. define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { -; GCN-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v1 -; GCN-NEXT: v_readfirstlane_b32 s7, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: v_mov_b32_e32 v3, v0 -; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] -; GCN-NEXT: s_cbranch_execnz .LBB8_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s10 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s10, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: s_mov_b64 s[4:5], exec +; GCN_O-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s6, v1 +; GCN_O-NEXT: v_readfirstlane_b32 s7, v2 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GCN_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN_O-NEXT: v_mov_b32_e32 v3, v0 +; GCN_O-NEXT: ; implicit-def: $vgpr1_vgpr2 +; GCN_O-NEXT: ; implicit-def: $vgpr0 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN_O-NEXT: s_cbranch_execnz .LBB8_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: v_mov_b32_e32 v0, v3 +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s10 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s10, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: s_mov_b64 s[4:5], exec +; GISEL_O-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL_O-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL_O-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL_O-NEXT: v_mov_b32_e32 v2, v0 +; GISEL_O-NEXT: ; implicit-def: $vgpr1 +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[6:7] +; GISEL_O-NEXT: s_cbranch_execnz .LBB8_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: v_mov_b32_e32 v0, v2 +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s10 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[4:5], exec -; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GISEL-NEXT: v_mov_b32_e32 v2, v0 -; GISEL-NEXT: ; implicit-def: $vgpr1 -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[6:7] -; GISEL-NEXT: s_cbranch_execnz .LBB8_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_mov_b32_e32 v0, v2 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s10 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) %ret = call amdgpu_gfx i32 %fptr(i32 %i) ret i32 %ret } ; Calling a vgpr can never be a tail call. define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { -; GCN-LABEL: test_indirect_tail_call_vgpr_ptr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s10, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v0 -; GCN-NEXT: v_readfirstlane_b32 s7, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] -; GCN-NEXT: s_cbranch_execnz .LBB9_1 -; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: s_mov_b32 s33, s10 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN_O-LABEL: test_indirect_tail_call_vgpr_ptr: +; GCN_O: ; %bb.0: +; GCN_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN_O-NEXT: s_mov_b32 s10, s33 +; GCN_O-NEXT: s_mov_b32 s33, s32 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0x400 +; GCN_O-NEXT: v_writelane_b32 v40, s30, 0 +; GCN_O-NEXT: v_writelane_b32 v40, s31, 1 +; GCN_O-NEXT: v_writelane_b32 v40, s34, 2 +; GCN_O-NEXT: v_writelane_b32 v40, s35, 3 +; GCN_O-NEXT: v_writelane_b32 v40, s36, 4 +; GCN_O-NEXT: v_writelane_b32 v40, s37, 5 +; GCN_O-NEXT: v_writelane_b32 v40, s38, 6 +; GCN_O-NEXT: v_writelane_b32 v40, s39, 7 +; GCN_O-NEXT: v_writelane_b32 v40, s40, 8 +; GCN_O-NEXT: v_writelane_b32 v40, s41, 9 +; GCN_O-NEXT: v_writelane_b32 v40, s42, 10 +; GCN_O-NEXT: v_writelane_b32 v40, s43, 11 +; GCN_O-NEXT: v_writelane_b32 v40, s44, 12 +; GCN_O-NEXT: v_writelane_b32 v40, s45, 13 +; GCN_O-NEXT: v_writelane_b32 v40, s46, 14 +; GCN_O-NEXT: v_writelane_b32 v40, s47, 15 +; GCN_O-NEXT: v_writelane_b32 v40, s48, 16 +; GCN_O-NEXT: v_writelane_b32 v40, s49, 17 +; GCN_O-NEXT: v_writelane_b32 v40, s50, 18 +; GCN_O-NEXT: v_writelane_b32 v40, s51, 19 +; GCN_O-NEXT: v_writelane_b32 v40, s52, 20 +; GCN_O-NEXT: v_writelane_b32 v40, s53, 21 +; GCN_O-NEXT: v_writelane_b32 v40, s54, 22 +; GCN_O-NEXT: v_writelane_b32 v40, s55, 23 +; GCN_O-NEXT: v_writelane_b32 v40, s56, 24 +; GCN_O-NEXT: v_writelane_b32 v40, s57, 25 +; GCN_O-NEXT: v_writelane_b32 v40, s58, 26 +; GCN_O-NEXT: v_writelane_b32 v40, s59, 27 +; GCN_O-NEXT: v_writelane_b32 v40, s60, 28 +; GCN_O-NEXT: v_writelane_b32 v40, s61, 29 +; GCN_O-NEXT: v_writelane_b32 v40, s62, 30 +; GCN_O-NEXT: v_writelane_b32 v40, s63, 31 +; GCN_O-NEXT: s_mov_b64 s[4:5], exec +; GCN_O-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 +; GCN_O-NEXT: v_readfirstlane_b32 s6, v0 +; GCN_O-NEXT: v_readfirstlane_b32 s7, v1 +; GCN_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GCN_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN_O-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN_O-NEXT: s_cbranch_execnz .LBB9_1 +; GCN_O-NEXT: ; %bb.2: +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: v_readlane_b32 s63, v40, 31 +; GCN_O-NEXT: v_readlane_b32 s62, v40, 30 +; GCN_O-NEXT: v_readlane_b32 s61, v40, 29 +; GCN_O-NEXT: v_readlane_b32 s60, v40, 28 +; GCN_O-NEXT: v_readlane_b32 s59, v40, 27 +; GCN_O-NEXT: v_readlane_b32 s58, v40, 26 +; GCN_O-NEXT: v_readlane_b32 s57, v40, 25 +; GCN_O-NEXT: v_readlane_b32 s56, v40, 24 +; GCN_O-NEXT: v_readlane_b32 s55, v40, 23 +; GCN_O-NEXT: v_readlane_b32 s54, v40, 22 +; GCN_O-NEXT: v_readlane_b32 s53, v40, 21 +; GCN_O-NEXT: v_readlane_b32 s52, v40, 20 +; GCN_O-NEXT: v_readlane_b32 s51, v40, 19 +; GCN_O-NEXT: v_readlane_b32 s50, v40, 18 +; GCN_O-NEXT: v_readlane_b32 s49, v40, 17 +; GCN_O-NEXT: v_readlane_b32 s48, v40, 16 +; GCN_O-NEXT: v_readlane_b32 s47, v40, 15 +; GCN_O-NEXT: v_readlane_b32 s46, v40, 14 +; GCN_O-NEXT: v_readlane_b32 s45, v40, 13 +; GCN_O-NEXT: v_readlane_b32 s44, v40, 12 +; GCN_O-NEXT: v_readlane_b32 s43, v40, 11 +; GCN_O-NEXT: v_readlane_b32 s42, v40, 10 +; GCN_O-NEXT: v_readlane_b32 s41, v40, 9 +; GCN_O-NEXT: v_readlane_b32 s40, v40, 8 +; GCN_O-NEXT: v_readlane_b32 s39, v40, 7 +; GCN_O-NEXT: v_readlane_b32 s38, v40, 6 +; GCN_O-NEXT: v_readlane_b32 s37, v40, 5 +; GCN_O-NEXT: v_readlane_b32 s36, v40, 4 +; GCN_O-NEXT: v_readlane_b32 s35, v40, 3 +; GCN_O-NEXT: v_readlane_b32 s34, v40, 2 +; GCN_O-NEXT: v_readlane_b32 s31, v40, 1 +; GCN_O-NEXT: v_readlane_b32 s30, v40, 0 +; GCN_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN_O-NEXT: s_mov_b64 exec, s[4:5] +; GCN_O-NEXT: s_addk_i32 s32, 0xfc00 +; GCN_O-NEXT: s_mov_b32 s33, s10 +; GCN_O-NEXT: s_waitcnt vmcnt(0) +; GCN_O-NEXT: s_setpc_b64 s[30:31] +; +; GCN_C-LABEL: test_indirect_tail_call_vgpr_ptr: +; GCN_C: ; %bb.0: +; GCN_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; +; GISEL_O-LABEL: test_indirect_tail_call_vgpr_ptr: +; GISEL_O: ; %bb.0: +; GISEL_O-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL_O-NEXT: s_mov_b32 s10, s33 +; GISEL_O-NEXT: s_mov_b32 s33, s32 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0x400 +; GISEL_O-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL_O-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL_O-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL_O-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL_O-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL_O-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL_O-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL_O-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL_O-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL_O-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL_O-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL_O-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL_O-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL_O-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL_O-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL_O-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL_O-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL_O-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL_O-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL_O-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL_O-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL_O-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL_O-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL_O-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL_O-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL_O-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL_O-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL_O-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL_O-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL_O-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL_O-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL_O-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL_O-NEXT: s_mov_b64 s[4:5], exec +; GISEL_O-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 +; GISEL_O-NEXT: v_readfirstlane_b32 s6, v0 +; GISEL_O-NEXT: v_readfirstlane_b32 s7, v1 +; GISEL_O-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GISEL_O-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL_O-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL_O-NEXT: ; implicit-def: $vgpr0 +; GISEL_O-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL_O-NEXT: s_cbranch_execnz .LBB9_1 +; GISEL_O-NEXT: ; %bb.2: +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL_O-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL_O-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL_O-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL_O-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL_O-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL_O-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL_O-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL_O-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL_O-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL_O-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL_O-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL_O-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL_O-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL_O-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL_O-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL_O-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL_O-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL_O-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL_O-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL_O-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL_O-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL_O-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL_O-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL_O-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL_O-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL_O-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL_O-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL_O-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL_O-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL_O-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL_O-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL_O-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL_O-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GISEL_O-NEXT: s_mov_b64 exec, s[4:5] +; GISEL_O-NEXT: s_addk_i32 s32, 0xfc00 +; GISEL_O-NEXT: s_mov_b32 s33, s10 +; GISEL_O-NEXT: s_waitcnt vmcnt(0) +; GISEL_O-NEXT: s_setpc_b64 s[30:31] ; -; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: s_mov_b32 s10, s33 -; GISEL-NEXT: s_mov_b32 s33, s32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[4:5], exec -; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v0 -; GISEL-NEXT: v_readfirstlane_b32 s7, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] -; GISEL-NEXT: s_cbranch_execnz .LBB9_1 -; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: s_mov_b32 s33, s10 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL_C-LABEL: test_indirect_tail_call_vgpr_ptr: +; GISEL_C: ; %bb.0: +; GISEL_C-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) tail call amdgpu_gfx void %fptr() ret void } !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdgpu_code_object_version", i32 200} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GISEL: {{.*}} Index: llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll +++ llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_C %s +; RUN: FileCheck -check-prefix=REMARK %s < %t +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -attributor-assume-closed-world=false -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefixes=STDERR,STDERR_O %s ; RUN: FileCheck -check-prefix=REMARK %s < %t ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel @@ -157,16 +159,27 @@ ret void } -; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call -; STDERR-NEXT: remark: foo.cl:64:0: SGPRs: 39 -; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: 32 -; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: 10 -; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 -; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True -; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 -; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 +; STDERR_O: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR_O-NEXT: remark: foo.cl:64:0: SGPRs: 39 +; STDERR_O-NEXT: remark: foo.cl:64:0: VGPRs: 32 +; STDERR_O-NEXT: remark: foo.cl:64:0: AGPRs: 10 +; STDERR_O-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR_O-NEXT: remark: foo.cl:64:0: Dynamic Stack: True +; STDERR_O-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR_O-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 + +; STDERR_C: remark: foo.cl:64:0: Function Name: test_indirect_call +; STDERR_C-NEXT: remark: foo.cl:64:0: SGPRs: 4 +; STDERR_C-NEXT: remark: foo.cl:64:0: VGPRs: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: AGPRs: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: Dynamic Stack: False +; STDERR_C-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: 8 +; STDERR_C-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4 define amdgpu_kernel void @test_indirect_call() !dbg !9 { @@ -175,17 +188,27 @@ ret void } -; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack -; STDERR-NEXT: remark: foo.cl:74:0: SGPRs: 39 -; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: 32 -; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: 10 -; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 -; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True -; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 -; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 -; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 +; STDERR_O: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack +; STDERR_O-NEXT: remark: foo.cl:74:0: SGPRs: 39 +; STDERR_O-NEXT: remark: foo.cl:74:0: VGPRs: 32 +; STDERR_O-NEXT: remark: foo.cl:74:0: AGPRs: 10 +; STDERR_O-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 +; STDERR_O-NEXT: remark: foo.cl:74:0: Dynamic Stack: True +; STDERR_O-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 +; STDERR_O-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 +; STDERR_O-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 +; STDERR_C: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack +; STDERR_C-NEXT: remark: foo.cl:74:0: SGPRs: 12 +; STDERR_C-NEXT: remark: foo.cl:74:0: VGPRs: 1 +; STDERR_C-NEXT: remark: foo.cl:74:0: AGPRs: 0 +; STDERR_C-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144 +; STDERR_C-NEXT: remark: foo.cl:74:0: Dynamic Stack: False +; STDERR_C-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: 8 +; STDERR_C-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0 +; STDERR_C-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg) define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 { Index: llvm/test/CodeGen/AMDGPU/sibling-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=hawaii -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -attributor-assume-closed-world=false -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s target datalayout = "A5" ; FIXME: Why is this commuted only sometimes? Index: llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=AKF_GCN %s -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefix=ATTRIBUTOR_GCN %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_CW %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor -attributor-assume-closed-world=false %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN,ATTRIBUTOR_GCN_OW %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -attributor-assume-closed-world=false < %s | FileCheck -check-prefix=GFX9 %s target datalayout = "A5" @@ -21,6 +22,17 @@ ret void } +define ptr @helper() { +; AKF_GCN-LABEL: define {{[^@]+}}@helper() { +; AKF_GCN-NEXT: ret ptr @indirect +; +; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@helper +; ATTRIBUTOR_GCN-SAME: () #[[ATTR0]] { +; ATTRIBUTOR_GCN-NEXT: ret ptr @indirect +; + ret ptr @indirect +} + define amdgpu_kernel void @test_simple_indirect_call() { ; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call ; AKF_GCN-SAME: () #[[ATTR0:[0-9]+]] { @@ -31,14 +43,23 @@ ; AKF_GCN-NEXT: call void [[FP]]() ; AKF_GCN-NEXT: ret void ; -; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call -; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] { -; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) -; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr -; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 -; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 -; ATTRIBUTOR_GCN-NEXT: call void [[FP]]() -; ATTRIBUTOR_GCN-NEXT: ret void +; ATTRIBUTOR_GCN_CW-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_GCN_CW-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN_CW-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_GCN_CW-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr +; ATTRIBUTOR_GCN_CW-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_CW-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_CW-NEXT: call void @indirect() +; ATTRIBUTOR_GCN_CW-NEXT: ret void +; +; ATTRIBUTOR_GCN_OW-LABEL: define {{[^@]+}}@test_simple_indirect_call +; ATTRIBUTOR_GCN_OW-SAME: () #[[ATTR1:[0-9]+]] { +; ATTRIBUTOR_GCN_OW-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5) +; ATTRIBUTOR_GCN_OW-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr +; ATTRIBUTOR_GCN_OW-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_OW-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8 +; ATTRIBUTOR_GCN_OW-NEXT: call void [[FP]]() +; ATTRIBUTOR_GCN_OW-NEXT: ret void ; ; GFX9-LABEL: test_simple_indirect_call: ; GFX9: ; %bb.0: @@ -73,6 +94,9 @@ ;. ; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN_CW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN_CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +;. +; ATTRIBUTOR_GCN_OW: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } +; ATTRIBUTOR_GCN_OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" } ;. Index: llvm/test/Transforms/Attributor/callgraph.ll =================================================================== --- llvm/test/Transforms/Attributor/callgraph.ll +++ llvm/test/Transforms/Attributor/callgraph.ll @@ -3,6 +3,7 @@ ; RUN: opt -passes=attributor -attributor-print-call-graph -S -disable-output < %s | FileCheck %s --check-prefixes=DOT ; RUN: opt -passes=attributor --attributor-max-specializations-per-call-base=2 -S < %s | FileCheck %s --check-prefixes=CHECK,UPTO2,LIMI2 ; RUN: opt -passes=attributor --attributor-max-specializations-per-call-base=0 -S < %s | FileCheck %s --check-prefixes=CHECK,LIMI0 +; RUN: opt -passes=attributor --attributor-assume-closed-world -S < %s | FileCheck %s --check-prefixes=CHECK,UPTO2,UNLIM,CWRLD define dso_local void @func1() { ; CHECK-LABEL: @func1( @@ -224,33 +225,6 @@ } define void @non_matching_fp2(i1 %c1, i1 %c2, i1 %c, ptr %unknown) { -; UNLIM-LABEL: @non_matching_fp2( -; UNLIM-NEXT: [[FP1:%.*]] = select i1 [[C1:%.*]], ptr @retI32, ptr @takeI32 -; UNLIM-NEXT: [[FP2:%.*]] = select i1 [[C2:%.*]], ptr @retFloatTakeFloat, ptr [[UNKNOWN:%.*]] -; UNLIM-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr [[FP1]], ptr [[FP2]] -; UNLIM-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FP]], @takeI32 -; UNLIM-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] -; UNLIM: 2: -; UNLIM-NEXT: call void @takeI32() -; UNLIM-NEXT: br label [[TMP10:%.*]] -; UNLIM: 3: -; UNLIM-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @retI32 -; UNLIM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] -; UNLIM: 5: -; UNLIM-NEXT: call void @retI32() -; UNLIM-NEXT: br label [[TMP10]] -; UNLIM: 6: -; UNLIM-NEXT: [[TMP7:%.*]] = icmp eq ptr [[FP]], @retFloatTakeFloat -; UNLIM-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] -; UNLIM: 8: -; UNLIM-NEXT: call void @retFloatTakeFloat() -; UNLIM-NEXT: br label [[TMP10]] -; UNLIM: 9: -; UNLIM-NEXT: call void [[FP]]() -; UNLIM-NEXT: br label [[TMP10]] -; UNLIM: 10: -; UNLIM-NEXT: ret void -; ; LIMI2-LABEL: @non_matching_fp2( ; LIMI2-NEXT: [[FP1:%.*]] = select i1 [[C1:%.*]], ptr @retI32, ptr @takeI32 ; LIMI2-NEXT: [[FP2:%.*]] = select i1 [[C2:%.*]], ptr @retFloatTakeFloat, ptr [[UNKNOWN:%.*]] @@ -278,6 +252,55 @@ ; LIMI0-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr [[FP1]], ptr [[FP2]] ; LIMI0-NEXT: call void [[FP]]() ; LIMI0-NEXT: ret void +; +; CWRLD-LABEL: @non_matching_fp2( +; CWRLD-NEXT: [[FP1:%.*]] = select i1 [[C1:%.*]], ptr @retI32, ptr @takeI32 +; CWRLD-NEXT: [[FP2:%.*]] = select i1 [[C2:%.*]], ptr @retFloatTakeFloat, ptr [[UNKNOWN:%.*]] +; CWRLD-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr [[FP1]], ptr [[FP2]] +; CWRLD-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FP]], @takeI32 +; CWRLD-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CWRLD: 2: +; CWRLD-NEXT: call void @takeI32() +; CWRLD-NEXT: br label [[TMP21:%.*]] +; CWRLD: 3: +; CWRLD-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @retI32 +; CWRLD-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; CWRLD: 5: +; CWRLD-NEXT: call void @retI32() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 6: +; CWRLD-NEXT: [[TMP7:%.*]] = icmp eq ptr [[FP]], @func3 +; CWRLD-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; CWRLD: 8: +; CWRLD-NEXT: call void @func3() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 9: +; CWRLD-NEXT: [[TMP10:%.*]] = icmp eq ptr [[FP]], @func4 +; CWRLD-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]] +; CWRLD: 11: +; CWRLD-NEXT: call void @func4() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 12: +; CWRLD-NEXT: [[TMP13:%.*]] = icmp eq ptr [[FP]], @retFloatTakeFloat +; CWRLD-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP15:%.*]] +; CWRLD: 14: +; CWRLD-NEXT: call void @retFloatTakeFloat() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 15: +; CWRLD-NEXT: [[TMP16:%.*]] = icmp eq ptr [[FP]], @retFloatTakeFloatFloatNoundef +; CWRLD-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[TMP18:%.*]] +; CWRLD: 17: +; CWRLD-NEXT: call void @retFloatTakeFloatFloatNoundef() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 18: +; CWRLD-NEXT: br i1 true, label [[TMP19:%.*]], label [[TMP20:%.*]] +; CWRLD: 19: +; CWRLD-NEXT: call void @void() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 20: +; CWRLD-NEXT: unreachable +; CWRLD: 21: +; CWRLD-NEXT: ret void ; %fp1 = select i1 %c1, ptr @retI32, ptr @takeI32 %fp2 = select i1 %c2, ptr @retFloatTakeFloat, ptr %unknown @@ -287,24 +310,74 @@ } define i32 @non_matching_unknown(i1 %c, ptr %fn) { -; UPTO2-LABEL: @non_matching_unknown( -; UPTO2-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr @retI32, ptr [[FN:%.*]] -; UPTO2-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FP]], @retI32 -; UPTO2-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] -; UPTO2: 2: -; UPTO2-NEXT: [[CALL1:%.*]] = call i32 @retI32(i32 42) -; UPTO2-NEXT: br label [[TMP4:%.*]] -; UPTO2: 3: -; UPTO2-NEXT: [[CALL2:%.*]] = call i32 [[FP]](i32 42) -; UPTO2-NEXT: br label [[TMP4]] -; UPTO2: 4: -; UPTO2-NEXT: [[CALL_PHI:%.*]] = phi i32 [ [[CALL1]], [[TMP2]] ], [ [[CALL2]], [[TMP3]] ] -; UPTO2-NEXT: ret i32 [[CALL_PHI]] +; LIMI2-LABEL: @non_matching_unknown( +; LIMI2-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr @retI32, ptr [[FN:%.*]] +; LIMI2-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FP]], @retI32 +; LIMI2-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; LIMI2: 2: +; LIMI2-NEXT: [[CALL1:%.*]] = call i32 @retI32(i32 42) +; LIMI2-NEXT: br label [[TMP4:%.*]] +; LIMI2: 3: +; LIMI2-NEXT: [[CALL2:%.*]] = call i32 [[FP]](i32 42) +; LIMI2-NEXT: br label [[TMP4]] +; LIMI2: 4: +; LIMI2-NEXT: [[CALL_PHI:%.*]] = phi i32 [ [[CALL1]], [[TMP2]] ], [ [[CALL2]], [[TMP3]] ] +; LIMI2-NEXT: ret i32 [[CALL_PHI]] ; ; LIMI0-LABEL: @non_matching_unknown( ; LIMI0-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr @retI32, ptr [[FN:%.*]] ; LIMI0-NEXT: [[CALL:%.*]] = call i32 [[FP]](i32 42) ; LIMI0-NEXT: ret i32 [[CALL]] +; +; CWRLD-LABEL: @non_matching_unknown( +; CWRLD-NEXT: [[FP:%.*]] = select i1 [[C:%.*]], ptr @retI32, ptr [[FN:%.*]] +; CWRLD-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FP]], @func3 +; CWRLD-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CWRLD: 2: +; CWRLD-NEXT: [[CALL1:%.*]] = call i32 @func3(i32 42) +; CWRLD-NEXT: br label [[TMP24:%.*]] +; CWRLD: 3: +; CWRLD-NEXT: [[TMP4:%.*]] = icmp eq ptr [[FP]], @func4 +; CWRLD-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; CWRLD: 5: +; CWRLD-NEXT: [[CALL2:%.*]] = call i32 @func4(i32 42) +; CWRLD-NEXT: br label [[TMP24]] +; CWRLD: 6: +; CWRLD-NEXT: [[TMP7:%.*]] = icmp eq ptr [[FP]], @retI32 +; CWRLD-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; CWRLD: 8: +; CWRLD-NEXT: [[CALL3:%.*]] = call i32 @retI32(i32 42) +; CWRLD-NEXT: br label [[TMP24]] +; CWRLD: 9: +; CWRLD-NEXT: [[TMP10:%.*]] = icmp eq ptr [[FP]], @takeI32 +; CWRLD-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]] +; CWRLD: 11: +; CWRLD-NEXT: [[CALL4:%.*]] = call i32 @takeI32(i32 42) +; CWRLD-NEXT: br label [[TMP24]] +; CWRLD: 12: +; CWRLD-NEXT: [[TMP13:%.*]] = icmp eq ptr [[FP]], @retFloatTakeFloat +; CWRLD-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP18:%.*]] +; CWRLD: 14: +; CWRLD-NEXT: [[TMP15:%.*]] = bitcast i32 42 to float +; CWRLD-NEXT: [[TMP16:%.*]] = call float @retFloatTakeFloat(float [[TMP15]]) +; CWRLD-NEXT: [[TMP17:%.*]] = bitcast float [[TMP16]] to i32 +; CWRLD-NEXT: br label [[TMP24]] +; CWRLD: 18: +; CWRLD-NEXT: [[TMP19:%.*]] = icmp eq ptr [[FP]], @retFloatTakeFloatFloatNoundef +; CWRLD-NEXT: br i1 [[TMP19]], label [[TMP20:%.*]], label [[TMP21:%.*]] +; CWRLD: 20: +; CWRLD-NEXT: [[CALL5:%.*]] = call i32 @retFloatTakeFloatFloatNoundef(i32 42) +; CWRLD-NEXT: br label [[TMP24]] +; CWRLD: 21: +; CWRLD-NEXT: br i1 true, label [[TMP22:%.*]], label [[TMP23:%.*]] +; CWRLD: 22: +; CWRLD-NEXT: [[CALL6:%.*]] = call i32 @void(i32 42) +; CWRLD-NEXT: br label [[TMP24]] +; CWRLD: 23: +; CWRLD-NEXT: unreachable +; CWRLD: 24: +; CWRLD-NEXT: [[CALL_PHI:%.*]] = phi i32 [ [[CALL1]], [[TMP2]] ], [ [[CALL2]], [[TMP5]] ], [ [[CALL3]], [[TMP8]] ], [ [[CALL4]], [[TMP11]] ], [ [[TMP17]], [[TMP14]] ], [ [[CALL5]], [[TMP20]] ], [ [[CALL6]], [[TMP22]] ] +; CWRLD-NEXT: ret i32 [[CALL_PHI]] ; %fp = select i1 %c, ptr @retI32, ptr %fn %call = call i32 %fp(i32 42) @@ -312,9 +385,59 @@ } define void @broker(ptr %unknown) !callback !0 { -; CHECK-LABEL: @broker( -; CHECK-NEXT: call void [[UNKNOWN:%.*]]() -; CHECK-NEXT: ret void +; LIMI2-LABEL: @broker( +; LIMI2-NEXT: call void [[UNKNOWN:%.*]]() +; LIMI2-NEXT: ret void +; +; LIMI0-LABEL: @broker( +; LIMI0-NEXT: call void [[UNKNOWN:%.*]]() +; LIMI0-NEXT: ret void +; +; CWRLD-LABEL: @broker( +; CWRLD-NEXT: [[TMP1:%.*]] = icmp eq ptr [[UNKNOWN:%.*]], @func3 +; CWRLD-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CWRLD: 2: +; CWRLD-NEXT: call void @func3() +; CWRLD-NEXT: br label [[TMP21:%.*]] +; CWRLD: 3: +; CWRLD-NEXT: [[TMP4:%.*]] = icmp eq ptr [[UNKNOWN]], @func4 +; CWRLD-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] +; CWRLD: 5: +; CWRLD-NEXT: call void @func4() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 6: +; CWRLD-NEXT: [[TMP7:%.*]] = icmp eq ptr [[UNKNOWN]], @retI32 +; CWRLD-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]] +; CWRLD: 8: +; CWRLD-NEXT: call void @retI32() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 9: +; CWRLD-NEXT: [[TMP10:%.*]] = icmp eq ptr [[UNKNOWN]], @takeI32 +; CWRLD-NEXT: br i1 [[TMP10]], label [[TMP11:%.*]], label [[TMP12:%.*]] +; CWRLD: 11: +; CWRLD-NEXT: call void @takeI32() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 12: +; CWRLD-NEXT: [[TMP13:%.*]] = icmp eq ptr [[UNKNOWN]], @retFloatTakeFloat +; CWRLD-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP15:%.*]] +; CWRLD: 14: +; CWRLD-NEXT: call void @retFloatTakeFloat() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 15: +; CWRLD-NEXT: [[TMP16:%.*]] = icmp eq ptr [[UNKNOWN]], @retFloatTakeFloatFloatNoundef +; CWRLD-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[TMP18:%.*]] +; CWRLD: 17: +; CWRLD-NEXT: call void @retFloatTakeFloatFloatNoundef() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 18: +; CWRLD-NEXT: br i1 true, label [[TMP19:%.*]], label [[TMP20:%.*]] +; CWRLD: 19: +; CWRLD-NEXT: call void @void() +; CWRLD-NEXT: br label [[TMP21]] +; CWRLD: 20: +; CWRLD-NEXT: unreachable +; CWRLD: 21: +; CWRLD-NEXT: ret void ; call void %unknown() ret void