Index: llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -173,14 +173,7 @@ const AMDGPUFunctionArgInfo & AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const { auto I = ArgInfoMap.find(&F); - if (I == ArgInfoMap.end()) { - if (AMDGPUTargetMachine::EnableFixedFunctionABI) - return FixedABIFunctionInfo; - - // Without the fixed ABI, we assume no function has special inputs. - assert(F.isDeclaration()); - return ExternFunctionInfo; - } - + if (I == ArgInfoMap.end()) + return FixedABIFunctionInfo; return I->second; } Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -717,8 +717,7 @@ if (!IsEntryFunc) { // For the fixed ABI, pass workitem IDs in the last argument register. - if (AMDGPUTargetMachine::EnableFixedFunctionABI) - TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info); + TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info); } IncomingValueAssigner Assigner(AssignFn); @@ -731,11 +730,6 @@ uint64_t StackOffset = Assigner.StackOffset; - if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) { - // Special inputs come after user arguments. - TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); - } - // Start adding system SGPRs. if (IsEntryFunc) { TLI.allocateSystemSGPRs(CCInfo, MF, *Info, CC, IsGraphics); @@ -1236,8 +1230,7 @@ // after the ordinary user argument registers. SmallVector, 12> ImplicitArgRegs; - if (AMDGPUTargetMachine::EnableFixedFunctionABI && - Info.CallConv != CallingConv::AMDGPU_Gfx) { + if (Info.CallConv != CallingConv::AMDGPU_Gfx) { // With a fixed ABI, allocate fixed registers before user arguments. if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) return false; @@ -1303,12 +1296,6 @@ const SITargetLowering &TLI = *getTLI(); const DataLayout &DL = F.getParent()->getDataLayout(); - if (!AMDGPUTargetMachine::EnableFixedFunctionABI && - Info.CallConv != CallingConv::AMDGPU_Gfx) { - LLVM_DEBUG(dbgs() << "Variable function ABI not implemented\n"); - return false; - } - SmallVector OutArgs; for (auto &OrigArg : Info.OrigArgs) splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv); @@ -1362,8 +1349,7 @@ // after the ordinary user argument registers. SmallVector, 12> ImplicitArgRegs; - if (AMDGPUTargetMachine::EnableFixedFunctionABI && - Info.CallConv != CallingConv::AMDGPU_Gfx) { + if (Info.CallConv != CallingConv::AMDGPU_Gfx) { // With a fixed ABI, allocate fixed registers before user arguments. if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) return false; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -37,7 +37,6 @@ public: static bool EnableLateStructurizeCFG; static bool EnableFunctionCalls; - static bool EnableFixedFunctionABI; static bool EnableLowerModuleLDS; AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -231,13 +231,6 @@ cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); -static cl::opt EnableAMDGPUFixedFunctionABIOpt( - "amdgpu-fixed-function-abi", - cl::desc("Enable all implicit function arguments"), - cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), - cl::init(true), - cl::Hidden); - // Enable lib calls simplifications static cl::opt EnableLibCallSimplify( "amdgpu-simplify-libcall", @@ -505,7 +498,6 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; -bool AMDGPUTargetMachine::EnableFixedFunctionABI = false; bool AMDGPUTargetMachine::EnableLowerModuleLDS = true; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2063,33 +2063,30 @@ SIMachineFunctionInfo &Info) const { auto &ArgInfo = Info.getArgInfo(); - // We need to allocate these in place regardless of their use. - const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI; - // TODO: Unify handling with private memory pointers. - if (IsFixed || Info.hasDispatchPtr()) + if (Info.hasDispatchPtr()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); - if (IsFixed || Info.hasQueuePtr()) + if (Info.hasQueuePtr()) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a // constant offset from the kernarg segment. - if (IsFixed || Info.hasImplicitArgPtr()) + if (Info.hasImplicitArgPtr()) allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr); - if (IsFixed || Info.hasDispatchID()) + if (Info.hasDispatchID()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchID); // flat_scratch_init is not applicable for non-kernel functions. - if (IsFixed || Info.hasWorkGroupIDX()) + if (Info.hasWorkGroupIDX()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX); - if (IsFixed || Info.hasWorkGroupIDY()) + if (Info.hasWorkGroupIDY()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY); - if (IsFixed || Info.hasWorkGroupIDZ()) + if (Info.hasWorkGroupIDZ()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); } @@ -2422,8 +2419,7 @@ allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info); } else { // For the fixed ABI, pass workitem IDs in the last argument register. - if (AMDGPUTargetMachine::EnableFixedFunctionABI) - allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info); + allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info); } if (IsKernel) { @@ -2550,11 +2546,6 @@ InVals.push_back(Val); } - if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) { - // Special inputs come after user arguments. - allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); - } - // Start adding system SGPRs. if (IsEntryFunc) { allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics); @@ -3124,8 +3115,7 @@ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg); - if (AMDGPUTargetMachine::EnableFixedFunctionABI && - CallConv != CallingConv::AMDGPU_Gfx) { + if (CallConv != CallingConv::AMDGPU_Gfx) { // With a fixed ABI, allocate fixed registers before user arguments. passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain); } @@ -3264,12 +3254,6 @@ } } - if (!AMDGPUTargetMachine::EnableFixedFunctionABI && - CallConv != CallingConv::AMDGPU_Gfx) { - // Copy special input registers after user input arguments. - passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain); - } - if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -62,11 +62,6 @@ // calls. const bool HasCalls = F.hasFnAttribute("amdgpu-calls"); - // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't - // have any calls. - const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI && - CC != CallingConv::AMDGPU_Gfx && - (!isEntryFunction() || HasCalls); const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL; @@ -80,7 +75,7 @@ } if (!isEntryFunction()) { - if (UseFixedABI) + if (CC != CallingConv::AMDGPU_Gfx) ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; // TODO: Pick a high register, and shift down, similar to a kernel. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -33,8 +33,8 @@ ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %16(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %14(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s ; Test that we don't insert code to pass implicit arguments we know ; the callee does not need. Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX900 %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope -check-prefix=GFX908 %s ; Workitem IDs are passed to the kernel differently for gfx908 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare i1 @external_i1_func_void() #0 declare zeroext i1 @external_i1_zeroext_func_void() #0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s declare hidden void @external_void_func_void() #0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -41,8 +41,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %10:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -57,8 +57,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -115,8 +115,8 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %14:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %14(<2 x s32>) + ; CHECK-NEXT: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] @@ -155,8 +155,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -171,8 +171,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -187,8 +187,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -203,8 +203,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %10:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %10(s32) + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") @@ -220,8 +220,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %11:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %11(s32) + ; CHECK-NEXT: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY %4(s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -amdgpu-fixed-function-abi=1 -stop-after=irtranslator -o - %s | FileCheck --check-prefix=FIXED %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -amdgpu-fixed-function-abi=0 -stop-after=irtranslator -o - %s | FileCheck --check-prefix=VARABI %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator -o - %s | FileCheck --check-prefix=FIXED %s ; Make sure arg1 is not allocated in v31, which is reserved for ; workitem IDs with -amdgpu-fixed-function-abi. @@ -47,46 +46,6 @@ ; FIXED: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; FIXED: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] ; FIXED: S_SETPC_B64_return [[COPY32]] - ; VARABI-LABEL: name: void_a31i32_i32 - ; VARABI: bb.1 (%ir-block.0): - ; VARABI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr30_sgpr31 - ; VARABI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VARABI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VARABI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VARABI: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; VARABI: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; VARABI: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; VARABI: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; VARABI: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; VARABI: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; VARABI: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; VARABI: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; VARABI: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; VARABI: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; VARABI: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; VARABI: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; VARABI: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; VARABI: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; VARABI: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; VARABI: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; VARABI: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; VARABI: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; VARABI: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; VARABI: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; VARABI: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; VARABI: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; VARABI: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; VARABI: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; VARABI: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; VARABI: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; VARABI: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; VARABI: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 - ; VARABI: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 - ; VARABI: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; VARABI: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; VARABI: G_STORE [[COPY31]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; VARABI: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; VARABI: S_SETPC_B64_return [[COPY33]] store i32 %arg1, i32 addrspace(1)* undef ret void } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -54,9 +54,9 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %8, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %9, !0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -90,8 +90,8 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 @@ -106,8 +106,8 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 @@ -123,9 +123,9 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %8, 1835018 /* regdef:VGPR_32 */, def %9 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] @@ -217,8 +217,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 1835017 /* reguse:VGPR_32 */, [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 @@ -234,8 +234,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %9, 196622 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %9 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 @@ -253,8 +253,8 @@ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %11 + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 @@ -269,14 +269,14 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %10 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %10 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %12, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %12 + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %5 ; CHECK-NEXT: $vgpr0 = COPY [[COPY5]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 @@ -300,10 +300,10 @@ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %11, 1835018 /* regdef:VGPR_32 */, def %12, 1835018 /* regdef:VGPR_32 */, def %13, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %11 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %12 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %13 + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) @@ -325,11 +325,11 @@ ; CHECK-NEXT: liveins: $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %8 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %8 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %10, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %10 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; This is a copy of sibling-call.ll, but stops after the IRTranslator. define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s declare hidden void @external_void_func_void() Index: llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=0 < %s | FileCheck -check-prefix=VARABI %s -; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s -; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s +; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s +; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s ; Test with gfx803 so that ; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require @@ -15,31 +14,6 @@ ; does not require the implicit arguments to the function. Make sure ; we do not crash. define void @parent_func_missing_inputs() #0 { -; VARABI-LABEL: parent_func_missing_inputs: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: s_or_saveexec_b64 s[4:5], -1 -; VARABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; VARABI-NEXT: s_mov_b64 exec, s[4:5] -; VARABI-NEXT: v_writelane_b32 v40, s33, 2 -; VARABI-NEXT: s_mov_b32 s33, s32 -; VARABI-NEXT: s_addk_i32 s32, 0x400 -; VARABI-NEXT: v_writelane_b32 v40, s30, 0 -; VARABI-NEXT: v_writelane_b32 v40, s31, 1 -; VARABI-NEXT: s_getpc_b64 s[4:5] -; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 -; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 -; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VARABI-NEXT: v_readlane_b32 s4, v40, 0 -; VARABI-NEXT: v_readlane_b32 s5, v40, 1 -; VARABI-NEXT: s_addk_i32 s32, 0xfc00 -; VARABI-NEXT: v_readlane_b32 s33, v40, 2 -; VARABI-NEXT: s_or_saveexec_b64 s[6:7], -1 -; VARABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; VARABI-NEXT: s_mov_b64 exec, s[6:7] -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[4:5] -; ; FIXEDABI-LABEL: parent_func_missing_inputs: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -69,20 +43,6 @@ } define amdgpu_kernel void @parent_kernel_missing_inputs() #0 { -; VARABI-LABEL: parent_kernel_missing_inputs: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_add_i32 s4, s4, s9 -; VARABI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 -; VARABI-NEXT: s_add_u32 s0, s0, s9 -; VARABI-NEXT: s_addc_u32 s1, s1, 0 -; VARABI-NEXT: s_mov_b32 s32, 0 -; VARABI-NEXT: s_mov_b32 flat_scratch_lo, s5 -; VARABI-NEXT: s_getpc_b64 s[4:5] -; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 -; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 -; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VARABI-NEXT: s_endpgm -; ; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs: ; FIXEDABI-SDAG: ; %bb.0: ; FIXEDABI-SDAG-NEXT: s_add_i32 s4, s4, s9 @@ -132,20 +92,6 @@ ; Function is marked with amdgpu-no-workitem-id-* but uses them anyway define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 { -; VARABI-LABEL: marked_func_use_workitem_id: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: v_and_b32_e32 v3, 0x3ff, v2 -; VARABI-NEXT: flat_store_dword v[0:1], v3 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_bfe_u32 v3, v2, 10, 10 -; VARABI-NEXT: v_bfe_u32 v2, v2, 20, 10 -; VARABI-NEXT: flat_store_dword v[0:1], v3 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[30:31] -; ; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id: ; FIXEDABI-SDAG: ; %bb.0: ; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -184,20 +130,6 @@ ; Function is marked with amdgpu-no-workitem-id-* but uses them anyway define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 { -; VARABI-LABEL: marked_kernel_use_workitem_id: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; VARABI-NEXT: s_waitcnt lgkmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v4, s1 -; VARABI-NEXT: v_mov_b32_e32 v3, s0 -; VARABI-NEXT: flat_store_dword v[3:4], v0 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: flat_store_dword v[3:4], v1 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: flat_store_dword v[3:4], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_endpgm -; ; FIXEDABI-LABEL: marked_kernel_use_workitem_id: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -221,20 +153,6 @@ } define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 { -; VARABI-LABEL: marked_func_use_workgroup_id: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v2, s4 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v2, s5 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v2, s6 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[30:31] -; ; FIXEDABI-LABEL: marked_func_use_workgroup_id: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -258,23 +176,6 @@ } define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 { -; VARABI-LABEL: marked_kernel_use_workgroup_id: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; VARABI-NEXT: v_mov_b32_e32 v2, s6 -; VARABI-NEXT: s_waitcnt lgkmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v0, s0 -; VARABI-NEXT: v_mov_b32_e32 v1, s1 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v2, s7 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v2, s8 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_endpgm -; ; FIXEDABI-LABEL: marked_kernel_use_workgroup_id: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -301,17 +202,6 @@ } define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 { -; VARABI-LABEL: marked_func_use_other_sgpr: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[30:31] -; ; FIXEDABI-LABEL: marked_func_use_other_sgpr: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -344,19 +234,6 @@ } define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 { -; VARABI-LABEL: marked_kernel_use_other_sgpr: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_add_u32 s0, s4, 8 -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_addc_u32 s1, s5, 0 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v0, s0 -; VARABI-NEXT: v_mov_b32_e32 v1, s1 -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_endpgm -; ; FIXEDABI-LABEL: marked_kernel_use_other_sgpr: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_add_u32 s0, s4, 8 @@ -381,13 +258,6 @@ } define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 { -; VARABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr: -; VARABI: ; %bb.0: -; VARABI-NEXT: v_mov_b32_e32 v0, 0 -; VARABI-NEXT: v_mov_b32_e32 v1, 0 -; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc -; VARABI-NEXT: s_endpgm -; ; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: v_mov_b32_e32 v0, 0 @@ -401,23 +271,6 @@ ; On gfx8, the queue ptr is required for this addrspacecast. define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 { -; VARABI-LABEL: addrspacecast_requires_queue_ptr: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 -; VARABI-NEXT: v_mov_b32_e32 v3, 0 -; VARABI-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc -; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 -; VARABI-NEXT: v_mov_b32_e32 v4, 1 -; VARABI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VARABI-NEXT: v_mov_b32_e32 v1, v3 -; VARABI-NEXT: flat_store_dword v[2:3], v4 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: v_mov_b32_e32 v2, 2 -; VARABI-NEXT: flat_store_dword v[0:1], v2 -; VARABI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[30:31] -; ; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr: ; FIXEDABI-SDAG: ; %bb.0: ; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -469,14 +322,6 @@ } define void @is_shared_requires_queue_ptr(i8* %ptr) #0 { -; VARABI-LABEL: is_shared_requires_queue_ptr: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; VARABI-NEXT: flat_store_dword v[0:1], v0 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[30:31] -; ; FIXEDABI-LABEL: is_shared_requires_queue_ptr: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -494,14 +339,6 @@ } define void @is_private_requires_queue_ptr(i8* %ptr) #0 { -; VARABI-LABEL: is_private_requires_queue_ptr: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; VARABI-NEXT: flat_store_dword v[0:1], v0 -; VARABI-NEXT: s_waitcnt vmcnt(0) -; VARABI-NEXT: s_setpc_b64 s[30:31] -; ; FIXEDABI-LABEL: is_private_requires_queue_ptr: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -519,12 +356,6 @@ } define void @trap_requires_queue() #0 { -; VARABI-LABEL: trap_requires_queue: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: s_mov_b64 s[0:1], 0 -; VARABI-NEXT: s_trap 2 -; ; FIXEDABI-LABEL: trap_requires_queue: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -535,11 +366,6 @@ } define void @debugtrap_requires_queue() #0 { -; VARABI-LABEL: debugtrap_requires_queue: -; VARABI: ; %bb.0: -; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VARABI-NEXT: s_trap 3 -; ; FIXEDABI-LABEL: debugtrap_requires_queue: ; FIXEDABI: ; %bb.0: ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) Index: llvm/test/CodeGen/AMDGPU/amdpal-callable.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -2,9 +2,6 @@ ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s ; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s -; Make sure this interacts well with -amdgpu-fixed-function-abi -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgpu-fixed-function-abi -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s - declare float @extern_func(float) #0 declare float @extern_func_many_args(<64 x float>) #0 Index: llvm/test/CodeGen/AMDGPU/call-constant.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-constant.ll +++ llvm/test/CodeGen/AMDGPU/call-constant.ll @@ -1,5 +1,5 @@ -; RUN: llc -global-isel=0 -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s -; RUN: llc -global-isel=1 -amdgpu-fixed-function-abi=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=GCN,GISEL %s ; GCN-LABEL: {{^}}test_call_undef: ; GCN: s_endpgm Index: llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -1,5 +1,5 @@ -; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}use_dispatch_ptr: ; GCN: s_load_dword s{{[0-9]+}}, s[4:5] @@ -10,6 +10,16 @@ ret void } +; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: +; GCN-NOT: s[4:5] +; GCN-NOT: s4 +; GCN-NOT: s5 +; GCN: .amdhsa_user_sgpr_dispatch_ptr 1 +define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { + call void @use_dispatch_ptr() + ret void +} + ; GCN-LABEL: {{^}}use_queue_ptr: ; GCN: s_load_dword s{{[0-9]+}}, s[6:7] define hidden void @use_queue_ptr() #1 { @@ -19,6 +29,39 @@ ret void } +; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: +; GCN: s_mov_b64 s[6:7], s[4:5] +; GCN: .amdhsa_user_sgpr_queue_ptr 1 +define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { + call void @use_queue_ptr() + ret void +} + +; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: +; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[6:7], 0x10 +; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] +; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 +; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] +; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} +; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} +define hidden void @use_queue_ptr_addrspacecast() #1 { + %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* + store volatile i32 0, i32* %asc + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: +; CIVI: s_mov_b64 s[6:7], s[4:5] +; CIVI: .amdhsa_user_sgpr_queue_ptr 1 + +; GFX9-NOT: s_mov_b64 s[6:7] +; GFX9: .amdhsa_user_sgpr_queue_ptr 0 +define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { + call void @use_queue_ptr_addrspacecast() + ret void +} + +; Not really supported in callable functions. ; GCN-LABEL: {{^}}use_kernarg_segment_ptr: ; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0 ; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0 @@ -38,6 +81,13 @@ ret void } +; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: +; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { + call void @use_kernarg_segment_ptr() + ret void +} + ; GCN-LABEL: {{^}}use_dispatch_id: ; GCN: ; use s[10:11] define hidden void @use_dispatch_id() #1 { @@ -45,6 +95,18 @@ call void asm sideeffect "; use $0", "s"(i64 %id) ret void } + +; No kernarg segment so that there is a mov to check. With kernarg +; pointer enabled, it happens to end up in the right place anyway. + +; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: +; GCN: s_mov_b64 s[10:11], s[4:5] +; GCN: .amdhsa_user_sgpr_dispatch_id 1 +define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { + call void @use_dispatch_id() + ret void +} + ; GCN-LABEL: {{^}}use_workgroup_id_x: ; GCN: s_waitcnt ; GCN: ; use s12 @@ -133,6 +195,123 @@ ret void } +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: +; GCN-NOT: s6 +; GCN: s_mov_b32 s12, s6 +; GCN: s_mov_b32 s32, 0 +; GCN: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, use_workgroup_id_x@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, use_workgroup_id_x@rel32@hi+12 +; GCN: s_swappc_b64 +; GCN-NEXT: s_endpgm + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { + call void @use_workgroup_id_x() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: +; GCN-NOT: s12 +; GCN: s_mov_b32 s13, s7 +; GCN-NOT: s12 +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { + call void @use_workgroup_id_y() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: +; GCN-NOT: s12 +; GCN-NOT: s13 +; GCN: s_mov_b32 s14, s7 +; GCN-NOT: s12 +; GCN-NOT: s13 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { + call void @use_workgroup_id_z() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: +; GCN-NOT: s14 +; GCN: s_mov_b32 s12, s6 +; GCN-NEXT: s_mov_b32 s13, s7 +; GCN-NOT: s14 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { + call void @use_workgroup_id_xy() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: +; GCN: s_mov_b32 s12, s6 +; GCN: s_mov_b32 s13, s7 +; GCN: s_mov_b32 s14, s8 +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { + call void @use_workgroup_id_xyz() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: + +; GCN-NOT: s13 +; GCN: s_mov_b32 s12, s6 +; GCN-NEXT: s_mov_b32 s14, s7 +; GCN-NOT: s13 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { + call void @use_workgroup_id_xz() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: + +; GCN: s_mov_b32 s13, s7 +; GCN: s_mov_b32 s14, s8 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 +define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { + call void @use_workgroup_id_yz() + ret void +} + ; Argument is in right place already ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: ; GCN-NOT: s12 @@ -197,6 +376,56 @@ ret void } +; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: + +; GCN-NOT: s13 +; GCN-NOT: s14 +; GCN-DAG: s_mov_b32 s12, s6 +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-NOT: s13 +; GCN-NOT: s14 + +; GCN-DAG: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 +define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { + call void @other_arg_use_workgroup_id_x(i32 555) + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: s_mov_b32 s13, s7 + +; GCN-DAG: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 0 +define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { + call void @other_arg_use_workgroup_id_y(i32 555) + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: +; GCN-DAG: v_mov_b32_e32 v0, 0x22b +; GCN-DAG: s_mov_b32 s14, s7 + +; GCN: s_mov_b32 s32, 0 +; GCN: s_swappc_b64 + +; GCN: .amdhsa_system_sgpr_workgroup_id_x 1 +; GCN: .amdhsa_system_sgpr_workgroup_id_y 0 +; GCN: .amdhsa_system_sgpr_workgroup_id_z 1 +define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { + call void @other_arg_use_workgroup_id_z(i32 555) + ret void +} + ; GCN-LABEL: {{^}}use_every_sgpr_input: ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} ; GCN: s_load_dword s{{[0-9]+}}, s[4:5] Index: llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ /dev/null @@ -1,616 +0,0 @@ -; RUN: llc -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s -; RUN: llc -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s - -; GCN-LABEL: {{^}}use_dispatch_ptr: -; GCN: s_load_dword s{{[0-9]+}}, s[4:5] -define hidden void @use_dispatch_ptr() #1 { - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %header_ptr - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: -; GCN: enable_sgpr_dispatch_ptr = 1 -; GCN-NOT: s[4:5] -; GCN-NOT: s4 -; GCN-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { - call void @use_dispatch_ptr() - ret void -} - -; GCN-LABEL: {{^}}use_queue_ptr: -; GCN: s_load_dword s{{[0-9]+}}, s[4:5] -define hidden void @use_queue_ptr() #1 { - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %header_ptr - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: -; GCN: enable_sgpr_queue_ptr = 1 -; GCN-NOT: s[4:5] -; GCN-NOT: s4 -; GCN-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { - call void @use_queue_ptr() - ret void -} - -; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: -; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x10 -; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] -; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 -; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] -; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} -; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} -define hidden void @use_queue_ptr_addrspacecast() #1 { - %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* - store volatile i32 0, i32* %asc - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: -; CIVI: enable_sgpr_queue_ptr = 1 -; CIVI-NOT: s[4:5] -; CIVI-NOT: s4 -; CIVI-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { - call void @use_queue_ptr_addrspacecast() - ret void -} - -; Not really supported in callable functions. -; GCN-LABEL: {{^}}use_kernarg_segment_ptr: -; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0{{$}} -; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0{{$}} -define hidden void @use_kernarg_segment_ptr() #1 { - %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %header_ptr - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: -; GCN: enable_sgpr_kernarg_segment_ptr = 1 -define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { - call void @use_kernarg_segment_ptr() - ret void -} - -; GCN-LABEL: {{^}}use_dispatch_id: -; GCN: ; use s[4:5] -define hidden void @use_dispatch_id() #1 { - %id = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %id) - ret void -} - -; No kernarg segment so that there is a mov to check. With kernarg -; pointer enabled, it happens to end up in the right place anyway. - -; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: -; GCN: enable_sgpr_dispatch_id = 1 -; GCN-NOT: s[4:5] -; GCN-NOT: s4 -; GCN-NOT: s5 -define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { - call void @use_dispatch_id() - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_x: -; GCN: s_waitcnt -; GCN: ; use s4 -define hidden void @use_workgroup_id_x() #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_stack_workgroup_id_x: -; GCN: s_waitcnt -; GCN-NOT: s32 -; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} -; GCN: ; use s4 -; GCN: s_setpc_b64 -define hidden void @use_stack_workgroup_id_x() #1 { - %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - %val = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_y: -; GCN: s_waitcnt -; GCN: ; use s4 -define hidden void @use_workgroup_id_y() #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_z: -; GCN: s_waitcnt -; GCN: ; use s4 -define hidden void @use_workgroup_id_z() #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_xy: -; GCN: ; use s4 -; GCN: ; use s5 -define hidden void @use_workgroup_id_xy() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.x() - %val1 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_xyz: -; GCN: ; use s4 -; GCN: ; use s5 -; GCN: ; use s6 -define hidden void @use_workgroup_id_xyz() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.x() - %val1 = call i32 @llvm.amdgcn.workgroup.id.y() - %val2 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - call void asm sideeffect "; use $0", "s"(i32 %val2) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_xz: -; GCN: ; use s4 -; GCN: ; use s5 -define hidden void @use_workgroup_id_xz() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.x() - %val1 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - ret void -} - -; GCN-LABEL: {{^}}use_workgroup_id_yz: -; GCN: ; use s4 -; GCN: ; use s5 -define hidden void @use_workgroup_id_yz() #1 { - %val0 = call i32 @llvm.amdgcn.workgroup.id.y() - %val1 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val0) - call void asm sideeffect "; use $0", "s"(i32 %val1) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN-NOT: s6 -; GCN: s_mov_b32 s4, s6 -; GCN: s_mov_b32 s32, 0 -; GCN: s_getpc_b64 s[6:7] -; GCN-NEXT: s_add_u32 s6, s6, use_workgroup_id_x@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s7, s7, use_workgroup_id_x@rel32@hi+12 -; GCN: s_swappc_b64 -; GCN-NEXT: s_endpgm -define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { - call void @use_workgroup_id_x() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN: s_mov_b32 s4, s7 -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { - call void @use_workgroup_id_y() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s4, s7 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { - call void @use_workgroup_id_z() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN: s_mov_b32 s5, s7 -; GCN: s_mov_b32 s4, s6 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { - call void @use_workgroup_id_xy() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s5, s7 -; GCN: s_mov_b32 s4, s6 -; GCN: s_mov_b32 s6, s8 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { - call void @use_workgroup_id_xyz() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s5, s7 -; GCN: s_mov_b32 s4, s6 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { - call void @use_workgroup_id_xz() - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN: s_mov_b32 s5, s8 -; GCN: s_mov_b32 s4, s7 - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { - call void @use_workgroup_id_yz() - ret void -} - -; Argument is in right place already -; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 -define hidden void @func_indirect_use_workgroup_id_x() #1 { - call void @use_workgroup_id_x() - ret void -} - -; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 -define hidden void @func_indirect_use_workgroup_id_y() #1 { - call void @use_workgroup_id_y() - ret void -} - -; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 -define hidden void @func_indirect_use_workgroup_id_z() #1 { - call void @use_workgroup_id_z() - ret void -} - -; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN: ; use s4 -define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %arg0, i32 addrspace(1)* undef - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN: ; use s4 -define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %arg0, i32 addrspace(1)* undef - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN: ; use s4 -define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { - %val = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %arg0, i32 addrspace(1)* undef - call void asm sideeffect "; use $0", "s"(i32 %val) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN-DAG: s_mov_b32 s4, s6 - -; GCN-DAG: s_mov_b32 s32, 0 -; GCN-NOT: s4 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { - call void @other_arg_use_workgroup_id_x(i32 555) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 0 - -; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN-DAG: s_mov_b32 s4, s7 - -; GCN-DAG: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { - call void @other_arg_use_workgroup_id_y(i32 555) - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 0 -; GCN: enable_sgpr_workgroup_id_z = 1 - -; GCN-DAG: v_mov_b32_e32 v0, 0x22b - -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { - call void @other_arg_use_workgroup_id_z(i32 555) - ret void -} - -; GCN-LABEL: {{^}}use_every_sgpr_input: -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} -; GCN: s_load_dword s{{[0-9]+}}, s[4:5] -; GCN: s_load_dword s{{[0-9]+}}, s[6:7] -; GCN: s_load_dword s{{[0-9]+}}, s[8:9] - -; GCN: ; use s[10:11] -; GCN: ; use s12 -; GCN: ; use s13 -; GCN: ; use s14 -define hidden void @use_every_sgpr_input() #1 { - %alloca = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc - - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc - - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 - %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc - - %val3 = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %val3) - - %val4 = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val4) - - %val5 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val5) - - %val6 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val6) - - ret void -} - -; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 -; GCN: enable_sgpr_workgroup_info = 0 - -; GCN: enable_sgpr_private_segment_buffer = 1 -; GCN: enable_sgpr_dispatch_ptr = 1 -; GCN: enable_sgpr_queue_ptr = 1 -; GCN: enable_sgpr_kernarg_segment_ptr = 1 -; GCN: enable_sgpr_dispatch_id = 1 -; GCN: enable_sgpr_flat_scratch_init = 1 - -; GCN: s_mov_b32 s13, s15 -; GCN: s_mov_b32 s12, s14 -; GCN: s_mov_b32 s14, s16 -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 { - call void @use_every_sgpr_input() - ret void -} - -; We have to pass the kernarg segment, but there are no kernel -; arguments so null is passed. -; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input_no_kernargs: -; GCN: enable_sgpr_workgroup_id_x = 1 -; GCN: enable_sgpr_workgroup_id_y = 1 -; GCN: enable_sgpr_workgroup_id_z = 1 -; GCN: enable_sgpr_workgroup_info = 0 - -; GCN: enable_sgpr_private_segment_buffer = 1 -; GCN: enable_sgpr_dispatch_ptr = 1 -; GCN: enable_sgpr_queue_ptr = 1 -; GCN: enable_sgpr_kernarg_segment_ptr = 0 -; GCN: enable_sgpr_dispatch_id = 1 -; GCN: enable_sgpr_flat_scratch_init = 1 - -; GCN: s_mov_b64 s[10:11], s[8:9] -; GCN: s_mov_b64 s[8:9], 0{{$}} -; GCN: s_mov_b32 s32, 0 -; GCN: s_swappc_b64 -define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 { - call void @use_every_sgpr_input() - ret void -} - -; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: -; GCN-NOT: s6 -; GCN-NOT: s7 -; GCN-NOT: s8 -; GCN-NOT: s9 -; GCN-NOT: s10 -; GCN-NOT: s11 -; GCN-NOT: s12 -; GCN-NOT: s13 -; GCN-NOT: s[6:7] -; GCN-NOT: s[8:9] -; GCN-NOT: s[10:11] -; GCN-NOT: s[12:13] -; GCN: s_or_saveexec_b64 s[16:17], -1 -define hidden void @func_indirect_use_every_sgpr_input() #1 { - call void @use_every_sgpr_input() - ret void -} - -; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: -; GCN: s_mov_b32 s4, s12 -; GCN: s_mov_b32 s5, s13 -; GCN: s_mov_b32 s6, s14 -; GCN: ; use s[10:11] -; GCN: ; use s12 -; GCN: ; use s13 -; GCN: ; use s14 - -; GCN: s_swappc_b64 -define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { - %alloca = alloca i32, align 4, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc - - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc - - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 - %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc - - %val3 = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %val3) - - %val4 = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val4) - - %val5 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val5) - - %val6 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val6) - - call void @use_workgroup_id_xyz() - ret void -} - -; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: -; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: s_addk_i32 s32, 0x400 -; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5] -; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7] - -; GCN: s_mov_b32 s4, s12 -; GCN: s_mov_b32 s5, s13 -; GCN: s_mov_b32 s6, s14 - -; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-68-9][0-9]*]], s14 -; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-57-9][0-9]*]], s13 -; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s12 -; GCN: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[8:9] - -; GCN: s_swappc_b64 - -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} -; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_X]]:[[HI_X]]{{\]}}, 0x0 -; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Y]]:[[HI_Y]]{{\]}}, 0x0 -; GCN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO_Z]]:[[HI_Z]]{{\]}}, 0x0 -; GCN: ; use -; GCN: ; use [[SAVE_X]] -; GCN: ; use [[SAVE_Y]] -; GCN: ; use [[SAVE_Z]] -define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { - %alloca = alloca i32, align 4, addrspace(5) - call void @use_workgroup_id_xyz() - - store volatile i32 0, i32 addrspace(5)* %alloca - - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc - - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc - - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 - %implicitarg.ptr.bc = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val2 = load volatile i32, i32 addrspace(4)* %implicitarg.ptr.bc - - %val3 = call i64 @llvm.amdgcn.dispatch.id() - call void asm sideeffect "; use $0", "s"(i64 %val3) - - %val4 = call i32 @llvm.amdgcn.workgroup.id.x() - call void asm sideeffect "; use $0", "s"(i32 %val4) - - %val5 = call i32 @llvm.amdgcn.workgroup.id.y() - call void asm sideeffect "; use $0", "s"(i32 %val5) - - %val6 = call i32 @llvm.amdgcn.workgroup.id.z() - call void asm sideeffect "; use $0", "s"(i32 %val6) - - ret void -} - -declare i32 @llvm.amdgcn.workgroup.id.x() #0 -declare i32 @llvm.amdgcn.workgroup.id.y() #0 -declare i32 @llvm.amdgcn.workgroup.id.z() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 -declare i64 @llvm.amdgcn.dispatch.id() #0 -declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - -attributes #0 = { nounwind readnone speculatable } -attributes #1 = { nounwind noinline } -attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" } Index: llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,9 +1,7 @@ -; RUN: llc -amdgpu-fixed-function-abi=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VARABI %s -; RUN: llc -amdgpu-fixed-function-abi=1 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIXEDABI %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt -; VARABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0 ; FIXEDABI: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] ; GCN-NEXT: s_waitcnt @@ -16,7 +14,6 @@ ; GCN-LABEL: {{^}}use_workitem_id_y: ; GCN: s_waitcnt -; VARABI: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 ; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] ; GCN-NEXT: s_waitcnt @@ -29,7 +26,6 @@ ; GCN-LABEL: {{^}}use_workitem_id_z: ; GCN: s_waitcnt -; VARABI: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 ; FIXEDABI: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10 ; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] ; GCN-NEXT: s_waitcnt @@ -42,9 +38,6 @@ ; GCN-LABEL: {{^}}use_workitem_id_xy: ; GCN: s_waitcnt -; VARABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 -; VARABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 - ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10 @@ -63,10 +56,6 @@ ; GCN-LABEL: {{^}}use_workitem_id_xyz: ; GCN: s_waitcnt -; VARABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 -; VARABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 -; VARABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 - ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31 ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10 @@ -89,9 +78,6 @@ ; GCN-LABEL: {{^}}use_workitem_id_xz: ; GCN: s_waitcnt -; VARABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 -; VARABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 - ; FIXEDABI-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10 @@ -109,9 +95,6 @@ ; GCN-LABEL: {{^}}use_workitem_id_yz: ; GCN: s_waitcnt -; VARABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 -; VARABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 - ; FIXEDABI-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10 ; FIXEDABI-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10 @@ -136,9 +119,7 @@ ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v31 -; VARABI-NOT: v31 ; GCN: s_swappc_b64 -; VARABI-NOT: v31 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 { call void @use_workitem_id_x() ret void @@ -147,9 +128,6 @@ ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y: ; GCN: enable_vgpr_workitem_id = 1 -; VARABI-NOT: v31 -; VARABI: v_lshlrev_b32_e32 v0, 10, v1 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -158,8 +136,6 @@ ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 -; VARABI-NOT: v31 - ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 { call void @use_workitem_id_y() @@ -169,12 +145,6 @@ ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; VARABI-NOT: v0 -; VARABI-NOT: v2 -; VARABI: v_lshlrev_b32_e32 v0, 20, v2 -; VARABI-NOT: v0 -; VARABI-NOT: v1 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2 @@ -188,13 +158,6 @@ } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy: -; VARABI-NOT: v0 -; VARABI-NOT: v1 -; VARABI: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 -; VARABI: v_or_b32_e32 v0, v0, [[IDY]] -; VARABI-NOT: v0 -; VARABI-NOT: v1 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -211,14 +174,6 @@ } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz: -; VARABI-NOT: v0 -; VARABI-NOT: v2 -; VARABI: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 -; VARABI: v_or_b32_e32 v0, v0, [[IDZ]] -; VARABI-NOT: v0 -; VARABI-NOT: v2 - - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -235,14 +190,6 @@ } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz: -; VARABI-NOT: v1 -; VARABI-NOT: v2 -; VARABI-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 -; VARABI-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 -; VARABI: v_or_b32_e32 v0, [[IDY]], [[IDZ]] -; VARABI-NOT: v1 -; VARABI-NOT: v2 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -260,17 +207,6 @@ } ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz: -; VARABI-NOT: v0 -; VARABI-NOT: v1 -; VARABI-NOT: v2 -; VARABI-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 -; VARABI-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 -; VARABI-DAG: v_or_b32_e32 v0, v0, [[IDY]] -; VARABI-DAG: v_or_b32_e32 v0, v0, [[IDZ]] -; VARABI-NOT: v0 -; VARABI-NOT: v1 -; VARABI-NOT: v2 - ; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 ; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 ; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] @@ -311,7 +247,6 @@ ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x: ; GCN: s_waitcnt -; VARABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1 ; FIXEDABI-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v31 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 @@ -325,7 +260,6 @@ ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y: ; GCN: s_waitcnt -; VARABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 10, 10 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] @@ -338,7 +272,6 @@ ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z: ; GCN: s_waitcnt -; VARABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10 ; FIXEDABI-DAG: v_bfe_u32 [[ID:v[0-9]+]], v31, 20, 10 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 ; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] @@ -353,9 +286,6 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x: ; GCN: enable_vgpr_workitem_id = 0 -; VARABI: v_mov_b32_e32 v1, v0 -; VARABI: v_mov_b32_e32 v0, 0x22b - ; FIXEDABI-NOT: v0 ; FIXEDABI: v_mov_b32_e32 v31, v0 ; FIXEDABI: v_mov_b32_e32 v0, 0x22b @@ -370,13 +300,6 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y: ; GCN: enable_vgpr_workitem_id = 1 -; VARABI: v_lshlrev_b32_e32 v1, 10, v1 -; VARABI-NOT: v1 -; VARABI: v_mov_b32_e32 v0, 0x22b -; VARABI-NOT: v1 -; VARABI: s_swappc_b64 -; VARABI-NOT: v0 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -390,11 +313,6 @@ ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: ; GCN: enable_vgpr_workitem_id = 2 -; VARABI-DAG: v_mov_b32_e32 v0, 0x22b -; VARABI-DAG: v_lshlrev_b32_e32 v1, 20, v2 -; VARABI: s_swappc_b64 -; VARABI-NOT: v0 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -406,11 +324,6 @@ } ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x: -; VARABI: buffer_load_dword v32, off, s[0:3], s32{{$}} -; VARABI: v_and_b32_e32 v32, 0x3ff, v32 -; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 -; VARABI: s_setpc_b64 - ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31 ; FIXEDABI: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} define void @too_many_args_use_workitem_id_x( @@ -463,11 +376,6 @@ ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x: ; GCN: enable_vgpr_workitem_id = 0 -; VARABI: s_mov_b32 s32, 0 -; VARABI: buffer_store_dword v0, off, s[0:3], s32{{$}} -; VARABI: s_swappc_b64 - - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 ; FIXEDABI-NOT: v2 @@ -491,8 +399,6 @@ } ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x: -; VARABI: s_mov_b32 s33, s32 -; VARABI: buffer_store_dword v1, off, s[0:3], s32{{$}} ; Touching the workitem id register is not necessary. ; FIXEDABI-NOT: v31 @@ -548,13 +454,6 @@ ; frame[2] = VGPR spill slot ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval: -; VARABI: buffer_load_dword v32, off, s[0:3], s32 offset:4 -; VARABI-NEXT: s_waitcnt -; VARABI-NEXT: v_and_b32_e32 v32, 0x3ff, v32 -; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32 -; VARABI: buffer_load_dword v0, off, s[0:3], s32 glc{{$}} -; VARABI: s_setpc_b64 - ; FIXEDABI: v_and_b32_e32 v31, 0x3ff, v31 ; FIXEDABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v31 @@ -616,17 +515,6 @@ ; sp[2] = stack passed workitem ID x ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval: -; VARABI: enable_vgpr_workitem_id = 0 -; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} -; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4 -; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4 -; VARABI: s_movk_i32 s32, 0x400{{$}} -; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4 - -; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} -; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]], -; VARABI: s_swappc_b64 - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 @@ -662,15 +550,6 @@ } ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: -; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} -; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}} -; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4 -; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} -; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]], -; VARABI: s_swappc_b64 - - ; FIXED-ABI-NOT: v31 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}} ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}} @@ -699,21 +578,6 @@ } ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz: -; VARABI-NOT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} -; VARABI: buffer_load_dword v32, off, s[0:3], s32{{$}} -; VARABI-NOT: buffer_load_dword - -; VARABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v32 -; VARABI-NOT: buffer_load_dword -; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]] -; VARABI-NOT: buffer_load_dword -; VARABI: v_bfe_u32 [[BFE_Y:v[0-9]+]], v32, 10, 10 -; VARABI-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v32, 20, 10 -; VARABI-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]] -; VARABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]] -; VARABI: s_setpc_b64 - - ; FIXEDABI: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v31 ; FIXEDABI-NOT: buffer_load_dword ; FIXEDABI: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]] @@ -783,8 +647,6 @@ ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140 ; GCN-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 ; GCN-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; VARABI-DAG: v_or_b32_e32 [[PACKEDID:v[0-9]+]], [[TMP2]], [[TMP0]] -; VARABI: buffer_store_dword [[PACKEDID]], off, s[0:3], s32{{$}} ; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}} ; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] Index: llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -12,25 +12,25 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %13:vgpr_32, %14:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32, %16:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %15, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %15, 0, %17, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %22:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %21, 0, %17, 0, %17, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %23:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %13, 0, %22, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %24:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %15, 0, %23, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %25:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %24, 0, %22, 0, %23, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %26:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %15, 0, %25, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %14 - ; GCN-NEXT: %27:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %26, 0, %22, 0, %25, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %28:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %27, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vcc = COPY %7 + ; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %28 + ; GCN-NEXT: $vgpr0 = COPY %21 ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 entry: @@ -46,25 +46,25 @@ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %13:vgpr_32, %14:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32, %16:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %15, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %15, 0, %17, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %22:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %21, 0, %17, 0, %17, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %23:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %13, 0, %22, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %24:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %15, 0, %23, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %25:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %24, 0, %22, 0, %23, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %26:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %15, 0, %25, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %14 - ; GCN-NEXT: %27:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %26, 0, %22, 0, %25, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %28:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %27, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vcc = COPY %7 + ; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %28 + ; GCN-NEXT: $vgpr0 = COPY %21 ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 entry: Index: llvm/test/CodeGen/AMDGPU/indirect-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=GISEL %s @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant void()*, align 4 @gv.fptr1 = external hidden unnamed_addr addrspace(4) constant void(i32)*, align 4 Index: llvm/test/CodeGen/AMDGPU/returnaddress.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/returnaddress.ll +++ llvm/test/CodeGen/AMDGPU/returnaddress.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s -; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; Test with zero frame ; GCN-LABEL: {{^}}func1