Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -753,6 +753,11 @@ CallLoweringInfo &Info) const { MachineFunction &MF = MIRBuilder.getMF(); + // If there's no call site, this doesn't correspond to a call from the IR and + // doesn't need implicit inputs. + if (!Info.CB) + return true; + const AMDGPUFunctionArgInfo *CalleeArgInfo = &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo; @@ -773,17 +778,32 @@ AMDGPUFunctionArgInfo::WORKGROUP_ID_Z }; + static constexpr StringLiteral ImplicitAttrNames[] = { + "amdgpu-no-dispatch-ptr", + "amdgpu-no-queue-ptr", + "amdgpu-no-implicitarg-ptr", + "amdgpu-no-dispatch-id", + "amdgpu-no-workgroup-id-x", + "amdgpu-no-workgroup-id-y", + "amdgpu-no-workgroup-id-z" + }; + MachineRegisterInfo &MRI = MF.getRegInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const AMDGPULegalizerInfo *LI = static_cast(ST.getLegalizerInfo()); + unsigned I = 0; for (auto InputID : InputRegs) { const ArgDescriptor *OutgoingArg; const TargetRegisterClass *ArgRC; LLT ArgTy; + // If the callee does not use the attribute value, skip copying the value. + if (Info.CB->hasFnAttr(ImplicitAttrNames[I++])) + continue; + std::tie(OutgoingArg, ArgRC, ArgTy) = CalleeArgInfo->getPreloadedValue(InputID); if (!OutgoingArg) @@ -843,16 +863,22 @@ const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ); const LLT S32 = LLT::scalar(32); + const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x"); + const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y"); + const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z"); + // If incoming ids are not packed we need to pack them. // FIXME: Should consider known workgroup size to eliminate known 0 cases. Register InputReg; - if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) { + if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX && + NeedWorkItemIDX) { InputReg = MRI.createGenericVirtualRegister(S32); LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX, std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX)); } - if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) { + if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY && + NeedWorkItemIDY) { Register Y = MRI.createGenericVirtualRegister(S32); LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY), std::get<2>(WorkitemIDY)); @@ -861,7 +887,8 @@ InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y; } - if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) { + if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ && + NeedWorkItemIDZ) { Register Z = MRI.createGenericVirtualRegister(S32); LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ), std::get<2>(WorkitemIDZ)); @@ -870,7 +897,7 @@ InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z; } - if (!InputReg) { + if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) { InputReg = MRI.createGenericVirtualRegister(S32); // Workitem ids are already packed, any of present incoming arguments will @@ -883,7 +910,9 @@ } if (OutgoingArg->isRegister()) { - ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg); + if (InputReg) + ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg); + if (!CCInfo.AllocateReg(OutgoingArg->getRegister())) report_fatal_error("failed to allocate implicit input argument"); } else { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2889,10 +2889,16 @@ std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType); if (!Arg) { - assert(ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); - // The intrinsic may appear when we have a 0 sized kernarg segment, in which - // case the pointer argument may be missing and we use null. - B.buildConstant(DstReg, 0); + if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) { + // The intrinsic may appear when we have a 0 sized kernarg segment, in which + // case the pointer argument may be missing and we use null. + B.buildConstant(DstReg, 0); + return true; + } + + // It's undefined behavior if a function marked with the amdgpu-no-* + // attributes uses the corresponding intrinsic. + B.buildUndef(DstReg); return true; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1819,11 +1819,16 @@ std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID); if (!Reg) { - // It's possible for a kernarg intrinsic call to appear in a kernel with no - // allocated segment, in which case we do not add the user sgpr argument, so - // just return null. - assert(PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR); - return DAG.getConstant(0, SDLoc(), VT); + if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) { + // It's possible for a kernarg intrinsic call to appear in a kernel with + // no allocated segment, in which case we do not add the user sgpr + // argument, so just return null. + return DAG.getConstant(0, SDLoc(), VT); + } + + // It's undefined behavior if a function marked with the amdgpu-no-* + // attributes uses the corresponding intrinsic. + return DAG.getUNDEF(VT); } return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT); @@ -2041,31 +2046,33 @@ SIMachineFunctionInfo &Info) const { auto &ArgInfo = Info.getArgInfo(); - // TODO: Unify handling with private memory pointers. + // We need to allocate these in place regardless of their use. + const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI; - if (Info.hasDispatchPtr()) + // TODO: Unify handling with private memory pointers. + if (IsFixed || Info.hasDispatchPtr()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr); - if (Info.hasQueuePtr()) + if (IsFixed || Info.hasQueuePtr()) allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr); // Implicit arg ptr takes the place of the kernarg segment pointer. This is a // constant offset from the kernarg segment. - if (Info.hasImplicitArgPtr()) + if (IsFixed || Info.hasImplicitArgPtr()) allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr); - if (Info.hasDispatchID()) + if (IsFixed || Info.hasDispatchID()) allocateSGPR64Input(CCInfo, ArgInfo.DispatchID); // flat_scratch_init is not applicable for non-kernel functions. - if (Info.hasWorkGroupIDX()) + if (IsFixed || Info.hasWorkGroupIDX()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX); - if (Info.hasWorkGroupIDY()) + if (IsFixed || Info.hasWorkGroupIDY()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY); - if (Info.hasWorkGroupIDZ()) + if (IsFixed || Info.hasWorkGroupIDZ()) allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ); } @@ -2765,21 +2772,28 @@ // TODO: Unify with private memory register handling. This is complicated by // the fact that at least in kernels, the input argument is not necessarily // in the same location as the input. - AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = { - AMDGPUFunctionArgInfo::DISPATCH_PTR, - AMDGPUFunctionArgInfo::QUEUE_PTR, - AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, - AMDGPUFunctionArgInfo::DISPATCH_ID, - AMDGPUFunctionArgInfo::WORKGROUP_ID_X, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Z + static constexpr std::pair ImplicitAttrs[] = { + {AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"}, + {AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" }, + {AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"}, + {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"}, + {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"}, + {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"}, + {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"} }; - for (auto InputID : InputRegs) { + for (auto Attr : ImplicitAttrs) { const ArgDescriptor *OutgoingArg; const TargetRegisterClass *ArgRC; LLT ArgTy; + AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first; + + // If the callee does not use the attribute value, skip copying the value. + if (CLI.CB->hasFnAttr(Attr.second)) + continue; + std::tie(OutgoingArg, ArgRC, ArgTy) = CalleeArgInfo->getPreloadedValue(InputID); if (!OutgoingArg) @@ -2845,11 +2859,17 @@ SDValue InputReg; SDLoc SL; + const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x"); + const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y"); + const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z"); + // If incoming ids are not packed we need to pack them. - if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) + if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX && + NeedWorkItemIDX) InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX); - if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) { + if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY && + NeedWorkItemIDY) { SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY); Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y, DAG.getShiftAmountConstant(10, MVT::i32, SL)); @@ -2857,7 +2877,8 @@ DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y; } - if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) { + if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ && + NeedWorkItemIDZ) { SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ); Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z, DAG.getShiftAmountConstant(20, MVT::i32, SL)); @@ -2865,7 +2886,7 @@ DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z; } - if (!InputReg.getNode()) { + if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) { // Workitem ids are already packed, any of present incoming arguments // will carry all required fields. ArgDescriptor IncomingArg = ArgDescriptor::createArg( @@ -2876,13 +2897,17 @@ } if (OutgoingArg->isRegister()) { - RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg); + if (InputReg) + RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg); + CCInfo.AllocateReg(OutgoingArg->getRegister()); } else { unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4)); - SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg, - SpecialArgOffset); - MemOpChains.push_back(ArgStore); + if (InputReg) { + SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg, + SpecialArgOffset); + MemOpChains.push_back(ArgStore); + } } } @@ -5291,9 +5316,18 @@ MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *Info = MF.getInfo(); Register UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); - SDValue QueuePtr = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + + SDValue QueuePtr; + if (UserSGPR == AMDGPU::NoRegister) { + // We probably are in a function incorrectly marked with + // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap, + // so just use a null pointer. + QueuePtr = DAG.getConstant(0, SL, MVT::i64); + } else { + QueuePtr = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); + } + SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64); SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01, QueuePtr, SDValue()); @@ -5370,7 +5404,11 @@ MachineFunction &MF = DAG.getMachineFunction(); SIMachineFunctionInfo *Info = MF.getInfo(); Register UserSGPR = Info->getQueuePtrUserSGPR(); - assert(UserSGPR != AMDGPU::NoRegister); + if (UserSGPR == AMDGPU::NoRegister) { + // We probably are in a function incorrectly marked with + // amdgpu-no-queue-ptr. This is undefined. + return DAG.getUNDEF(MVT::i32); + } SDValue QueuePtr = CreateLiveInRegister( DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -9,47 +9,34 @@ define amdgpu_kernel void @kernel_call_no_workitem_ids() { ; CHECK-LABEL: name: kernel_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64) - ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) + ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4) ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK: $vgpr31 = COPY [[OR1]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: $sgpr10_sgpr11 = COPY [[COPY8]](s64) + ; CHECK: $sgpr12 = COPY [[COPY9]](s32) + ; CHECK: $sgpr13 = COPY [[COPY10]](s32) + ; CHECK: $sgpr14 = COPY [[COPY11]](s32) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK: S_ENDPGM 0 call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" @@ -59,47 +46,38 @@ define amdgpu_kernel void @kernel_call_no_workgroup_ids() { ; CHECK-LABEL: name: kernel_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY4]] ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64) - ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C2]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK: [[COPY12:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY12]](<4 x s32>) + ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4) ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK: $sgpr10_sgpr11 = COPY [[COPY8]](s64) ; CHECK: $vgpr31 = COPY [[OR1]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK: S_ENDPGM 0 call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" @@ -109,47 +87,29 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() { ; CHECK-LABEL: name: kernel_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK: [[C:%[0-9]+]]:_(p4) = G_CONSTANT i64 0 ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C]], [[C1]](s64) - ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C2]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY17]], [[C3]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C3]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK: $sgpr14 = COPY [[COPY14]](s32) ; CHECK: $vgpr31 = COPY [[OR1]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK: S_ENDPGM 0 call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" @@ -159,40 +119,37 @@ define void @func_call_no_workitem_ids() { ; CHECK-LABEL: name: func_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>) + ; CHECK: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY18]] + ; CHECK: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] + ; CHECK: S_SETPC_B64_return [[COPY16]] call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" ret void } @@ -200,40 +157,31 @@ define void @func_call_no_workgroup_ids() { ; CHECK-LABEL: name: func_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]] + ; CHECK: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]] + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>) + ; CHECK: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK: $sgpr6_sgpr7 = COPY [[COPY7]](p4) + ; CHECK: $sgpr8_sgpr9 = COPY [[COPY8]](p4) + ; CHECK: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; CHECK: $vgpr31 = COPY [[COPY10]](s32) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY18]] + ; CHECK: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] + ; CHECK: S_SETPC_B64_return [[COPY12]] call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } @@ -241,40 +189,22 @@ define void @func_call_no_other_sgprs() { ; CHECK-LABEL: name: func_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @extern - ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK: $vgpr31 = COPY [[COPY16]](s32) - ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; CHECK: $sgpr8_sgpr9 = COPY [[COPY3]](p4) + ; CHECK: $vgpr31 = COPY [[COPY4]](s32) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK: S_SETPC_B64_return [[COPY18]] + ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY6]] call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -43,41 +43,17 @@ define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN: $sgpr12 = COPY [[COPY16]](s32) - ; GCN: $sgpr13 = COPY [[COPY17]](s32) - ; GCN: $sgpr14 = COPY [[COPY18]](s32) - ; GCN: $vgpr31 = COPY [[COPY19]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) ret i32 %ret @@ -86,46 +62,22 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN: $sgpr12 = COPY [[COPY16]](s32) - ; GCN: $sgpr13 = COPY [[COPY17]](s32) - ; GCN: $sgpr14 = COPY [[COPY18]](s32) - ; GCN: $vgpr31 = COPY [[COPY19]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 @@ -137,46 +89,22 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN: $sgpr12 = COPY [[COPY16]](s32) - ; GCN: $sgpr13 = COPY [[COPY17]](s32) - ; GCN: $sgpr14 = COPY [[COPY18]](s32) - ; GCN: $vgpr31 = COPY [[COPY19]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 @@ -188,41 +116,17 @@ define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN: $sgpr12 = COPY [[COPY16]](s32) - ; GCN: $sgpr13 = COPY [[COPY17]](s32) - ; GCN: $sgpr14 = COPY [[COPY18]](s32) - ; GCN: $vgpr31 = COPY [[COPY19]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) ret void @@ -232,17 +136,8 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: kernel_call_i32_fastcc_i32_i32_unused_result ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 - ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN: liveins: $sgpr8_sgpr9 + ; GCN: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) @@ -253,38 +148,12 @@ ; GCN: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64) ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) - ; GCN: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) - ; GCN: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 - ; GCN: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) - ; GCN: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) - ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN: $vgpr0 = COPY [[EVEC]](s32) ; GCN: $vgpr1 = COPY [[EVEC1]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN: $sgpr12 = COPY [[COPY14]](s32) - ; GCN: $sgpr13 = COPY [[COPY15]](s32) - ; GCN: $sgpr14 = COPY [[COPY16]](s32) - ; GCN: $vgpr31 = COPY [[OR1]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: S_ENDPGM 0 entry: @@ -314,52 +183,28 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; GCN: [[COPY9:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C]](s32) + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32) ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) - ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN: $sgpr12 = COPY [[COPY16]](s32) - ; GCN: $sgpr13 = COPY [[COPY17]](s32) - ; GCN: $sgpr14 = COPY [[COPY18]](s32) - ; GCN: $vgpr31 = COPY [[COPY19]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) + ; GCN: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN: $vgpr0 = COPY [[COPY22]](s32) - ; GCN: [[COPY23:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY11]] - ; GCN: S_SETPC_B64_return [[COPY23]], implicit $vgpr0 + ; GCN: $vgpr0 = COPY [[COPY6]](s32) + ; GCN: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GCN: S_SETPC_B64_return [[COPY7]], implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) ret i32 %ret @@ -371,77 +216,53 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32 ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr0 = COPY [[COPY]](s32) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) - ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN: $sgpr12 = COPY [[COPY44]](s32) - ; GCN: $sgpr13 = COPY [[COPY45]](s32) - ; GCN: $sgpr14 = COPY [[COPY46]](s32) - ; GCN: $vgpr31 = COPY [[COPY47]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) ret i32 %ret @@ -506,110 +327,86 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: $vgpr2 = COPY [[COPY10]](s32) - ; GCN: $vgpr3 = COPY [[COPY11]](s32) - ; GCN: $vgpr4 = COPY [[COPY12]](s32) - ; GCN: $vgpr5 = COPY [[COPY13]](s32) - ; GCN: $vgpr6 = COPY [[COPY14]](s32) - ; GCN: $vgpr7 = COPY [[COPY15]](s32) - ; GCN: $vgpr8 = COPY [[COPY16]](s32) - ; GCN: $vgpr9 = COPY [[COPY17]](s32) - ; GCN: $vgpr10 = COPY [[COPY18]](s32) - ; GCN: $vgpr11 = COPY [[COPY19]](s32) - ; GCN: $vgpr12 = COPY [[COPY20]](s32) - ; GCN: $vgpr13 = COPY [[COPY21]](s32) - ; GCN: $vgpr14 = COPY [[COPY22]](s32) - ; GCN: $vgpr15 = COPY [[COPY23]](s32) - ; GCN: $vgpr16 = COPY [[COPY24]](s32) - ; GCN: $vgpr17 = COPY [[COPY25]](s32) - ; GCN: $vgpr18 = COPY [[COPY26]](s32) - ; GCN: $vgpr19 = COPY [[COPY27]](s32) - ; GCN: $vgpr20 = COPY [[COPY28]](s32) - ; GCN: $vgpr21 = COPY [[COPY29]](s32) - ; GCN: $vgpr22 = COPY [[COPY30]](s32) - ; GCN: $vgpr23 = COPY [[COPY31]](s32) - ; GCN: $vgpr24 = COPY [[COPY32]](s32) - ; GCN: $vgpr25 = COPY [[COPY33]](s32) - ; GCN: $vgpr26 = COPY [[COPY34]](s32) - ; GCN: $vgpr27 = COPY [[COPY35]](s32) - ; GCN: $vgpr28 = COPY [[COPY36]](s32) - ; GCN: $vgpr29 = COPY [[COPY37]](s32) - ; GCN: $vgpr30 = COPY [[COPY38]](s32) + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: $vgpr2 = COPY [[COPY2]](s32) + ; GCN: $vgpr3 = COPY [[COPY3]](s32) + ; GCN: $vgpr4 = COPY [[COPY4]](s32) + ; GCN: $vgpr5 = COPY [[COPY5]](s32) + ; GCN: $vgpr6 = COPY [[COPY6]](s32) + ; GCN: $vgpr7 = COPY [[COPY7]](s32) + ; GCN: $vgpr8 = COPY [[COPY8]](s32) + ; GCN: $vgpr9 = COPY [[COPY9]](s32) + ; GCN: $vgpr10 = COPY [[COPY10]](s32) + ; GCN: $vgpr11 = COPY [[COPY11]](s32) + ; GCN: $vgpr12 = COPY [[COPY12]](s32) + ; GCN: $vgpr13 = COPY [[COPY13]](s32) + ; GCN: $vgpr14 = COPY [[COPY14]](s32) + ; GCN: $vgpr15 = COPY [[COPY15]](s32) + ; GCN: $vgpr16 = COPY [[COPY16]](s32) + ; GCN: $vgpr17 = COPY [[COPY17]](s32) + ; GCN: $vgpr18 = COPY [[COPY18]](s32) + ; GCN: $vgpr19 = COPY [[COPY19]](s32) + ; GCN: $vgpr20 = COPY [[COPY20]](s32) + ; GCN: $vgpr21 = COPY [[COPY21]](s32) + ; GCN: $vgpr22 = COPY [[COPY22]](s32) + ; GCN: $vgpr23 = COPY [[COPY23]](s32) + ; GCN: $vgpr24 = COPY [[COPY24]](s32) + ; GCN: $vgpr25 = COPY [[COPY25]](s32) + ; GCN: $vgpr26 = COPY [[COPY26]](s32) + ; GCN: $vgpr27 = COPY [[COPY27]](s32) + ; GCN: $vgpr28 = COPY [[COPY28]](s32) + ; GCN: $vgpr29 = COPY [[COPY29]](s32) + ; GCN: $vgpr30 = COPY [[COPY30]](s32) ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN: $sgpr12 = COPY [[COPY44]](s32) - ; GCN: $sgpr13 = COPY [[COPY45]](s32) - ; GCN: $sgpr14 = COPY [[COPY46]](s32) - ; GCN: $vgpr31 = COPY [[COPY47]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) ret i32 %ret @@ -618,115 +415,91 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: $vgpr2 = COPY [[COPY10]](s32) - ; GCN: $vgpr3 = COPY [[COPY11]](s32) - ; GCN: $vgpr4 = COPY [[COPY12]](s32) - ; GCN: $vgpr5 = COPY [[COPY13]](s32) - ; GCN: $vgpr6 = COPY [[COPY14]](s32) - ; GCN: $vgpr7 = COPY [[COPY15]](s32) - ; GCN: $vgpr8 = COPY [[COPY16]](s32) - ; GCN: $vgpr9 = COPY [[COPY17]](s32) - ; GCN: $vgpr10 = COPY [[COPY18]](s32) - ; GCN: $vgpr11 = COPY [[COPY19]](s32) - ; GCN: $vgpr12 = COPY [[COPY20]](s32) - ; GCN: $vgpr13 = COPY [[COPY21]](s32) - ; GCN: $vgpr14 = COPY [[COPY22]](s32) - ; GCN: $vgpr15 = COPY [[COPY23]](s32) - ; GCN: $vgpr16 = COPY [[COPY24]](s32) - ; GCN: $vgpr17 = COPY [[COPY25]](s32) - ; GCN: $vgpr18 = COPY [[COPY26]](s32) - ; GCN: $vgpr19 = COPY [[COPY27]](s32) - ; GCN: $vgpr20 = COPY [[COPY28]](s32) - ; GCN: $vgpr21 = COPY [[COPY29]](s32) - ; GCN: $vgpr22 = COPY [[COPY30]](s32) - ; GCN: $vgpr23 = COPY [[COPY31]](s32) - ; GCN: $vgpr24 = COPY [[COPY32]](s32) - ; GCN: $vgpr25 = COPY [[COPY33]](s32) - ; GCN: $vgpr26 = COPY [[COPY34]](s32) - ; GCN: $vgpr27 = COPY [[COPY35]](s32) - ; GCN: $vgpr28 = COPY [[COPY36]](s32) - ; GCN: $vgpr29 = COPY [[COPY37]](s32) - ; GCN: $vgpr30 = COPY [[COPY38]](s32) + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: $vgpr2 = COPY [[COPY2]](s32) + ; GCN: $vgpr3 = COPY [[COPY3]](s32) + ; GCN: $vgpr4 = COPY [[COPY4]](s32) + ; GCN: $vgpr5 = COPY [[COPY5]](s32) + ; GCN: $vgpr6 = COPY [[COPY6]](s32) + ; GCN: $vgpr7 = COPY [[COPY7]](s32) + ; GCN: $vgpr8 = COPY [[COPY8]](s32) + ; GCN: $vgpr9 = COPY [[COPY9]](s32) + ; GCN: $vgpr10 = COPY [[COPY10]](s32) + ; GCN: $vgpr11 = COPY [[COPY11]](s32) + ; GCN: $vgpr12 = COPY [[COPY12]](s32) + ; GCN: $vgpr13 = COPY [[COPY13]](s32) + ; GCN: $vgpr14 = COPY [[COPY14]](s32) + ; GCN: $vgpr15 = COPY [[COPY15]](s32) + ; GCN: $vgpr16 = COPY [[COPY16]](s32) + ; GCN: $vgpr17 = COPY [[COPY17]](s32) + ; GCN: $vgpr18 = COPY [[COPY18]](s32) + ; GCN: $vgpr19 = COPY [[COPY19]](s32) + ; GCN: $vgpr20 = COPY [[COPY20]](s32) + ; GCN: $vgpr21 = COPY [[COPY21]](s32) + ; GCN: $vgpr22 = COPY [[COPY22]](s32) + ; GCN: $vgpr23 = COPY [[COPY23]](s32) + ; GCN: $vgpr24 = COPY [[COPY24]](s32) + ; GCN: $vgpr25 = COPY [[COPY25]](s32) + ; GCN: $vgpr26 = COPY [[COPY26]](s32) + ; GCN: $vgpr27 = COPY [[COPY27]](s32) + ; GCN: $vgpr28 = COPY [[COPY28]](s32) + ; GCN: $vgpr29 = COPY [[COPY29]](s32) + ; GCN: $vgpr30 = COPY [[COPY30]](s32) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN: $sgpr12 = COPY [[COPY44]](s32) - ; GCN: $sgpr13 = COPY [[COPY45]](s32) - ; GCN: $sgpr14 = COPY [[COPY46]](s32) - ; GCN: $vgpr31 = COPY [[COPY47]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 @@ -741,31 +514,15 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) ; GCN: $vgpr2 = COPY [[C]](s32) ; GCN: $vgpr3 = COPY [[C]](s32) ; GCN: $vgpr4 = COPY [[C]](s32) @@ -795,32 +552,24 @@ ; GCN: $vgpr28 = COPY [[C]](s32) ; GCN: $vgpr29 = COPY [[C]](s32) ; GCN: $vgpr30 = COPY [[C]](s32) - ; GCN: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C2]](s32) + ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN: $sgpr12 = COPY [[COPY15]](s32) - ; GCN: $sgpr13 = COPY [[COPY16]](s32) - ; GCN: $sgpr14 = COPY [[COPY17]](s32) - ; GCN: $vgpr31 = COPY [[COPY18]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN: $vgpr0 = COPY [[COPY21]](s32) - ; GCN: [[COPY22:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; GCN: S_SETPC_B64_return [[COPY22]], implicit $vgpr0 + ; GCN: $vgpr0 = COPY [[COPY5]](s32) + ; GCN: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret @@ -830,67 +579,27 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 - ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN: $sgpr12 = COPY [[COPY16]](s32) - ; GCN: $sgpr13 = COPY [[COPY17]](s32) - ; GCN: $sgpr14 = COPY [[COPY18]](s32) - ; GCN: $vgpr31 = COPY [[COPY19]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 - ; GCN: [[COPY22:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY23:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY24:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY25:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: $vgpr2 = COPY [[COPY21]](s32) - ; GCN: [[COPY30:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY30]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY22]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY23]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY24]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY25]](s64) - ; GCN: $sgpr12 = COPY [[COPY26]](s32) - ; GCN: $sgpr13 = COPY [[COPY27]](s32) - ; GCN: $sgpr14 = COPY [[COPY28]](s32) - ; GCN: $vgpr31 = COPY [[COPY29]](s32) - ; GCN: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: $vgpr2 = COPY [[COPY5]](s32) + ; GCN: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) %ret = tail call fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %other.call) @@ -902,115 +611,91 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.5, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) - ; GCN: $vgpr2 = COPY [[COPY10]](s32) - ; GCN: $vgpr3 = COPY [[COPY11]](s32) - ; GCN: $vgpr4 = COPY [[COPY12]](s32) - ; GCN: $vgpr5 = COPY [[COPY13]](s32) - ; GCN: $vgpr6 = COPY [[COPY14]](s32) - ; GCN: $vgpr7 = COPY [[COPY15]](s32) - ; GCN: $vgpr8 = COPY [[COPY16]](s32) - ; GCN: $vgpr9 = COPY [[COPY17]](s32) - ; GCN: $vgpr10 = COPY [[COPY18]](s32) - ; GCN: $vgpr11 = COPY [[COPY19]](s32) - ; GCN: $vgpr12 = COPY [[COPY20]](s32) - ; GCN: $vgpr13 = COPY [[COPY21]](s32) - ; GCN: $vgpr14 = COPY [[COPY22]](s32) - ; GCN: $vgpr15 = COPY [[COPY23]](s32) - ; GCN: $vgpr16 = COPY [[COPY24]](s32) - ; GCN: $vgpr17 = COPY [[COPY25]](s32) - ; GCN: $vgpr18 = COPY [[COPY26]](s32) - ; GCN: $vgpr19 = COPY [[COPY27]](s32) - ; GCN: $vgpr20 = COPY [[COPY28]](s32) - ; GCN: $vgpr21 = COPY [[COPY29]](s32) - ; GCN: $vgpr22 = COPY [[COPY30]](s32) - ; GCN: $vgpr23 = COPY [[COPY31]](s32) - ; GCN: $vgpr24 = COPY [[COPY32]](s32) - ; GCN: $vgpr25 = COPY [[COPY33]](s32) - ; GCN: $vgpr26 = COPY [[COPY34]](s32) - ; GCN: $vgpr27 = COPY [[COPY35]](s32) - ; GCN: $vgpr28 = COPY [[COPY36]](s32) - ; GCN: $vgpr29 = COPY [[COPY37]](s32) - ; GCN: $vgpr30 = COPY [[COPY38]](s32) + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) + ; GCN: $vgpr2 = COPY [[COPY2]](s32) + ; GCN: $vgpr3 = COPY [[COPY3]](s32) + ; GCN: $vgpr4 = COPY [[COPY4]](s32) + ; GCN: $vgpr5 = COPY [[COPY5]](s32) + ; GCN: $vgpr6 = COPY [[COPY6]](s32) + ; GCN: $vgpr7 = COPY [[COPY7]](s32) + ; GCN: $vgpr8 = COPY [[COPY8]](s32) + ; GCN: $vgpr9 = COPY [[COPY9]](s32) + ; GCN: $vgpr10 = COPY [[COPY10]](s32) + ; GCN: $vgpr11 = COPY [[COPY11]](s32) + ; GCN: $vgpr12 = COPY [[COPY12]](s32) + ; GCN: $vgpr13 = COPY [[COPY13]](s32) + ; GCN: $vgpr14 = COPY [[COPY14]](s32) + ; GCN: $vgpr15 = COPY [[COPY15]](s32) + ; GCN: $vgpr16 = COPY [[COPY16]](s32) + ; GCN: $vgpr17 = COPY [[COPY17]](s32) + ; GCN: $vgpr18 = COPY [[COPY18]](s32) + ; GCN: $vgpr19 = COPY [[COPY19]](s32) + ; GCN: $vgpr20 = COPY [[COPY20]](s32) + ; GCN: $vgpr21 = COPY [[COPY21]](s32) + ; GCN: $vgpr22 = COPY [[COPY22]](s32) + ; GCN: $vgpr23 = COPY [[COPY23]](s32) + ; GCN: $vgpr24 = COPY [[COPY24]](s32) + ; GCN: $vgpr25 = COPY [[COPY25]](s32) + ; GCN: $vgpr26 = COPY [[COPY26]](s32) + ; GCN: $vgpr27 = COPY [[COPY27]](s32) + ; GCN: $vgpr28 = COPY [[COPY28]](s32) + ; GCN: $vgpr29 = COPY [[COPY29]](s32) + ; GCN: $vgpr30 = COPY [[COPY30]](s32) ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN: $sgpr12 = COPY [[COPY44]](s32) - ; GCN: $sgpr13 = COPY [[COPY45]](s32) - ; GCN: $sgpr14 = COPY [[COPY46]](s32) - ; GCN: $vgpr31 = COPY [[COPY47]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 @@ -1022,46 +707,38 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area ; GCN: bb.1.entry: - ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 - ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 - ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 - ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 - ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 - ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 - ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 - ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 - ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 - ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 - ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 - ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 - ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.9, align 16, addrspace 5) ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 @@ -1076,7 +753,7 @@ ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -1084,16 +761,8 @@ ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32) ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN: $vgpr0 = COPY [[COPY8]](s32) - ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr0 = COPY [[COPY]](s32) + ; GCN: $vgpr1 = COPY [[COPY1]](s32) ; GCN: $vgpr2 = COPY [[C1]](s32) ; GCN: $vgpr3 = COPY [[C1]](s32) ; GCN: $vgpr4 = COPY [[C1]](s32) @@ -1129,17 +798,9 @@ ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store (s32) into %fixed-stack.1, addrspace 5) ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store (s32) into %fixed-stack.0, align 8, addrspace 5) - ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN: $sgpr12 = COPY [[COPY44]](s32) - ; GCN: $sgpr13 = COPY [[COPY45]](s32) - ; GCN: $sgpr14 = COPY [[COPY46]](s32) - ; GCN: $vgpr31 = COPY [[COPY47]](s32) - ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 Index: llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -0,0 +1,562 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=0 < %s | FileCheck -check-prefix=VARABI %s +; RUN: llc -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s +; RUN: llc -global-isel -amdhsa-code-object-version=3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-fixed-function-abi=1 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s + +; Test with gfx803 so that +; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require +; the queue ptr. Tests with code object v3 to test +; llvm.trap/llvm.debugtrap that require the queue ptr. + + +declare hidden void @requires_all_inputs() + +; This function incorrectly is marked with the hints that the callee +; does not require the implicit arguments to the function. Make sure +; we do not crash. +define void @parent_func_missing_inputs() #0 { +; VARABI-LABEL: parent_func_missing_inputs: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: s_or_saveexec_b64 s[4:5], -1 +; VARABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; VARABI-NEXT: s_mov_b64 exec, s[4:5] +; VARABI-NEXT: v_writelane_b32 v40, s33, 2 +; VARABI-NEXT: v_writelane_b32 v40, s30, 0 +; VARABI-NEXT: s_mov_b32 s33, s32 +; VARABI-NEXT: s_addk_i32 s32, 0x400 +; VARABI-NEXT: s_getpc_b64 s[4:5] +; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 +; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 +; VARABI-NEXT: v_writelane_b32 v40, s31, 1 +; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; VARABI-NEXT: v_readlane_b32 s4, v40, 0 +; VARABI-NEXT: v_readlane_b32 s5, v40, 1 +; VARABI-NEXT: s_addk_i32 s32, 0xfc00 +; VARABI-NEXT: v_readlane_b32 s33, v40, 2 +; VARABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; VARABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; VARABI-NEXT: s_mov_b64 exec, s[6:7] +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[4:5] +; +; FIXEDABI-LABEL: parent_func_missing_inputs: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: s_or_saveexec_b64 s[16:17], -1 +; FIXEDABI-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; FIXEDABI-NEXT: s_mov_b64 exec, s[16:17] +; FIXEDABI-NEXT: v_writelane_b32 v40, s33, 2 +; FIXEDABI-NEXT: v_writelane_b32 v40, s30, 0 +; FIXEDABI-NEXT: s_mov_b32 s33, s32 +; FIXEDABI-NEXT: s_addk_i32 s32, 0x400 +; FIXEDABI-NEXT: s_getpc_b64 s[16:17] +; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 +; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 +; FIXEDABI-NEXT: v_writelane_b32 v40, s31, 1 +; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] +; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0 +; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1 +; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 +; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_setpc_b64 s[4:5] + call void @requires_all_inputs() + ret void +} + +define amdgpu_kernel void @parent_kernel_missing_inputs() #0 { +; VARABI-LABEL: parent_kernel_missing_inputs: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_add_i32 s4, s4, s9 +; VARABI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 +; VARABI-NEXT: s_add_u32 s0, s0, s9 +; VARABI-NEXT: s_addc_u32 s1, s1, 0 +; VARABI-NEXT: s_mov_b32 flat_scratch_lo, s5 +; VARABI-NEXT: s_getpc_b64 s[4:5] +; VARABI-NEXT: s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4 +; VARABI-NEXT: s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12 +; VARABI-NEXT: s_mov_b32 s32, 0 +; VARABI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; VARABI-NEXT: s_endpgm +; +; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs: +; FIXEDABI-SDAG: ; %bb.0: +; FIXEDABI-SDAG-NEXT: s_add_i32 s10, s10, s15 +; FIXEDABI-SDAG-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 +; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; FIXEDABI-SDAG-NEXT: s_add_u32 s0, s0, s15 +; FIXEDABI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 +; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 +; FIXEDABI-SDAG-NEXT: s_mov_b32 flat_scratch_lo, s11 +; FIXEDABI-SDAG-NEXT: s_addc_u32 s1, s1, 0 +; FIXEDABI-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] +; FIXEDABI-SDAG-NEXT: v_or_b32_e32 v31, v0, v2 +; FIXEDABI-SDAG-NEXT: s_mov_b64 s[8:9], 0 +; FIXEDABI-SDAG-NEXT: s_getpc_b64 s[16:17] +; FIXEDABI-SDAG-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 +; FIXEDABI-SDAG-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 +; FIXEDABI-SDAG-NEXT: s_mov_b32 s32, 0 +; FIXEDABI-SDAG-NEXT: s_swappc_b64 s[30:31], s[16:17] +; FIXEDABI-SDAG-NEXT: s_endpgm +; +; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs: +; FIXEDABI-GISEL: ; %bb.0: +; FIXEDABI-GISEL-NEXT: s_add_i32 s10, s10, s15 +; FIXEDABI-GISEL-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8 +; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; FIXEDABI-GISEL-NEXT: s_add_u32 s0, s0, s15 +; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; FIXEDABI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 20, v2 +; FIXEDABI-GISEL-NEXT: s_mov_b32 flat_scratch_lo, s11 +; FIXEDABI-GISEL-NEXT: s_addc_u32 s1, s1, 0 +; FIXEDABI-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] +; FIXEDABI-GISEL-NEXT: v_or_b32_e32 v31, v0, v1 +; FIXEDABI-GISEL-NEXT: s_mov_b64 s[8:9], 0 +; FIXEDABI-GISEL-NEXT: s_getpc_b64 s[16:17] +; FIXEDABI-GISEL-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 +; FIXEDABI-GISEL-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 +; FIXEDABI-GISEL-NEXT: s_mov_b32 s32, 0 +; FIXEDABI-GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] +; FIXEDABI-GISEL-NEXT: s_endpgm + call void @requires_all_inputs() + ret void +} + +; Function is marked with amdgpu-no-workitem-id-* but uses them anyway +define void @marked_func_use_workitem_id(i32 addrspace(1)* %ptr) #0 { +; VARABI-LABEL: marked_func_use_workitem_id: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: v_and_b32_e32 v3, 0x3ff, v2 +; VARABI-NEXT: flat_store_dword v[0:1], v3 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_bfe_u32 v3, v2, 10, 10 +; VARABI-NEXT: v_bfe_u32 v2, v2, 20, 10 +; VARABI-NEXT: flat_store_dword v[0:1], v3 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id: +; FIXEDABI-SDAG: ; %bb.0: +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: v_and_b32_e32 v2, 0x3ff, v31 +; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 10, 10 +; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-SDAG-NEXT: v_bfe_u32 v2, v31, 20, 10 +; FIXEDABI-SDAG-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id: +; FIXEDABI-GISEL: ; %bb.0: +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v31 +; FIXEDABI-GISEL-NEXT: v_bfe_u32 v3, v31, 10, 10 +; FIXEDABI-GISEL-NEXT: v_bfe_u32 v4, v31, 20, 10 +; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v3 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v4 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %id.y = call i32 @llvm.amdgcn.workitem.id.y() + %id.z = call i32 @llvm.amdgcn.workitem.id.z() + store volatile i32 %id.x, i32 addrspace(1)* %ptr + store volatile i32 %id.y, i32 addrspace(1)* %ptr + store volatile i32 %id.z, i32 addrspace(1)* %ptr + ret void +} + +; Function is marked with amdgpu-no-workitem-id-* but uses them anyway +define amdgpu_kernel void @marked_kernel_use_workitem_id(i32 addrspace(1)* %ptr) #0 { +; VARABI-LABEL: marked_kernel_use_workitem_id: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VARABI-NEXT: s_waitcnt lgkmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v4, s1 +; VARABI-NEXT: v_mov_b32_e32 v3, s0 +; VARABI-NEXT: flat_store_dword v[3:4], v0 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: flat_store_dword v[3:4], v1 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: flat_store_dword v[3:4], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_endpgm +; +; FIXEDABI-LABEL: marked_kernel_use_workitem_id: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v4, s1 +; FIXEDABI-NEXT: v_mov_b32_e32 v3, s0 +; FIXEDABI-NEXT: flat_store_dword v[3:4], v0 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: flat_store_dword v[3:4], v1 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: flat_store_dword v[3:4], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_endpgm + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + %id.y = call i32 @llvm.amdgcn.workitem.id.y() + %id.z = call i32 @llvm.amdgcn.workitem.id.z() + store volatile i32 %id.x, i32 addrspace(1)* %ptr + store volatile i32 %id.y, i32 addrspace(1)* %ptr + store volatile i32 %id.z, i32 addrspace(1)* %ptr + ret void +} + +define void @marked_func_use_workgroup_id(i32 addrspace(1)* %ptr) #0 { +; VARABI-LABEL: marked_func_use_workgroup_id: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v2, s4 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v2, s5 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v2, s6 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-LABEL: marked_func_use_workgroup_id: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s12 +; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s13 +; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s14 +; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] + %id.x = call i32 @llvm.amdgcn.workgroup.id.x() + %id.y = call i32 @llvm.amdgcn.workgroup.id.y() + %id.z = call i32 @llvm.amdgcn.workgroup.id.z() + store volatile i32 %id.x, i32 addrspace(1)* %ptr + store volatile i32 %id.y, i32 addrspace(1)* %ptr + store volatile i32 %id.z, i32 addrspace(1)* %ptr + ret void +} + +define amdgpu_kernel void @marked_kernel_use_workgroup_id(i32 addrspace(1)* %ptr) #0 { +; VARABI-LABEL: marked_kernel_use_workgroup_id: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; VARABI-NEXT: v_mov_b32_e32 v2, s6 +; VARABI-NEXT: s_waitcnt lgkmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v0, s0 +; VARABI-NEXT: v_mov_b32_e32 v1, s1 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v2, s7 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v2, s8 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_endpgm +; +; FIXEDABI-LABEL: marked_kernel_use_workgroup_id: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6 +; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0 +; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1 +; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s7 +; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8 +; FIXEDABI-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_endpgm + %id.x = call i32 @llvm.amdgcn.workgroup.id.x() + %id.y = call i32 @llvm.amdgcn.workgroup.id.y() + %id.z = call i32 @llvm.amdgcn.workgroup.id.z() + store volatile i32 %id.x, i32 addrspace(1)* %ptr + store volatile i32 %id.y, i32 addrspace(1)* %ptr + store volatile i32 %id.z, i32 addrspace(1)* %ptr + ret void +} + +define void @marked_func_use_other_sgpr(i64 addrspace(1)* %ptr) #0 { +; VARABI-LABEL: marked_func_use_other_sgpr: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-LABEL: marked_func_use_other_sgpr: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s6 +; FIXEDABI-NEXT: v_mov_b32_e32 v3, s7 +; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s8 +; FIXEDABI-NEXT: v_mov_b32_e32 v3, s9 +; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s4 +; FIXEDABI-NEXT: v_mov_b32_e32 v3, s5 +; FIXEDABI-NEXT: flat_load_ubyte v2, v[2:3] glc +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v2, s10 +; FIXEDABI-NEXT: v_mov_b32_e32 v3, s11 +; FIXEDABI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] + %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() + %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() + %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr + %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr + %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr + store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr + ret void +} + +define amdgpu_kernel void @marked_kernel_use_other_sgpr(i64 addrspace(1)* %ptr) #0 { +; VARABI-LABEL: marked_kernel_use_other_sgpr: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_add_u32 s0, s4, 8 +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_addc_u32 s1, s5, 0 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v0, s0 +; VARABI-NEXT: v_mov_b32_e32 v1, s1 +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_endpgm +; +; FIXEDABI-LABEL: marked_kernel_use_other_sgpr: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_add_u32 s0, s4, 8 +; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; FIXEDABI-NEXT: s_addc_u32 s1, s5, 0 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: v_mov_b32_e32 v0, s0 +; FIXEDABI-NEXT: v_mov_b32_e32 v1, s1 +; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; FIXEDABI-NEXT: s_endpgm + %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() + %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() + %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() + %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr + %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr + %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr + store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr + ret void +} + +define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 { +; VARABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr: +; VARABI: ; %bb.0: +; VARABI-NEXT: v_mov_b32_e32 v0, 0 +; VARABI-NEXT: v_mov_b32_e32 v1, 0 +; VARABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; VARABI-NEXT: s_endpgm +; +; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: v_mov_b32_e32 v0, 0 +; FIXEDABI-NEXT: v_mov_b32_e32 v1, 0 +; FIXEDABI-NEXT: flat_load_ubyte v0, v[0:1] glc +; FIXEDABI-NEXT: s_endpgm + %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr + ret void +} + +; On gfx8, the queue ptr is required for this addrspacecast. +define void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) #0 { +; VARABI-LABEL: addrspacecast_requires_queue_ptr: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; VARABI-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc +; VARABI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; VARABI-NEXT: v_mov_b32_e32 v3, 0 +; VARABI-NEXT: v_mov_b32_e32 v4, 1 +; VARABI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VARABI-NEXT: flat_store_dword v[2:3], v4 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: v_mov_b32_e32 v1, v3 +; VARABI-NEXT: v_mov_b32_e32 v2, 2 +; VARABI-NEXT: flat_store_dword v[0:1], v2 +; VARABI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr: +; FIXEDABI-SDAG: ; %bb.0: +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: s_load_dword s4, s[6:7], 0x40 +; FIXEDABI-SDAG-NEXT: s_load_dword s5, s[6:7], 0x44 +; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; FIXEDABI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v2, s5 +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc +; FIXEDABI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v0, vcc +; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 1 +; FIXEDABI-SDAG-NEXT: flat_store_dword v[2:3], v0 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; FIXEDABI-SDAG-NEXT: v_mov_b32_e32 v0, 2 +; FIXEDABI-SDAG-NEXT: flat_store_dword v[4:5], v0 +; FIXEDABI-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; FIXEDABI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr: +; FIXEDABI-GISEL: ; %bb.0: +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: s_load_dword s4, s[6:7], 0x44 +; FIXEDABI-GISEL-NEXT: s_load_dword s5, s[6:7], 0x40 +; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc +; FIXEDABI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v3, s4 +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; FIXEDABI-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, s5 +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; FIXEDABI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc +; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v4, 1 +; FIXEDABI-GISEL-NEXT: flat_store_dword v[2:3], v4 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-GISEL-NEXT: v_mov_b32_e32 v2, 2 +; FIXEDABI-GISEL-NEXT: flat_store_dword v[0:1], v2 +; FIXEDABI-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; FIXEDABI-GISEL-NEXT: s_setpc_b64 s[30:31] + %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32* + %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32* + store volatile i32 1, i32* %flat.private + store volatile i32 2, i32* %flat.local + ret void +} + +define void @is_shared_requires_queue_ptr(i8* %ptr) #0 { +; VARABI-LABEL: is_shared_requires_queue_ptr: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; VARABI-NEXT: flat_store_dword v[0:1], v0 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-LABEL: is_shared_requires_queue_ptr: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x40 +; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] + %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) + %zext = zext i1 %is.shared to i32 + store volatile i32 %zext, i32 addrspace(1)* undef + ret void +} + +define void @is_private_requires_queue_ptr(i8* %ptr) #0 { +; VARABI-LABEL: is_private_requires_queue_ptr: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; VARABI-NEXT: flat_store_dword v[0:1], v0 +; VARABI-NEXT: s_waitcnt vmcnt(0) +; VARABI-NEXT: s_setpc_b64 s[30:31] +; +; FIXEDABI-LABEL: is_private_requires_queue_ptr: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: s_load_dword s4, s[6:7], 0x44 +; FIXEDABI-NEXT: s_waitcnt lgkmcnt(0) +; FIXEDABI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1 +; FIXEDABI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; FIXEDABI-NEXT: flat_store_dword v[0:1], v0 +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] + %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) + %zext = zext i1 %is.private to i32 + store volatile i32 %zext, i32 addrspace(1)* undef + ret void +} + +define void @trap_requires_queue() #0 { +; VARABI-LABEL: trap_requires_queue: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: s_mov_b64 s[0:1], 0 +; VARABI-NEXT: s_trap 2 +; +; FIXEDABI-LABEL: trap_requires_queue: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: s_mov_b64 s[0:1], s[6:7] +; FIXEDABI-NEXT: s_trap 2 + call void @llvm.trap() + unreachable +} + +define void @debugtrap_requires_queue() #0 { +; VARABI-LABEL: debugtrap_requires_queue: +; VARABI: ; %bb.0: +; VARABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VARABI-NEXT: s_trap 3 +; +; FIXEDABI-LABEL: debugtrap_requires_queue: +; FIXEDABI: ; %bb.0: +; FIXEDABI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FIXEDABI-NEXT: s_trap 3 + call void @llvm.debugtrap() + unreachable +} + +declare i32 @llvm.amdgcn.workitem.id.x() +declare i32 @llvm.amdgcn.workitem.id.y() +declare i32 @llvm.amdgcn.workitem.id.z() +declare i32 @llvm.amdgcn.workgroup.id.x() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() +declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() +declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() +declare i64 @llvm.amdgcn.dispatch.id() +declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() +declare i1 @llvm.amdgcn.is.shared(i8*) +declare i1 @llvm.amdgcn.is.private(i8*) +declare void @llvm.trap() +declare void @llvm.debugtrap() + +attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } Index: llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -144,17 +144,21 @@ ret void } +; Argument is in right place already. We are free to clobber other +; SGPR arguments ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN-NOT: s12 +; GCN-NOT: s13 +; GCN-NOT: s14 define hidden void @func_indirect_use_workgroup_id_y() #1 { call void @use_workgroup_id_y() ret void } ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: -; GCN-NOT: s4 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN-NOT: s12 +; GCN-NOT: s13 +; GCN-NOT: s14 define hidden void @func_indirect_use_workgroup_id_z() #1 { call void @use_workgroup_id_z() ret void Index: llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -131,10 +131,11 @@ ; VARABI: enable_vgpr_workitem_id = 0 ; FIXEDABI: enable_vgpr_workitem_id = 2 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v31 +; FIXEDABI: v_mov_b32_e32 v31, v0{{$}} +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v31 ; VARABI-NOT: v31 ; GCN: s_swappc_b64 @@ -148,20 +149,18 @@ ; VARABI: enable_vgpr_workitem_id = 1 ; FIXEDABI: enable_vgpr_workitem_id = 2 -; FIXEDABI-NOT: v0 -; FIXEDABI-NOT: v1 ; VARABI-NOT: v31 ; VARABI: v_lshlrev_b32_e32 v0, 10, v1 - -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] - ; FIXEDABI-NOT: v0 ; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 + ; VARABI-NOT: v31 ; GCN: s_swappc_b64 @@ -179,10 +178,11 @@ ; VARABI-NOT: v0 ; VARABI-NOT: v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 { @@ -198,10 +198,14 @@ ; VARABI-NOT: v0 ; VARABI-NOT: v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI: v_lshlrev_b32_e32 v1, 10, v1 +; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 { @@ -218,10 +222,14 @@ ; VARABI-NOT: v2 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI: v_lshlrev_b32_e32 v1, 20, v2 +; FIXEDABI-NEXT: v_or_b32_e32 v31, v0, v1 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 { @@ -238,11 +246,15 @@ ; VARABI-NOT: v1 ; VARABI-NOT: v2 - -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI:v_lshlrev_b32_e32 v0, 20, v2 +; FIXEDABI-NEXT: v_lshlrev_b32_e32 v1, 10, v1 +; FIXEDABI-NEXT: v_or_b32_e32 v31, v1, v0 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 { @@ -348,10 +360,9 @@ ; VARABI: v_mov_b32_e32 v1, v0 ; VARABI: v_mov_b32_e32 v0, 0x22b -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI: v_mov_b32_e32 v31, v0 +; FIXEDABI: v_mov_b32_e32 v0, 0x22b ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 { @@ -371,10 +382,12 @@ ; VARABI-NOT: v0 ; FIXEDABI: enable_vgpr_workitem_id = 2 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] + +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI: v_lshlrev_b32_e32 v31, 10, v1 +; FIXEDABI: v_mov_b32_e32 v0, 0x22b define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { call void @other_arg_use_workitem_id_y(i32 555) ret void @@ -388,11 +401,11 @@ ; VARABI: s_swappc_b64 ; VARABI-NOT: v0 - -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI: v_lshlrev_b32_e32 v31, 20, v2 +; FIXEDABI: v_mov_b32_e32 v0, 0x22b define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { call void @other_arg_use_workitem_id_z(i32 555) ret void @@ -462,13 +475,13 @@ ; FIXEDABI: enable_vgpr_workitem_id = 2 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 ; FIXEDABI-DAG: s_mov_b32 s32, 0 ; FIXEDABI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x140{{$}} -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI-DAG: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] -; FIXEDABI: buffer_store_dword [[K]], off, s[0:3], s32{{$}} +; FIXEDABI-DAG: buffer_store_dword [[K]], off, s[0:3], s32{{$}} +; FIXEDABI-DAG: v_mov_b32_e32 v31, v0 ; FIXEDABI: s_swappc_b64 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 { @@ -622,6 +635,10 @@ ; VARABI: s_swappc_b64 +; FIXEDABI-NOT: v0 +; FIXEDABI-NOT: v1 +; FIXEDABI-NOT: v2 +; FIXEDABI: v_mov_b32_e32 v31, v0 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7 ; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], 0 offset:4{{$}} ; FIXEDABI: s_movk_i32 s32, 0x400{{$}} @@ -632,11 +649,6 @@ ; FIXME: Why this reload? ; FIXEDABI: buffer_load_dword [[RELOAD:v[0-9]+]], off, s[0:3], 0 offset:4{{$}} -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP1:v[0-9]+]], 10, v1 -; FIXEDABI-DAG: v_lshlrev_b32_e32 [[TMP0:v[0-9]+]], 20, v2 -; FIXEDABI-DAG: v_or_b32_e32 [[TMP2:v[0-9]+]], v0, [[TMP1]] -; FIXEDABI: v_or_b32_e32 v31, [[TMP2]], [[TMP0]] - ; FIXEDABI-NOT: s32 ; FIXEDABI: buffer_store_dword [[RELOAD]], off, s[0:3], s32 offset:4 ; FIXEDABI: s_swappc_b64 @@ -885,9 +897,53 @@ ret void } +declare hidden void @extern_hint(i32) #2 + +; Workitem IDs should not be passed due to the attribute +; GCN-LABEL: {{^}}kern_call_no_workitem_id_hints: +; GCN-NOT: v30 +; GCN-NOT: v31 +; GCN: v_mov_b32_e32 v0, 9 +; GCN-NOT: v0 +; GCN-NOT: v31 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_call_no_workitem_id_hints() #2 { + call void @extern_hint(i32 9) + ret void +} + +; GCN-LABEL: {{^}}func_call_no_workitem_id_hints: +; GCN-NOT: v30 +; GCN-NOT: v31 +; GCN: v_mov_b32_e32 v0, 9 +; GCN-NOT: v0 +; GCN-NOT: v31 +; GCN: s_swappc_b64 +define void @func_call_no_workitem_id_hints() #2 { + call void @extern_hint(i32 9) + ret void +} + +declare hidden void @extern_nohint(i32) + +; Check that the hint is respected on the callsite, not the function +; declaration +; GCN-LABEL: {{^}}kern_callsite_workitem_id_hints: +; GCN-NOT: v30 +; GCN-NOT: v31 +; GCN: v_mov_b32_e32 v0, 9 +; GCN-NOT: v0 +; GCN-NOT: v31 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 { + call void @extern_nohint(i32 9) #2 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare i32 @llvm.amdgcn.workitem.id.y() #0 declare i32 @llvm.amdgcn.workitem.id.z() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind noinline } +attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }