Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -19,7 +19,9 @@ namespace llvm { class AMDGPUTargetLowering; +class GCNSubtarget; class MachineInstrBuilder; +class SIMachineFunctionInfo; class AMDGPUCallLowering final : public CallLowering { void lowerParameterPtr(Register DstReg, MachineIRBuilder &B, Type *ParamTy, @@ -54,6 +56,29 @@ SmallVectorImpl> &ArgRegs, CallLoweringInfo &Info) const; + bool + doCallerAndCalleePassArgsTheSameWay(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl &InArgs) const; + + bool + areCalleeOutgoingArgsTailCallable(CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const; + + /// Returns true if the call can be lowered as a tail call. + bool + isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder, + CallLoweringInfo &Info, + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const; + + void handleImplicitCallArguments( + MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, + const GCNSubtarget &ST, const SIMachineFunctionInfo &MFI, + ArrayRef> ImplicitArgRegs) const; + + bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &OutArgs) const; bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; Index: llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -202,7 +202,11 @@ const LLT S32 = LLT::scalar(32); if (IsTailCall) { - llvm_unreachable("implement me"); + Offset += FPDiff; + int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true); + auto FIReg = MIRBuilder.buildFrameIndex(PtrTy, FI); + MPO = MachinePointerInfo::getFixedStack(MF, FI); + return FIReg.getReg(0); } const SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -721,6 +725,8 @@ if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler)) return false; + uint64_t StackOffset = Handler.StackUsed; + if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) { // Special inputs come after user arguments. TLI.allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info); @@ -735,6 +741,12 @@ TLI.allocateSpecialInputSGPRs(CCInfo, MF, *TRI, *Info); } + // When we tail call, we need to check if the callee's arguments will fit on + // the caller's stack. So, whenever we lower formal arguments, we should keep + // track of this information, since we might lower a tail call in this + // function later. + Info->setBytesInStackArgArea(StackOffset); + // Move back to the end of the basic block. B.setMBB(MBB); @@ -897,7 +909,7 @@ static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall) { - return AMDGPU::SI_CALL; + return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::SI_CALL; } // Add operands to call instruction to track the callee. @@ -921,6 +933,365 @@ return true; } +bool AMDGPUCallLowering::doCallerAndCalleePassArgsTheSameWay( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &InArgs) const { + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + + // If the calling conventions match, then everything must be the same. + if (CalleeCC == CallerCC) + return true; + + // Check if the caller and callee will handle arguments in the same way. + const SITargetLowering &TLI = *getTLI(); + CCAssignFn *CalleeAssignFnFixed; + CCAssignFn *CalleeAssignFnVarArg; + std::tie(CalleeAssignFnFixed, CalleeAssignFnVarArg) = + getAssignFnsForCC(CalleeCC, TLI); + + CCAssignFn *CallerAssignFnFixed; + CCAssignFn *CallerAssignFnVarArg; + std::tie(CallerAssignFnFixed, CallerAssignFnVarArg) = + getAssignFnsForCC(CallerCC, TLI); + + if (!resultsCompatible(Info, MF, InArgs, *CalleeAssignFnFixed, + *CalleeAssignFnVarArg, *CallerAssignFnFixed, + *CallerAssignFnVarArg)) + return false; + + const GCNSubtarget &ST = MF.getSubtarget(); + + // Make sure that the caller and callee preserve all of the same registers. + auto TRI = ST.getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + return TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved); +} + +bool AMDGPUCallLowering::areCalleeOutgoingArgsTailCallable( + CallLoweringInfo &Info, MachineFunction &MF, + SmallVectorImpl &OutArgs) const { + // If there are no outgoing arguments, then we are done. + if (OutArgs.empty()) + return true; + + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + const SITargetLowering &TLI = *getTLI(); + + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + + // We have outgoing arguments. Make sure that we can tail call with them. + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, CallerF.getContext()); + + if (!analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg)) { + LLVM_DEBUG(dbgs() << "... Could not analyze call operands.\n"); + return false; + } + + // Make sure that they can fit on the caller's stack. + const SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) { + LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n"); + return false; + } + + // Verify that the parameters in callee-saved registers match. + // TODO: Port this over to CallLowering as general code once swiftself is + // supported. + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const uint32_t *CallerPreservedMask = TRI->getCallPreservedMask(MF, CallerCC); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (unsigned i = 0; i < OutLocs.size(); ++i) { + auto &ArgLoc = OutLocs[i]; + // If it's not a register, it's fine. + if (!ArgLoc.isRegLoc()) { + if (Info.IsVarArg) { + // Be conservative and disallow variadic memory operands to match SDAG's + // behaviour. + // FIXME: If the caller's calling convention is C, then we can + // potentially use its argument area. However, for cases like fastcc, + // we can't do anything. + LLVM_DEBUG( + dbgs() + << "... Cannot tail call vararg function with stack arguments\n"); + return false; + } + continue; + } + + Register Reg = ArgLoc.getLocReg(); + + // Only look at callee-saved registers. + if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) + continue; + + LLVM_DEBUG( + dbgs() + << "... Call has an argument passed in a callee-saved register.\n"); + + // Check if it was copied from. + ArgInfo &OutInfo = OutArgs[i]; + + if (OutInfo.Regs.size() > 1) { + LLVM_DEBUG( + dbgs() << "... Cannot handle arguments in multiple registers.\n"); + return false; + } + + // Check if we copy the register, walking through copies from virtual + // registers. Note that getDefIgnoringCopies does not ignore copies from + // physical registers. + MachineInstr *RegDef = getDefIgnoringCopies(OutInfo.Regs[0], MRI); + if (!RegDef || RegDef->getOpcode() != TargetOpcode::COPY) { + LLVM_DEBUG( + dbgs() + << "... Parameter was not copied into a VReg, cannot tail call.\n"); + return false; + } + + // Got a copy. Verify that it's the same as the register we want. + Register CopyRHS = RegDef->getOperand(1).getReg(); + if (CopyRHS != Reg) { + LLVM_DEBUG(dbgs() << "... Callee-saved register was not copied into " + "VReg, cannot tail call.\n"); + return false; + } + } + + return true; +} + +/// Return true if the calling convention is one that we can guarantee TCO for. +static bool canGuaranteeTCO(CallingConv::ID CC) { + return CC == CallingConv::Fast; +} + +/// Return true if we might ever do TCO for calls with this calling convention. +static bool mayTailCallThisCC(CallingConv::ID CC) { + switch (CC) { + case CallingConv::C: + case CallingConv::AMDGPU_Gfx: + return true; + default: + return canGuaranteeTCO(CC); + } +} + +bool AMDGPUCallLowering::isEligibleForTailCallOptimization( + MachineIRBuilder &B, CallLoweringInfo &Info, + SmallVectorImpl &InArgs, + SmallVectorImpl &OutArgs) const { + // Must pass all target-independent checks in order to tail call optimize. + if (!Info.IsTailCall) + return false; + + MachineFunction &MF = B.getMF(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = CallerF.getCallingConv(); + + const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + // Kernels aren't callable, and don't have a live in return address so it + // doesn't make sense to do a tail call with entry functions. + if (!CallerPreserved) + return false; + + if (!mayTailCallThisCC(CalleeCC)) { + LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n"); + return false; + } + + if (any_of(CallerF.args(), [](const Argument &A) { + return A.hasByValAttr() || A.hasSwiftErrorAttr(); + })) { + LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval " + "or swifterror arguments\n"); + return false; + } + + // If we have -tailcallopt, then we're done. + if (MF.getTarget().Options.GuaranteedTailCallOpt) + return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv(); + + // Verify that the incoming and outgoing arguments from the callee are + // safe to tail call. + if (!doCallerAndCalleePassArgsTheSameWay(Info, MF, InArgs)) { + LLVM_DEBUG( + dbgs() + << "... Caller and callee have incompatible calling conventions.\n"); + return false; + } + + if (!areCalleeOutgoingArgsTailCallable(Info, MF, OutArgs)) + return false; + + LLVM_DEBUG( + dbgs() << "... Call is eligible for tail call optimization.\n"); + return true; +} + +// Insert outgoing implicit arguments for a call, by inserting copies to the +// implicit argument registers and adding the necessary implicit uses to the +// call instruction. +void AMDGPUCallLowering::handleImplicitCallArguments( + MachineIRBuilder &MIRBuilder, MachineInstrBuilder &CallInst, + const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo, + ArrayRef> ImplicitArgRegs) const { + if (!ST.enableFlatScratch()) { + // Insert copies for the SRD. In the HSA case, this should be an identity + // copy. + auto ScratchRSrcReg = + MIRBuilder.buildCopy(LLT::vector(4, 32), FuncInfo.getScratchRSrcReg()); + MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg); + CallInst.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit); + } + + for (std::pair ArgReg : ImplicitArgRegs) { + MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second); + CallInst.addReg(ArgReg.first, RegState::Implicit); + } +} + +bool AMDGPUCallLowering::lowerTailCall( + MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, + SmallVectorImpl &OutArgs) const { + MachineFunction &MF = MIRBuilder.getMF(); + const GCNSubtarget &ST = MF.getSubtarget(); + SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + const Function &F = MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SITargetLowering &TLI = *getTLI(); + + // True when we're tail calling, but without -tailcallopt. + bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt; + + // Find out which ABI gets to decide where things go. + CallingConv::ID CalleeCC = Info.CallConv; + CCAssignFn *AssignFnFixed; + CCAssignFn *AssignFnVarArg; + std::tie(AssignFnFixed, AssignFnVarArg) = getAssignFnsForCC(CalleeCC, TLI); + + MachineInstrBuilder CallSeqStart; + if (!IsSibCall) + CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP); + + unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true); + auto MIB = MIRBuilder.buildInstrNoInsert(Opc); + if (!addCallTargetOperands(MIB, MIRBuilder, Info)) + return false; + + // Byte offset for the tail call. When we are sibcalling, this will always + // be 0. + MIB.addImm(0); + + // Tell the call which registers are clobbered. + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CalleeCC); + MIB.addRegMask(Mask); + + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. + int FPDiff = 0; + + // This will be 0 for sibcalls, potentially nonzero for tail calls produced + // by -tailcallopt. For sibcalls, the memory operands for the call are + // already available in the caller's incoming argument space. + unsigned NumBytes = 0; + if (!IsSibCall) { + // We aren't sibcalling, so we need to compute FPDiff. We need to do this + // before handling assignments, because FPDiff must be known for memory + // arguments. + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + SmallVector OutLocs; + CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext()); + analyzeArgInfo(OutInfo, OutArgs, *AssignFnFixed, *AssignFnVarArg); + + // The callee will pop the argument stack as a tail call. Thus, we must + // keep it 16-byte aligned. + NumBytes = alignTo(OutInfo.getNextStackOffset(), 16); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + } + + SmallVector ArgLocs; + CCState CCInfo(Info.CallConv, Info.IsVarArg, MF, ArgLocs, F.getContext()); + + // We could pass MIB and directly add the implicit uses to the call + // now. However, as an aesthetic choice, place implicit argument operands + // after the ordinary user argument registers. + SmallVector, 12> ImplicitArgRegs; + + if (AMDGPUTargetMachine::EnableFixedFunctionABI && + Info.CallConv != CallingConv::AMDGPU_Gfx) { + // With a fixed ABI, allocate fixed registers before user arguments. + if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) + return false; + } + + // Do the actual argument marshalling. + AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, + AssignFnVarArg, true, FPDiff); + if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, OutArgs, Handler)) + return false; + + handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs); + + // If we have -tailcallopt, we need to adjust the stack. We'll do the call + // sequence start and end here. + if (!IsSibCall) { + MIB->getOperand(1).setImm(FPDiff); + CallSeqStart.addImm(NumBytes).addImm(0); + // End the call sequence *before* emitting the call. Normally, we would + // tidy the frame up after the call. However, here, we've laid out the + // parameters so that when SP is reset, they will be in the correct + // location. + MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKDOWN).addImm(NumBytes).addImm(0); + } + + // Now we can add the actual call instruction to the correct basic block. + MIRBuilder.insertInstr(MIB); + + // If Callee is a reg, since it is used by a target specific + // instruction, it must have a register class matching the + // constraint of that instruction. + + // FIXME: We should define regbankselectable call instructions to handle + // divergent call targets. + if (MIB->getOperand(0).isReg()) { + MIB->getOperand(0).setReg(constrainOperandRegClass( + MF, *TRI, MRI, *ST.getInstrInfo(), + *ST.getRegBankInfo(), *MIB, MIB->getDesc(), MIB->getOperand(0), + 0)); + } + + MF.getFrameInfo().setHasTailCall(); + Info.LoweredTailCall = true; + return true; +} + bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const { if (Info.IsVarArg) { @@ -958,7 +1329,8 @@ splitToValueTypes(Info.OrigRet, InArgs, DL, Info.CallConv); // If we can lower as a tail call, do that instead. - bool CanTailCallOpt = false; + bool CanTailCallOpt = + isEligibleForTailCallOptimization(MIRBuilder, Info, InArgs, OutArgs); // We must emit a tail call if we have musttail. if (Info.IsMustTailCall && !CanTailCallOpt) { @@ -966,6 +1338,9 @@ return false; } + if (CanTailCallOpt) + return lowerTailCall(MIRBuilder, Info, OutArgs); + // Find out which ABI gets to decide where things go. CCAssignFn *AssignFnFixed; CCAssignFn *AssignFnVarArg; @@ -1014,19 +1389,7 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); - if (!ST.enableFlatScratch()) { - // Insert copies for the SRD. In the HSA case, this should be an identity - // copy. - auto ScratchRSrcReg = MIRBuilder.buildCopy(LLT::vector(4, 32), - MFI->getScratchRSrcReg()); - MIRBuilder.buildCopy(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg); - MIB.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Implicit); - } - - for (std::pair ArgReg : ImplicitArgRegs) { - MIRBuilder.buildCopy((Register)ArgReg.first, ArgReg.second); - MIB.addReg(ArgReg.first, RegState::Implicit); - } + handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -0,0 +1,1509 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; This is a copy of sibling-call.ll, but stops after the IRTranslator. + +define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { + ; GCN-LABEL: name: i32_fastcc_i32_i32 + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[ADD]](s32) + ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %add0 = add i32 %arg0, %arg1 + ret i32 %add0 +} + +define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { + ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; GCN: $vgpr0 = COPY [[ADD]](s32) + ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 + store volatile i32 9, i32 addrspace(5)* %gep + %add0 = add i32 %arg0, %arg1 + ret i32 %add0 +} + +define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 + ; GCN: frameInfo: + ; GCN: hasCalls: false + ; GCN: hasTailCall: true +; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN: $sgpr12 = COPY [[COPY16]](s32) + ; GCN: $sgpr13 = COPY [[COPY17]](s32) + ; GCN: $sgpr14 = COPY [[COPY18]](s32) + ; GCN: $vgpr31 = COPY [[COPY19]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) + ret i32 %ret +} + +define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN: $sgpr12 = COPY [[COPY16]](s32) + ; GCN: $sgpr13 = COPY [[COPY17]](s32) + ; GCN: $sgpr14 = COPY [[COPY18]](s32) + ; GCN: $vgpr31 = COPY [[COPY19]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 + store volatile i32 9, i32 addrspace(5)* %gep + %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) + ret i32 %ret +} + +define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN: $sgpr12 = COPY [[COPY16]](s32) + ; GCN: $sgpr13 = COPY [[COPY17]](s32) + ; GCN: $sgpr14 = COPY [[COPY18]](s32) + ; GCN: $vgpr31 = COPY [[COPY19]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 + store volatile i32 9, i32 addrspace(5)* %gep + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b) + ret i32 %ret +} + +define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN: $sgpr12 = COPY [[COPY16]](s32) + ; GCN: $sgpr13 = COPY [[COPY17]](s32) + ; GCN: $sgpr14 = COPY [[COPY18]](s32) + ; GCN: $vgpr31 = COPY [[COPY19]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) + ret void +} + +; It doesn't make sense to do a tail from a kernel +define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { + ; GCN-LABEL: name: kernel_call_i32_fastcc_i32_i32_unused_result + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr16 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr15 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GCN: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) + ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load 8 from %ir.0, align 16, addrspace 4) + ; GCN: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32) + ; GCN: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32) + ; GCN: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64) + ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 + ; GCN: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]] + ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4) + ; GCN: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64) + ; GCN: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; GCN: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32) + ; GCN: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) + ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GCN: $vgpr0 = COPY [[EVEC]](s32) + ; GCN: $vgpr1 = COPY [[EVEC1]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN: $sgpr12 = COPY [[COPY14]](s32) + ; GCN: $sgpr13 = COPY [[COPY15]](s32) + ; GCN: $sgpr14 = COPY [[COPY16]](s32) + ; GCN: $vgpr31 = COPY [[OR1]](s32) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; GCN: S_ENDPGM 0 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) + ret void +} + +define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 { + ; GCN-LABEL: name: i32_fastcc_i32_byval_i32 + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $vgpr0, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; GCN: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 4 from %ir.arg1, addrspace 5) + ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] + ; GCN: $vgpr0 = COPY [[ADD]](s32) + ; GCN: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 + %add0 = add i32 %arg0, %arg1.load + ret i32 %add0 +} + +; Tail call disallowed with byval in parent. +define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: [[COPY9:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C]](s32) + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: G_MEMCPY [[PTR_ADD]](p5), [[COPY9]](p5), [[C1]](s32), 0 :: (dereferenceable store 4 into stack, addrspace 5), (dereferenceable load 4 from %ir.b.byval, addrspace 5) + ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN: $sgpr12 = COPY [[COPY16]](s32) + ; GCN: $sgpr13 = COPY [[COPY17]](s32) + ; GCN: $sgpr14 = COPY [[COPY18]](s32) + ; GCN: $vgpr31 = COPY [[COPY19]](s32) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: ADJCALLSTACKDOWN 0, 4, implicit-def $scc + ; GCN: $vgpr0 = COPY [[COPY22]](s32) + ; GCN: [[COPY23:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY11]] + ; GCN: S_SETPC_B64_return [[COPY23]], implicit $vgpr0 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) + ret i32 %ret +} + +; Tail call disallowed with byval in parent, not callee. The stack +; usage of incoming arguments must be <= the outgoing stack +; arguments. +define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32 + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store 4 into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load 4 from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) + ret i32 %ret +} + +define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { + ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32 + ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.1, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) + ; GCN: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; GCN: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] + ; GCN: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] + ; GCN: $vgpr0 = COPY [[ADD2]](s32) + ; GCN: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; GCN: S_SETPC_B64_return [[COPY32]], implicit $vgpr0 + %val_firststack = extractvalue [32 x i32] %large, 30 + %val_laststack = extractvalue [32 x i32] %large, 31 + %add0 = add i32 %arg0, %arg1 + %add1 = add i32 %add0, %val_firststack + %add2 = add i32 %add1, %val_laststack + ret i32 %add2 +} + +define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32 + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr2 = COPY [[COPY10]](s32) + ; GCN: $vgpr3 = COPY [[COPY11]](s32) + ; GCN: $vgpr4 = COPY [[COPY12]](s32) + ; GCN: $vgpr5 = COPY [[COPY13]](s32) + ; GCN: $vgpr6 = COPY [[COPY14]](s32) + ; GCN: $vgpr7 = COPY [[COPY15]](s32) + ; GCN: $vgpr8 = COPY [[COPY16]](s32) + ; GCN: $vgpr9 = COPY [[COPY17]](s32) + ; GCN: $vgpr10 = COPY [[COPY18]](s32) + ; GCN: $vgpr11 = COPY [[COPY19]](s32) + ; GCN: $vgpr12 = COPY [[COPY20]](s32) + ; GCN: $vgpr13 = COPY [[COPY21]](s32) + ; GCN: $vgpr14 = COPY [[COPY22]](s32) + ; GCN: $vgpr15 = COPY [[COPY23]](s32) + ; GCN: $vgpr16 = COPY [[COPY24]](s32) + ; GCN: $vgpr17 = COPY [[COPY25]](s32) + ; GCN: $vgpr18 = COPY [[COPY26]](s32) + ; GCN: $vgpr19 = COPY [[COPY27]](s32) + ; GCN: $vgpr20 = COPY [[COPY28]](s32) + ; GCN: $vgpr21 = COPY [[COPY29]](s32) + ; GCN: $vgpr22 = COPY [[COPY30]](s32) + ; GCN: $vgpr23 = COPY [[COPY31]](s32) + ; GCN: $vgpr24 = COPY [[COPY32]](s32) + ; GCN: $vgpr25 = COPY [[COPY33]](s32) + ; GCN: $vgpr26 = COPY [[COPY34]](s32) + ; GCN: $vgpr27 = COPY [[COPY35]](s32) + ; GCN: $vgpr28 = COPY [[COPY36]](s32) + ; GCN: $vgpr29 = COPY [[COPY37]](s32) + ; GCN: $vgpr30 = COPY [[COPY38]](s32) + ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX4]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX5]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) + ret i32 %ret +} + +define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr2 = COPY [[COPY10]](s32) + ; GCN: $vgpr3 = COPY [[COPY11]](s32) + ; GCN: $vgpr4 = COPY [[COPY12]](s32) + ; GCN: $vgpr5 = COPY [[COPY13]](s32) + ; GCN: $vgpr6 = COPY [[COPY14]](s32) + ; GCN: $vgpr7 = COPY [[COPY15]](s32) + ; GCN: $vgpr8 = COPY [[COPY16]](s32) + ; GCN: $vgpr9 = COPY [[COPY17]](s32) + ; GCN: $vgpr10 = COPY [[COPY18]](s32) + ; GCN: $vgpr11 = COPY [[COPY19]](s32) + ; GCN: $vgpr12 = COPY [[COPY20]](s32) + ; GCN: $vgpr13 = COPY [[COPY21]](s32) + ; GCN: $vgpr14 = COPY [[COPY22]](s32) + ; GCN: $vgpr15 = COPY [[COPY23]](s32) + ; GCN: $vgpr16 = COPY [[COPY24]](s32) + ; GCN: $vgpr17 = COPY [[COPY25]](s32) + ; GCN: $vgpr18 = COPY [[COPY26]](s32) + ; GCN: $vgpr19 = COPY [[COPY27]](s32) + ; GCN: $vgpr20 = COPY [[COPY28]](s32) + ; GCN: $vgpr21 = COPY [[COPY29]](s32) + ; GCN: $vgpr22 = COPY [[COPY30]](s32) + ; GCN: $vgpr23 = COPY [[COPY31]](s32) + ; GCN: $vgpr24 = COPY [[COPY32]](s32) + ; GCN: $vgpr25 = COPY [[COPY33]](s32) + ; GCN: $vgpr26 = COPY [[COPY34]](s32) + ; GCN: $vgpr27 = COPY [[COPY35]](s32) + ; GCN: $vgpr28 = COPY [[COPY36]](s32) + ; GCN: $vgpr29 = COPY [[COPY37]](s32) + ; GCN: $vgpr30 = COPY [[COPY38]](s32) + ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 + store volatile i32 9, i32 addrspace(5)* %gep + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) + ret i32 %ret +} + +; If the callee requires more stack argument space than the caller, +; don't do a tail call. +; TODO: Do we really need this restriction? +define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { + ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 + ; GCN: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr2 = COPY [[C]](s32) + ; GCN: $vgpr3 = COPY [[C]](s32) + ; GCN: $vgpr4 = COPY [[C]](s32) + ; GCN: $vgpr5 = COPY [[C]](s32) + ; GCN: $vgpr6 = COPY [[C]](s32) + ; GCN: $vgpr7 = COPY [[C]](s32) + ; GCN: $vgpr8 = COPY [[C]](s32) + ; GCN: $vgpr9 = COPY [[C]](s32) + ; GCN: $vgpr10 = COPY [[C]](s32) + ; GCN: $vgpr11 = COPY [[C]](s32) + ; GCN: $vgpr12 = COPY [[C]](s32) + ; GCN: $vgpr13 = COPY [[C]](s32) + ; GCN: $vgpr14 = COPY [[C]](s32) + ; GCN: $vgpr15 = COPY [[C]](s32) + ; GCN: $vgpr16 = COPY [[C]](s32) + ; GCN: $vgpr17 = COPY [[C]](s32) + ; GCN: $vgpr18 = COPY [[C]](s32) + ; GCN: $vgpr19 = COPY [[C]](s32) + ; GCN: $vgpr20 = COPY [[C]](s32) + ; GCN: $vgpr21 = COPY [[C]](s32) + ; GCN: $vgpr22 = COPY [[C]](s32) + ; GCN: $vgpr23 = COPY [[C]](s32) + ; GCN: $vgpr24 = COPY [[C]](s32) + ; GCN: $vgpr25 = COPY [[C]](s32) + ; GCN: $vgpr26 = COPY [[C]](s32) + ; GCN: $vgpr27 = COPY [[C]](s32) + ; GCN: $vgpr28 = COPY [[C]](s32) + ; GCN: $vgpr29 = COPY [[C]](s32) + ; GCN: $vgpr30 = COPY [[C]](s32) + ; GCN: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C2]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store 4 into stack + 4, addrspace 5) + ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store 4 into stack + 8, align 8, addrspace 5) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN: $sgpr12 = COPY [[COPY15]](s32) + ; GCN: $sgpr13 = COPY [[COPY16]](s32) + ; GCN: $sgpr14 = COPY [[COPY17]](s32) + ; GCN: $vgpr31 = COPY [[COPY18]](s32) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: ADJCALLSTACKDOWN 0, 12, implicit-def $scc + ; GCN: $vgpr0 = COPY [[COPY21]](s32) + ; GCN: [[COPY22:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; GCN: S_SETPC_B64_return [[COPY22]], implicit $vgpr0 +entry: + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) + ret i32 %ret +} + +; Have another non-tail in the function +define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { + ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 + ; GCN: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN: $sgpr12 = COPY [[COPY16]](s32) + ; GCN: $sgpr13 = COPY [[COPY17]](s32) + ; GCN: $sgpr14 = COPY [[COPY18]](s32) + ; GCN: $vgpr31 = COPY [[COPY19]](s32) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; GCN: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 + ; GCN: [[COPY22:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY23:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY24:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY25:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr2 = COPY [[COPY21]](s32) + ; GCN: [[COPY30:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY30]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY22]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY23]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY24]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY25]](s64) + ; GCN: $sgpr12 = COPY [[COPY26]](s32) + ; GCN: $sgpr13 = COPY [[COPY27]](s32) + ; GCN: $sgpr14 = COPY [[COPY28]](s32) + ; GCN: $vgpr31 = COPY [[COPY29]](s32) + ; GCN: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) + %ret = tail call fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %other.call) + ret i32 %ret +} + +; Have stack object in caller and stack passed arguments. SP should be +; in same place at function exit. +define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { + ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32 + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr2 = COPY [[COPY10]](s32) + ; GCN: $vgpr3 = COPY [[COPY11]](s32) + ; GCN: $vgpr4 = COPY [[COPY12]](s32) + ; GCN: $vgpr5 = COPY [[COPY13]](s32) + ; GCN: $vgpr6 = COPY [[COPY14]](s32) + ; GCN: $vgpr7 = COPY [[COPY15]](s32) + ; GCN: $vgpr8 = COPY [[COPY16]](s32) + ; GCN: $vgpr9 = COPY [[COPY17]](s32) + ; GCN: $vgpr10 = COPY [[COPY18]](s32) + ; GCN: $vgpr11 = COPY [[COPY19]](s32) + ; GCN: $vgpr12 = COPY [[COPY20]](s32) + ; GCN: $vgpr13 = COPY [[COPY21]](s32) + ; GCN: $vgpr14 = COPY [[COPY22]](s32) + ; GCN: $vgpr15 = COPY [[COPY23]](s32) + ; GCN: $vgpr16 = COPY [[COPY24]](s32) + ; GCN: $vgpr17 = COPY [[COPY25]](s32) + ; GCN: $vgpr18 = COPY [[COPY26]](s32) + ; GCN: $vgpr19 = COPY [[COPY27]](s32) + ; GCN: $vgpr20 = COPY [[COPY28]](s32) + ; GCN: $vgpr21 = COPY [[COPY29]](s32) + ; GCN: $vgpr22 = COPY [[COPY30]](s32) + ; GCN: $vgpr23 = COPY [[COPY31]](s32) + ; GCN: $vgpr24 = COPY [[COPY32]](s32) + ; GCN: $vgpr25 = COPY [[COPY33]](s32) + ; GCN: $vgpr26 = COPY [[COPY34]](s32) + ; GCN: $vgpr27 = COPY [[COPY35]](s32) + ; GCN: $vgpr28 = COPY [[COPY36]](s32) + ; GCN: $vgpr29 = COPY [[COPY37]](s32) + ; GCN: $vgpr30 = COPY [[COPY38]](s32) + ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: G_STORE [[LOAD1]](s32), [[FRAME_INDEX5]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: G_STORE [[LOAD2]](s32), [[FRAME_INDEX6]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 + store volatile i32 9, i32 addrspace(5)* %gep + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) + ret i32 %ret +} + +define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 { + ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.9, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.7, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.5, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.3, align 8, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store 4 into %ir.gep, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: $vgpr1 = COPY [[COPY9]](s32) + ; GCN: $vgpr2 = COPY [[C1]](s32) + ; GCN: $vgpr3 = COPY [[C1]](s32) + ; GCN: $vgpr4 = COPY [[C1]](s32) + ; GCN: $vgpr5 = COPY [[C1]](s32) + ; GCN: $vgpr6 = COPY [[C1]](s32) + ; GCN: $vgpr7 = COPY [[C1]](s32) + ; GCN: $vgpr8 = COPY [[C1]](s32) + ; GCN: $vgpr9 = COPY [[C1]](s32) + ; GCN: $vgpr10 = COPY [[C1]](s32) + ; GCN: $vgpr11 = COPY [[C1]](s32) + ; GCN: $vgpr12 = COPY [[C1]](s32) + ; GCN: $vgpr13 = COPY [[C1]](s32) + ; GCN: $vgpr14 = COPY [[C1]](s32) + ; GCN: $vgpr15 = COPY [[C1]](s32) + ; GCN: $vgpr16 = COPY [[C1]](s32) + ; GCN: $vgpr17 = COPY [[C1]](s32) + ; GCN: $vgpr18 = COPY [[C1]](s32) + ; GCN: $vgpr19 = COPY [[C1]](s32) + ; GCN: $vgpr20 = COPY [[C1]](s32) + ; GCN: $vgpr21 = COPY [[C1]](s32) + ; GCN: $vgpr22 = COPY [[C1]](s32) + ; GCN: $vgpr23 = COPY [[C1]](s32) + ; GCN: $vgpr24 = COPY [[C1]](s32) + ; GCN: $vgpr25 = COPY [[C1]](s32) + ; GCN: $vgpr26 = COPY [[C1]](s32) + ; GCN: $vgpr27 = COPY [[C1]](s32) + ; GCN: $vgpr28 = COPY [[C1]](s32) + ; GCN: $vgpr29 = COPY [[C1]](s32) + ; GCN: $vgpr30 = COPY [[C1]](s32) + ; GCN: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store 4 into %fixed-stack.2, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX9]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX10]](p5) :: (store 4 into %fixed-stack.0, align 8, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca = alloca [16 x i32], align 4, addrspace(5) + %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 + store volatile i32 9, i32 addrspace(5)* %gep + %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) + ret i32 %ret +} + +declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byval([3 x i32]) align 16, [2 x i64] addrspace(5)* byval([2 x i64])) + +define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { + ; GCN-LABEL: name: sibling_call_fastcc_multi_byval + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.35, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.34, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.33, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 + ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.32, addrspace 5) + ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 + ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.31, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 + ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.30, addrspace 5) + ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 + ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.29, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 + ; GCN: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.28, addrspace 5) + ; GCN: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 + ; GCN: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.27, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 + ; GCN: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.26, addrspace 5) + ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 + ; GCN: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.25, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 + ; GCN: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.24, addrspace 5) + ; GCN: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 + ; GCN: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.23, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 + ; GCN: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.22, addrspace 5) + ; GCN: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 + ; GCN: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.21, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 + ; GCN: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.20, addrspace 5) + ; GCN: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 + ; GCN: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.19, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 + ; GCN: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.18, addrspace 5) + ; GCN: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 + ; GCN: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.17, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; GCN: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.16, addrspace 5) + ; GCN: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; GCN: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.15, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; GCN: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.14, addrspace 5) + ; GCN: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; GCN: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.13, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; GCN: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.12, addrspace 5) + ; GCN: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; GCN: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.11, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; GCN: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.10, addrspace 5) + ; GCN: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; GCN: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.9, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; GCN: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.8, addrspace 5) + ; GCN: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; GCN: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.7, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; GCN: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.6, addrspace 5) + ; GCN: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; GCN: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.5, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; GCN: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.4, addrspace 5) + ; GCN: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; GCN: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load 4 from %fixed-stack.3, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load 4 from %fixed-stack.2, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GCN: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 + ; GCN: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.alloca1 + ; GCN: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store 4 into %ir.alloca0, addrspace 5) + ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.alloca0 + 4, addrspace 5) + ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store 4 into %ir.alloca0 + 8, addrspace 5) + ; GCN: G_STORE [[C1]](s64), [[FRAME_INDEX35]](p5) :: (store 8 into %ir.alloca1, addrspace 5) + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) + ; GCN: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store 8 into %ir.alloca1 + 8, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: $vgpr0 = COPY [[COPY8]](s32) + ; GCN: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GCN: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store 12 into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca0, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store 16 into %fixed-stack.0, addrspace 5), (dereferenceable load 16 from %ir.alloca1, align 8, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca0 = alloca [3 x i32], align 16, addrspace(5) + %alloca1 = alloca [2 x i64], align 8, addrspace(5) + store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca0 + store [2 x i64] zeroinitializer, [2 x i64] addrspace(5)* %alloca1 + tail call fastcc void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byval([3 x i32]) %alloca0, [2 x i64] addrspace(5)* byval([2 x i64]) %alloca1) + ret void +} + +declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* byval([3 x i32]) align 16, [32 x i32], i32) + +; Callee has a byval and non-byval stack passed argument +define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { + ; GCN-LABEL: name: sibling_call_byval_and_stack_passed + ; GCN: bb.1.entry: + ; GCN: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; GCN: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; GCN: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; GCN: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; GCN: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GCN: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GCN: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GCN: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GCN: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GCN: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GCN: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GCN: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GCN: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GCN: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GCN: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GCN: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GCN: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GCN: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GCN: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GCN: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GCN: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GCN: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GCN: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GCN: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GCN: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GCN: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GCN: [[COPY32:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; GCN: [[COPY33:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; GCN: [[COPY34:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; GCN: [[COPY35:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; GCN: [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; GCN: [[COPY37:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; GCN: [[COPY38:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; GCN: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.36 + ; GCN: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.36, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.35 + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.35, addrspace 5) + ; GCN: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.34 + ; GCN: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load 4 from %fixed-stack.34, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.33 + ; GCN: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load 4 from %fixed-stack.33, addrspace 5) + ; GCN: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.32 + ; GCN: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load 4 from %fixed-stack.32, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX5:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.31 + ; GCN: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load 4 from %fixed-stack.31, addrspace 5) + ; GCN: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.30 + ; GCN: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load 4 from %fixed-stack.30, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.29 + ; GCN: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p5) :: (invariant load 4 from %fixed-stack.29, addrspace 5) + ; GCN: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.28 + ; GCN: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load 4 from %fixed-stack.28, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX9:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.27 + ; GCN: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p5) :: (invariant load 4 from %fixed-stack.27, addrspace 5) + ; GCN: [[FRAME_INDEX10:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.26 + ; GCN: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p5) :: (invariant load 4 from %fixed-stack.26, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX11:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.25 + ; GCN: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p5) :: (invariant load 4 from %fixed-stack.25, addrspace 5) + ; GCN: [[FRAME_INDEX12:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.24 + ; GCN: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p5) :: (invariant load 4 from %fixed-stack.24, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX13:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.23 + ; GCN: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p5) :: (invariant load 4 from %fixed-stack.23, addrspace 5) + ; GCN: [[FRAME_INDEX14:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.22 + ; GCN: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p5) :: (invariant load 4 from %fixed-stack.22, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX15:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.21 + ; GCN: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p5) :: (invariant load 4 from %fixed-stack.21, addrspace 5) + ; GCN: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.20 + ; GCN: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load 4 from %fixed-stack.20, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX17:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.19 + ; GCN: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX17]](p5) :: (invariant load 4 from %fixed-stack.19, addrspace 5) + ; GCN: [[FRAME_INDEX18:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.18 + ; GCN: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX18]](p5) :: (invariant load 4 from %fixed-stack.18, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX19:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.17 + ; GCN: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX19]](p5) :: (invariant load 4 from %fixed-stack.17, addrspace 5) + ; GCN: [[FRAME_INDEX20:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.16 + ; GCN: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX20]](p5) :: (invariant load 4 from %fixed-stack.16, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX21:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.15 + ; GCN: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX21]](p5) :: (invariant load 4 from %fixed-stack.15, addrspace 5) + ; GCN: [[FRAME_INDEX22:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.14 + ; GCN: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX22]](p5) :: (invariant load 4 from %fixed-stack.14, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX23:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.13 + ; GCN: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX23]](p5) :: (invariant load 4 from %fixed-stack.13, addrspace 5) + ; GCN: [[FRAME_INDEX24:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.12 + ; GCN: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX24]](p5) :: (invariant load 4 from %fixed-stack.12, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX25:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.11 + ; GCN: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX25]](p5) :: (invariant load 4 from %fixed-stack.11, addrspace 5) + ; GCN: [[FRAME_INDEX26:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.10 + ; GCN: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX26]](p5) :: (invariant load 4 from %fixed-stack.10, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX27:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.9 + ; GCN: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX27]](p5) :: (invariant load 4 from %fixed-stack.9, addrspace 5) + ; GCN: [[FRAME_INDEX28:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.8 + ; GCN: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX28]](p5) :: (invariant load 4 from %fixed-stack.8, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX29:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.7 + ; GCN: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX29]](p5) :: (invariant load 4 from %fixed-stack.7, addrspace 5) + ; GCN: [[FRAME_INDEX30:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.6 + ; GCN: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX30]](p5) :: (invariant load 4 from %fixed-stack.6, align 8, addrspace 5) + ; GCN: [[FRAME_INDEX31:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.5 + ; GCN: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX31]](p5) :: (invariant load 4 from %fixed-stack.5, addrspace 5) + ; GCN: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.4 + ; GCN: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load 4 from %fixed-stack.4, align 16, addrspace 5) + ; GCN: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 + ; GCN: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load 4 from %fixed-stack.3, addrspace 5) + ; GCN: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca + ; GCN: G_STORE [[C]](s32), [[FRAME_INDEX34]](p5) :: (store 4 into %ir.alloca, addrspace 5) + ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; GCN: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C2]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store 4 into %ir.alloca + 4, addrspace 5) + ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) + ; GCN: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store 4 into %ir.alloca + 8, addrspace 5) + ; GCN: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed + ; GCN: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 + ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; GCN: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store 12 into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load 12 from %ir.alloca, align 16, addrspace 5) + ; GCN: $vgpr0 = COPY [[C1]](s32) + ; GCN: $vgpr1 = COPY [[C1]](s32) + ; GCN: $vgpr2 = COPY [[C1]](s32) + ; GCN: $vgpr3 = COPY [[C1]](s32) + ; GCN: $vgpr4 = COPY [[C1]](s32) + ; GCN: $vgpr5 = COPY [[C1]](s32) + ; GCN: $vgpr6 = COPY [[C1]](s32) + ; GCN: $vgpr7 = COPY [[C1]](s32) + ; GCN: $vgpr8 = COPY [[C1]](s32) + ; GCN: $vgpr9 = COPY [[C1]](s32) + ; GCN: $vgpr10 = COPY [[C1]](s32) + ; GCN: $vgpr11 = COPY [[C1]](s32) + ; GCN: $vgpr12 = COPY [[C1]](s32) + ; GCN: $vgpr13 = COPY [[C1]](s32) + ; GCN: $vgpr14 = COPY [[C1]](s32) + ; GCN: $vgpr15 = COPY [[C1]](s32) + ; GCN: $vgpr16 = COPY [[C1]](s32) + ; GCN: $vgpr17 = COPY [[C1]](s32) + ; GCN: $vgpr18 = COPY [[C1]](s32) + ; GCN: $vgpr19 = COPY [[C1]](s32) + ; GCN: $vgpr20 = COPY [[C1]](s32) + ; GCN: $vgpr21 = COPY [[C1]](s32) + ; GCN: $vgpr22 = COPY [[C1]](s32) + ; GCN: $vgpr23 = COPY [[C1]](s32) + ; GCN: $vgpr24 = COPY [[C1]](s32) + ; GCN: $vgpr25 = COPY [[C1]](s32) + ; GCN: $vgpr26 = COPY [[C1]](s32) + ; GCN: $vgpr27 = COPY [[C1]](s32) + ; GCN: $vgpr28 = COPY [[C1]](s32) + ; GCN: $vgpr29 = COPY [[C1]](s32) + ; GCN: $vgpr30 = COPY [[C1]](s32) + ; GCN: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 + ; GCN: G_STORE [[C1]](s32), [[FRAME_INDEX36]](p5) :: (store 4 into %fixed-stack.1, addrspace 5) + ; GCN: [[FRAME_INDEX37:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 + ; GCN: G_STORE [[COPY8]](s32), [[FRAME_INDEX37]](p5) :: (store 4 into %fixed-stack.0, align 16, addrspace 5) + ; GCN: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN: $sgpr12 = COPY [[COPY44]](s32) + ; GCN: $sgpr13 = COPY [[COPY45]](s32) + ; GCN: $sgpr14 = COPY [[COPY46]](s32) + ; GCN: $vgpr31 = COPY [[COPY47]](s32) + ; GCN: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 +entry: + %alloca = alloca [3 x i32], align 16, addrspace(5) + store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca + tail call fastcc void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* byval([3 x i32]) %alloca, [32 x i32] zeroinitializer, i32 %stack.out.arg) + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind noinline } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -amdgpu-fixed-function-abi -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s + +declare hidden void @external_void_func_void() + +define void @tail_call_void_func_void() { + ; CHECK-LABEL: name: tail_call_void_func_void + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 + ; CHECK: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 + ; CHECK: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; CHECK: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void + ; CHECK: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; CHECK: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK: S_SETPC_B64_return [[COPY18]] + tail call void @external_void_func_void() + ret void +} Index: llvm/test/CodeGen/AMDGPU/call-constant.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/call-constant.ll +++ llvm/test/CodeGen/AMDGPU/call-constant.ll @@ -19,7 +19,7 @@ ; SDAG: s_waitcnt ; SDAG-NEXT: .Lfunc_end -; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +; GISEL: s_setpc_b64 s{{\[[0-9]+:[0-9]+\]}} define i32 @test_tail_call_undef() #0 { %call = tail call i32 undef(i32 1) ret i32 %call @@ -43,7 +43,7 @@ ; SDAG: s_waitcnt ; SDAG-NEXT: .Lfunc_end -; GISEL: s_swappc_b64 s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GISEL: s_setpc_b64 s{{\[[0-9]+:[0-9]+\]$}} define i32 @test_tail_call_null() #0 { %call = tail call i32 null(i32 1) ret i32 %call Index: llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -enable-var-scope %s +; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -enable-var-scope %s ; Callee with SGPR and VGPR arguments define hidden amdgpu_gfx float @callee(float %v.arg0, float inreg %s.arg1) {