diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -593,6 +593,47 @@ llvm_unreachable("Invalid TargetStackID::Value"); } +// Activate all lanes, returns saved exec. +static Register buildScratchExecCopy(LivePhysRegs &LiveRegs, + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool IsProlog) { + Register ScratchExecCopy; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + SIMachineFunctionInfo *FuncInfo = MF.getInfo(); + DebugLoc DL; + + if (LiveRegs.empty()) { + if (IsProlog) { + LiveRegs.init(TRI); + LiveRegs.addLiveIns(MBB); + if (FuncInfo->SGPRForFPSaveRestoreCopy) + LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); + } else { + // In epilog. + LiveRegs.init(*ST.getRegisterInfo()); + LiveRegs.addLiveOuts(MBB); + LiveRegs.stepBackward(*MBBI); + } + } + + ScratchExecCopy = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, *TRI.getWaveMaskRegClass()); + + if (!IsProlog) + LiveRegs.removeReg(ScratchExecCopy); + + const unsigned OrSaveExec = + ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; + BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1); + + return ScratchExecCopy; +} + void SIFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { SIMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -621,6 +662,15 @@ // turn on all lanes before doing the spill to memory. Register ScratchExecCopy; + bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); + bool SpillFPToMemory = false; + // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR. + // Otherwise we are spilling the FP to memory. + if (HasFPSaveIndex) + SpillFPToMemory = + MFI.getStackID(FuncInfo->FramePointerSaveIndex.getValue()) != + TargetStackID::SGPRSpill; + // Emit the copy if we need an FP, and are using a free SGPR to save it. if (FuncInfo->SGPRForFPSaveRestoreCopy) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->SGPRForFPSaveRestoreCopy) @@ -636,25 +686,8 @@ if (!Reg.FI.hasValue()) continue; - if (!ScratchExecCopy) { - if (LiveRegs.empty()) { - LiveRegs.init(TRI); - LiveRegs.addLiveIns(MBB); - if (FuncInfo->SGPRForFPSaveRestoreCopy) - LiveRegs.removeReg(FuncInfo->SGPRForFPSaveRestoreCopy); - } - - ScratchExecCopy - = findScratchNonCalleeSaveRegister(MRI, LiveRegs, - *TRI.getWaveMaskRegClass()); - assert(FuncInfo->SGPRForFPSaveRestoreCopy != ScratchExecCopy); - - const unsigned OrSaveExec = ST.isWave32() ? - AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; - BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), - ScratchExecCopy) - .addImm(-1); - } + if (!ScratchExecCopy) + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); buildPrologSpill(LiveRegs, MBB, MBBI, TII, Reg.VGPR, FuncInfo->getScratchRSrcReg(), @@ -662,30 +695,50 @@ Reg.FI.getValue()); } + if (HasFPSaveIndex && SpillFPToMemory) { + const int FI = FuncInfo->FramePointerSaveIndex.getValue(); + assert(!MFI.isDeadObjectIndex(FI)); + + if (!ScratchExecCopy) + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true); + + MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) + .addReg(FramePtrReg); + + buildPrologSpill(LiveRegs, MBB, MBBI, TII, TmpVGPR, + FuncInfo->getScratchRSrcReg(), StackPtrReg, + FuncInfo->FramePointerSaveIndex.getValue()); + } + if (ScratchExecCopy) { // FIXME: Split block and make terminator. unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) - .addReg(ScratchExecCopy, RegState::Kill); + .addReg(ScratchExecCopy, RegState::Kill); LiveRegs.addReg(ScratchExecCopy); } - if (FuncInfo->FramePointerSaveIndex) { + // In this case, spill the FP to a reserved VGPR. + if (HasFPSaveIndex && !SpillFPToMemory) { const int FI = FuncInfo->FramePointerSaveIndex.getValue(); - assert(!MFI.isDeadObjectIndex(FI) && - MFI.getStackID(FI) == TargetStackID::SGPRSpill); - ArrayRef Spill - = FuncInfo->getSGPRToVGPRSpills(FI); + assert(!MFI.isDeadObjectIndex(FI)); + + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRToVGPRSpills(FI); assert(Spill.size() == 1); // Save FP before setting it up. // FIXME: This should respect spillSGPRToVGPR; BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR) - .addReg(FramePtrReg) - .addImm(Spill[0].Lane) - .addReg(Spill[0].VGPR, RegState::Undef); + .addReg(FramePtrReg) + .addImm(Spill[0].Lane) + .addReg(Spill[0].VGPR, RegState::Undef); } if (TRI.needsStackRealignment(MF)) { @@ -706,13 +759,13 @@ // s_add_u32 tmp_reg, s32, NumBytes // s_and_b32 s32, tmp_reg, 0b111...0000 BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), ScratchSPReg) - .addReg(StackPtrReg) - .addImm((Alignment - 1) * ST.getWavefrontSize()) - .setMIFlag(MachineInstr::FrameSetup); + .addReg(StackPtrReg) + .addImm((Alignment - 1) * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg) - .addReg(ScratchSPReg, RegState::Kill) - .addImm(-Alignment * ST.getWavefrontSize()) - .setMIFlag(MachineInstr::FrameSetup); + .addReg(ScratchSPReg, RegState::Kill) + .addImm(-Alignment * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameSetup); FuncInfo->setIsStackRealigned(true); } else if ((HasFP = hasFP(MF))) { // If we need a base pointer, set it up here. It's whatever the value of @@ -720,15 +773,15 @@ // allocated after this, so we can still use the base pointer to reference // locals. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) - .addReg(StackPtrReg) - .setMIFlag(MachineInstr::FrameSetup); + .addReg(StackPtrReg) + .setMIFlag(MachineInstr::FrameSetup); } if (HasFP && RoundedSize != 0) { BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg) - .addReg(StackPtrReg) - .addImm(RoundedSize * ST.getWavefrontSize()) - .setMIFlag(MachineInstr::FrameSetup); + .addReg(StackPtrReg) + .addImm(RoundedSize * ST.getWavefrontSize()) + .setMIFlag(MachineInstr::FrameSetup); } assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy || @@ -758,9 +811,17 @@ uint32_t RoundedSize = FuncInfo->isStackRealigned() ? NumBytes + MFI.getMaxAlign().value() : NumBytes; + const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); + const Register FramePtrReg = FuncInfo->getFrameOffsetReg(); + + bool HasFPSaveIndex = FuncInfo->FramePointerSaveIndex.hasValue(); + bool SpillFPToMemory = false; + if (HasFPSaveIndex) + SpillFPToMemory = + MFI.getStackID(FuncInfo->FramePointerSaveIndex.getValue()) != + TargetStackID::SGPRSpill; if (RoundedSize != 0 && hasFP(MF)) { - const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg(); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg) .addReg(StackPtrReg) .addImm(RoundedSize * ST.getWavefrontSize()) @@ -768,55 +829,49 @@ } if (FuncInfo->SGPRForFPSaveRestoreCopy) { - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FuncInfo->getFrameOffsetReg()) - .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) - .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg) + .addReg(FuncInfo->SGPRForFPSaveRestoreCopy) + .setMIFlag(MachineInstr::FrameSetup); } - if (FuncInfo->FramePointerSaveIndex) { + Register ScratchExecCopy; + if (HasFPSaveIndex) { const int FI = FuncInfo->FramePointerSaveIndex.getValue(); - - assert(!MF.getFrameInfo().isDeadObjectIndex(FI) && - MF.getFrameInfo().getStackID(FI) == TargetStackID::SGPRSpill); - - ArrayRef Spill - = FuncInfo->getSGPRToVGPRSpills(FI); - assert(Spill.size() == 1); - BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - FuncInfo->getFrameOffsetReg()) - .addReg(Spill[0].VGPR) - .addImm(Spill[0].Lane); + assert(!MFI.isDeadObjectIndex(FI)); + if (SpillFPToMemory) { + if (!ScratchExecCopy) + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); + + MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister( + MRI, LiveRegs, AMDGPU::VGPR_32RegClass); + buildEpilogReload(LiveRegs, MBB, MBBI, TII, TempVGPR, + FuncInfo->getScratchRSrcReg(), StackPtrReg, FI); + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) + .addReg(TempVGPR, RegState::Kill); + } else { + // Reload from VGPR spill. + assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill); + ArrayRef Spill = + FuncInfo->getSGPRToVGPRSpills(FI); + assert(Spill.size() == 1); + BuildMI(MBB, MBBI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + FramePtrReg) + .addReg(Spill[0].VGPR) + .addImm(Spill[0].Lane); + } } - Register ScratchExecCopy; - for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg - : FuncInfo->getSGPRSpillVGPRs()) { + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg : + FuncInfo->getSGPRSpillVGPRs()) { if (!Reg.FI.hasValue()) continue; - const SIRegisterInfo &TRI = TII->getRegisterInfo(); - if (!ScratchExecCopy) { - // See emitPrologue - if (LiveRegs.empty()) { - LiveRegs.init(*ST.getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - LiveRegs.stepBackward(*MBBI); - } - - ScratchExecCopy = findScratchNonCalleeSaveRegister( - MRI, LiveRegs, *TRI.getWaveMaskRegClass()); - LiveRegs.removeReg(ScratchExecCopy); - - const unsigned OrSaveExec = - ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64; - - BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy) - .addImm(-1); - } + if (!ScratchExecCopy) + ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false); buildEpilogReload(LiveRegs, MBB, MBBI, TII, Reg.VGPR, - FuncInfo->getScratchRSrcReg(), - FuncInfo->getStackPtrOffsetReg(), Reg.FI.getValue()); + FuncInfo->getScratchRSrcReg(), StackPtrReg, + Reg.FI.getValue()); } if (ScratchExecCopy) { @@ -824,7 +879,7 @@ unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC; BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec) - .addReg(ScratchExecCopy, RegState::Kill); + .addReg(ScratchExecCopy, RegState::Kill); } } @@ -906,7 +961,7 @@ if (MFI->isEntryFunction()) return; - const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + MachineFrameInfo &FrameInfo = MF.getFrameInfo(); const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -934,12 +989,14 @@ if (!HasFP) return; + // We need to save and restore the current FP. + + // 1: If there is already a VGPR with free lanes, use it. We + // may already have to pay the penalty for spilling a CSR VGPR. if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) { int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, TargetStackID::SGPRSpill); - // If there is already a VGPR with free lanes, use it. We may already have - // to pay the penalty for spilling a CSR VGPR. if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) llvm_unreachable("allocate SGPR spill should have worked"); @@ -952,16 +1009,22 @@ return; } + // 2: Next, try to save the FP in an unused SGPR. MFI->SGPRForFPSaveRestoreCopy = findUnusedSGPRNonCalleeSaved(MF.getRegInfo()); if (!MFI->SGPRForFPSaveRestoreCopy) { - // There's no free lane to spill, and no free register to save FP, so we're - // forced to spill another VGPR to use for the spill. int NewFI = MF.getFrameInfo().CreateStackObject(4, 4, true, nullptr, TargetStackID::SGPRSpill); - if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI)) - llvm_unreachable("allocate SGPR spill should have worked"); - MFI->FramePointerSaveIndex = NewFI; + + if (MFI->allocateSGPRSpillToVGPR(MF, NewFI)) { + // 3: There's no free lane to spill, and no free register to save FP, so + // we're forced to spill another VGPR to use for the spill. + MFI->FramePointerSaveIndex = NewFI; + } else { + // 4: If all else fails, spill the FP to memory. + MFI->FramePointerSaveIndex = + FrameInfo.CreateSpillStackObject(4, Align(4)); + } LLVM_DEBUG( auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front(); diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -459,6 +459,115 @@ ret void } +; With no free registers, we must spill the FP to memory. +; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory: +; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 +; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]] +; GCN: s_mov_b64 exec, [[COPY_EXEC1]] +; GCN: s_mov_b32 s33, s32 +; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]] +; GCN: s_waitcnt vmcnt(0) +; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]] +; GCN: s_mov_b64 exec, [[COPY_EXEC2]] +; GCN: s_setpc_b64 +define void @callee_need_to_spill_fp_to_memory() #1 { + call void asm sideeffect "; clobber nonpreserved SGPRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"() + ret void +} + +; If we have a reserved VGPR that can be used for SGPR spills, we may still +; need to spill the FP to memory if there are no free lanes in the reserved +; VGPR. +; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory_full_reserved_vgpr: +; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33 +; GCN: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]] +; GCN: s_mov_b64 exec, [[COPY_EXEC1]] +; GCN-NOT: v_writelane_b32 v40, s33 +; GCN: s_mov_b32 s33, s32 +; GCN-NOT: v_readlane_b32 s33, v40 +; GCN: s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]] +; GCN: v_readfirstlane_b32 s33, [[TMP_VGPR2]] +; GCN: s_mov_b64 exec, [[COPY_EXEC2]] +; GCN: s_setpc_b64 +define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #1 { + call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49} + ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59} + ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69} + ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79} + ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89} + ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99} + ,~{s100},~{s101},~{s102},~{s39},~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs except CSR v40", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"() + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind "frame-pointer"="all" } attributes #2 = { nounwind "frame-pointer"="non-leaf" }