Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -613,30 +613,36 @@ .setMIFlag(MachineInstr::FrameSetup); } - if (!FuncInfo->getSGPRSpillVGPRs().empty()) { - if (LiveRegs.empty()) { - LiveRegs.init(TRI); - LiveRegs.addLiveIns(MBB); - } + // To avoid clobbering VGPRs in lanes that weren't active on function entry, + // turn on all lanes before doing the spill to memory. + unsigned ScratchExecCopy = AMDGPU::NoRegister; - // To avoid clobbering VGPRs in lanes that weren't active on function entry, - // turn on all lanes before doing the spill to memory. - unsigned ScratchExecCopy - = findScratchNonCalleeSaveRegister(MF, LiveRegs, - AMDGPU::SReg_64_XEXECRegClass); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy) - .addImm(-1); - - for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg - : FuncInfo->getSGPRSpillVGPRs()) { - if (!Reg.FI.hasValue()) - continue; - TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true, - Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, - &TII->getRegisterInfo()); + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg + : FuncInfo->getSGPRSpillVGPRs()) { + if (!Reg.FI.hasValue()) + continue; + + if (ScratchExecCopy == AMDGPU::NoRegister) { + if (LiveRegs.empty()) { + LiveRegs.init(TRI); + LiveRegs.addLiveIns(MBB); + } + + ScratchExecCopy + = findScratchNonCalleeSaveRegister(MF, LiveRegs, + AMDGPU::SReg_64_XEXECRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), + ScratchExecCopy) + .addImm(-1); } + TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true, + Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, + &TII->getRegisterInfo()); + } + + if (ScratchExecCopy != AMDGPU::NoRegister) { // FIXME: Split block and make terminator. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(ScratchExecCopy); @@ -654,27 +660,31 @@ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL; - if (!FuncInfo->getSGPRSpillVGPRs().empty()) { - // See emitPrologue - LivePhysRegs LiveRegs(*ST.getRegisterInfo()); - LiveRegs.addLiveIns(MBB); + unsigned ScratchExecCopy = AMDGPU::NoRegister; + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg + : FuncInfo->getSGPRSpillVGPRs()) { + if (!Reg.FI.hasValue()) + continue; - unsigned ScratchExecCopy - = findScratchNonCalleeSaveRegister(MF, LiveRegs, - AMDGPU::SReg_64_XEXECRegClass); - - BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy) - .addImm(-1); - - for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg - : FuncInfo->getSGPRSpillVGPRs()) { - if (!Reg.FI.hasValue()) - continue; - TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR, - Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, - &TII->getRegisterInfo()); + if (ScratchExecCopy == AMDGPU::NoRegister) { + // See emitPrologue + LivePhysRegs LiveRegs(*ST.getRegisterInfo()); + LiveRegs.addLiveIns(MBB); + + ScratchExecCopy + = findScratchNonCalleeSaveRegister(MF, LiveRegs, + AMDGPU::SReg_64_XEXECRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), ScratchExecCopy) + .addImm(-1); } + TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR, + Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, + &TII->getRegisterInfo()); + } + + if (ScratchExecCopy != AMDGPU::NoRegister) { // FIXME: Split block and make terminator. BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) .addReg(ScratchExecCopy); Index: test/CodeGen/AMDGPU/callee-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/callee-frame-setup.ll +++ test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -135,5 +135,21 @@ ret void } +; Has no spilled CSR VGPRs used for SGPR spilling, so no need to +; enable all lanes and restore. + +; GCN-LABEL: {{^}}spill_only_csr_sgpr: +; GCN: s_waitcnt +; GCN-NEXT: v_writelane_b32 v0, s42, 0 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; clobber s42 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s42, v0, 0 +; GCN-NEXT: s_setpc_b64 +define void @spill_only_csr_sgpr() { + call void asm sideeffect "; clobber s42", "~{s42}"() + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind "no-frame-pointer-elim"="true" }