Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -147,6 +147,7 @@ bool selectBVHIntrinsic(MachineInstr &I) const; bool selectSMFMACIntrin(MachineInstr &I) const; bool selectWaveAddress(MachineInstr &I) const; + bool selectStackRestore(MachineInstr &MI) const; std::pair selectVOP3ModsImpl(MachineOperand &Root, bool IsCanonicalizing = true, Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -72,6 +72,13 @@ InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); } +// Return the wave level SGPR base address if this is a wave address. +static Register getWaveAddress(const MachineInstr *Def) { + return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS + ? Def->getOperand(1).getReg() + : Register(); +} + bool AMDGPUInstructionSelector::isVCC(Register Reg, const MachineRegisterInfo &MRI) const { // The verifier is oblivious to s1 being a valid value for wavesize registers. @@ -3365,6 +3372,33 @@ return true; } +bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const { + Register SrcReg = MI.getOperand(0).getReg(); + if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI)) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + Register SP = + Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore(); + Register WaveAddr = getWaveAddress(DefMI); + MachineBasicBlock *MBB = MI.getParent(); + const DebugLoc &DL = MI.getDebugLoc(); + + if (!WaveAddr) { + WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr) + .addReg(SrcReg) + .addImm(Subtarget->getWavefrontSizeLog2()) + .setOperandDead(3); // Dead scc + } + + BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), SP) + .addReg(WaveAddr); + + MI.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::select(MachineInstr &I) { if (I.isPHI()) return selectPHI(I); @@ -3501,6 +3535,8 @@ return true; case AMDGPU::G_AMDGPU_WAVE_ADDRESS: return selectWaveAddress(I); + case AMDGPU::G_STACKRESTORE: + return selectStackRestore(I); default: return selectImpl(I, *CoverageInfo); } @@ -4362,13 +4398,6 @@ return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits; } -// Return the wave level SGPR base address if this is a wave address. -static Register getWaveAddress(const MachineInstr *Def) { - return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS - ? Def->getOperand(1).getReg() - : Register(); -} - InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffset( MachineOperand &Root) const { Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -201,6 +201,7 @@ bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -843,6 +843,11 @@ getActionDefinitionsBuilder(G_DYN_STACKALLOC) .legalFor({{PrivatePtr, S32}}); + getActionDefinitionsBuilder(G_STACKSAVE) + .customFor({PrivatePtr}); + getActionDefinitionsBuilder(G_STACKRESTORE) + .legalFor({PrivatePtr}); + getActionDefinitionsBuilder(G_GLOBAL_VALUE) .customIf(typeIsNot(0, PrivatePtr)); @@ -2038,6 +2043,8 @@ return legalizeCTLZ_CTTZ(MI, MRI, B); case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: return legalizeFPTruncRound(MI, B); + case TargetOpcode::G_STACKSAVE: + return legalizeStackSave(MI, B); default: return false; } @@ -6519,6 +6526,16 @@ return true; } +bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI, + MachineIRBuilder &B) const { + const SITargetLowering *TLI = ST.getTargetLowering(); + Register StackPtr = TLI->getStackPointerRegisterToSaveRestore(); + Register DstReg = MI.getOperand(0).getReg(); + B.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {DstReg}, {StackPtr}); + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { MachineIRBuilder &B = Helper.MIRBuilder; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3229,6 +3229,11 @@ case AMDGPU::G_DYN_STACKALLOC: applyMappingDynStackAlloc(MI, OpdMapper, MRI); return; + case AMDGPU::G_STACKRESTORE: { + applyDefaultMapping(OpdMapper); + constrainOpWithReadfirstlane(MI, MRI, 0); + return; + } case AMDGPU::G_SBFX: applyMappingBFE(OpdMapper, /*Signed*/ true); return; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.invalid.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.invalid.mir @@ -0,0 +1,14 @@ +# RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +# ERR: LLVM ERROR: cannot select: G_STACKRESTORE %{{[0-9]+}}:vgpr(p5) (in function: stackrestore_waveaddress_vgpr) + +--- +name: stackrestore_waveaddress_vgpr +legalized: true +regBankSelected: true +body: | + bb.0: + %0:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + G_STACKRESTORE %0 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-stacksave-stackrestore.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s +# RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s + +--- +name: stackrestore_waveaddress_sgpr +legalized: true +regBankSelected: true +body: | + bb.0: + ; GFX10-WAVE32-LABEL: name: stackrestore_waveaddress_sgpr + ; GFX10-WAVE32: $sgpr32 = COPY $sgpr32 + ; + ; GFX10-WAVE64-LABEL: name: stackrestore_waveaddress_sgpr + ; GFX10-WAVE64: $sgpr32 = COPY $sgpr32 + %0:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + G_STACKRESTORE %0 + +... + +# Test we aren't special casing the direct register value. +--- +name: stackrestore_direct_sp_sgpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr10 + ; GFX10-WAVE32-LABEL: name: stackrestore_direct_sp_sgpr + ; GFX10-WAVE32: liveins: $sgpr10 + ; GFX10-WAVE32-NEXT: {{ $}} + ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr32 + ; GFX10-WAVE32-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 5, implicit-def dead $scc + ; GFX10-WAVE32-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]] + ; + ; GFX10-WAVE64-LABEL: name: stackrestore_direct_sp_sgpr + ; GFX10-WAVE64: liveins: $sgpr10 + ; GFX10-WAVE64-NEXT: {{ $}} + ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr32 + ; GFX10-WAVE64-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 6, implicit-def dead $scc + ; GFX10-WAVE64-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]] + %0:sgpr(p5) = COPY $sgpr32 + G_STACKRESTORE %0 + +... + +--- +name: stackrestore_any_sgpr +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr10 + ; GFX10-WAVE32-LABEL: name: stackrestore_any_sgpr + ; GFX10-WAVE32: liveins: $sgpr10 + ; GFX10-WAVE32-NEXT: {{ $}} + ; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; GFX10-WAVE32-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 5, implicit-def dead $scc + ; GFX10-WAVE32-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]] + ; + ; GFX10-WAVE64-LABEL: name: stackrestore_any_sgpr + ; GFX10-WAVE64: liveins: $sgpr10 + ; GFX10-WAVE64-NEXT: {{ $}} + ; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr10 + ; GFX10-WAVE64-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 6, implicit-def dead $scc + ; GFX10-WAVE64-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]] + %0:sgpr(p5) = COPY $sgpr10 + G_STACKRESTORE %0 + +... + Index: llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll +++ llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll @@ -1,6 +1,9 @@ ; RUN: split-file %s %t -; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stacksave-error.ll 2>&1 | FileCheck -check-prefix=ERR-SAVE %s -; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stackrestore-error.ll 2>&1 | FileCheck -check-prefix=ERR-RESTORE %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stacksave-error.ll 2>&1 | FileCheck -check-prefix=ERR-SAVE-SDAG %s +; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stackrestore-error.ll 2>&1 | FileCheck -check-prefix=ERR-RESTORE-SDAG %s + +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stacksave-error.ll 2>&1 | FileCheck -check-prefix=ERR-SAVE-GISEL %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stackrestore-error.ll 2>&1 | FileCheck -check-prefix=ERR-RESTORE-GISEL %s ; Test that an error is produced if stacksave/stackrestore are used ; with the wrong (default) address space. @@ -9,7 +12,8 @@ declare ptr @llvm.stacksave.p0() -; ERR-SAVE: LLVM ERROR: Cannot select: {{.+}}: i64,ch = stacksave +; ERR-SAVE-SDAG: LLVM ERROR: Cannot select: {{.+}}: i64,ch = stacksave +; ERR-SAVE-GISEL: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p0) = G_STACKSAVE (in function: func_store_stacksave) define void @func_store_stacksave() { %stacksave = call ptr @llvm.stacksave.p0() call void asm sideeffect "; use $0", "s"(ptr %stacksave) @@ -20,7 +24,8 @@ declare void @llvm.stackrestore.p0(ptr) -; ERR-RESTORE: LLVM ERROR: Cannot select: {{.+}}: ch = stackrestore {{.+}}, {{.+}} +; ERR-RESTORE-SDAG: LLVM ERROR: Cannot select: {{.+}}: ch = stackrestore {{.+}}, {{.+}} +; ERR-RESTORE-GISEL: LLVM ERROR: unable to legalize instruction: G_STACKRESTORE %{{[0-9]+}}:_(p0) (in function: func_stacksave_sgpr) define amdgpu_gfx void @func_stacksave_sgpr(ptr inreg %stack) { call void @llvm.stackrestore.p0(ptr %stack) ret void