Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1027,6 +1027,13 @@ } void fixImplicitOperands(MachineInstr &MI) const; + + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; }; /// \brief Returns true if a reg:subreg pair P has a TRC class Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1062,6 +1062,7 @@ if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); + assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled"); // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for spilling SGPRs. @@ -1190,6 +1191,7 @@ if (RI.isSGPRClass(RC)) { MFI->setHasSpilledSGPRs(); + assert(DestReg != AMDGPU::M0 && "m0 should not be reloaded into"); // FIXME: Maybe this should not include a memoperand because it will be // lowered to non-memory instructions. @@ -6558,3 +6560,36 @@ } bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } + +MachineInstr *SIInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { + // This is a bit of a hack (copied from AArch64). Consider this instruction: + // + // %0:sreg_32 = COPY $m0 + // + // We explicitly chose SReg_32 for the virtual register so such a copy might + // be eliminated by RegisterCoalescer. However, that may not be possible, and + // %0 may even spill. We can't spill $m0 normally (it would require copying to + // a numbered SGPR anyway), and since it is in the SReg_32 register class, + // TargetInstrInfo::foldMemoryOperand() is going to try. + // + // To prevent that, constrain the %0 register class here. + if (MI.isFullCopy()) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + + if (DstReg == AMDGPU::M0 && SrcReg.isVirtual()) { + MF.getRegInfo().constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); + return nullptr; + } + + if (SrcReg == AMDGPU::M0 && DstReg.isVirtual()) { + MF.getRegInfo().constrainRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass); + return nullptr; + } + } + + return nullptr; +} Index: llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -stress-regalloc=2 -start-before=greedy -stop-after=virtregmap -o - %s | FileCheck %s + +# Test that a spill of a copy of m0 is not folded to be a spill of m0 directly. + +--- + +name: merge_sgpr_spill_into_copy_from_m0 +tracksRegLiveness: true +body: | + bb.0: + + ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0 + ; CHECK: liveins: $vgpr0 + ; CHECK: S_WAITCNT 0 + ; CHECK: S_NOP 0, implicit-def $m0 + ; CHECK: $sgpr0 = S_MOV_B32 $m0 + ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $m0 = S_MOV_B32 killed $sgpr0 + ; CHECK: S_NOP 0 + ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec + S_NOP 0, implicit-def $m0 + %0:sreg_32 = COPY $m0 + S_NOP 0, implicit-def %1:sreg_32, implicit-def %2:sreg_32, implicit %0 + $m0 = COPY %0 + S_SENDMSG 0, implicit $m0, implicit $exec + +... + +# Test that a reload into a copy of m0 is not folded to be a reload of m0 directly. + +--- + +name: reload_sgpr_spill_into_copy_to_m0 +tracksRegLiveness: true +body: | + bb.0: + + ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0 + ; CHECK: liveins: $vgpr0 + ; CHECK: S_WAITCNT 0 + ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0 + ; CHECK: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr0, 0, undef $vgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32_vi $vgpr0, 0 + ; CHECK: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 + ; CHECK: $sgpr0 = V_READLANE_B32_vi killed $vgpr0, 0 + ; CHECK: $m0 = S_MOV_B32 killed $sgpr0 + ; CHECK: S_NOP 0 + ; CHECK: S_SENDMSG 0, implicit $m0, implicit $exec + S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $m0 + S_NOP 0, implicit %0, implicit-def %3:sreg_32, implicit-def %4:sreg_32 + $m0 = COPY %0 + S_SENDMSG 0, implicit $m0, implicit $exec + +...