diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -933,12 +933,12 @@ } else { SavedExecReg = getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart]), - AMDGPU::sub0, &AMDGPU::SGPR_64RegClass); + AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass); // If src/dst is an odd size it is possible subreg0 is not aligned. if (!SavedExecReg && NumSubRegs > 2) SavedExecReg = getMatchingSuperReg(getSubReg(SuperReg, SplitParts[FirstPart + 1]), - AMDGPU::sub0, &AMDGPU::SGPR_64RegClass); + AMDGPU::sub0, &AMDGPU::SReg_64_XEXECRegClass); } assert(SavedExecReg); diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -0,0 +1,117 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK %s + +--- | + define amdgpu_kernel void @check_vcc() #0 { + ret void + } + + define amdgpu_kernel void @check_exec() #0 { + ret void + } + + attributes #0 = { "frame-pointer"="all" } +... + +# CHECK-LABEL: name: check_vcc +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $vcc_lo, 0, undef $vgpr0 +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $vcc_hi, 1, $vgpr0 +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $exec_lo, 32, $vgpr0 +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $exec_hi, 33, $vgpr0 +# CHECK: $exec = S_MOV_B64 3 +# CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: $exec_lo = V_READLANE_B32_{{(vi|gfx10)}} $vgpr0, 32 +# CHECK: $exec_hi = V_READLANE_B32_{{(vi|gfx10)}} killed $vgpr0, 33 + +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $vcc_lo, 0, undef $vgpr0 +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $vcc_hi, 1, $vgpr0 +# CHECK: $vcc = S_MOV_B64 $exec +# CHECK: $exec = S_MOV_B64 3 +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: $exec = S_MOV_B64 killed $vcc + + +# CHECK: $vcc = S_MOV_B64 $exec +# CHECK: $exec = S_MOV_B64 3 +# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: $exec = S_MOV_B64 killed $vcc +# CHECK: $vcc_lo = V_READLANE_B32_{{(vi|gfx10)}} $vgpr0, 0 +# CHECK: $vcc_hi = V_READLANE_B32_{{(vi|gfx10)}} killed $vgpr0, 1 +--- +name: check_vcc +tracksRegLiveness: true +liveins: + - { reg: '$sgpr4_sgpr5' } + - { reg: '$sgpr6_sgpr7' } + - { reg: '$sgpr8' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + bb.0: + liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 + + $vcc = IMPLICIT_DEF + SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $vcc = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + +# CHECK-LABEL: name: check_exec +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $exec_lo, 0, undef $vgpr0 +# CHECK: $vgpr0 = V_WRITELANE_B32_{{(vi|gfx10)}} $exec_hi, 1, $vgpr0 +# CHECK: $exec = S_MOV_B64 3 +# CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: $exec_lo = V_READLANE_B32_{{(vi|gfx10)}} $vgpr0, 0 +# CHECK: $exec_hi = V_READLANE_B32_{{(vi|gfx10)}} killed $vgpr0, 1 + +# CHECK: $exec = S_MOV_B64 3 +# CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET ${{(sgpr[0-9_]+)*}}, $sgpr33, 4 +# CHECK: $exec_lo = V_READLANE_B32_{{(vi|gfx10)}} $vgpr0, 0 +# CHECK: $exec_hi = V_READLANE_B32_{{(vi|gfx10)}} killed $vgpr0, 1 +--- +name: check_exec +tracksRegLiveness: true +liveins: + - { reg: '$sgpr4_sgpr5' } + - { reg: '$sgpr6_sgpr7' } + - { reg: '$sgpr8' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + bb.0: + liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7 + + SI_SPILL_S64_SAVE $exec, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + + $exec = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32