diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -287,16 +287,19 @@ SIMachineFunctionInfo *FuncInfo = MF.getInfo(); unsigned Size = FrameInfo.getObjectSize(FI); - assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); - assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); + unsigned NumLanes = Size / 4; - int NumLanes = Size / 4; + if (NumLanes > WaveSize) + return false; + + assert(Size >= 4 && "invalid sgpr spill size"); + assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); // Make sure to handle the case where a wide SGPR spill may span between two // VGPRs. - for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { + for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { Register LaneVGPR; unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); diff --git a/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll --- a/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-wide-sgpr.ll @@ -193,18 +193,236 @@ ret void } -; FIXME: x16 inlineasm seems broken -; define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { -; %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 -; %cmp = icmp eq i32 %in, 0 -; br i1 %cmp, label %bb0, label %ret - -; bb0: -; call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 -; br label %ret - -; ret: -; ret void -; } +; ALL-LABEL: {{^}}spill_sgpr_x16: + +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15 +; VGPR: s_cbranch_scc1 + +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15 + +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: s_cbranch_scc1 + +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +define amdgpu_kernel void @spill_sgpr_x16(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.sgpr = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr) #0 + br label %ret + +ret: + ret void +} + +; ALL-LABEL: {{^}}spill_sgpr_x32: + +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 0 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 1 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 2 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 3 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 4 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 5 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 6 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 7 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 8 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 9 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 10 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 11 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 12 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 13 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 14 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 15 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 16 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 17 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 18 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 19 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 20 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 21 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 22 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 23 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 24 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 25 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 26 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 27 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 28 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 29 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 30 +; VGPR: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 31 +; VGPR: s_cbranch_scc1 + +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 0 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 1 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 2 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 3 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 4 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 5 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 6 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 7 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 8 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 9 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 10 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 11 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 12 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 13 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 14 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 15 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 16 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 17 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 18 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 19 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 20 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 21 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 22 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 23 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 24 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 25 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 26 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 27 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 28 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 29 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 30 +; VGPR: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 31 + +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: buffer_store_dword +; VMEM: s_cbranch_scc1 + +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +; VMEM: buffer_load_dword +define amdgpu_kernel void @spill_sgpr_x32(i32 addrspace(1)* %out, i32 %in) #0 { + %wide.sgpr = call <32 x i32> asm sideeffect "; def $0", "=s" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "s"(<32 x i32> %wide.sgpr) #0 + br label %ret + +ret: + ret void +} attributes #0 = { nounwind }