diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -6067,7 +6067,7 @@ +++++++++++++ If the kernel needs a frame pointer for the reasons defined in -``SIFrameLowering`` then SGPR34 is used and is always set to ``0`` in the +``SIFrameLowering`` then SGPR33 is used and is always set to ``0`` in the kernel prolog. If a frame pointer is not required then all uses of the frame pointer are replaced with immediate ``0`` offsets. @@ -8897,7 +8897,7 @@ The following is not part of the AMDGPU function calling convention but describes how the AMDGPU implements function calls: -1. SGPR34 is used as a frame pointer (FP) if necessary. Like the SP it is an +1. SGPR33 is used as a frame pointer (FP) if necessary. Like the SP it is an unswizzled scratch address. It is only needed if runtime sized ``alloca`` are used, or for the reasons defined in ``SIFrameLowering``. 2. Runtime stack alignment is not currently supported. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1950,7 +1950,7 @@ // finalized, because it does not rely on the known stack size, only // properties like whether variable sized objects are present. if (ST.getFrameLowering()->hasFP(MF)) { - Info.setFrameOffsetReg(AMDGPU::SGPR34); + Info.setFrameOffsetReg(AMDGPU::SGPR33); } } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -81,7 +81,7 @@ ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; // TODO: Pick a high register, and shift down, similar to a kernel. - FrameOffsetReg = AMDGPU::SGPR34; + FrameOffsetReg = AMDGPU::SGPR33; StackPtrOffsetReg = AMDGPU::SGPR32; ArgInfo.PrivateSegmentBuffer = diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -13,14 +13,14 @@ } ; GCN-LABEL: {{^}}indirect_use_vcc: -; GCN: v_writelane_b32 v32, s34, 2 +; GCN: v_writelane_b32 v32, s33, 2 ; GCN: v_writelane_b32 v32, s30, 0 ; GCN: v_writelane_b32 v32, s31, 1 ; GCN: s_swappc_b64 ; GCN: v_readlane_b32 s4, v32, 0 ; GCN: v_readlane_b32 s5, v32, 1 -; GCN: v_readlane_b32 s34, v32, 2 -; GCN: ; NumSgprs: 37 +; GCN: v_readlane_b32 s33, v32, 2 +; GCN: ; NumSgprs: 36 ; GCN: ; NumVgprs: 33 define void @indirect_use_vcc() #1 { call void @use_vcc() @@ -29,8 +29,8 @@ ; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel: ; GCN: is_dynamic_callstack = 0 -; CI: ; NumSgprs: 39 -; VI-NOBUG: ; NumSgprs: 41 +; CI: ; NumSgprs: 38 +; VI-NOBUG: ; NumSgprs: 40 ; VI-BUG: ; NumSgprs: 96 ; GCN: ; NumVgprs: 33 define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 { @@ -48,8 +48,8 @@ } ; GCN-LABEL: {{^}}indirect_use_flat_scratch: -; CI: ; NumSgprs: 39 -; VI: ; NumSgprs: 41 +; CI: ; NumSgprs: 38 +; VI: ; NumSgprs: 40 ; GCN: ; NumVgprs: 33 define void @indirect_use_flat_scratch() #1 { call void @use_flat_scratch() @@ -58,8 +58,8 @@ ; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel: ; GCN: is_dynamic_callstack = 0 -; CI: ; NumSgprs: 39 -; VI-NOBUG: ; NumSgprs: 41 +; CI: ; NumSgprs: 38 +; VI-NOBUG: ; NumSgprs: 40 ; VI-BUG: ; NumSgprs: 96 ; GCN: ; NumVgprs: 33 define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -23,9 +23,9 @@ ; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GCN: buffer_store_dword -; GCN: v_writelane_b32 v32, s34, 4 -; GCN: v_writelane_b32 v32, s36, 0 -; GCN: v_writelane_b32 v32, s37, 1 +; GCN: v_writelane_b32 v32, s33, 4 +; GCN: v_writelane_b32 v32, s34, 0 +; GCN: v_writelane_b32 v32, s35, 1 ; GCN: v_writelane_b32 v32, s30, 2 ; GCN: v_writelane_b32 v32, s31, 3 @@ -35,10 +35,10 @@ ; GCN-NEXT: s_swappc_b64 ; GCN-DAG: v_readlane_b32 s4, v32, 2 ; GCN-DAG: v_readlane_b32 s5, v32, 3 -; GCN: v_readlane_b32 s37, v32, 1 -; GCN: v_readlane_b32 s36, v32, 0 +; GCN: v_readlane_b32 s35, v32, 1 +; GCN: v_readlane_b32 s34, v32, 0 -; GCN: v_readlane_b32 s34, v32, 4 +; GCN: v_readlane_b32 s33, v32, 4 ; GCN: buffer_load_dword ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { @@ -50,14 +50,14 @@ ; GCN-LABEL: {{^}}test_func_call_external_void_funcx2: ; GCN: buffer_store_dword v32 -; GCN: v_writelane_b32 v32, s34, 4 +; GCN: v_writelane_b32 v32, s33, 4 -; GCN: s_mov_b32 s34, s32 +; GCN: s_mov_b32 s33, s32 ; GCN: s_add_u32 s32, s32, 0x400 ; GCN: s_swappc_b64 ; GCN-NEXT: s_swappc_b64 -; GCN: v_readlane_b32 s34, v32, 4 +; GCN: v_readlane_b32 s33, v32, 4 ; GCN: buffer_load_dword v32, define void @test_func_call_external_void_funcx2() #0 { call void @external_void_func_void() @@ -125,6 +125,8 @@ ret void } +; FIXME: What is the expected behavior for reserved registers here? + ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -146,8 +148,6 @@ ret void } -; FIXME: What is the expected behavior for reserved registers here? - ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}} ; GCN-NOT: s34 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -12,9 +12,9 @@ ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt -; GCN-NEXT: s_mov_b32 s4, s34 -; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: s_mov_b32 s34, s4 +; GCN-NEXT: s_mov_b32 s4, s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_setpc_b64 define void @callee_no_stack_no_fp_elim_all() #1 { ret void @@ -46,13 +46,13 @@ ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt -; GCN-NEXT: s_mov_b32 s4, s34 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s4, s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x200 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}} -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4{{$}} +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4{{$}} ; GCN-NEXT: s_sub_u32 s32, s32, 0x200 -; GCN-NEXT: s_mov_b32 s34, s4 +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @callee_with_stack_no_fp_elim_all() #1 { @@ -80,14 +80,14 @@ ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN: v_writelane_b32 [[CSR_VGPR]], s34, 2 -; GCN-DAG: s_mov_b32 s34, s32 +; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; GCN-DAG: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} ; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, -; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s34{{$}} +; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}} ; GCN: s_swappc_b64 @@ -95,7 +95,7 @@ ; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]] ; GCN: s_sub_u32 s32, s32, 0x400{{$}} -; GCN-NEXT: v_readlane_b32 s34, [[CSR_VGPR]], 2 +; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] @@ -121,7 +121,7 @@ ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-DAG: s_add_u32 s32, s32, 0x400 -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s34, [[FP_SPILL_LANE:[0-9]+]] +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]] ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 @@ -131,7 +131,7 @@ ; GCN-DAG: v_readlane_b32 s5, v32, 1 ; GCN: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, [[CSR_VGPR]], [[FP_SPILL_LANE]] +; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]] ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] @@ -204,20 +204,20 @@ ; TODO: Can the SP inc/deec be remvoed? ; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_csr_vgpr: ; GCN: s_waitcnt -; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s34 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GCN-DAG: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s34 offset:8 +; GCN-DAG: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:8 ; GCN: ;;#ASMSTART ; GCN-NEXT: ; clobber v33 ; GCN-NEXT: ;;#ASMEND -; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload +; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 -; GCN-NEXT: s_mov_b32 s34, s4 +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 { @@ -230,17 +230,17 @@ ; Use a copy to a free SGPR instead of introducing a second CSR VGPR. ; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr: ; GCN: s_waitcnt -; GCN-NEXT: v_writelane_b32 v1, s34, 63 -; GCN-NEXT: s_mov_b32 s34, s32 -; GCN: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill +; GCN-NEXT: v_writelane_b32 v1, s33, 63 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-COUNT-63: v_writelane_b32 v1 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset:8 +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:8 ; GCN: ;;#ASMSTART ; GCN-COUNT-63: v_readlane_b32 s{{[0-9]+}}, v1 ; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 -; GCN-NEXT: v_readlane_b32 s34, v1, 63 +; GCN-NEXT: v_readlane_b32 s33, v1, 63 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @last_lane_vgpr_for_fp_csr() #1 { @@ -262,19 +262,19 @@ ; Use a copy to a free SGPR instead of introducing a second CSR VGPR. ; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr: ; GCN: s_waitcnt -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s34 -; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-COUNT-64: v_writelane_b32 v1, ; GCN: buffer_store_dword ; GCN: ;;#ASMSTART ; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1 -; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload +; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN: s_add_u32 s32, s32, 0x300 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300 -; GCN-NEXT: s_mov_b32 s34, [[FP_COPY]] +; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @no_new_vgpr_for_fp_csr() #1 { @@ -296,13 +296,13 @@ ; GCN-LABEL: {{^}}realign_stack_no_fp_elim: ; GCN: s_waitcnt ; GCN-NEXT: s_add_u32 [[SCRATCH:s[0-9]+]], s32, 0x7ffc0 -; GCN-NEXT: s_mov_b32 s4, s34 -; GCN-NEXT: s_and_b32 s34, [[SCRATCH]], 0xfff80000 +; GCN-NEXT: s_mov_b32 s4, s33 +; GCN-NEXT: s_and_b32 s33, [[SCRATCH]], 0xfff80000 ; GCN-NEXT: s_add_u32 s32, s32, 0x100000 ; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GCN-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s34 +; GCN-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s33 ; GCN-NEXT: s_sub_u32 s32, s32, 0x100000 -; GCN-NEXT: s_mov_b32 s34, s4 +; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 define void @realign_stack_no_fp_elim() #1 { @@ -313,18 +313,18 @@ ; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp: ; GCN: s_waitcnt -; GCN-NEXT: v_writelane_b32 v1, s34, 2 +; GCN-NEXT: v_writelane_b32 v1, s33, 2 ; GCN-NEXT: v_writelane_b32 v1, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; GCN: v_writelane_b32 v1, s31, 1 -; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s34 offset:4 +; GCN: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4 ; GCN: ;;#ASMSTART ; GCN: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: s_add_u32 s32, s32, 0x200 ; GCN-NEXT: v_readlane_b32 s5, v1, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x200 -; GCN-NEXT: v_readlane_b32 s34, v1, 2 +; GCN-NEXT: v_readlane_b32 s33, v1, 2 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[4:5] define void @no_unused_non_csr_sgpr_for_fp() #1 { @@ -347,9 +347,9 @@ ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 ; GCN-NEXT: v_writelane_b32 v32, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-DAG: v_writelane_b32 v32, s31, 1 ; GCN-DAG: buffer_store_dword @@ -360,7 +360,7 @@ ; GCN: v_readlane_b32 s4, v32, 0 ; GCN-NEXT: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x300{{$}} -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] @@ -394,9 +394,9 @@ ; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008 ; GCN-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 ; GCN-NEXT: v_writelane_b32 v32, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-DAG: v_writelane_b32 v32, s31, 1 ; GCN-DAG: s_add_u32 s32, s32, 0x40300{{$}} ; GCN-DAG: buffer_store_dword @@ -406,7 +406,7 @@ ; GCN: v_readlane_b32 s4, v32, 0 ; GCN-NEXT: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x40300{{$}} -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008 ; GCN-NEXT: buffer_load_dword [[CSR_VGPR]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Reload @@ -444,13 +444,13 @@ ; An FP is needed, despite not needing any spills ; TODO: Ccould see callee does not use stack and omit FP. ; GCN-LABEL: {{^}}ipra_call_with_stack: -; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s34 -; GCN: s_mov_b32 s34, s32 +; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN: s_mov_b32 s33, s32 ; GCN: s_add_u32 s32, s32, 0x400 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34{{$}} +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} ; GCN: s_swappc_b64 ; GCN: s_sub_u32 s32, s32, 0x400 -; GCN: s_mov_b32 s34, [[FP_COPY:s[0-9]+]] +; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -530,7 +530,7 @@ } ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: -; GCN-DAG: s_mov_b32 s34, s32 +; GCN-DAG: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x400 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[4:5] ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[6:7] @@ -550,7 +550,7 @@ ; GCN: s_swappc_b64 -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO1:[0-9]+]], s[[LO_X]] ; GCN-DAG: v_mov_b32_e32 v[[HI1:[0-9]+]], s[[HI_X]] ; GCN-DAG: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO1]]:[[HI1]]{{\]}} diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -486,7 +486,7 @@ } ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x: -; VARABI: s_mov_b32 s34, s32 +; VARABI: s_mov_b32 s33, s32 ; VARABI: buffer_store_dword v1, off, s[0:3], s32{{$}} ; Touching the workitem id register is not necessary. @@ -514,14 +514,14 @@ ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} -; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_load_dword v32, off, s[0:3], s34{{$}} +; GCN-DAG: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} ; GCN: s_swappc_b64 -; GCN: buffer_load_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_sub_u32 s32, s32, 0x400{{$}} ; GCN: s_setpc_b64 define void @too_many_args_call_too_many_args_use_workitem_id_x( @@ -664,8 +664,8 @@ ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: ; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} -; VARABI: buffer_store_dword [[K]], off, s[0:3], s34{{$}} -; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}} +; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}} +; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}} ; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} ; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]], @@ -674,11 +674,11 @@ ; FIXED-ABI-NOT: v31 ; FIXEDABI: v_mov_b32_e32 [[K0:v[0-9]+]], 0x3e7{{$}} -; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s34{{$}} +; FIXEDABI: buffer_store_dword [[K0]], off, s[0:3], s33{{$}} ; FIXEDABI: v_mov_b32_e32 [[K1:v[0-9]+]], 0x140{{$}} ; FIXEDABI: buffer_store_dword [[K1]], off, s[0:3], s32{{$}} -; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s34{{$}} +; FIXEDABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}} ; FIXED-ABI-NOT: v31 ; FIXEDABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -166,17 +166,17 @@ define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 { ; GFX803-LABEL: test_force_fp_kern_empty: ; GFX803: ; %bb.0: ; %entry -; GFX803-NEXT: s_mov_b32 s34, 0 +; GFX803-NEXT: s_mov_b32 s33, 0 ; GFX803-NEXT: s_endpgm ; ; GFX900-LABEL: test_force_fp_kern_empty: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_mov_b32 s34, 0 +; GFX900-NEXT: s_mov_b32 s33, 0 ; GFX900-NEXT: s_endpgm ; ; GFX1010-LABEL: test_force_fp_kern_empty: ; GFX1010: ; %bb.0: ; %entry -; GFX1010-NEXT: s_mov_b32 s34, 0 +; GFX1010-NEXT: s_mov_b32 s33, 0 ; GFX1010-NEXT: s_endpgm entry: ret void @@ -188,11 +188,11 @@ ; GFX803-NEXT: s_add_u32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 -; GFX803-NEXT: s_mov_b32 s34, 0 +; GFX803-NEXT: s_mov_b32 s33, 0 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 ; GFX803-NEXT: v_mov_b32_e32 v0, 0 ; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 -; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4 +; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX803-NEXT: s_endpgm ; ; GFX900-LABEL: test_force_fp_kern_stack: @@ -200,16 +200,16 @@ ; GFX900-NEXT: s_add_u32 flat_scratch_lo, s4, s7 ; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 ; GFX900-NEXT: s_add_u32 s0, s0, s7 -; GFX900-NEXT: s_mov_b32 s34, 0 +; GFX900-NEXT: s_mov_b32 s33, 0 ; GFX900-NEXT: s_addc_u32 s1, s1, 0 ; GFX900-NEXT: v_mov_b32_e32 v0, 0 -; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX900-NEXT: s_endpgm ; ; GFX1010-LABEL: test_force_fp_kern_stack: ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_add_u32 s4, s4, s7 -; GFX1010-NEXT: s_mov_b32 s34, 0 +; GFX1010-NEXT: s_mov_b32 s33, 0 ; GFX1010-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 @@ -217,7 +217,7 @@ ; GFX1010-NEXT: s_addc_u32 s1, s1, 0 ; GFX1010-NEXT: v_mov_b32_e32 v0, 0 ; GFX1010-NEXT: ; implicit-def: $vcc_hi -; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4 +; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX1010-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) @@ -237,7 +237,7 @@ ; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 ; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 ; GFX803-NEXT: s_mov_b32 s32, 0 -; GFX803-NEXT: s_mov_b32 s34, 0 +; GFX803-NEXT: s_mov_b32 s33, 0 ; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX803-NEXT: s_endpgm ; @@ -251,7 +251,7 @@ ; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 ; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 ; GFX900-NEXT: s_mov_b32 s32, 0 -; GFX900-NEXT: s_mov_b32 s34, 0 +; GFX900-NEXT: s_mov_b32 s33, 0 ; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX900-NEXT: s_endpgm ; @@ -259,7 +259,7 @@ ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_add_u32 s4, s4, s7 ; GFX1010-NEXT: s_mov_b32 s32, 0 -; GFX1010-NEXT: s_mov_b32 s34, 0 +; GFX1010-NEXT: s_mov_b32 s33, 0 ; GFX1010-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 @@ -282,7 +282,7 @@ ; GFX803-NEXT: s_add_u32 s4, s4, s7 ; GFX803-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 ; GFX803-NEXT: s_add_u32 s0, s0, s7 -; GFX803-NEXT: s_mov_b32 s34, 0 +; GFX803-NEXT: s_mov_b32 s33, 0 ; GFX803-NEXT: s_addc_u32 s1, s1, 0 ; GFX803-NEXT: v_mov_b32_e32 v0, 0 ; GFX803-NEXT: s_mov_b32 flat_scratch_lo, s5 @@ -290,7 +290,7 @@ ; GFX803-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 ; GFX803-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 ; GFX803-NEXT: s_movk_i32 s32, 0x400 -; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4 +; GFX803-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX803-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX803-NEXT: s_endpgm ; @@ -300,13 +300,13 @@ ; GFX900-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 ; GFX900-NEXT: s_add_u32 s0, s0, s7 ; GFX900-NEXT: s_addc_u32 s1, s1, 0 -; GFX900-NEXT: s_mov_b32 s34, 0 +; GFX900-NEXT: s_mov_b32 s33, 0 ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: s_getpc_b64 s[4:5] ; GFX900-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 ; GFX900-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 ; GFX900-NEXT: s_movk_i32 s32, 0x400 -; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX900-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX900-NEXT: s_endpgm ; @@ -314,7 +314,7 @@ ; GFX1010: ; %bb.0: ; %entry ; GFX1010-NEXT: s_add_u32 s4, s4, s7 ; GFX1010-NEXT: s_movk_i32 s32, 0x200 -; GFX1010-NEXT: s_mov_b32 s34, 0 +; GFX1010-NEXT: s_mov_b32 s33, 0 ; GFX1010-NEXT: s_addc_u32 s5, s5, 0 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4 ; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5 @@ -325,7 +325,7 @@ ; GFX1010-NEXT: s_add_u32 s4, s4, ex@rel32@lo+4 ; GFX1010-NEXT: s_addc_u32 s5, s5, ex@rel32@hi+4 ; GFX1010-NEXT: ; implicit-def: $vcc_hi -; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s34 offset:4 +; GFX1010-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX1010-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX1010-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -30,9 +30,9 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 ; GCN-NEXT: v_writelane_b32 v32, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v2f32@rel32@lo+4 @@ -42,7 +42,7 @@ ; GCN-NEXT: v_readlane_b32 s4, v32, 0 ; GCN-NEXT: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -64,9 +64,9 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 ; GCN-NEXT: v_writelane_b32 v32, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v3f32@rel32@lo+4 @@ -76,7 +76,7 @@ ; GCN-NEXT: v_readlane_b32 s4, v32, 0 ; GCN-NEXT: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -98,9 +98,9 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 ; GCN-NEXT: v_writelane_b32 v32, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_v4f16@rel32@lo+4 @@ -110,7 +110,7 @@ ; GCN-NEXT: v_readlane_b32 s4, v32, 0 ; GCN-NEXT: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -132,9 +132,9 @@ ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 ; GCN-NEXT: v_writelane_b32 v32, s30, 0 -; GCN-NEXT: s_mov_b32 s34, s32 +; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, func_struct@rel32@lo+4 @@ -145,7 +145,7 @@ ; GCN-NEXT: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -219,10 +219,10 @@ ; GCN-LABEL: {{^}}undefined_stack_store_reg: ; GCN: s_and_saveexec_b64 -; GCN: buffer_store_dword v0, off, s[0:3], s34 offset: -; GCN: buffer_store_dword v0, off, s[0:3], s34 offset: -; GCN: buffer_store_dword v0, off, s[0:3], s34 offset: -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset: +; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: +; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: +; GCN: buffer_store_dword v0, off, s[0:3], s33 offset: +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: define void @undefined_stack_store_reg(float %arg, i32 %arg1) #0 { bb: %tmp = alloca <4 x float>, align 16, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir --- a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir @@ -30,7 +30,7 @@ waveLimiter: true scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' stackPtrOffsetReg: '$sgpr32' - frameOffsetReg: '$sgpr34' + frameOffsetReg: '$sgpr33' argumentInfo: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } dispatchPtr: { reg: '$sgpr4_sgpr5' } diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -189,18 +189,18 @@ ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v35, s34, 4 -; GFX9-NEXT: s_mov_b32 s34, s32 +; GFX9-NEXT: v_writelane_b32 v35, s33, 4 +; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_add_u32 s32, s32, 0x800 -; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s34 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s34 ; 4-byte Folded Spill -; GFX9-NEXT: v_writelane_b32 v35, s36, 0 +; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v34, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-NEXT: v_writelane_b32 v35, s34, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+4 -; GFX9-NEXT: v_writelane_b32 v35, s37, 1 -; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 +; GFX9-NEXT: v_writelane_b32 v35, s35, 1 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v32, v1 ; GFX9-NEXT: v_mov_b32_e32 v33, v0 ; GFX9-NEXT: v_writelane_b32 v35, s30, 2 @@ -208,21 +208,21 @@ ; GFX9-NEXT: v_writelane_b32 v35, s31, 3 ; GFX9-NEXT: v_and_b32_e32 v34, 0xffffff, v32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_mad_u32_u24 v32, v33, v32, v34 ; GFX9-NEXT: v_mov_b32_e32 v0, v32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_add_u32_e32 v0, v32, v34 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s4, v35, 2 ; GFX9-NEXT: v_readlane_b32 s5, v35, 3 -; GFX9-NEXT: v_readlane_b32 s37, v35, 1 -; GFX9-NEXT: v_readlane_b32 s36, v35, 0 -; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s34 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s34 offset:8 ; 4-byte Folded Reload +; GFX9-NEXT: v_readlane_b32 s35, v35, 1 +; GFX9-NEXT: v_readlane_b32 s34, v35, 0 +; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: s_sub_u32 s32, s32, 0x800 -; GFX9-NEXT: v_readlane_b32 s34, v35, 4 +; GFX9-NEXT: v_readlane_b32 s33, v35, 4 ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -14,8 +14,8 @@ ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-DAG: v_writelane_b32 v32, s34, 2 -; GCN-DAG: s_mov_b32 s34, s32 +; GCN-DAG: v_writelane_b32 v32, s33, 2 +; GCN-DAG: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x400 ; GCN-DAG: v_writelane_b32 v32, s30, 0 ; GCN-DAG: v_writelane_b32 v32, s31, 1 @@ -26,7 +26,7 @@ ; GCN: v_readlane_b32 s5, v32, 1 ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, v32, 2 +; GCN-NEXT: v_readlane_b32 s33, v32, 2 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] @@ -39,9 +39,9 @@ ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use: ; GCN: s_waitcnt -; GCN: s_mov_b32 s34, s32 +; GCN: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}} -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s34 offset: +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset: ; GCN: s_swappc_b64 ; GCN: s_sub_u32 s32, s32, 0x1400{{$}} ; GCN: s_setpc_b64 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -19,7 +19,7 @@ machineFunctionInfo: isEntryFunction: false scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 - frameOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 stackPtrOffsetReg: $sgpr32 body: | @@ -28,19 +28,19 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs ; CHECK: liveins: $vgpr1 - ; CHECK: $sgpr27 = frame-setup COPY $sgpr34 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK: $sgpr34 = S_LSHR_B32 $sgpr34, 6, implicit-def $scc - ; CHECK: $sgpr34 = S_ADD_U32 killed $sgpr34, 8192, implicit-def $scc - ; CHECK: $vgpr2 = COPY killed $sgpr34 - ; CHECK: $sgpr34 = S_SUB_U32 killed $sgpr34, 8192, implicit-def $scc - ; CHECK: $sgpr34 = S_LSHL_B32 $sgpr34, 6, implicit-def $scc + ; CHECK: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc + ; CHECK: $sgpr33 = S_ADD_U32 killed $sgpr33, 8192, implicit-def $scc + ; CHECK: $vgpr2 = COPY killed $sgpr33 + ; CHECK: $sgpr33 = S_SUB_U32 killed $sgpr33, 8192, implicit-def $scc + ; CHECK: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup COPY $sgpr27 + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 @@ -61,7 +61,7 @@ machineFunctionInfo: isEntryFunction: false scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 - frameOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 stackPtrOffsetReg: $sgpr32 body: | @@ -70,17 +70,17 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr ; CHECK: liveins: $vgpr1 - ; CHECK: $sgpr27 = frame-setup COPY $sgpr34 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK: $sgpr29 = S_LSHR_B32 $sgpr34, 6, implicit-def $scc + ; CHECK: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc ; CHECK: $sgpr29 = S_ADD_U32 killed $sgpr29, 8192, implicit-def $scc ; CHECK: $vgpr2 = COPY killed $sgpr29 ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31 ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup COPY $sgpr27 + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr28, implicit $sgpr31 @@ -101,7 +101,7 @@ machineFunctionInfo: isEntryFunction: false scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 - frameOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 stackPtrOffsetReg: $sgpr32 body: | @@ -110,17 +110,17 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr_64 ; CHECK: liveins: $vgpr1 - ; CHECK: $sgpr27 = frame-setup COPY $sgpr34 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr34, implicit $exec + ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $sgpr28 = S_MOV_B32 8192 ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr3, 0, implicit $exec ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup COPY $sgpr27 + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 ; CHECK: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 @@ -140,7 +140,7 @@ machineFunctionInfo: isEntryFunction: false scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 - frameOffsetReg: $sgpr34 + frameOffsetReg: $sgpr33 stackPtrOffsetReg: $sgpr32 body: | @@ -149,17 +149,17 @@ ; CHECK-LABEL: name: scavenge_sgpr_pei_prefer_vcc ; CHECK: liveins: $vgpr1 - ; CHECK: $sgpr27 = frame-setup COPY $sgpr34 + ; CHECK: $sgpr27 = frame-setup COPY $sgpr33 ; CHECK: $sgpr4 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc + ; CHECK: $sgpr33 = frame-setup S_AND_B32 killed $sgpr4, 4294443008, implicit-def $scc ; CHECK: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 - ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr34, implicit $exec + ; CHECK: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; CHECK: $vcc_lo = S_MOV_B32 8192 ; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc - ; CHECK: $sgpr34 = frame-setup COPY $sgpr27 + ; CHECK: $sgpr33 = frame-setup COPY $sgpr27 ; CHECK: S_ENDPGM 0 S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31 $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -205,13 +205,13 @@ ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: s_mov_b32 s34, s32 +; GCN: s_mov_b32 s33, s32 ; GCN-DAG: s_add_u32 s32, s32, 0x400 -; GCN-DAG: buffer_store_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v33, off, s[0:3], s34 ; 4-byte Folded Spill -; GCN-DAG: v_writelane_b32 v34, s36, 0 -; GCN-DAG: v_writelane_b32 v34, s37, 1 +; GCN-DAG: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v33, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: v_writelane_b32 v34, s34, 0 +; GCN-DAG: v_writelane_b32 v34, s35, 1 ; GCN-DAG: s_getpc_b64 s[4:5] ; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 @@ -220,18 +220,18 @@ ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s36, v34, 0 -; GCN-DAG: v_readlane_b32 s37, v34, 1 +; GCN-DAG: v_readlane_b32 s34, v34, 0 +; GCN-DAG: v_readlane_b32 s35, v34, 1 -; GCN: buffer_load_dword v33, off, s[0:3], s34 ; 4-byte Folded Reload -; GCN: buffer_load_dword v32, off, s[0:3], s34 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v33, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN: buffer_load_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 ; GCN: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: v_readlane_b32 s34, +; GCN-NEXT: v_readlane_b32 s33, ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll --- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll @@ -4,13 +4,13 @@ ; GCN: s_or_saveexec_b64 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 v32, s34, 2 +; GCN: v_writelane_b32 v32, s33, 2 ; GCN: s_swappc_b64 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9 -; GCN: buffer_store_dword [[K]], off, s[0:3], s34{{$}} +; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; GCN: v_readlane_b32 s34, v32, 2 +; GCN: v_readlane_b32 s33, v32, 2 ; GCN: s_or_saveexec_b64 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN: s_mov_b64 exec diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -33,7 +33,7 @@ ; GCN-LABEL: {{^}}needs_align16_stack_align4: ; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}} -; GCN: s_and_b32 s34, [[SCRATCH_REG]], 0xfffffc00 +; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffffc00 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: v_or_b32_e32 v{{[0-9]+}}, 12 @@ -54,7 +54,7 @@ ; GCN-LABEL: {{^}}needs_align32: ; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}} -; GCN: s_and_b32 s34, [[SCRATCH_REG]], 0xfffff800 +; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffff800 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen ; GCN: v_or_b32_e32 v{{[0-9]+}}, 12 @@ -75,7 +75,7 @@ ; GCN-LABEL: {{^}}force_realign4: ; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}} -; GCN: s_and_b32 s34, [[SCRATCH_REG]], 0xffffff00 +; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00 ; GCN: s_add_u32 s32, s32, 0xd00{{$}} ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen @@ -125,13 +125,13 @@ ; GCN-LABEL: {{^}}default_realign_align128: ; GCN: s_add_u32 [[TMP:s[0-9]+]], s32, 0x1fc0 -; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s34 -; GCN-NEXT: s_and_b32 s34, [[TMP]], 0xffffe000 +; GCN-NEXT: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN-NEXT: s_and_b32 s33, [[TMP]], 0xffffe000 ; GCN-NEXT: s_add_u32 s32, s32, 0x4000 -; GCN-NOT: s34 -; GCN: buffer_store_dword v0, off, s[0:3], s34{{$}} +; GCN-NOT: s33 +; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}} ; GCN: s_sub_u32 s32, s32, 0x4000 -; GCN: s_mov_b32 s34, [[FP_COPY]] +; GCN: s_mov_b32 s33, [[FP_COPY]] define void @default_realign_align128(i32 %idx) #0 { %alloca.align = alloca i32, align 128, addrspace(5) store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1063,8 +1063,8 @@ ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v32, s34, 2 -; GCN: s_mov_b32 s34, s32 +; GCN-NEXT: v_writelane_b32 v32, s33, 2 +; GCN: s_mov_b32 s33, s32 ; GFX1064: s_add_u32 s32, s32, 0x400 ; GFX1032: s_add_u32 s32, s32, 0x200 @@ -1078,7 +1078,7 @@ ; GFX1064: s_sub_u32 s32, s32, 0x400 ; GFX1032: s_sub_u32 s32, s32, 0x200 -; GCN: v_readlane_b32 s34, v32, 2 +; GCN: v_readlane_b32 s33, v32, 2 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll @@ -77,7 +77,7 @@ ; CHECK-NEXT: memoryBound: false ; CHECK-NEXT: waveLimiter: false ; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' -; CHECK-NEXT: frameOffsetReg: '$sgpr34' +; CHECK-NEXT: frameOffsetReg: '$sgpr33' ; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32' ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } @@ -104,7 +104,7 @@ ; CHECK-NEXT: memoryBound: false ; CHECK-NEXT: waveLimiter: false ; CHECK-NEXT: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' -; CHECK-NEXT: frameOffsetReg: '$sgpr34' +; CHECK-NEXT: frameOffsetReg: '$sgpr33' ; CHECK-NEXT: stackPtrOffsetReg: '$sgpr32' ; CHECK-NEXT: argumentInfo: ; CHECK-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }