Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2153,7 +2153,7 @@ // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) { - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); unsigned OffsetReg = Info->getScratchWaveOffsetReg(); @@ -2357,8 +2357,8 @@ InFlag = Chain.getValue(1); } - uint64_t CalleePopBytes = 0; - Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32), + uint64_t CalleePopBytes = NumBytes; + Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32), DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32), InFlag, DL); if (!Ins.empty()) Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -74,7 +74,6 @@ ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} ; GCN-DAG: v_writelane_b32 -; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 @@ -86,6 +85,7 @@ ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 +; GCN-NOT: s_add_u32 s32, s32, 0x800 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 @@ -107,8 +107,9 @@ ; GCN: v_readlane_b32 ; GCN-NOT: v_readlane_b32 s32 -; GCN: s_sub_u32 s32, s32, 0x800{{$}} -; GCN-NEXT: s_sub_u32 s32, s32, 0xc00{{$}} +; GCN-NOT: s_sub_u32 s32, s32, 0x800 + +; GCN: s_sub_u32 s32, s32, 0xc00{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @call_void_func_byval_struct_func() #0 { @@ -138,7 +139,7 @@ ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 -; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} +; GCN-NOT: s_add_u32 s32, s32, 0x800 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 @@ -162,9 +163,8 @@ ; GCN: s_swappc_b64 -; FIXME: Dead SP modfication -; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} -; GCN-NEXT: s_endpgm +; GCN-NOT: s_sub_u32 s32 +; GCN: s_endpgm define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 { entry: %arg0 = alloca %struct.ByValStruct, align 4 Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -385,10 +385,10 @@ ; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32: ; HSA-DAG: s_mov_b32 s33, s9 -; HSA-DAG: s_add_u32 [[SP_REG:s[0-9]+]], s33, 0x100{{$}} +; HSA-NOT: s_add_u32 s32 ; MESA-DAG: s_mov_b32 s33, s3{{$}} -; MESA-DAG: s_add_u32 [[SP_REG:s[0-9]+]], s33, 0x100{{$}} +; MESA-NOT: s_add_u32 s32 ; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} ; GCN-DAG: buffer_load_dwordx4 v[0:3], off @@ -400,7 +400,7 @@ ; GCN-DAG: buffer_load_dwordx4 v[24:27], off ; GCN-DAG: buffer_load_dwordx4 v[28:31], off -; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]] offset:4{{$}} +; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32 offset:4{{$}} ; GCN: s_waitcnt ; GCN-NEXT: s_swappc_b64 ; GCN-NEXT: s_endpgm @@ -447,7 +447,7 @@ ; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], s33 offset:8 ; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], s33 offset:12 -; GCN: s_add_u32 [[SP]], [[SP]], 0x200 +; GCN-NOT: s_add_u32 [[SP]], ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12 @@ -463,7 +463,7 @@ ; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 ; GCN-NEXT: s_swappc_b64 -; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200 +; GCN-NOT: [[SP]] define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { %val = alloca { i8, i32 }, align 4 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 0 @@ -486,13 +486,13 @@ ; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12 -; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200 +; GCN-NOT: s_add_u32 [[SP]] ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 ; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8 ; GCN-NEXT: s_swappc_b64 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20 -; GCN: s_sub_u32 [[SP]], [[SP]], 0x200 +; GCN-NOT: s_sub_u32 [[SP]] ; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off ; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off Index: test/CodeGen/AMDGPU/callee-special-input-vgprs.ll =================================================================== --- test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -425,7 +425,7 @@ ; GCN: s_mov_b32 s33, s7 ; GCN: s_add_u32 s32, s33, 0x200{{$}} -; GCN-DAG: s_add_u32 s32, s32, 0x100{{$}} +; GCN-NOT: s32 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} ; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12