diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1086,6 +1086,18 @@ // to determine the end of the prologue. DebugLoc DL; + if (FuncInfo->isChainFunction()) { + // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but + // are free to set one up if they need it. + bool UseSP = MFI.hasCalls() || MFI.hasStackObjects(); + if (UseSP) { + assert(StackPtrReg != AMDGPU::SP_REG); + + BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg) + .addImm(0); + } + } + bool HasFP = false; bool HasBP = false; uint32_t NumBytes = MFI.getStackSize(); diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -31,8 +31,6 @@ ret void } -; FIXME: Setup s32. - define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) { ; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: ; GISEL-GFX11: ; %bb.0: @@ -45,6 +43,7 @@ ; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX11-NEXT: s_endpgm @@ -66,6 +65,7 @@ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 ; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GISEL-GFX10-NEXT: s_endpgm @@ -81,6 +81,7 @@ ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) ; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX11-NEXT: s_endpgm @@ -102,6 +103,7 @@ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 ; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] ; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) ; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; DAGISEL-GFX10-NEXT: s_endpgm @@ -109,12 +111,12 @@ ret void } -; FIXME: Setup s32. - define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 x i32> %vgprs) { ; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 ; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24 @@ -145,9 +147,8 @@ ; GISEL-GFX11-NEXT: s_getpc_b64 s[24:25] ; GISEL-GFX11-NEXT: s_add_u32 s24, s24, use@gotpcrel32@lo+4 ; GISEL-GFX11-NEXT: s_addc_u32 s25, s25, use@gotpcrel32@hi+12 -; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 -; GISEL-GFX11-NEXT: s_load_b64 s[24:25], s[24:25], 0x0 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 +; GISEL-GFX11-NEXT: s_load_b64 s[24:25], s[24:25], 0x0 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 ; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 ; GISEL-GFX11-NEXT: s_add_u32 s26, s32, 56 @@ -189,6 +190,7 @@ ; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14 ; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15 +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 ; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 @@ -246,6 +248,8 @@ ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill: ; DAGISEL-GFX11: ; %bb.0: ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 ; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24 @@ -276,9 +280,8 @@ ; DAGISEL-GFX11-NEXT: s_getpc_b64 s[24:25] ; DAGISEL-GFX11-NEXT: s_add_u32 s24, s24, use@gotpcrel32@lo+4 ; DAGISEL-GFX11-NEXT: s_addc_u32 s25, s25, use@gotpcrel32@hi+12 -; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 -; DAGISEL-GFX11-NEXT: s_load_b64 s[24:25], s[24:25], 0x0 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12 +; DAGISEL-GFX11-NEXT: s_load_b64 s[24:25], s[24:25], 0x0 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10 ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8 ; DAGISEL-GFX11-NEXT: s_add_i32 s26, s32, 8 @@ -320,6 +323,7 @@ ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8 +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 ; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll @@ -201,13 +201,12 @@ unreachable } -; FIXME: Setup s32. - define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { ; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; GISEL-GFX11-NEXT: ;;#ASMSTART @@ -228,6 +227,7 @@ ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; GISEL-GFX10-NEXT: ;;#ASMSTART @@ -252,6 +252,7 @@ ; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee@gotpcrel32@hi+12 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 ; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; DAGISEL-GFX11-NEXT: ;;#ASMSTART @@ -272,6 +273,7 @@ ; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee@gotpcrel32@hi+12 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 ; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; DAGISEL-GFX10-NEXT: ;;#ASMSTART @@ -293,6 +295,7 @@ ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; GISEL-GFX11-NEXT: ;;#ASMSTART @@ -313,6 +316,7 @@ ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; GISEL-GFX10-NEXT: ;;#ASMSTART @@ -337,6 +341,7 @@ ; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 ; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; DAGISEL-GFX11-NEXT: ;;#ASMSTART @@ -357,6 +362,7 @@ ; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 ; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; DAGISEL-GFX10-NEXT: ;;#ASMSTART @@ -377,8 +383,9 @@ ; GISEL-GFX11-LABEL: chain_preserve_to_chain_wwm: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 3 ; GISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 4 @@ -402,8 +409,9 @@ ; GISEL-GFX10-LABEL: chain_preserve_to_chain_wwm: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 +; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 4 @@ -429,8 +437,9 @@ ; DAGISEL-GFX11-NEXT: s_getpc_b64 s[4:5] ; DAGISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_callee@gotpcrel32@lo+4 ; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12 -; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, 3 ; DAGISEL-GFX11-NEXT: s_not_b32 exec_lo, exec_lo @@ -454,8 +463,9 @@ ; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5] ; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_callee@gotpcrel32@lo+4 ; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12 -; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, 3 ; DAGISEL-GFX10-NEXT: s_not_b32 exec_lo, exec_lo @@ -483,11 +493,12 @@ ; GISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX11-NEXT: s_clause 0x1 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v11, s32 offset:4 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 -; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX11-NEXT: ;;#ASMSTART ; GISEL-GFX11-NEXT: s_nop ; GISEL-GFX11-NEXT: ;;#ASMEND @@ -508,10 +519,11 @@ ; GISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 +; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], s32 offset:4 ; 4-byte Folded Spill ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 -; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; GISEL-GFX10-NEXT: ;;#ASMSTART ; GISEL-GFX10-NEXT: s_nop ; GISEL-GFX10-NEXT: ;;#ASMEND @@ -534,10 +546,11 @@ ; DAGISEL-GFX11-NEXT: s_getpc_b64 s[4:5] ; DAGISEL-GFX11-NEXT: s_add_u32 s4, s4, chain_callee@gotpcrel32@lo+4 ; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12 +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 +; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; DAGISEL-GFX11-NEXT: s_clause 0x1 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v11, s32 offset:4 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 -; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX11-NEXT: ;;#ASMSTART @@ -559,9 +572,10 @@ ; DAGISEL-GFX10-NEXT: s_getpc_b64 s[4:5] ; DAGISEL-GFX10-NEXT: s_add_u32 s4, s4, chain_callee@gotpcrel32@lo+4 ; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_callee@gotpcrel32@hi+12 +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 +; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], s32 offset:4 ; 4-byte Folded Spill ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill -; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 ; DAGISEL-GFX10-NEXT: ;;#ASMSTART @@ -585,6 +599,7 @@ ; GISEL-GFX11: ; %bb.0: ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX11-NEXT: s_mov_b32 s2, s0 ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; GISEL-GFX11-NEXT: ;;#ASMSTART @@ -605,6 +620,7 @@ ; GISEL-GFX10: ; %bb.0: ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 +; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; GISEL-GFX10-NEXT: s_mov_b32 s2, s0 ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; GISEL-GFX10-NEXT: ;;#ASMSTART @@ -629,6 +645,7 @@ ; DAGISEL-GFX11-NEXT: s_addc_u32 s5, s5, chain_preserve_callee_2@gotpcrel32@hi+12 ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 ; DAGISEL-GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0 ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 ; 4-byte Folded Spill ; DAGISEL-GFX11-NEXT: ;;#ASMSTART @@ -649,6 +666,7 @@ ; DAGISEL-GFX10-NEXT: s_addc_u32 s5, s5, chain_preserve_callee_2@gotpcrel32@hi+12 ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 ; DAGISEL-GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 ; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0 ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 ; 4-byte Folded Spill ; DAGISEL-GFX10-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir @@ -36,6 +36,7 @@ ; GCN-LABEL: name: preserve_active_lanes_above_args ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr32 = S_MOV_B32 0 ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr10, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10 @@ -71,6 +72,7 @@ ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr32 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr10, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr11, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) @@ -147,6 +149,7 @@ ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr32 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr8, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr9, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir @@ -37,6 +37,7 @@ ; GCN-LABEL: name: preserve_inactive_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr32 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr8, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr9, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) @@ -73,6 +74,7 @@ ; GCN-LABEL: name: preserve_inactive_detected_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr32 = S_MOV_B32 0 ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr8, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr9, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)