Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -66,7 +66,11 @@ CC == CallingConv::SPIR_KERNEL; if (IsKernel) { - if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0) + if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0 || + // FIXME: Mesa is still using implicit arguments placed before user + // kernel arguments, and we do not see if we can optimize them + // out. These should be consolidated into the same implicit arg section + ST.getExplicitKernelArgOffset(F) != 0) KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -56,11 +56,11 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX9-LABEL: store_load_vindex_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4 ; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 @@ -73,10 +73,10 @@ ; ; GFX10-LABEL: store_load_vindex_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 4 @@ -233,8 +233,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX9-LABEL: store_load_vindex_small_offset_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -253,10 +253,10 @@ ; ; GFX10-LABEL: store_load_vindex_small_offset_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x104 @@ -405,8 +405,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX9-LABEL: store_load_vindex_large_offset_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -425,10 +425,10 @@ ; ; GFX10-LABEL: store_load_vindex_large_offset_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004 @@ -519,8 +519,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-LABEL: store_load_large_imm_offset_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: s_movk_i32 s0, 0x3e80 @@ -536,10 +536,10 @@ ; ; GFX10-LABEL: store_load_large_imm_offset_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 Index: llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -53,6 +53,21 @@ ret void } +; FUNC-LABEL: {{^}}ngroups_z_no_kernargs: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] + +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z +define amdgpu_kernel void @ngroups_z_no_kernargs() { +entry: + %ngroups.z = call i32 @llvm.r600.read.ngroups.z() #0 + store volatile i32 %ngroups.z, i32 addrspace(1)* null + ret void +} + ; FUNC-LABEL: {{^}}global_size_x: ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc Index: llvm/test/CodeGen/AMDGPU/flat-scratch.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -8,9 +8,9 @@ define amdgpu_kernel void @zero_init_kernel() { ; GFX9-LABEL: zero_init_kernel: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: s_mov_b32 s2, s0 ; GFX9-NEXT: s_mov_b32 s3, s0 @@ -30,10 +30,10 @@ ; ; GFX10-LABEL: zero_init_kernel: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: s_mov_b32 s0, 0 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 @@ -409,10 +409,10 @@ define amdgpu_kernel void @store_load_vindex_kernel() { ; GFX9-LABEL: store_load_vindex_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: v_add_u32_e32 v2, v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v3, 15 ; GFX9-NEXT: scratch_store_dword v2, v3, off @@ -424,10 +424,10 @@ ; ; GFX10-LABEL: store_load_vindex_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 @@ -608,8 +608,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX9-LABEL: zero_init_small_offset_kernel: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -633,10 +633,10 @@ ; ; GFX10-LABEL: zero_init_small_offset_kernel: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 @@ -1115,8 +1115,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX9-LABEL: store_load_vindex_small_offset_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1133,10 +1133,10 @@ ; ; GFX10-LABEL: store_load_vindex_small_offset_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x104 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 @@ -1328,8 +1328,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX9-LABEL: zero_init_large_offset_kernel: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:16 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1353,10 +1353,10 @@ ; ; GFX10-LABEL: zero_init_large_offset_kernel: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: scratch_load_dword v0, off, off offset:16 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 @@ -1887,8 +1887,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX9-LABEL: store_load_vindex_large_offset_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1905,10 +1905,10 @@ ; ; GFX10-LABEL: store_load_vindex_large_offset_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4004 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 @@ -2100,8 +2100,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-LABEL: store_load_large_imm_offset_kernel: ; GFX9: ; %bb.0: ; %bb -; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: s_movk_i32 s0, 0x3000 @@ -2117,10 +2117,10 @@ ; ; GFX10-LABEL: store_load_large_imm_offset_kernel: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_add_u32 s0, s0, s3 -; GFX10-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s2, s2, s5 +; GFX10-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3800 Index: llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll +++ llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll @@ -1,16 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; Spill an SGPR to scratch without having spare SGPRs available to save exec define amdgpu_kernel void @test() #1 { ; GFX10-LABEL: test: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s10, -1 -; GFX10-NEXT: s_mov_b32 s11, 0x31e16000 -; GFX10-NEXT: s_add_u32 s8, s8, s1 +; GFX10-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX10-NEXT: s_mov_b64 s[8:9], s[0:1] +; GFX10-NEXT: s_add_u32 s8, s8, s5 ; GFX10-NEXT: s_addc_u32 s9, s9, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s[0:7]