diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -216,16 +216,6 @@ std::vector KernelUsedVariables = AMDGPU::findLDSVariablesToLower(M, &F); - // Replace all constant uses with instructions if they belong to the - // current kernel. Unnecessary, removing will cause test churn. - for (GlobalVariable *GV : KernelUsedVariables) { - for (User *U : make_early_inc_range(GV->users())) { - if (ConstantExpr *C = dyn_cast(U)) - AMDGPU::replaceConstantUsesInFunction(C, &F); - } - GV->removeDeadConstantUsers(); - } - if (!KernelUsedVariables.empty()) { std::string VarName = (Twine("llvm.amdgcn.kernel.") + F.getName() + ".lds").str(); diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1002,7 +1002,7 @@ ; CI: ; %bb.0: ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_read_b128 v[0:3], v0 +; CI-NEXT: ds_read2_b64 v[0:3], v0 offset1:1 ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 @@ -1012,16 +1012,27 @@ ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: load_misaligned64_constant_offsets: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: ds_read_b128 v[0:3], v4 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] -; GFX9-NEXT: s_endpgm +; GFX9-ALIGNED-LABEL: load_misaligned64_constant_offsets: +; GFX9-ALIGNED: ; %bb.0: +; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-ALIGNED-NEXT: ds_read2_b64 v[0:3], v4 offset1:1 +; GFX9-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-ALIGNED-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-ALIGNED-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-ALIGNED-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX9-ALIGNED-NEXT: s_endpgm +; +; GFX9-UNALIGNED-LABEL: load_misaligned64_constant_offsets: +; GFX9-UNALIGNED: ; %bb.0: +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-UNALIGNED-NEXT: ds_read_b128 v[0:3], v4 +; GFX9-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-UNALIGNED-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-UNALIGNED-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-UNALIGNED-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] +; GFX9-UNALIGNED-NEXT: s_endpgm %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 %sum = add i64 %val0, %val1 diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -816,22 +816,31 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() { ; CI-LABEL: store_misaligned64_constant_offsets: ; CI: ; %bb.0: -; CI-NEXT: v_mov_b32_e32 v0, 0x7b -; CI-NEXT: v_mov_b32_e32 v1, 0 -; CI-NEXT: v_mov_b32_e32 v2, v0 -; CI-NEXT: v_mov_b32_e32 v3, v1 +; CI-NEXT: s_mov_b64 s[0:1], 0x7b +; CI-NEXT: v_mov_b32_e32 v0, s0 +; CI-NEXT: v_mov_b32_e32 v2, 0 +; CI-NEXT: v_mov_b32_e32 v1, s1 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_write_b128 v1, v[0:3] +; CI-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:1 ; CI-NEXT: s_endpgm ; -; GFX9-LABEL: store_misaligned64_constant_offsets: -; GFX9: ; %bb.0: -; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: ds_write_b128 v1, v[0:3] -; GFX9-NEXT: s_endpgm +; GFX9-ALIGNED-LABEL: store_misaligned64_constant_offsets: +; GFX9-ALIGNED: ; %bb.0: +; GFX9-ALIGNED-NEXT: s_mov_b64 s[0:1], 0x7b +; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-ALIGNED-NEXT: ds_write2_b64 v2, v[0:1], v[0:1] offset1:1 +; GFX9-ALIGNED-NEXT: s_endpgm +; +; GFX9-UNALIGNED-LABEL: store_misaligned64_constant_offsets: +; GFX9-UNALIGNED: ; %bb.0: +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-UNALIGNED-NEXT: ds_write_b128 v1, v[0:3] +; GFX9-UNALIGNED-NEXT: s_endpgm store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4 store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4 ret void diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -209,7 +209,11 @@ ; GCN-NEXT: ; implicit-def: $sgpr6 ; GCN-NEXT: .LBB2_1: ; %bb1 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: s_or_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_cmp_lg_u32 lds@abs32@lo, 4 +; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec +; GCN-NEXT: s_and_b64 s[8:9], s[8:9], exec +; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GCN-NEXT: s_cmp_gt_i32 s6, -1 ; GCN-NEXT: s_cbranch_scc1 .LBB2_3 ; GCN-NEXT: ; %bb.2: ; %bb4