Index: test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll =================================================================== --- test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll +++ test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll @@ -25,7 +25,7 @@ %gptr0.phi = phi i32 addrspace(1)* [ %gep0, %entry ], [ %gep0.inc, %for.body ] %gptr1.phi = phi i32 addrspace(1)* [ %gep1, %entry ], [ %gep1.inc, %for.body ] %lptr0.phi = phi i32 addrspace(3)* [ %gep2, %entry ], [ %gep2.inc, %for.body ] - %lptr1 = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 1 + %lptr1 = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 2 %val0 = load i32, i32 addrspace(1)* %gep0 store i32 %val0, i32 addrspace(3)* %lptr0.phi %val1 = load i32, i32 addrspace(1)* %gep1 Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -10,17 +10,17 @@ ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] -; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]] -; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]] +; SI-DAG: v_add_i32_e32 [[VADDR8:v[0-9]+]], vcc, 8, [[VADDR]] +; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR8]] ; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]] -; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]] +; SI-DAG: v_add_i32_e32 [[VADDR0x88:v[0-9]+]], vcc, 0x88, [[VADDR]] +; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x88]] ; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] -; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1 -; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33 +; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:2 +; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:34 ; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256 ; CHECK: s_endpgm define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 { @@ -36,13 +36,13 @@ tail call void @llvm.amdgcn.s.barrier() #1 %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02 %tmp = load float, float addrspace(3)* %arrayidx, align 4 - %add1 = add nsw i32 %offset.02, 1 + %add1 = add nsw i32 %offset.02, 2 %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1 %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4 %add3 = add nsw i32 %offset.02, 32 %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3 %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4 - %add5 = add nsw i32 %offset.02, 33 + %add5 = add nsw i32 %offset.02, 34 %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5 %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4 %add7 = add nsw i32 %offset.02, 64 Index: test/CodeGen/AMDGPU/madak.ll =================================================================== --- test/CodeGen/AMDGPU/madak.ll +++ test/CodeGen/AMDGPU/madak.ll @@ -47,17 +47,17 @@ %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 - %a = load float, float addrspace(1)* %in.gep.0, align 4 - %b = load float, float addrspace(1)* %in.gep.1, align 4 - %c = load float, float addrspace(1)* %in.gep.2, align 4 + %a = load volatile float, float addrspace(1)* %in.gep.0, align 4 + %b = load volatile float, float addrspace(1)* %in.gep.1, align 4 + %c = load volatile float, float addrspace(1)* %in.gep.2, align 4 %mul0 = fmul float %a, %b %mul1 = fmul float %a, %c %madak0 = fadd float %mul0, 10.0 %madak1 = fadd float %mul1, 10.0 - store float %madak0, float addrspace(1)* %out.gep.0, align 4 - store float %madak1, float addrspace(1)* %out.gep.1, align 4 + store volatile float %madak0, float addrspace(1)* %out.gep.0, align 4 + store volatile float %madak1, float addrspace(1)* %out.gep.1, align 4 ret void } Index: test/CodeGen/AMDGPU/schedule-global-loads.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-global-loads.ll +++ test/CodeGen/AMDGPU/schedule-global-loads.ll @@ -8,12 +8,12 @@ ; FUNC-LABEL: {{^}}cluster_global_arg_loads: ; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 +; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; SI: buffer_store_dword [[REG0]] ; SI: buffer_store_dword [[REG1]] define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 { %load0 = load i32, i32 addrspace(1)* %ptr, align 4 - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1 + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 2 %load1 = load i32, i32 addrspace(1)* %gep, align 4 store i32 %load0, i32 addrspace(1)* %out0, align 4 store i32 %load1, i32 addrspace(1)* %out1, align 4 Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -11,13 +11,13 @@ ; FUNC-LABEL: @reorder_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 ; CI: buffer_store_dword define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2 + %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 store i32 99, i32 addrspace(1)* %gptr, align 4 @@ -32,12 +32,12 @@ ; FUNC-LABEL: @no_reorder_local_load_volatile_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 ; CI: buffer_store_dword -; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2 + %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 store volatile i32 99, i32 addrspace(1)* %gptr, align 4 @@ -51,13 +51,13 @@ ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4 -; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8 +; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 ; CI: buffer_store_dword define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4 %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2 + %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 store i32 99, i32 addrspace(1)* %gptr, align 4 @@ -75,13 +75,13 @@ ; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} ; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1 -; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2 +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3 ; CI: buffer_store_dword define void @reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 { %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8 %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2 + %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3 %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4 store i32 99, i32 addrspace(1)* %gptr, align 4 @@ -97,14 +97,14 @@ ; CI: v_readfirstlane_b32 s[[PTR_LO:[0-9]+]], v{{[0-9]+}} ; CI: v_readfirstlane_b32 s[[PTR_HI:[0-9]+]], v{{[0-9]+}} ; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x1 -; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x2 +; CI-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x3 ; CI: ds_write_b32 ; CI: buffer_store_dword define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 { %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8 %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2 + %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 3 %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4 store i32 99, i32 addrspace(3)* %lptr, align 4 @@ -143,7 +143,7 @@ ; CI: buffer_store_dword define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1 - %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 2 + %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 3 %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4 store i32 99, i32 addrspace(3)* %lptr, align 4 @@ -157,16 +157,15 @@ ; FUNC-LABEL: @reorder_local_offsets ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400 -; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404 -; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12 +; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400 -; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404 +; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408 ; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3 %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 100 - %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 101 + %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 102 store i32 123, i32 addrspace(3)* %ptr1, align 4 %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4 @@ -184,16 +183,15 @@ ; FUNC-LABEL: @reorder_global_offsets ; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 ; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 -; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 +; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 ; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 -; CI: buffer_store_dword +; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408 ; CI: s_endpgm define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 { %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3 %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 100 - %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 101 + %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 102 store i32 123, i32 addrspace(1)* %ptr1, align 4 %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4