Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14357,14 +14357,12 @@ SDValue Chain = Chains.pop_back_val(); // For TokenFactor nodes, look at each operand and only continue up the - // chain until we find two aliases. If we've seen two aliases, assume we'll - // find more and revert to original chain since the xform is unlikely to be - // profitable. + // chain until we reach the depth limit. // // FIXME: The depth check could be made to return the last non-aliasing // chain we found before we hit a tokenfactor rather than the original // chain. - if (Depth > 6 || Aliases.size() == 2) { + if (Depth > 6) { Aliases.clear(); Aliases.push_back(OriginalChain); return; Index: test/CodeGen/AMDGPU/merge-stores.ll =================================================================== --- test/CodeGen/AMDGPU/merge-stores.ll +++ test/CodeGen/AMDGPU/merge-stores.ll @@ -1,5 +1,8 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s + +; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s ; Run with devices with different unaligned load restrictions. @@ -151,11 +154,15 @@ ; FIXME: Should be able to merge this ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: -; XGCN: buffer_store_dwordx4 -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword -; GCN: buffer_store_dword +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v + +; GCN-AA: buffer_store_dwordx2 +; GCN-AA: buffer_store_dword v +; GCN-AA: buffer_store_dword v + ; GCN: s_endpgm define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 @@ -484,11 +491,15 @@ ; This works once AA is enabled on the subtarget ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] -; XGCN: buffer_store_dwordx4 [[LOAD]] -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v -; GCN: buffer_store_dword v + +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v +; GCN-NOAA: buffer_store_dword v + +; GCN-AA: buffer_store_dwordx4 [[LOAD]] + +; GCN: s_endpgm define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 @@ -606,9 +617,18 @@ ret void } +; FIXME: This should do 2 dwordx4 loads ; GCN-LABEL: {{^}}merge_global_store_8_constants_i32: -; GCN: buffer_store_dwordx4 -; GCN: buffer_store_dwordx4 + +; GCN-NOAA: buffer_store_dwordx4 +; GCN-NOAA: buffer_store_dwordx4 + +; GCN-AA: buffer_store_dwordx4 +; GCN-AA: buffer_store_dwordx2 +; GCN-AA: buffer_store_dwordx2 + +; GCN: s_endpgm + define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1