Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -846,9 +846,9 @@ return SDValue(); } if (N1.hasOneUse()) { - // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one // use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); if (!OpNode.getNode()) return SDValue(); AddToWorklist(OpNode.getNode()); Index: test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; Test for a bug where DAGCombiner::ReassociateOps() was creating adds +; with offset in the first operand and base pointers in the second. + +; CHECK-LABEL: {{^}}store_same_base_ptr: +; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR:v\[[0-9]+:[0-9]+\]]], [[SADDR:s\[[0-9]+:[0-9]+\]]] +; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]] +; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]] +; CHECK: buffer_store_dword v{{[0-9]+}}, [[VADDR]], [[SADDR]] + +define void @store_same_base_ptr(i32 addrspace(1)* %out) { +entry: + %id = call i32 @llvm.amdgcn.workitem.id.x() #0 + %offset = sext i32 %id to i64 + %offset0 = add i64 %offset, 1027 + %ptr0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset0 + store i32 3, i32 addrspace(1)* %ptr0 + %offset1 = add i64 %offset, 1026 + %ptr1 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset1 + store i32 2, i32 addrspace(1)* %ptr1 + %offset2 = add i64 %offset, 1025 + %ptr2 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset2 + store i32 1, i32 addrspace(1)* %ptr2 + %offset3 = add i64 %offset, 1024 + %ptr3 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset3 + store i32 0, i32 addrspace(1)* %ptr3 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/shl_add_constant.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_constant.ll +++ test/CodeGen/AMDGPU/shl_add_constant.ll @@ -73,7 +73,7 @@ ; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc ; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3 -; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]] +; SI: s_add_i32 [[TMP:s[0-9]+]], [[Y]], [[SHL3]] ; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8 ; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] ; SI: buffer_store_dword [[VRESULT]] Index: test/CodeGen/X86/add-nsw-sext.ll =================================================================== --- test/CodeGen/X86/add-nsw-sext.ll +++ test/CodeGen/X86/add-nsw-sext.ll @@ -25,7 +25,7 @@ ; CHECK-LABEL: add_nsw_sext_add: ; CHECK: # BB#0: ; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: leaq 5(%rax,%rsi), %rax +; CHECK-NEXT: leaq 5(%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -72,7 +72,7 @@ ; CHECK-LABEL: gep8: ; CHECK: # BB#0: ; CHECK-NEXT: movslq %edi, %rax -; CHECK-NEXT: leaq 5(%rax,%rsi), %rax +; CHECK-NEXT: leaq 5(%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 @@ -127,7 +127,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movslq %edi, %rax ; CHECK-NEXT: shlq $4, %rax -; CHECK-NEXT: leaq 80(%rax,%rsi), %rax +; CHECK-NEXT: leaq 80(%rsi,%rax), %rax ; CHECK-NEXT: retq %add = add nsw i32 %i, 5 Index: test/CodeGen/X86/combine-multiplies.ll =================================================================== --- test/CodeGen/X86/combine-multiplies.ll +++ test/CodeGen/X86/combine-multiplies.ll @@ -31,10 +31,10 @@ ; ; CHECK-LABEL: testCombineMultiplies ; CHECK: imull $400, [[ARG1:%[a-z]+]], [[MUL:%[a-z]+]] # imm = 0x190 -; CHECK-NEXT: leal ([[MUL]],[[ARG2:%[a-z]+]]), [[LEA:%[a-z]+]] +; CHECK-NEXT: leal ([[ARG2:%[a-z]+]],[[MUL]]), [[LEA:%[a-z]+]] ; CHECK-NEXT: movl $11, {{[0-9]+}}([[LEA]],[[ARG1]],4) -; CHECK-NEXT: movl $22, {{[0-9]+}}([[MUL]],[[ARG2]]) -; CHECK-NEXT: movl $33, {{[0-9]+}}([[MUL]],[[ARG2]]) +; CHECK-NEXT: movl $22, {{[0-9]+}}([[ARG2]],[[MUL]]) +; CHECK-NEXT: movl $33, {{[0-9]+}}([[ARG2]],[[MUL]]) ; CHECK: retl ; @@ -109,7 +109,7 @@ ; CHECK-NEXT: movdqa [[C242]], v2 ; CHECK-NEXT: [[C726]], v3 ; CHECK-NEXT: [[C11]], x -; CHECK-NEXT: retl +; CHECK-NEXT: retl @v2 = common global <4 x i32> zeroinitializer, align 16 @v3 = common global <4 x i32> zeroinitializer, align 16 @@ -148,7 +148,7 @@ ; CHECK-NEXT: movdqa [[C242]], v2 ; CHECK-NEXT: [[C726]], v3 ; CHECK-NEXT: [[C11]], x -; CHECK-NEXT: retl +; CHECK-NEXT: retl ; Function Attrs: nounwind define void @testCombineMultiplies_non_splat(<4 x i32> %v1) { entry: