Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1517,28 +1517,6 @@ return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static -SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, - SelectionDAG &DAG) { - EVT VT = N0.getValueType(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - ConstantSDNode *N01C = dyn_cast(N01); - - if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && - isa(N00.getOperand(1))) { - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<getOperand(0); SDValue N1 = N->getOperand(1); @@ -1655,16 +1633,6 @@ } } - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); - if (Result.getNode()) return Result; - } - if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); - if (Result.getNode()) return Result; - } - // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB) @@ -4169,6 +4137,18 @@ HiBitsMask); } + // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Variant of version done on multiply, except mul by a power of 2 is turned + // into a shift. + APInt Val; + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isa(N0.getOperand(1)) || + isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + } + if (N1C) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) Index: test/CodeGen/AArch64/arm64-shifted-sext.ll =================================================================== --- test/CodeGen/AArch64/arm64-shifted-sext.ll +++ test/CodeGen/AArch64/arm64-shifted-sext.ll @@ -166,8 +166,8 @@ define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp { entry: ; CHECK-LABEL: extendedLeftShiftshortTointBy16: -; CHECK: add [[REG:w[0-9]+]], w0, #1 -; CHECK: lsl w0, [[REG]], #16 +; CHECK: lsl [[REG:w[0-9]+]], w0, #16 +; CHECK: add w0, [[REG]], #16, lsl #12 %inc = add i16 %a, 1 %conv2 = zext i16 %inc to i32 %shl = shl nuw i32 %conv2, 16 Index: test/CodeGen/R600/shl_add_constant.ll =================================================================== --- /dev/null +++ test/CodeGen/R600/shl_add_constant.ll @@ -0,0 +1,90 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare i32 @llvm.r600.read.tidig.x() #1 + +; Test with inline immediate + +; FUNC-LABEL: @shl_2_add_9_i32 +; SI: V_LSHLREV_B32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} +; SI: V_ADD_I32_e32 [[RESULT:v[0-9]+]], 36, [[REG]] +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM +define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x + %val = load i32 addrspace(1)* %ptr, align 4 + %add = add i32 %val, 9 + %result = shl i32 %add, 2 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @shl_2_add_9_i32_2_add_uses +; SI-DAG: V_ADD_I32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}} +; SI-DAG: V_LSHLREV_B32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}} +; SI-DAG: BUFFER_STORE_DWORD [[ADDREG]] +; SI-DAG: BUFFER_STORE_DWORD [[SHLREG]] +; SI: S_ENDPGM +define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { + %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x + %val = load i32 addrspace(1)* %ptr, align 4 + %add = add i32 %val, 9 + %result = shl i32 %add, 2 + store i32 %result, i32 addrspace(1)* %out0, align 4 + store i32 %add, i32 addrspace(1)* %out1, align 4 + ret void +} + +; Test with add literal constant + +; FUNC-LABEL: @shl_2_add_999_i32 +; SI: V_LSHLREV_B32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} +; SI: V_ADD_I32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]] +; SI: BUFFER_STORE_DWORD [[RESULT]] +; SI: S_ENDPGM +define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x + %val = load i32 addrspace(1)* %ptr, align 4 + %shl = add i32 %val, 999 + %result = shl i32 %shl, 2 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_add_shl_add_constant +; SI-DAG: S_LOAD_DWORD [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: S_LOAD_DWORD [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: S_LSHL_B32 [[SHL3:s[0-9]+]], [[X]], 3 +; SI: S_ADD_I32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]] +; SI: S_ADD_I32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8 +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]] +define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { + %add.0 = add i32 %x, 123 + %shl = shl i32 %add.0, 3 + %add.1 = add i32 %shl, %y + store i32 %add.1, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @test_add_shl_add_constant_inv +; SI-DAG: S_LOAD_DWORD [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: S_LOAD_DWORD [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc +; SI: S_LSHL_B32 [[SHL3:s[0-9]+]], [[X]], 3 +; SI: S_ADD_I32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]] +; SI: S_ADD_I32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8 +; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]] +; SI: BUFFER_STORE_DWORD [[VRESULT]] + +define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { + %add.0 = add i32 %x, 123 + %shl = shl i32 %add.0, 3 + %add.1 = add i32 %y, %shl + store i32 %add.1, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/R600/shl_add_ptr.ll =================================================================== --- test/CodeGen/R600/shl_add_ptr.ll +++ test/CodeGen/R600/shl_add_ptr.ll @@ -1,7 +1,4 @@ -; XFAIL: * -; Enable when patch to perform shl + add constant generic DAG combiner patch is in. - -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; Test that doing a shift of a pointer with a constant add will be ; folded into the constant offset addressing mode even if the add has Index: test/CodeGen/R600/trunc.ll =================================================================== --- test/CodeGen/R600/trunc.ll +++ test/CodeGen/R600/trunc.ll @@ -31,10 +31,10 @@ ; SI-LABEL: @trunc_shl_i64: ; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI: S_ADD_U32 s[[LO_SREG2:[0-9]+]], s[[LO_SREG]], +; SI: S_LSHL_B64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2 +; SI: S_ADD_U32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]], ; SI: S_ADDC_U32 -; SI: S_LSHL_B64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG2]]:{{[0-9]+\]}}, 2 -; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SHL]] +; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]] ; SI: BUFFER_STORE_DWORD v[[LO_VREG]], define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { %aa = add i64 %a, 234 ; Prevent shrinking store. Index: test/CodeGen/X86/add_shl_constant.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/add_shl_constant.ll @@ -0,0 +1,49 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; CHECK-LABEL: add_shl_add_constant_1_i32 +; CHECK: leal 984(%rsi,%rdi,8), %eax +; CHECK-NEXT: retq +define i32 @add_shl_add_constant_1_i32(i32 %x, i32 %y) nounwind { + %add.0 = add i32 %x, 123 + %shl = shl i32 %add.0, 3 + %add.1 = add i32 %shl, %y + ret i32 %add.1 +} + +; CHECK-LABEL: add_shl_add_constant_2_i32 +; CHECK: leal 984(%rsi,%rdi,8), %eax +; CHECK-NEXT: retq +define i32 @add_shl_add_constant_2_i32(i32 %x, i32 %y) nounwind { + %add.0 = add i32 %x, 123 + %shl = shl i32 %add.0, 3 + %add.1 = add i32 %y, %shl + ret i32 %add.1 +} + +; CHECK: LCPI2_0: +; CHECK: .long 984 +; CHECK: _add_shl_add_constant_1_v4i32 +; CHECK: pslld $3, %[[REG:xmm[0-9]+]] +; CHECK: paddd %xmm1, %[[REG]] +; CHECK: paddd LCPI2_0(%rip), %[[REG:xmm[0-9]+]] +; CHECK: retq +define <4 x i32> @add_shl_add_constant_1_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { + %add.0 = add <4 x i32> %x, + %shl = shl <4 x i32> %add.0, + %add.1 = add <4 x i32> %shl, %y + ret <4 x i32> %add.1 +} + +; CHECK: LCPI3_0: +; CHECK: .long 984 +; CHECK: _add_shl_add_constant_2_v4i32 +; CHECK: pslld $3, %[[REG:xmm[0-9]+]] +; CHECK: paddd %xmm1, %[[REG]] +; CHECK: paddd LCPI3_0(%rip), %[[REG:xmm[0-9]+]] +; CHECK: retq +define <4 x i32> @add_shl_add_constant_2_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { + %add.0 = add <4 x i32> %x, + %shl = shl <4 x i32> %add.0, + %add.1 = add <4 x i32> %y, %shl + ret <4 x i32> %add.1 +}