Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1517,28 +1517,6 @@
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
 }
 
-static
-SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
-                              SelectionDAG &DAG) {
-  EVT VT = N0.getValueType();
-  SDValue N00 = N0.getOperand(0);
-  SDValue N01 = N0.getOperand(1);
-  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
-
-  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
-      isa<ConstantSDNode>(N00.getOperand(1))) {
-    // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
-    N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
-                     DAG.getNode(ISD::SHL, SDLoc(N00), VT,
-                                 N00.getOperand(0), N01),
-                     DAG.getNode(ISD::SHL, SDLoc(N01), VT,
-                                 N00.getOperand(1), N01));
-    return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
-  }
-
-  return SDValue();
-}
-
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1655,16 +1633,6 @@
     }
   }
 
-  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
-  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
-    SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
-    if (Result.getNode()) return Result;
-  }
-  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
-    SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
-    if (Result.getNode()) return Result;
-  }
-
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   if (N1.getOpcode() == ISD::SHL &&
       N1.getOperand(0).getOpcode() == ISD::SUB)
@@ -4169,6 +4137,18 @@
                        HiBitsMask);
   }
 
+  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+  // Variant of version done on multiply, except mul by a power of 2 is turned
+  // into a shift.
+  APInt Val;
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      (isa<ConstantSDNode>(N0.getOperand(1)) ||
+       isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+    SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+    SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+  }
+
   if (N1C) {
     SDValue NewSHL = visitShiftByConstant(N, N1C);
     if (NewSHL.getNode())
Index: test/CodeGen/AArch64/arm64-shifted-sext.ll
===================================================================
--- test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -166,8 +166,8 @@
 define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: extendedLeftShiftshortTointBy16:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: lsl w0, [[REG]], #16
+; CHECK: lsl [[REG:w[0-9]+]], w0, #16
+; CHECK: add w0, [[REG]], #16, lsl #12
   %inc = add i16 %a, 1
   %conv2 = zext i16 %inc to i32
   %shl = shl nuw i32 %conv2, 16
Index: test/CodeGen/R600/shl_add_constant.ll
===================================================================
--- /dev/null
+++ test/CodeGen/R600/shl_add_constant.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Test with inline immediate
+
+; FUNC-LABEL: @shl_2_add_9_i32
+; SI: V_LSHLREV_B32_e32  [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: V_ADD_I32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: S_ENDPGM
+define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+  %val = load i32 addrspace(1)* %ptr, align 4
+  %add = add i32 %val, 9
+  %result = shl i32 %add, 2
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @shl_2_add_9_i32_2_add_uses
+; SI-DAG: V_ADD_I32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: V_LSHLREV_B32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-DAG: BUFFER_STORE_DWORD [[ADDREG]]
+; SI-DAG: BUFFER_STORE_DWORD [[SHLREG]]
+; SI: S_ENDPGM
+define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+  %val = load i32 addrspace(1)* %ptr, align 4
+  %add = add i32 %val, 9
+  %result = shl i32 %add, 2
+  store i32 %result, i32 addrspace(1)* %out0, align 4
+  store i32 %add, i32 addrspace(1)* %out1, align 4
+  ret void
+}
+
+; Test with add literal constant
+
+; FUNC-LABEL: @shl_2_add_999_i32
+; SI: V_LSHLREV_B32_e32  [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: V_ADD_I32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI: S_ENDPGM
+define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+  %val = load i32 addrspace(1)* %ptr, align 4
+  %shl = add i32 %val, 999
+  %result = shl i32 %shl, 2
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_add_shl_add_constant
+; SI-DAG: S_LOAD_DWORD [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: S_LSHL_B32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: S_ADD_I32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: S_ADD_I32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]]
+define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %shl, %y
+   store i32 %add.1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_add_shl_add_constant_inv
+; SI-DAG: S_LOAD_DWORD [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: S_LOAD_DWORD [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: S_LSHL_B32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: S_ADD_I32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: S_ADD_I32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: BUFFER_STORE_DWORD [[VRESULT]]
+
+define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %y, %shl
+  store i32 %add.1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Index: test/CodeGen/R600/shl_add_ptr.ll
===================================================================
--- test/CodeGen/R600/shl_add_ptr.ll
+++ test/CodeGen/R600/shl_add_ptr.ll
@@ -1,7 +1,4 @@
-; XFAIL: *
-; Enable when patch to perform shl + add constant generic DAG combiner patch is in.
-
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; Test that doing a shift of a pointer with a constant add will be
 ; folded into the constant offset addressing mode even if the add has
Index: test/CodeGen/R600/trunc.ll
===================================================================
--- test/CodeGen/R600/trunc.ll
+++ test/CodeGen/R600/trunc.ll
@@ -31,10 +31,10 @@
 
 ; SI-LABEL: @trunc_shl_i64:
 ; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: S_ADD_U32 s[[LO_SREG2:[0-9]+]], s[[LO_SREG]],
+; SI: S_LSHL_B64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
+; SI: S_ADD_U32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
 ; SI: S_ADDC_U32
-; SI: S_LSHL_B64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG2]]:{{[0-9]+\]}}, 2
-; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SHL]]
+; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
 ; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
 define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
   %aa = add i64 %a, 234 ; Prevent shrinking store.
Index: test/CodeGen/X86/add_shl_constant.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/add_shl_constant.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK-LABEL: add_shl_add_constant_1_i32
+; CHECK: leal 984(%rsi,%rdi,8), %eax
+; CHECK-NEXT: retq
+define i32 @add_shl_add_constant_1_i32(i32 %x, i32 %y) nounwind {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %shl, %y
+  ret i32 %add.1
+}
+
+; CHECK-LABEL: add_shl_add_constant_2_i32
+; CHECK: leal 984(%rsi,%rdi,8), %eax
+; CHECK-NEXT: retq
+define i32 @add_shl_add_constant_2_i32(i32 %x, i32 %y) nounwind {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %y, %shl
+  ret i32 %add.1
+}
+
+; CHECK: LCPI2_0:
+; CHECK: .long 984
+; CHECK: _add_shl_add_constant_1_v4i32
+; CHECK: pslld $3, %[[REG:xmm[0-9]+]]
+; CHECK: paddd %xmm1, %[[REG]]
+; CHECK: paddd LCPI2_0(%rip), %[[REG:xmm[0-9]+]]
+; CHECK: retq
+define <4 x i32> @add_shl_add_constant_1_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+  %add.0 = add <4 x i32> %x, <i32 123, i32 123, i32 123, i32 123>
+  %shl = shl <4 x i32> %add.0, <i32 3, i32 3, i32 3, i32 3>
+  %add.1 = add <4 x i32> %shl, %y
+  ret <4 x i32> %add.1
+}
+
+; CHECK: LCPI3_0:
+; CHECK: .long 984
+; CHECK: _add_shl_add_constant_2_v4i32
+; CHECK: pslld $3, %[[REG:xmm[0-9]+]]
+; CHECK: paddd %xmm1, %[[REG]]
+; CHECK: paddd LCPI3_0(%rip), %[[REG:xmm[0-9]+]]
+; CHECK: retq
+define <4 x i32> @add_shl_add_constant_2_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+  %add.0 = add <4 x i32> %x, <i32 123, i32 123, i32 123, i32 123>
+  %shl = shl <4 x i32> %add.0, <i32 3, i32 3, i32 3, i32 3>
+  %add.1 = add <4 x i32> %y, %shl
+  ret <4 x i32> %add.1
+}