Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7204,6 +7204,69 @@
   return SDValue();
 }
 
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand, we may be able to convert that into 2 independent
+/// shifts followed by the logic op. This is a throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+  // Match a one-use bitwise logic op.
+  SDValue LogicOp = Shift->getOperand(0);
+  if (!LogicOp.hasOneUse())
+    return SDValue();
+
+  unsigned LogicOpcode = LogicOp.getOpcode();
+  if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+      LogicOpcode != ISD::XOR)
+    return SDValue();
+
+  // Find a matching one-use shift by constant.
+  // Logic ops are commutative, so check each operand for a match.
+  EVT VT = Shift->getValueType(0);
+  unsigned ShiftOpcode = Shift->getOpcode();
+  ConstantSDNode *ShiftAmtC = isConstOrConstSplat(Shift->getOperand(1));
+  assert(ShiftAmtC && "Expected a shift with constant operand");
+  const APInt &C1 = ShiftAmtC->getAPIntValue();
+  auto matchFirstShift = [&](SDValue V, const APInt *&C) {
+    if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+      return false;
+
+    // There are no guarantees about the types of shift operands, so check that
+    // our constants are the same width.
+    ConstantSDNode *ShiftC = isConstOrConstSplat(V.getOperand(1));
+    if (!ShiftC || ShiftC->getAPIntValue().getBitWidth() != C1.getBitWidth())
+      return false;
+
+    // The fold is not valid if the sum of the shift values exceeds bitwidth.
+    APInt ShiftSum = ShiftC->getAPIntValue() + C1;
+    unsigned BitWidth = VT.getScalarSizeInBits();
+    if (ShiftSum.getLimitedValue(BitWidth) >= BitWidth)
+      return false;
+
+    // Match complete - save the first shift amount.
+    C = &ShiftC->getAPIntValue();
+    return true;
+  };
+
+  SDValue X, Y;
+  const APInt *C0;
+  if (matchFirstShift(LogicOp.getOperand(0), C0)) {
+    X = LogicOp.getOperand(0).getOperand(0);
+    Y = LogicOp.getOperand(1);
+  } else if (matchFirstShift(LogicOp.getOperand(1), C0)) {
+    X = LogicOp.getOperand(1).getOperand(0);
+    Y = LogicOp.getOperand(0);
+  } else {
+    return SDValue();
+  }
+
+  // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+  SDLoc DL(Shift);
+  EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+  SDValue ShiftSum = DAG.getConstant(*C0 + C1, DL, ShiftAmtVT);
+  SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSum);
+  SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, Shift->getOperand(1));
+  return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
 /// Handle transforms common to the three shifts, when the shift amount is a
 /// constant.
 /// We are looking for: (shift being one of shl/sra/srl)
@@ -7222,6 +7285,14 @@
   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
     return SDValue();
 
+  // TODO: This is limited to early combining because it may reveal regressions
+  //       otherwise. But since we just checked a target hook to see if this is
+  //       desirable, that should have filtered out cases where this interferes
+  //       with some other pattern matching.
+  if (!LegalTypes)
+    if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+      return R;
+
   // We want to pull some binops through shifts, so that we have (and (shift))
   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   // thing happens with address calculations, so it's important to canonicalize
Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll
===================================================================
--- llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -265,12 +265,12 @@
 define i32 @test_nouseful_bits(i8 %a, i32 %b) {
 ; CHECK-LABEL: test_nouseful_bits:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    bfi w8, w8, #8, #24
-; CHECK-NEXT:    mov w9, w0
-; CHECK-NEXT:    bfi w9, w8, #8, #24
-; CHECK-NEXT:    bfi w0, w9, #8, #24
-; CHECK-NEXT:    lsl w0, w0, #8
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    lsl w8, w8, #8
+; CHECK-NEXT:    mov w9, w8
+; CHECK-NEXT:    bfxil w9, w0, #0, #8
+; CHECK-NEXT:    bfi w8, w9, #16, #16
+; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
   %conv = zext i8 %a to i32     ;   0  0  0  A
   %shl = shl i32 %b, 8          ;   B2 B1 B0 0
Index: llvm/test/CodeGen/AArch64/shift-logic.ll
===================================================================
--- llvm/test/CodeGen/AArch64/shift-logic.ll
+++ llvm/test/CodeGen/AArch64/shift-logic.ll
@@ -4,8 +4,8 @@
 define i32 @shl_and(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: shl_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, w0, lsl #5
-; CHECK-NEXT:    lsl w0, w8, #7
+; CHECK-NEXT:    lsl w8, w0, #12
+; CHECK-NEXT:    and w0, w8, w1, lsl #7
 ; CHECK-NEXT:    ret
   %sh0 = shl i32 %x, 5
   %r = and i32 %sh0, %y
@@ -16,8 +16,8 @@
 define i32 @shl_or(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: shl_or:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w1, w0, lsl #5
-; CHECK-NEXT:    lsl w0, w8, #7
+; CHECK-NEXT:    lsl w8, w0, #12
+; CHECK-NEXT:    orr w0, w8, w1, lsl #7
 ; CHECK-NEXT:    ret
   %sh0 = shl i32 %x, 5
   %r = or i32 %y, %sh0
@@ -28,8 +28,8 @@
 define i32 @shl_xor(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: shl_xor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w1, w0, lsl #5
-; CHECK-NEXT:    lsl w0, w8, #7
+; CHECK-NEXT:    lsl w8, w0, #12
+; CHECK-NEXT:    eor w0, w8, w1, lsl #7
 ; CHECK-NEXT:    ret
   %sh0 = shl i32 %x, 5
   %r = xor i32 %sh0, %y
@@ -40,8 +40,8 @@
 define i32 @lshr_and(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: lshr_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, w0, lsr #5
-; CHECK-NEXT:    lsr w0, w8, #7
+; CHECK-NEXT:    lsr w8, w0, #12
+; CHECK-NEXT:    and w0, w8, w1, lsr #7
 ; CHECK-NEXT:    ret
   %sh0 = lshr i32 %x, 5
   %r = and i32 %y, %sh0
@@ -52,8 +52,8 @@
 define i32 @lshr_or(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: lshr_or:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w1, w0, lsr #5
-; CHECK-NEXT:    lsr w0, w8, #7
+; CHECK-NEXT:    lsr w8, w0, #12
+; CHECK-NEXT:    orr w0, w8, w1, lsr #7
 ; CHECK-NEXT:    ret
   %sh0 = lshr i32 %x, 5
   %r = or i32 %sh0, %y
@@ -64,8 +64,8 @@
 define i32 @lshr_xor(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: lshr_xor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w1, w0, lsr #5
-; CHECK-NEXT:    lsr w0, w8, #7
+; CHECK-NEXT:    lsr w8, w0, #12
+; CHECK-NEXT:    eor w0, w8, w1, lsr #7
 ; CHECK-NEXT:    ret
   %sh0 = lshr i32 %x, 5
   %r = xor i32 %y, %sh0
@@ -77,8 +77,8 @@
 define i32 @ashr_and(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ashr_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and w8, w1, w0, asr #5
-; CHECK-NEXT:    asr w0, w8, #7
+; CHECK-NEXT:    asr w8, w0, #12
+; CHECK-NEXT:    and w0, w8, w1, asr #7
 ; CHECK-NEXT:    ret
   %sh0 = ashr i32 %x, 5
   %r = and i32 %y, %sh0
@@ -89,8 +89,8 @@
 define i32 @ashr_or(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ashr_or:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr w8, w1, w0, asr #5
-; CHECK-NEXT:    asr w0, w8, #7
+; CHECK-NEXT:    asr w8, w0, #12
+; CHECK-NEXT:    orr w0, w8, w1, asr #7
 ; CHECK-NEXT:    ret
   %sh0 = ashr i32 %x, 5
   %r = or i32 %sh0, %y
@@ -101,8 +101,8 @@
 define i32 @ashr_xor(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ashr_xor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w1, w0, asr #5
-; CHECK-NEXT:    asr w0, w8, #7
+; CHECK-NEXT:    asr w8, w0, #12
+; CHECK-NEXT:    eor w0, w8, w1, asr #7
 ; CHECK-NEXT:    ret
   %sh0 = ashr i32 %x, 5
   %r = xor i32 %y, %sh0
Index: llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
===================================================================
--- llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -15,10 +15,10 @@
 
 ; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
 ; CHECK: bl _f2
-; CHECK: cmp {{r[0-9]+}}, #0
-; CHECK-NEXT: it       eq
-; CHECK-NEXT: addeq    {{r[0-9]+}}, #1
-; CHECK-NEXT: lsls
+; CHECK: clz {{r[0-9]+}}
+; CHECK-DAG: lsrs    {{r[0-9]+}}
+; CHECK-DAG: lsls    {{r[0-9]+}}
+; CHECK-NEXT: orr.w   {{r[0-9]+}}
 ; CHECK-NEXT: InlineAsm Start
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
Index: llvm/test/CodeGen/X86/shift-logic.ll
===================================================================
--- llvm/test/CodeGen/X86/shift-logic.ll
+++ llvm/test/CodeGen/X86/shift-logic.ll
@@ -5,9 +5,9 @@
 ; CHECK-LABEL: shl_and:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll $5, %eax
+; CHECK-NEXT:    shll $7, %esi
+; CHECK-NEXT:    shll $12, %eax
 ; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    shll $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = shl i32 %x, 5
   %r = and i32 %sh0, %y
@@ -19,9 +19,9 @@
 ; CHECK-LABEL: shl_or:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll $5, %eax
+; CHECK-NEXT:    shll $7, %esi
+; CHECK-NEXT:    shll $12, %eax
 ; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    shll $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = shl i32 %x, 5
   %r = or i32 %y, %sh0
@@ -33,9 +33,9 @@
 ; CHECK-LABEL: shl_xor:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shll $5, %eax
+; CHECK-NEXT:    shll $7, %esi
+; CHECK-NEXT:    shll $12, %eax
 ; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    shll $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = shl i32 %x, 5
   %r = xor i32 %sh0, %y
@@ -47,9 +47,9 @@
 ; CHECK-LABEL: lshr_and:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrl $5, %eax
+; CHECK-NEXT:    shrl $7, %esi
+; CHECK-NEXT:    shrl $12, %eax
 ; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    shrl $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = lshr i32 %x, 5
   %r = and i32 %y, %sh0
@@ -61,9 +61,9 @@
 ; CHECK-LABEL: lshr_or:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrl $5, %eax
+; CHECK-NEXT:    shrl $7, %esi
+; CHECK-NEXT:    shrl $12, %eax
 ; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    shrl $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = lshr i32 %x, 5
   %r = or i32 %sh0, %y
@@ -75,9 +75,9 @@
 ; CHECK-LABEL: lshr_xor:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrl $5, %eax
+; CHECK-NEXT:    shrl $7, %esi
+; CHECK-NEXT:    shrl $12, %eax
 ; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    shrl $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = lshr i32 %x, 5
   %r = xor i32 %y, %sh0
@@ -90,9 +90,9 @@
 ; CHECK-LABEL: ashr_and:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    sarl $5, %eax
+; CHECK-NEXT:    sarl $7, %esi
+; CHECK-NEXT:    sarl $12, %eax
 ; CHECK-NEXT:    andl %esi, %eax
-; CHECK-NEXT:    sarl $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = ashr i32 %x, 5
   %r = and i32 %y, %sh0
@@ -104,9 +104,9 @@
 ; CHECK-LABEL: ashr_or:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    sarl $5, %eax
+; CHECK-NEXT:    sarl $7, %esi
+; CHECK-NEXT:    sarl $12, %eax
 ; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    sarl $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = ashr i32 %x, 5
   %r = or i32 %sh0, %y
@@ -118,9 +118,9 @@
 ; CHECK-LABEL: ashr_xor:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    sarl $5, %eax
+; CHECK-NEXT:    sarl $7, %esi
+; CHECK-NEXT:    sarl $12, %eax
 ; CHECK-NEXT:    xorl %esi, %eax
-; CHECK-NEXT:    sarl $7, %eax
 ; CHECK-NEXT:    retq
   %sh0 = ashr i32 %x, 5
   %r = xor i32 %y, %sh0
@@ -171,4 +171,3 @@
   %sh1 = lshr i32 %r, 7
   ret i32 %sh1
 }
-