Index: include/llvm/CodeGen/TargetLowering.h
===================================================================
--- include/llvm/CodeGen/TargetLowering.h
+++ include/llvm/CodeGen/TargetLowering.h
@@ -3083,6 +3083,15 @@
     return true;
   }
 
+  /// Return true if it is profitable to fold a pair of shifts into a mask.
+  /// This is usually true on most targets. But some targets, like Thumb1,
+  /// have immediate shift instructions, but no immediate "and" instruction;
+  /// this makes the fold unprofitable.
+  virtual bool shouldFoldShiftPairToMask(const SDNode *N,
+                                         CombineLevel Level) const {
+    return true;
+  }
+
   // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
   // to a shuffle and a truncate.
   // Example of such a combine:
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6513,7 +6513,8 @@
   //                               (and (srl x, (sub c1, c2), MASK)
   // Only fold this if the inner shift has no other uses -- if it does, folding
   // this will increase the total number of instructions.
-  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
+      TLI.shouldFoldShiftPairToMask(N, Level)) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t c1 = N0C1->getZExtValue();
       if (c1 < OpSizeInBits) {
@@ -6819,7 +6820,8 @@
 
   // fold (srl (shl x, c), c) -> (and x, cst2)
   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
-      isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      TLI.shouldFoldShiftPairToMask(N, Level)) {
     SDLoc DL(N);
     SDValue Mask =
         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -593,6 +593,8 @@
     bool isDesirableToCommuteWithShift(const SDNode *N,
                                        CombineLevel Level) const override;
 
+    bool shouldFoldShiftPairToMask(const SDNode *N,
+                                   CombineLevel Level) const override;
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -10431,6 +10431,18 @@
   return false;
 }
 
+bool
+ARMTargetLowering::shouldFoldShiftPairToMask(const SDNode *N,
+                                             CombineLevel Level) const {
+  if (!Subtarget->isThumb1Only())
+    return true;
+
+  if (Level == BeforeLegalizeTypes)
+    return true;
+
+  return false;
+}
+
 static SDValue PerformSHLSimplify(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const ARMSubtarget *ST) {
@@ -10734,6 +10746,12 @@
   if (!C2 || C2 >= 32)
     return SDValue();
 
+  // Clear irrelevant bits in the mask.
+  if (LeftShift)
+    C1 &= (-1U << C2);
+  else
+    C1 &= (-1U >> C2);
+
   SelectionDAG &DAG = DCI.DAG;
   SDLoc DL(N);
 
@@ -10741,9 +10759,7 @@
   // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
   // transform to a pair of shifts, to save materializing c1.
 
-  // First pattern: right shift, and c1+1 is a power of two.
-  // FIXME: Also check reversed pattern (left shift, and ~c1+1 is a power
-  // of two).
+  // First pattern: right shift, then mask off leading bits.
   // FIXME: Use demanded bits?
   if (!LeftShift && isMask_32(C1)) {
     uint32_t C3 = countLeadingZeros(C1);
@@ -10755,13 +10771,23 @@
     }
   }
 
-  // Second pattern: left shift, and (c1>>c2)+1 is a power of two.
-  // FIXME: Also check reversed pattern (right shift, and ~(c1<<c2)+1
-  // is a power of two).
+  // First pattern, reversed: left shift, then mask off trailing bits.
+  if (LeftShift && isMask_32(~C1)) {
+    uint32_t C3 = countTrailingZeros(C1);
+    if (C2 < C3) {
+      SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+                                DAG.getConstant(C3 - C2, DL, MVT::i32));
+      return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+                         DAG.getConstant(C3, DL, MVT::i32));
+    }
+  }
+
+  // Second pattern: left shift, then mask off leading bits.
   // FIXME: Use demanded bits?
   if (LeftShift && isShiftedMask_32(C1)) {
+    uint32_t Trailing = countTrailingZeros(C1);
     uint32_t C3 = countLeadingZeros(C1);
-    if (C2 + C3 < 32 && C1 == ((-1U << (C2 + C3)) >> C3)) {
+    if (Trailing == C2 && C2 + C3 < 32) {
       SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
                                 DAG.getConstant(C2 + C3, DL, MVT::i32));
       return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
@@ -10769,6 +10795,19 @@
     }
   }
 
+  // Second pattern, reversed: right shift, then mask off trailing bits.
+  // FIXME: Handle other patterns of known/demanded bits.
+  if (!LeftShift && isShiftedMask_32(C1)) {
+    uint32_t Leading = countLeadingZeros(C1);
+    uint32_t C3 = countTrailingZeros(C1);
+    if (Leading == C2 && C2 + C3 < 32) {
+      SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
+                                DAG.getConstant(C2 + C3, DL, MVT::i32));
+      return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
+                         DAG.getConstant(C3, DL, MVT::i32));
+    }
+  }
+
   // FIXME: Transform "(and (shl x, c2) c1)" ->
   // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
   // c1.
Index: test/CodeGen/Thumb/shift-and.ll
===================================================================
--- test/CodeGen/Thumb/shift-and.ll
+++ test/CodeGen/Thumb/shift-and.ll
@@ -45,9 +45,8 @@
 define i32 @test4(i32 %x) {
 ; CHECK-LABEL: test4:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    lsls r0, r0, #4
-; CHECK-NEXT:    movs r1, #112
-; CHECK-NEXT:    bics r0, r1
+; CHECK-NEXT:    lsrs r0, r0, #3
+; CHECK-NEXT:    lsls r0, r0, #7
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = shl i32 %x, 4
@@ -84,9 +83,8 @@
 define i32 @test7(i32 %x) {
 ; CHECK-LABEL: test7:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    lsrs r1, r0, #29
-; CHECK-NEXT:    movs r0, #4
-; CHECK-NEXT:    ands r0, r1
+; CHECK-NEXT:    lsrs r0, r0, #31
+; CHECK-NEXT:    lsls r0, r0, #2
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = lshr i32 %x, 29
@@ -110,9 +108,8 @@
 define i32 @test9(i32 %x) {
 ; CHECK-LABEL: test9:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    lsrs r0, r0, #2
-; CHECK-NEXT:    movs r1, #1
-; CHECK-NEXT:    bics r0, r1
+; CHECK-NEXT:    lsrs r0, r0, #3
+; CHECK-NEXT:    lsls r0, r0, #1
 ; CHECK-NEXT:    bx lr
 entry:
   %and = lshr i32 %x, 2
@@ -131,3 +128,63 @@
   %shr = and i32 %0, 255
   ret i32 %shr
 }
+
+define i32 @test11(i32 %x) {
+; CHECK-LABEL: test11:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    lsrs r0, r0, #24
+; CHECK-NEXT:    lsls r0, r0, #2
+; CHECK-NEXT:    bx lr
+entry:
+  %shl = lshr i32 %x, 22
+  %and = and i32 %shl, 1020
+  ret i32 %and
+}
+
+define i32 @test12(i32 %x) {
+; CHECK-LABEL: test12:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    lsrs r0, r0, #3
+; CHECK-NEXT:    lsls r0, r0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = shl i32 %x, 1
+  %shr = and i32 %0, -16
+  ret i32 %shr
+}
+
+define i32 @test13(i32 %x) {
+; CHECK-LABEL: test13:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    lsrs r0, r0, #3
+; CHECK-NEXT:    lsls r0, r0, #4
+; CHECK-NEXT:    bx lr
+entry:
+  %shr = lshr i32 %x, 3
+  %shl = shl i32 %shr, 4
+  ret i32 %shl
+}
+
+define i32 @test14(i32 %x) {
+; CHECK-LABEL: test14:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    lsrs r0, r0, #6
+; CHECK-NEXT:    lsls r0, r0, #10
+; CHECK-NEXT:    bx lr
+entry:
+  %shl = shl i32 %x, 4
+  %and = and i32 %shl, -1024
+  ret i32 %and
+}
+
+define i32 @test15(i32 %x) {
+; CHECK-LABEL: test15:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    lsrs r0, r0, #4
+; CHECK-NEXT:    lsls r0, r0, #3
+; CHECK-NEXT:    bx lr
+entry:
+  %shr = lshr i32 %x, 4
+  %shl = shl i32 %shr, 3
+  ret i32 %shl
+}