Index: lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1507,9 +1507,31 @@
   ReplaceNode(N, St);
 }
 
+static bool isShiftedMask(uint64_t Mask, EVT VT) {
+  assert(VT == MVT::i32 || VT == MVT::i64);
+  if (VT == MVT::i32)
+    return isShiftedMask_32(Mask);
+  return isShiftedMask_64(Mask);
+}
+
+static unsigned countTrailingZeros(uint64_t Val, EVT VT) {
+  assert(VT == MVT::i32 || VT == MVT::i64);
+  if (VT == MVT::i32)
+    return countTrailingZeros<uint32_t>(Val);
+  return countTrailingZeros<uint64_t>(Val);
+}
+
+static unsigned countTrailingOnes(uint64_t Val, EVT VT) {
+  assert(VT == MVT::i32 || VT == MVT::i64);
+  if (VT == MVT::i32)
+    return countTrailingOnes<uint32_t>(Val);
+  return countTrailingOnes<uint64_t>(Val);
+}
+
 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
                                        unsigned &Opc, SDValue &Opd0,
                                        unsigned &LSB, unsigned &MSB,
+                                       unsigned &DstLSB,
                                        unsigned NumberOfIgnoredLowBits,
                                        bool BiggerPattern) {
   assert(N->getOpcode() == ISD::AND &&
@@ -1541,8 +1563,8 @@
   // simplified. Try to undo that
   AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
 
-  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
-  if (AndImm & (AndImm + 1))
+  // The immediate should be a mask, potentially shifted.
+  if (!isShiftedMask(AndImm, VT))
     return false;
 
   bool ClampMSB = false;
@@ -1584,10 +1606,19 @@
     return false;
   }
 
-  LSB = SrlImm;
-  MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
-                                 : countTrailingOnes<uint64_t>(AndImm)) -
-        1;
+  // If the mask is shifted left, then by how much?
+  DstLSB = countTrailingZeros(AndImm, VT);
+  // Pretend that the mask is not shifted so that cto works.
+  AndImm >>= DstLSB;
+
+  // How many bits are we extracting?
+  unsigned NBits = countTrailingOnes(AndImm, VT);
+
+  // Starting from which bit? (accounting for the shifted mask)
+  LSB = SrlImm + DstLSB;
+  // And the last bit to extract is?
+  MSB = LSB + NBits - 1;
+
   if (ClampMSB)
     // Since we're moving the extend before the right shift operation, we need
     // to clamp the MSB to make sure we don't shift in undefined bits instead of
@@ -1771,18 +1802,21 @@
 
 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
+                                unsigned &DstLSB,
                                 unsigned NumberOfIgnoredLowBits = 0,
                                 bool BiggerPattern = false) {
   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
     return false;
 
+  DstLSB = 0;
+
   switch (N->getOpcode()) {
   default:
     if (!N->isMachineOpcode())
       return false;
     break;
   case ISD::AND:
-    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
+    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, DstLSB,
                                       NumberOfIgnoredLowBits, BiggerPattern);
   case ISD::SRL:
   case ISD::SRA:
@@ -1810,31 +1844,40 @@
   return false;
 }
 
+static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount);
+
 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
-  unsigned Opc, Immr, Imms;
+  unsigned Opc, Immr, Imms, DstLSB;
   SDValue Opd0;
-  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
+  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms, DstLSB))
     return false;
 
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
 
-  // If the bit extract operation is 64bit but the original type is 32bit, we
-  // need to add one EXTRACT_SUBREG.
-  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
-    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
-                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
+  // In what value type should we perform the extraction?
+  EVT BFMVT = VT;
+  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32)
+    BFMVT = MVT::i64;
 
-    SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
-    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
-    ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
-                                          MVT::i32, SDValue(BFM, 0), SubReg));
-    return true;
+  SDValue BFMOps[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, BFMVT),
+                      CurDAG->getTargetConstant(Imms, dl, BFMVT)};
+  SDNode *BFM = CurDAG->getMachineNode(Opc, dl, BFMVT, BFMOps);
+
+  // If the bit extract operation is 64bit but the original type is 32bit,
+  // we will need to add one EXTRACT_SUBREG.
+  if (BFMVT != VT) {
+    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, VT);
+    SDValue ExtractOps[] = {SDValue(BFM, 0), SubReg};
+    BFM = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
+                                 ExtractOps);
   }
 
-  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
-                   CurDAG->getTargetConstant(Imms, dl, VT)};
-  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+  // The extracted bits may be shifted left afterwards.
+  if (DstLSB)
+    BFM = getLeftShift(CurDAG, SDValue(BFM, 0), DstLSB).getNode();
+
+  ReplaceNode(N, BFM);
   return true;
 }
 
@@ -2107,9 +2150,9 @@
 /// Does this tree qualify as an attempt to move a bitfield into position,
 /// essentially "(and (shl VAL, N), Mask)".
 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
-                                    bool BiggerPattern,
-                                    SDValue &Src, int &ShiftAmount,
-                                    int &MaskWidth) {
+                                    bool BiggerPattern, SDValue &Src,
+                                    unsigned &ShiftAmount,
+                                    unsigned &MaskWidth) {
   EVT VT = Op.getValueType();
   unsigned BitWidth = VT.getSizeInBits();
   (void)BitWidth;
@@ -2157,13 +2200,6 @@
   return true;
 }
 
-static bool isShiftedMask(uint64_t Mask, EVT VT) {
-  assert(VT == MVT::i32 || VT == MVT::i64);
-  if (VT == MVT::i32)
-    return isShiftedMask_32(Mask);
-  return isShiftedMask_64(Mask);
-}
-
 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
 // inserted only sets known zero bits.
 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
@@ -2299,9 +2335,11 @@
     SDNode *OrOpd1 = OrOpd1Val.getNode();
 
     unsigned BFXOpc;
-    int DstLSB, Width;
-    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
+    unsigned DstLSB, Width;
+    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, DstLSB,
                             NumberOfIgnoredLowBits, BiggerPattern)) {
+      if (DstLSB != 0)
+        continue; // BFXIL inserts starting with bit 0, always.
       // Check that the returned opcode is compatible with the pattern,
       // i.e., same type and zero extended (U and not S)
       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
@@ -2309,7 +2347,6 @@
         continue;
 
       // Compute the width of the bitfield insertion
-      DstLSB = 0;
       Width = ImmS - ImmR + 1;
       // FIXME: This constraint is to catch bitfield insertion we may
       // want to widen the pattern if we want to grab general bitfied
@@ -2319,9 +2356,8 @@
 
       // If the mask on the insertee is correct, we have a BFXIL operation. We
       // can share the ImmR and ImmS values from the already-computed UBFM.
-    } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
-                                       BiggerPattern,
-                                       Src, DstLSB, Width)) {
+    } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, BiggerPattern, Src,
+                                       DstLSB, Width)) {
       ImmR = (BitWidth - DstLSB) % BitWidth;
       ImmS = Width - 1;
     } else
@@ -2445,7 +2481,7 @@
     return false;
 
   SDValue Op0;
-  int DstLSB, Width;
+  unsigned DstLSB, Width;
   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
                                Op0, DstLSB, Width))
     return false;
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -372,10 +372,6 @@
 
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
-  /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
-  bool isDesirableToCommuteWithShift(const SDNode *N,
-                                     CombineLevel Level) const override;
-
   /// Returns true if it is beneficial to convert a load of a constant
   /// to just the constant itself.
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8840,24 +8840,6 @@
   return ScratchRegs;
 }
 
-bool
-AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
-                                                     CombineLevel Level) const {
-  N = N->getOperand(0).getNode();
-  EVT VT = N->getValueType(0);
-    // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
-    // it with shift to let it be lowered to UBFX.
-  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
-      isa<ConstantSDNode>(N->getOperand(1))) {
-    uint64_t TruncMask = N->getConstantOperandVal(1);
-    if (isMask_64(TruncMask) &&
-      N->getOperand(0).getOpcode() == ISD::SRL &&
-      isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
-      return false;
-  }
-  return true;
-}
-
 bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                               Type *Ty) const {
   assert(Ty->isIntegerTy());
Index: test/CodeGen/AArch64/arm64-bitfield-extract.ll
===================================================================
--- test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -969,10 +969,11 @@
 define i64 @fct21(i64 %x) {
 ; LLC-LABEL: fct21:
 ; LLC:       // %bb.0: // %entry
-; LLC-NEXT:    adrp x9, arr
 ; LLC-NEXT:    ubfx x8, x0, #4, #4
+; LLC-NEXT:    adrp x9, arr
+; LLC-NEXT:    lsl x8, x8, #3
 ; LLC-NEXT:    add x9, x9, :lo12:arr
-; LLC-NEXT:    ldr x0, [x9, x8, lsl #3]
+; LLC-NEXT:    ldr x0, [x9, x8]
 ; LLC-NEXT:    ret
 ; OPT-LABEL: @fct21(
 ; OPT-NEXT:  entry:
Index: test/CodeGen/AArch64/bitfield-insert.ll
===================================================================
--- test/CodeGen/AArch64/bitfield-insert.ll
+++ test/CodeGen/AArch64/bitfield-insert.ll
@@ -221,7 +221,8 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    ldr w9, [x1]
-; CHECK-NEXT:    lsr w9, w9, #14
+; CHECK-NEXT:    lsl w9, w9, #12
+; CHECK-NEXT:    lsr w9, w9, #26
 ; CHECK-NEXT:    bfi w8, w9, #26, #5
 ; CHECK-NEXT:    str w8, [x0]
 ; CHECK-NEXT:    ret
Index: test/CodeGen/AArch64/extract-bits.ll
===================================================================
--- test/CodeGen/AArch64/extract-bits.ll
+++ test/CodeGen/AArch64/extract-bits.ll
@@ -1002,8 +1002,8 @@
 define i32 @c1_i32(i32 %arg) nounwind {
 ; CHECK-LABEL: c1_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #19
-; CHECK-NEXT:    and w0, w8, #0xffc
+; CHECK-NEXT:    ubfx w8, w0, #21, #10
+; CHECK-NEXT:    lsl w0, w8, #2
 ; CHECK-NEXT:    ret
   %tmp0 = lshr i32 %arg, 19
   %tmp1 = and i32 %tmp0, 4092
@@ -1027,8 +1027,8 @@
 define i32 @c4_i32_bad(i32 %arg) nounwind {
 ; CHECK-LABEL: c4_i32_bad:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #19
-; CHECK-NEXT:    and w0, w8, #0x1ffe
+; CHECK-NEXT:    lsr w8, w0, #20
+; CHECK-NEXT:    lsl w0, w8, #1
 ; CHECK-NEXT:    ret
   %tmp0 = lshr i32 %arg, 19
   %tmp1 = and i32 %tmp0, 16382
@@ -1052,8 +1052,8 @@
 define i64 @c1_i64(i64 %arg) nounwind {
 ; CHECK-LABEL: c1_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr x8, x0, #51
-; CHECK-NEXT:    and x0, x8, #0xffc
+; CHECK-NEXT:    ubfx x8, x0, #53, #10
+; CHECK-NEXT:    lsl x0, x8, #2
 ; CHECK-NEXT:    ret
   %tmp0 = lshr i64 %arg, 51
   %tmp1 = and i64 %tmp0, 4092
@@ -1077,8 +1077,8 @@
 define i64 @c4_i64_bad(i64 %arg) nounwind {
 ; CHECK-LABEL: c4_i64_bad:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr x8, x0, #51
-; CHECK-NEXT:    and x0, x8, #0x1ffe
+; CHECK-NEXT:    lsr x8, x0, #52
+; CHECK-NEXT:    lsl x0, x8, #1
 ; CHECK-NEXT:    ret
   %tmp0 = lshr i64 %arg, 51
   %tmp1 = and i64 %tmp0, 16382
Index: test/CodeGen/AArch64/pull-binop-through-shift.ll
===================================================================
--- test/CodeGen/AArch64/pull-binop-through-shift.ll
+++ test/CodeGen/AArch64/pull-binop-through-shift.ll
@@ -108,8 +108,8 @@
 define i32 @and_signbit_lshr(i32 %x, i32* %dst) {
 ; CHECK-LABEL: and_signbit_lshr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #8
-; CHECK-NEXT:    and w0, w8, #0xffff00
+; CHECK-NEXT:    lsr w8, w0, #16
+; CHECK-NEXT:    lsl w0, w8, #8
 ; CHECK-NEXT:    str w0, [x1]
 ; CHECK-NEXT:    ret
   %t0 = and i32 %x, 4294901760 ; 0xFFFF0000
@@ -120,8 +120,8 @@
 define i32 @and_nosignbit_lshr(i32 %x, i32* %dst) {
 ; CHECK-LABEL: and_nosignbit_lshr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #8
-; CHECK-NEXT:    and w0, w8, #0x7fff00
+; CHECK-NEXT:    ubfx w8, w0, #16, #15
+; CHECK-NEXT:    lsl w0, w8, #8
 ; CHECK-NEXT:    str w0, [x1]
 ; CHECK-NEXT:    ret
   %t0 = and i32 %x, 2147418112 ; 0x7FFF0000
@@ -223,8 +223,8 @@
 define i32 @and_nosignbit_ashr(i32 %x, i32* %dst) {
 ; CHECK-LABEL: and_nosignbit_ashr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #8
-; CHECK-NEXT:    and w0, w8, #0x7fff00
+; CHECK-NEXT:    ubfx w8, w0, #16, #15
+; CHECK-NEXT:    lsl w0, w8, #8
 ; CHECK-NEXT:    str w0, [x1]
 ; CHECK-NEXT:    ret
   %t0 = and i32 %x, 2147418112 ; 0x7FFF0000
Index: test/CodeGen/AArch64/rotate-extract.ll
===================================================================
--- test/CodeGen/AArch64/rotate-extract.ll
+++ test/CodeGen/AArch64/rotate-extract.ll
@@ -97,9 +97,10 @@
 define i32 @no_extract_shrl(i32 %i) nounwind {
 ; CHECK-LABEL: no_extract_shrl:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #3
-; CHECK-NEXT:    lsr w0, w0, #9
-; CHECK-NEXT:    bfi w0, w8, #28, #4
+; CHECK-NEXT:    lsl w8, w0, #25
+; CHECK-NEXT:    and w8, w8, #0xf0000000
+; CHECK-NEXT:    bfxil w8, w0, #9, #23
+; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
   %lhs_div = lshr i32 %i, 3
   %rhs_div = lshr i32 %i, 9
Index: test/CodeGen/AArch64/selectcc-to-shiftand.ll
===================================================================
--- test/CodeGen/AArch64/selectcc-to-shiftand.ll
+++ test/CodeGen/AArch64/selectcc-to-shiftand.ll
@@ -19,8 +19,8 @@
 define i32 @neg_sel_special_constant(i32 %a) {
 ; CHECK-LABEL: neg_sel_special_constant:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsr w8, w0, #22
-; CHECK-NEXT:    and w0, w8, #0x200
+; CHECK-NEXT:    lsr w8, w0, #31
+; CHECK-NEXT:    lsl w0, w8, #9
 ; CHECK-NEXT:    ret
   %tmp.1 = icmp slt i32 %a, 0
   %retval = select i1 %tmp.1, i32 512, i32 0