diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -433,6 +433,15 @@
   return isIntImmediate(N.getNode(), Imm);
 }
 
+// isIntImmediateEq - This method tests to see if N is a constant operand that
+// is equivalent to 'ImmExpected'.
+static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
+  uint64_t Imm;
+  if (!isIntImmediate(N.getNode(), Imm))
+    return false;
+  return Imm == ImmExpected;
+}
+
 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 // opcode and that it has a immediate integer right operand.
 // If so Imm will receive the 32 bit value.
@@ -2544,6 +2553,48 @@
   return true;
 }
 
+// For node (shl (and val, mask), N)), returns true if the node is equivalent to
+// UBFIZ.
+static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDNode *N,
+                                              SDValue &Src, int &DstLSB,
+                                              int &Width) {
+  // Caller should have verified that N is a left shift with constant shift
+  // amount; asserts that.
+  assert(N->getOpcode() == ISD::SHL &&
+         "N should be a SHL node to call this function");
+  assert(isIntImmediateEq(N->getOperand(1), ShlImm) &&
+         "N should shift ShlImm to call this function");
+
+  uint64_t AndImm = 0;
+  SDValue Op0 = N->getOperand(0);
+  if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
+    return false;
+
+  // AndImm might be a mask or a shifted mask, optionally prefixed with random
+  // bits (e.g., xyz111110000, {x, y, z} are random bits (0 or 1)). If the
+  // portion of AND results corresponding to random bits are shifted out by SHL,
+  // they doesn't matter.
+  //
+  // Note, a mask is a special shifted mask (i.e., isShiftedMask_64(Mask)
+  // returns true).
+  const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
+  if (isShiftedMask_64(ShiftedAndImm)) {
+    const uint64_t NumTrailingZeroInRestoredMask =
+        countTrailingZeros(ShiftedAndImm);
+
+    // Width is the number of bits to extract (starting from LSB) from the
+    // source operand of AND. No need to extract the rest of the bits since they
+    // are either masked out (i.e., AND with zero so that equivalent with UBFIZ)
+    // or shifted out.
+    Width = NumTrailingZeroInRestoredMask +
+            countTrailingOnes(ShiftedAndImm >> NumTrailingZeroInRestoredMask);
+    DstLSB = ShlImm;
+    Src = Op0.getNode()->getOperand(0);
+    return true;
+  }
+  return false;
+}
+
 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
                                            bool BiggerPattern,
                                            const uint64_t NonZeroBits,
@@ -2556,6 +2607,9 @@
   if (!isOpcWithIntImmediate(N, ISD::SHL, ShlImm))
     return false;
 
+  if (isSeveralBitsPositioningOpFromShl(ShlImm, N, Src, DstLSB, Width))
+    return true;
+
   EVT VT = N->getValueType(0);
   assert((VT == MVT::i32 || VT == MVT::i64) &&
          "Caller guarantees that type is i32 or i64");
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -9114,13 +9114,12 @@
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    and x8, x1, #0x7
+; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    bfi x9, x8, #1, #3
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    bfi x8, x1, #1, #3
 ; CHECK-NEXT:    str q0, [sp]
-; CHECK-NEXT:    ldrh w0, [x9]
+; CHECK-NEXT:    ldrh w0, [x8]
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
   %lv = load <8 x i16>, <8 x i16>* %A
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1222,12 +1222,11 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    and x8, x0, #0x7
-; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    str q0, [sp]
-; CHECK-NEXT:    bfi x9, x8, #1, #3
-; CHECK-NEXT:    ldr h1, [x9]
+; CHECK-NEXT:    bfi x8, x0, #1, #3
+; CHECK-NEXT:    ldr h1, [x8]
 ; CHECK-NEXT:    mov v1.h[1], v0.h[1]
 ; CHECK-NEXT:    mov v1.h[2], v0.h[2]
 ; CHECK-NEXT:    mov v1.h[3], v0.h[3]
@@ -1250,11 +1249,10 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    add x8, sp, #8
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT:    and x8, x0, #0x3
-; CHECK-NEXT:    add x9, sp, #8
-; CHECK-NEXT:    bfi x9, x8, #1, #2
-; CHECK-NEXT:    str h0, [x9]
+; CHECK-NEXT:    bfi x8, x0, #1, #2
+; CHECK-NEXT:    str h0, [x8]
 ; CHECK-NEXT:    ldr d1, [sp, #8]
 ; CHECK-NEXT:    mov v1.h[1], v0.h[1]
 ; CHECK-NEXT:    mov v1.h[2], v0.h[2]
diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
--- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
@@ -6,13 +6,12 @@
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    and x8, x1, #0x3
-; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    fmov.2d v0, #2.00000000
-; CHECK-NEXT:    bfi x9, x8, #2, #2
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
-; CHECK-NEXT:    ldr s0, [x9]
+; CHECK-NEXT:    ldr s0, [x8]
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
@@ -27,13 +26,12 @@
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
-; CHECK-NEXT:    and x8, x1, #0x3
-; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    movi.16b v0, #63
-; CHECK-NEXT:    bfi x9, x8, #2, #2
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
-; CHECK-NEXT:    ldr s0, [x9]
+; CHECK-NEXT:    ldr s0, [x8]
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -266,7 +266,7 @@
 ; CHECK-LABEL: test_nouseful_bits:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
-; CHECK-NEXT:    bfi w0, w8, #8, #8
+; CHECK-NEXT:    bfi w0, w0, #8, #8
 ; CHECK-NEXT:    lsl w8, w8, #8
 ; CHECK-NEXT:    bfi w8, w0, #16, #16
 ; CHECK-NEXT:    mov w0, w8
diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
--- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll
@@ -250,19 +250,17 @@
 ; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
 ; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    umov w8, v0.h[1]
-; CHECK-NEXT:    umov w9, v0.h[2]
-; CHECK-NEXT:    umov w10, v0.h[0]
+; CHECK-NEXT:    umov w9, v0.h[0]
+; CHECK-NEXT:    umov w10, v0.h[2]
 ; CHECK-NEXT:    umov w11, v0.h[3]
 ; CHECK-NEXT:    and v1.8b, v0.8b, v2.8b
 ; CHECK-NEXT:    cmeq v0.4h, v1.4h, v0.4h
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
+; CHECK-NEXT:    bfi w9, w8, #1, #1
+; CHECK-NEXT:    bfi w9, w10, #2, #1
 ; CHECK-NEXT:    mvn v0.8b, v0.8b
+; CHECK-NEXT:    bfi w9, w11, #3, #29
+; CHECK-NEXT:    and w8, w9, #0xf
 ; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    bfi w10, w8, #1, #1
-; CHECK-NEXT:    bfi w10, w9, #2, #1
-; CHECK-NEXT:    bfi w10, w11, #3, #29
-; CHECK-NEXT:    and w8, w10, #0xf
 ; CHECK-NEXT:    strb w8, [x0]
 ; CHECK-NEXT:    ret
   %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)
diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll
--- a/llvm/test/CodeGen/AArch64/vec_umulo.ll
+++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll
@@ -300,15 +300,13 @@
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    and v1.8b, v2.8b, v1.8b
 ; CHECK-NEXT:    umov w8, v1.h[1]
-; CHECK-NEXT:    umov w9, v1.h[2]
-; CHECK-NEXT:    umov w10, v1.h[0]
+; CHECK-NEXT:    umov w9, v1.h[0]
+; CHECK-NEXT:    umov w10, v1.h[2]
 ; CHECK-NEXT:    umov w11, v1.h[3]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    bfi w10, w8, #1, #1
-; CHECK-NEXT:    bfi w10, w9, #2, #1
-; CHECK-NEXT:    bfi w10, w11, #3, #29
-; CHECK-NEXT:    and w8, w10, #0xf
+; CHECK-NEXT:    bfi w9, w8, #1, #1
+; CHECK-NEXT:    bfi w9, w10, #2, #1
+; CHECK-NEXT:    bfi w9, w11, #3, #29
+; CHECK-NEXT:    and w8, w9, #0xf
 ; CHECK-NEXT:    strb w8, [x0]
 ; CHECK-NEXT:    ret
   %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1)