diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9939,30 +9939,34 @@
     Swap = true;
   }
 
+  // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
+  // out of range values with 0s. We do need to make sure that any out-of-range
+  // values are really out-of-range for a v16i8 vector.
+  bool IsUndefOrZero = V2.isUndef() || isZerosVector(V2.getNode());
+  MVT IndexVT = MVT::v8i8;
+  unsigned IndexLen = 8;
+  if (Op.getValueSizeInBits() == 128) {
+    IndexVT = MVT::v16i8;
+    IndexLen = 16;
+  }
+
   SmallVector<SDValue, 8> TBLMask;
   for (int Val : ShuffleMask) {
     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
       unsigned Offset = Byte + Val * BytesPerElt;
       if (Swap)
-        Offset = Offset < 16 ? Offset + 16 : Offset - 16;
+        Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
+      if (IsUndefOrZero && Offset >= IndexLen)
+        Offset = 255;
       TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
     }
   }
 
-  MVT IndexVT = MVT::v8i8;
-  unsigned IndexLen = 8;
-  if (Op.getValueSizeInBits() == 128) {
-    IndexVT = MVT::v16i8;
-    IndexLen = 16;
-  }
-
   SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
   SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
 
   SDValue Shuffle;
-  // If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
-  // out of range values with 0s.
-  if (V2.isUndef() || isZerosVector(V2.getNode())) {
+  if (IsUndefOrZero) {
     if (IndexLen == 8)
       V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
     Shuffle = DAG.getNode(
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -909,9 +909,9 @@
 
 ; CHECK-LABEL: .LCPI90_0:
 ; CHECK-NEXT: .byte   0
-; CHECK-NEXT: .byte   8
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   2
-; CHECK-NEXT: .byte   9
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   4
 ; CHECK-NEXT: .byte   5
 ; CHECK-NEXT: .byte   6
@@ -930,14 +930,14 @@
 }
 
 ; CHECK-LABEL: .LCPI91_0:
-; CHECK-NEXT: .byte   24
-; CHECK-NEXT: .byte   16
-; CHECK-NEXT: .byte   26
-; CHECK-NEXT: .byte   17
-; CHECK-NEXT: .byte   28
-; CHECK-NEXT: .byte   29
-; CHECK-NEXT: .byte   30
-; CHECK-NEXT: .byte   31
+; CHECK-NEXT: .byte   0
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   2
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   4
+; CHECK-NEXT: .byte   5
+; CHECK-NEXT: .byte   6
+; CHECK-NEXT: .byte   7
 define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
 ; CHECK:       // %bb.0:
@@ -984,12 +984,12 @@
 ; CHECK-LABEL: .LCPI93_0:
 ; CHECK-NEXT: .byte   0
 ; CHECK-NEXT: .byte   1
-; CHECK-NEXT: .byte   16
-; CHECK-NEXT: .byte   17
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   4
 ; CHECK-NEXT: .byte   5
-; CHECK-NEXT: .byte   18
-; CHECK-NEXT: .byte   19
+; CHECK-NEXT: .byte   255
+; CHECK-NEXT: .byte   255
 ; CHECK-NEXT: .byte   8
 ; CHECK-NEXT: .byte   9
 ; CHECK-NEXT: .byte   10
@@ -1011,12 +1011,12 @@
 
 ; CHECK: .byte   0
 ; CHECK: .byte   1
-; CHECK: .byte   16
-; CHECK: .byte   17
+; CHECK: .byte   255
+; CHECK: .byte   255
 ; CHECK: .byte   4
 ; CHECK: .byte   5
-; CHECK: .byte   18
-; CHECK: .byte   19
+; CHECK: .byte   255
+; CHECK: .byte   255
 ; CHECK: .byte   8
 ; CHECK: .byte   9
 ; CHECK: .byte   10