Index: lib/Analysis/InstructionSimplify.cpp
===================================================================
--- lib/Analysis/InstructionSimplify.cpp
+++ lib/Analysis/InstructionSimplify.cpp
@@ -1852,6 +1852,38 @@
                                       MaxRecurse))
       return V;
 
+  // Assuming the effective width of Y is not larger than A, i.e. all bits
+  // from X and Y are disjoint in (X << A) | Y,
+  // if the mask of this AND op covers all bits of X or Y, while it covers
+  // no bits from the other, we can bypass this AND op. E.g.,
+  // ((X << A) | Y) & Mask -> Y,
+  //     if Mask = ((1 << effective_width_of(Y)) - 1)
+  // ((X << A) | Y) & Mask -> X << A,
+  //     if Mask = ((1 << effective_width_of(X)) - 1) << A
+  Value *Y, *XShifted;
+  if (match(Op1, m_APInt(Mask)) &&
+      match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
+                                     m_Value(XShifted)),
+                        m_Value(Y)))) {
+    const unsigned ShiftCnt = ShAmt->getZExtValue();
+    const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    const unsigned Width = Op0->getType()->getScalarSizeInBits();
+    const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+    if (EffWidthY <= ShiftCnt) {
+      const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
+                                                Q.DT);
+      const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+      const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
+      const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShiftCnt;
+      // If the mask is extracting all bits from X or Y as is, we can skip
+      // this AND op.
+      if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
+        return Y;
+      if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
+        return XShifted;
+    }
+  }
+
   return nullptr;
 }
 
Index: test/Transforms/InstSimplify/AndOrXor.ll
===================================================================
--- test/Transforms/InstSimplify/AndOrXor.ll
+++ test/Transforms/InstSimplify/AndOrXor.ll
@@ -967,12 +967,8 @@
 
 define i64 @shl_or_and1(i32 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and1(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 1
-; CHECK-NEXT:    ret i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %tmp1 = zext i32 %a to i64
   %tmp2 = zext i1 %b to i64
@@ -985,11 +981,8 @@
 define i64 @shl_or_and2(i32 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4294967296
-; CHECK-NEXT:    ret i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 32
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %tmp1 = zext i1 %b to i64
   %tmp2 = zext i32 %a to i64
@@ -999,35 +992,26 @@
   ret i64 %tmp5
 }
 
-define i32 @shl_or_and3(i32 %a, i32 %b) {
+define i64 @shl_or_and3(i32 %a, i32 %b) {
 ; concatinate two 32-bit integers and extract lower 32-bit
 ; CHECK-LABEL: @shl_or_and3(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT:    ret i32 [[TMP6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 %b to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %tmp1 = zext i32 %a to i64
   %tmp2 = zext i32 %b to i64
   %tmp3 = shl nuw i64 %tmp1, 32
   %tmp4 = or i64 %tmp2, %tmp3
   %tmp5 = and i64 %tmp4, 4294967295
-  %tmp6 = trunc i64 %tmp5 to i32
-  ret i32 %tmp6
+  ret i64 %tmp5
 }
 
 define i32 @shl_or_and4(i16 %a, i16 %b) {
 ; concatinate two 16-bit integers and extract higher 16-bit
 ; CHECK-LABEL: @shl_or_and4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], -65536
-; CHECK-NEXT:    ret i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 16
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %tmp1 = zext i16 %a to i32
   %tmp2 = zext i16 %b to i32
@@ -1037,23 +1021,17 @@
   ret i32 %tmp5
 }
 
-define i64 @shl_or_and5(i64 %a, i1 %b) {
+define i128 @shl_or_and5(i64 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and5(
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i64 [[A:%.*]] to i128
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i128 [[TMP1]], 64
-; CHECK-NEXT:    [[TMP4:%.*]] = or i128 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and i128 [[TMP4]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT:    ret i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[B:%.*]] to i128
+; CHECK-NEXT:    ret i128 [[TMP1]]
 ;
   %tmp1 = zext i64 %a to i128
   %tmp2 = zext i1 %b to i128
   %tmp3 = shl nuw i128 %tmp1, 64
   %tmp4 = or i128 %tmp2, %tmp3
   %tmp5 = and i128 %tmp4, 1
-  %tmp6 = trunc i128 %tmp5 to i64
-  ret i64 %tmp6
+  ret i128 %tmp5
 }
 
 define i32 @shl_or_and6(i16 %a, i16 %b) {
@@ -1109,3 +1087,48 @@
   %tmp5 = and i32 %tmp4, 131071 ; mask with 0x1FFFF
   ret i32 %tmp5
 }
+
+define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
+; CHECK-LABEL: @shl_or_and1v(
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %tmp1 = zext <2 x i32> %a to <2 x i64>
+  %tmp2 = zext <2 x i1> %b to <2 x i64>
+  %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+  %tmp4 = or <2 x i64> %tmp2, %tmp3
+  %tmp5 = and <2 x i64> %tmp4, <i64 1, i64 1>
+  ret <2 x i64> %tmp5
+}
+
+define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
+; CHECK-LABEL: @shl_or_and2v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %tmp1 = zext <2 x i1> %b to <2 x i64>
+  %tmp2 = zext <2 x i32> %a to <2 x i64>
+  %tmp3 = shl nuw <2 x i64> %tmp1, <i64 32, i64 32>
+  %tmp4 = or <2 x i64> %tmp2, %tmp3
+  %tmp5 = and <2 x i64> %tmp4, <i64 4294967296, i64 4294967296>
+  ret <2 x i64> %tmp5
+}
+
+define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) {
+; A variation of above test case, but fails due to the mask value
+; CHECK-LABEL: @shl_or_and3v(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <2 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i32> [[TMP4]], <i32 -65535, i32 -65535>
+; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
+;
+  %tmp1 = zext <2 x i16> %a to <2 x i32>
+  %tmp2 = zext <2 x i16> %b to <2 x i32>
+  %tmp3 = shl nuw <2 x i32> %tmp1, <i32 16, i32 16>
+  %tmp4 = or <2 x i32> %tmp2, %tmp3
+  %tmp5 = and <2 x i32> %tmp4, <i32 4294901761, i32 4294901761> ; mask with 0xFFFF0001
+  ret <2 x i32> %tmp5
+}
Index: test/Transforms/InstSimplify/shift.ll
===================================================================
--- test/Transforms/InstSimplify/shift.ll
+++ test/Transforms/InstSimplify/shift.ll
@@ -206,9 +206,9 @@
   ret i64 %tmp5
 }
 
-define <2 x i64> @shl_or_shr1v(<2 x i32> %a, <2 x i32> %b) {
+define <2 x i64> @shl_or_shr1_vec(<2 x i32> %a, <2 x i32> %b) {
 ; Unit test for vector integer
-; CHECK-LABEL: @shl_or_shr1v(
+; CHECK-LABEL: @shl_or_shr1_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> %a to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
@@ -220,9 +220,9 @@
   ret <2 x i64> %tmp5
 }
 
-define <2 x i64> @shl_or_shr2v(<2 x i32> %a, <2 x i32> %b) {
+define <2 x i64> @shl_or_shr2_vec(<2 x i32> %a, <2 x i32> %b) {
 ; Negative unit test for vector integer
-; CHECK-LABEL: @shl_or_shr2v(
+; CHECK-LABEL: @shl_or_shr2_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> %a to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> %b to <2 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP1]], <i64 31, i64 31>
Index: test/Transforms/NewGVN/pair_jumpthread.ll
===================================================================
--- test/Transforms/NewGVN/pair_jumpthread.ll
+++ test/Transforms/NewGVN/pair_jumpthread.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -newgvn -S | FileCheck %s
 ; RUN: opt < %s -newgvn -jump-threading -S | FileCheck --check-prefix=CHECK-JT %s
-; This test is expected to fail until the transformation is committed.
-; XFAIL: *
 
 define signext i32 @testBI(i32 signext %v) {
 ; Test with std::pair<bool, int>