Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2014,6 +2014,51 @@
   return nullptr;
 }
 
+// Fold redundant masking.
+// Let C3 = C1 shft C2
+// (X & C1) | ((X shft C2) & C3) -> (X & C1) | ((X & C1) shft C2)
+// TODO: If the canonical form of AND before any type of shift
+// is taken, then this function can be removed.
+static bool foldRedundantMask(BinaryOperator &I,
+                                      InstCombiner::BuilderTy &Builder) {
+  const APInt *AndC, *ShAndC, *ShftAmt;
+  Value *X;
+  BinaryOperator *And, *Shift, *DeadAnd;
+  // 1st: Check for desired pattern
+  if (!match(
+          &I,
+          m_c_Or(m_CombineAnd(
+              m_And(m_OneUse(m_CombineAnd(m_Shift(m_Value(X), m_APInt(ShftAmt)),
+                                          m_BinOp(Shift))),
+                    m_APInt(ShAndC)), m_BinOp(DeadAnd)),
+              m_CombineAnd(m_And(m_Deferred(X), m_APInt(AndC)), m_BinOp(And)))))
+    return false;
+
+  const Instruction::BinaryOps Opcode = Shift->getOpcode();
+  const auto DoShift = ShftAmt->getZExtValue();
+
+  // 2nd: Check if the masks match
+  if (Instruction::Shl == Opcode) {
+    if (!(*ShAndC == AndC->shl(DoShift) || *AndC == ShAndC->lshr(DoShift)))
+      return false;
+
+  } else if (Instruction::LShr == Opcode) {
+    if (!(*ShAndC == AndC->lshr(DoShift) || *AndC == ShAndC->shl(DoShift)))
+      return false;
+
+  } else if (*ShAndC != AndC->ashr(ShftAmt->getZExtValue())) {
+    if (ShAndC->countLeadingZeros() < DoShift || *AndC != ShAndC->shl(DoShift))
+      // TODO: If the value X is known to have sign bit 0, it behaves as lshr
+      return false;
+  }
+
+  // 3rd: Shift the result of the AND
+  Value *NewShift =
+      Builder.CreateBinOp(Opcode, And, ConstantInt::get(I.getType(), *ShftAmt));
+  DeadAnd->replaceAllUsesWith(NewShift);
+  return true;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -2156,6 +2201,9 @@
         return replaceInstUsesWith(I, V);
       if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder))
         return replaceInstUsesWith(I, V);
+
+      if (C1 && C2 && foldRedundantMask(I, Builder))
+        return &I;
     }
   }
 
Index: test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll
===================================================================
--- /dev/null
+++ test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine %s -o - | FileCheck %s
+
+; Fold redundant masking operations of shifted value
+; In a case where
+; x1 = a & 0xFF00
+; x2 = a >> 8 & 0xFF
+; we can see x2 as:
+; x2 = a >> 8 & 0xFF00 >> 8
+; that can be translated to
+; x2 = (a & 0xFF00) >> 8
+; that is
+; x2 = x1 >> 8
+
+
+define i32 @shl(i16 %a) {
+; CHECK-LABEL: @shl(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[A:%.*]], 172
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw i16 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP42:%.*]] = or i16 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP42]] to i32
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %tmp0 = sext i16 %a to i32
+  %tmp1 = shl i32 %tmp0, 8
+  %tmp2 = and i32 %tmp0, 172
+  %tmp3 = and i32 %tmp1, 44032
+  %tmp4 = or i32 %tmp2, %tmp3
+  ret i32 %tmp4
+}
+
+define i32 @lshr(i16 %a) {
+; CHECK-LABEL: @lshr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[A:%.*]], -21504
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %tmp0 = sext i16 %a to i32
+  %tmp1 = lshr i32 %tmp0, 8
+  %tmp2 = and i32 %tmp1, 172
+  %tmp3 = and i32 %tmp0, 44032
+  %tmp4 = or i32 %tmp2, %tmp3
+  ret i32 %tmp4
+}
+
+define i32 @ashr(i16 %a) {
+; CHECK-LABEL: @ashr(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[A:%.*]], -21504
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %tmp0 = sext i16 %a to i32
+  %tmp3 = and i32 %tmp0, 44032
+  %tmp2 = ashr i32 %tmp0, 8
+  %tmp1 = and i32 %tmp2, 172
+  %tmp4 = or i32 %tmp3, %tmp1
+  ret i32 %tmp4
+}
+
+define i32 @ashr2(i16 %a) {
+; CHECK-LABEL: @ashr2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[A:%.*]], -256
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %tmp0 = sext i16 %a to i32
+  %tmp3 = and i32 %tmp0, 65280
+  %tmp2 = ashr i32 %tmp0, 8
+  %tmp1 = and i32 %tmp2, 255
+  %tmp4 = or i32 %tmp3, %tmp1
+  ret i32 %tmp4
+}
+
+define i16 @ashr3(i16 %a) {
+; CHECK-LABEL: @ashr3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[A:%.*]], -256
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i16 [[A]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = or i16 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    ret i16 [[TMP4]]
+;
+entry:
+  %tmp1 = and i16 %a, 65280
+  %tmp2 = ashr i16 %a, 8
+  %tmp3 = and i16 %tmp2, 255
+  %tmp4 = or i16 %tmp1, %tmp3
+  ret i16 %tmp4
+}
+
+define i32 @ashr_nogood(i16 %a) {
+; CHECK-LABEL: @ashr_nogood(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 65280
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr i32 [[TMP0]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 65535
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %tmp0 = sext i16 %a to i32
+  %tmp1 = and i32 %tmp0, 65280
+  %tmp2 = ashr i32 %tmp0, 8
+  %tmp3 = and i32 %tmp2, 65535
+  %tmp4 = or i32 %tmp1, %tmp3
+  ret i32 %tmp4
+}
+
+define i32 @shl_nogood(i16 %a) {
+; CHECK-LABEL: @shl_nogood(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 172
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 44032
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+entry:
+  %tmp0 = sext i16 %a to i32
+  %tmp1 = and i32 %tmp0, 172
+  %tmp2 = shl i32 %tmp0, %tmp1
+  %tmp3 = and i32 %tmp2, 44032
+  %tmp4 = or i32 %tmp1, %tmp3
+  ret i32 %tmp4
+}
+
+define void @foo(i16* %a) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMPA:%.*]] = load i16, i16* [[A:%.*]], align 2
+; CHECK-NEXT:    [[TMP0:%.*]] = and i16 [[TMPA]], -256
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i16 [[TMPA]], 8
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store i16 [[OR]], i16* [[A]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tmpA = load i16, i16* %a, align 2
+  %conv = sext i16 %tmpA to i32
+  %and = and i32 %conv, 65280
+  %tmp1 = lshr i32 %conv, 8
+  %and3 = and i32 %tmp1, 255
+  %or = or i32 %and3, %and
+  %conv4 = trunc i32 %or to i16
+  store i16 %conv4, i16* %a, align 2
+  ret void
+}
+
+; Check that all uses of x2 are replaced
+define i32 @mulUses(i32 %a) {
+; CHECK-LABEL: @mulUses(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[A:%.*]], 44032
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[TMP3]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP0]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP0]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+entry:
+  %tmp1 = lshr i32 %a, 8
+  %x2 = and i32 %tmp1, 172
+  %tmp3 = and i32 %a, 44032
+  %tmp4 = or i32 %x2, %tmp3
+  %tmp5 = add i32 %tmp4, %x2
+  %tmp6 = mul i32 %tmp5, %x2
+  ret i32 %tmp6
+}