Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -198,6 +198,7 @@
     SDNode *Select(SDNode *N) override;
     SDNode *SelectGather(SDNode *N, unsigned Opc);
     SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
+    SDNode *SelectAndWithSExtImmediate(SDNode *Node, MVT NVT);
 
     bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
     bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -2208,6 +2209,57 @@
   return ResNode;
 }
 
+// Try to shrink the encoding of an AND by setting additional bits in the mask.
+// It is only correct to do so if we know a priori that the other operand of the
+// AND already has those bits set to zero.
+SDNode *X86DAGToDAGISel::SelectAndWithSExtImmediate(SDNode *Node, MVT NVT) {
+  SDValue N0 = Node->getOperand(0);
+  SDValue N1 = Node->getOperand(1);
+
+  if (NVT != MVT::i32 && NVT != MVT::i64)
+    return nullptr;
+
+  auto *Cst = dyn_cast<ConstantSDNode>(N1);
+  if (!Cst)
+    return nullptr;
+
+  // As a heuristic, skip over negative constants.  It turns out not to be
+  // productive to widen the mask.
+  int64_t Val = Cst->getSExtValue();
+  if (Val <= 0)
+    return nullptr;
+
+  // Limit ourselves to constants which already have sign bits to save on
+  // compile time.
+  if ((int8_t)Val >= 0)
+    return nullptr;
+
+  unsigned Opc;
+  switch (NVT.SimpleTy) {
+  default:
+    llvm_unreachable("Unsupported VT!");
+  case MVT::i32:
+    Opc = X86::AND32ri8;
+    break;
+  case MVT::i64:
+    Opc = X86::AND64ri8;
+    break;
+  }
+
+  APInt Op0Zero, Op0One;
+  CurDAG->computeKnownBits(N0, Op0Zero, Op0One);
+  // Grow the mask using the known zero bits.
+  Op0Zero |= Val;
+  // See if the mask can be efficiently encoded using at most NumBits.
+  if (!Op0Zero.isSignedIntN(8))
+    return nullptr;
+
+  SDLoc DL(Node);
+  SDValue NewCst =
+      CurDAG->getTargetConstant(Op0Zero.getSExtValue(), DL, MVT::i8);
+  return CurDAG->getMachineNode(Opc, DL, NVT, N0, NewCst);
+}
+
 SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   MVT NVT = Node->getSimpleValueType(0);
   unsigned Opc, MOpc;
@@ -2223,7 +2275,8 @@
   }
 
   switch (Opcode) {
-  default: break;
+  default:
+    break;
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
@@ -2298,7 +2351,13 @@
       return RetVal;
     break;
   }
-  case ISD::AND:
+  case ISD::AND: {
+    if (SDNode *NewNode = SelectAndWithSExtImmediate(Node, NVT)) {
+      ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0));
+      return nullptr;
+    }
+    // FALLTHROUGH
+  }
   case ISD::OR:
   case ISD::XOR: {
     // For operations of the form (x << C1) op C2, check if we can use a smaller
Index: llvm/trunk/test/CodeGen/X86/shift-pair.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/shift-pair.ll
+++ llvm/trunk/test/CodeGen/X86/shift-pair.ll
@@ -3,7 +3,7 @@
 define i64 @test(i64 %A) {
 ; CHECK: @test
 ; CHECK: shrq $54
-; CHECK: andl $1020
+; CHECK: andq $-4
 ; CHECK: ret
     %B = lshr i64 %A, 56
     %C = shl i64 %B, 2
Index: llvm/trunk/test/CodeGen/X86/win64_frame.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/win64_frame.ll
+++ llvm/trunk/test/CodeGen/X86/win64_frame.ll
@@ -100,9 +100,8 @@
 
   alloca i32, i32 %a
   ; CHECK:        movl    %ecx, %eax
-  ; CHECK:        leaq    15(,%rax,4), %rcx
-  ; CHECK:        movabsq $34359738352, %rax
-  ; CHECK:        andq    %rcx, %rax
+  ; CHECK:        leaq    15(,%rax,4), %rax
+  ; CHECK:        andq    $-16, %rax
   ; CHECK:        callq   __chkstk
   ; CHECK:        subq    %rax, %rsp
 
Index: llvm/trunk/test/CodeGen/X86/zext-fold.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/zext-fold.ll
+++ llvm/trunk/test/CodeGen/X86/zext-fold.ll
@@ -8,7 +8,7 @@
 }
 ; CHECK: test1
 ; CHECK: movzbl
-; CHECK-NEXT: andl {{.*}}224
+; CHECK-NEXT: andl {{.*}}-32
 
 ;; Multiple uses of %x but easily extensible.
 define i32 @test2(i8 %x) nounwind readnone {
@@ -21,7 +21,7 @@
 }
 ; CHECK: test2
 ; CHECK: movzbl
-; CHECK: andl $224
+; CHECK: andl $-32
 ; CHECK: orl $63
 
 declare void @use(i32, i8)
@@ -36,6 +36,6 @@
 ; CHECK: test3
 ; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
-; CHECK-NEXT: andl $224, [[REGISTER]]
+; CHECK-NEXT: andl $-32, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
 ; CHECK-NEXT: call{{.*}}use