Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -305,6 +305,21 @@
     return MaskAndBranchFoldingIsLegal;
   }
 
+  /// Return true if the target should transform:
+  /// X & Y == Y
+  /// into:
+  /// ~X & Y == 0
+  ///
+  /// If all bits of X that are masked by Y are set, then all bits in the
+  /// bitwise-not of X that are masked by Y are not set.
+  ///
+  /// This may be profitable if the target has a bitwise and-not operation that
+  /// sets comparison flags. A target may want to limit the transformation based
+  /// on the type of Y or if Y is a constant.
+  virtual bool useAndNotCompare(const DataLayout &DL, Value *Y) const {
+    return false;
+  }
+
   /// \brief Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).
Index: lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- lib/CodeGen/CodeGenPrepare.cpp
+++ lib/CodeGen/CodeGenPrepare.cpp
@@ -189,6 +189,7 @@
     bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
     bool optimizeSwitchInst(SwitchInst *CI);
     bool optimizeExtractElementInst(Instruction *Inst);
+    bool optimizeICmpInst(ICmpInst *ICmp);
     bool dupRetToEnableTailCallOpts(BasicBlock *BB);
     bool placeDbgValues(Function &F);
     bool sinkAndCmp(Function &F);
@@ -5161,6 +5162,45 @@
   return false;
 }
 
+bool CodeGenPrepare::optimizeICmpInst(ICmpInst *ICmp) {
+  if (!TLI || !DL || !ICmp->isEquality())
+    return false;
+
+  // Match this pattern in any of its permutations:
+  // X & Y == Y
+  Value *Op0 = ICmp->getOperand(0), *Op1 = ICmp->getOperand(1);
+  Value *X = nullptr, *Y = nullptr;
+  ICmpInst::Predicate P;
+  if (!match(ICmp, m_ICmp(P, m_And(m_Value(X), m_Specific(Op1)), m_Value(Y))) &&
+      !match(ICmp, m_ICmp(P, m_And(m_Specific(Op1), m_Value(X)), m_Value(Y))) &&
+      !match(ICmp, m_ICmp(P, m_Value(Y), m_And(m_Value(X), m_Specific(Op0)))) &&
+      !match(ICmp, m_ICmp(P, m_Value(Y), m_And(m_Specific(Op0), m_Value(X)))))
+    return false;
+
+  // Bail out if the 'and' has a use besides the compare or if the compare
+  // operand that we want to turn into a zero is already a zero (otherwise,
+  // infinite loop). Finally, make sure that the target wants to do this.
+  int AndOpIndex = ICmp->getOperand(0) == Y ? 1 : 0;
+  auto *And = cast<Instruction>(ICmp->getOperand(AndOpIndex));
+  auto *YConst = dyn_cast<Constant>(Y);
+  if (!And->hasOneUse() || (YConst && YConst->isNullValue()) ||
+      !TLI->useAndNotCompare(*DL, Y))
+    return false;
+
+  // Turn the original code into: ~X & Y == 0.
+  //   %And  = and i32 %X, %Y
+  //   %ICmp = icmp eq i32 %And, %Y
+  //   -->
+  //   %NotX = xor i32 %X, -1
+  //   %And  = and i32 %NotX, %Y
+  //   %ICmp = icmp eq i32 %And, 0
+
+  BinaryOperator *NotX = BinaryOperator::CreateNot(X, "not", And);
+  And->replaceUsesOfWith(X, NotX);
+  ICmp->replaceUsesOfWith(Y, Constant::getNullValue(And->getType()));
+  return true;
+}
+
 bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
@@ -5209,9 +5249,16 @@
     return false;
   }
 
-  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+  if (CmpInst *CI = dyn_cast<CmpInst>(I)) {
+    bool Modified = false;
+    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(I))
+      Modified = optimizeICmpInst(ICmp);
+
     if (!TLI || !TLI->hasMultipleConditionRegisters())
-      return OptimizeCmpExpression(CI, TLI);
+      Modified |= OptimizeCmpExpression(CI, TLI);
+
+    return Modified;
+  }
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     stripInvariantGroupMetadata(*LI);
Index: lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.h
+++ lib/Target/PowerPC/PPCISelLowering.h
@@ -442,6 +442,8 @@
       return true;
     }
 
+    bool useAndNotCompare(const DataLayout &DL, Value *V) const override;
+
     bool supportSplitCSR(MachineFunction *MF) const override {
       return
         MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -9382,6 +9383,18 @@
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+bool PPCTargetLowering::useAndNotCompare(const DataLayout &DL, Value *V) const {
+  // Only scalar integer operations will benefit. 
+  if (getValueType(DL, V->getType()).isVector())
+    return false;
+
+  // A power-of-2 operand can be lowered using 'rlwinm', so don't use 'andc'.
+  if (isKnownToBeAPowerOfTwo(V, DL, true))
+    return false;
+
+  return true;
+}
+
 static std::string getRecipOp(const char *Base, EVT VT) {
   std::string RecipOp(Base);
   if (VT.getScalarType() == MVT::f64)
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -755,6 +755,8 @@
 
     bool isCheapToSpeculateCtlz() const override;
 
+    bool useAndNotCompare(const DataLayout &DL, Value *V) const override;
+
     /// Return the value type to use for ISD::SETCC.
     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                            EVT VT) const override;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -4275,6 +4276,22 @@
   return Subtarget.hasLZCNT();
 }
 
+bool X86TargetLowering::useAndNotCompare(const DataLayout &DL, Value *V) const {
+  if (!Subtarget.hasBMI())
+    return false;
+
+  // There are only 32-bit and 64-bit forms for 'andn'.
+  EVT VT = getValueType(DL, V->getType());
+  if (!(VT == MVT::i32 || VT == MVT::i64))
+    return false;
+
+  // A power-of-2 operand can be lowered using 'bt', so don't use 'andn'.
+  if (isKnownToBeAPowerOfTwo(V, DL, true))
+    return false;
+
+  return true;
+}
+
 /// Return true if every element in Mask, beginning
 /// from position Pos and ending in Pos+Size is undef.
 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
Index: test/CodeGen/PowerPC/andc.ll
===================================================================
--- test/CodeGen/PowerPC/andc.ll
+++ test/CodeGen/PowerPC/andc.ll
@@ -1,16 +1,11 @@
 ; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck %s
 
-; TODO: These could use 'andc'.
-
 define i1 @and_cmp1(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp1:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    and r2, r3, r4
-; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    cmpw cr0, r2, r4
-; CHECK-NEXT:    bclr 12, 2, 0
-; CHECK-NEXT:  ; BB#1:
-; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    andc r2, r4, r3
+; CHECK-NEXT:    cntlzw r2, r2
+; CHECK-NEXT:    rlwinm r3, r2, 27, 31, 31
 ; CHECK-NEXT:    blr
 ;
   %and = and i32 %x, %y
@@ -21,12 +16,10 @@
 define i1 @and_cmp_const(i32 %x) {
 ; CHECK-LABEL: and_cmp_const:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    andi. r2, r3, 43
-; CHECK-NEXT:    li r3, 1
-; CHECK-NEXT:    cmpwi r2, 43
-; CHECK-NEXT:    bclr 12, 2, 0
-; CHECK-NEXT:  ; BB#1:
-; CHECK-NEXT:    li r3, 0
+; CHECK-NEXT:    li r2, 43
+; CHECK-NEXT:    andc r2, r2, r3
+; CHECK-NEXT:    cntlzw r2, r2
+; CHECK-NEXT:    rlwinm r3, r2, 27, 31, 31
 ; CHECK-NEXT:    blr
 ;
   %and = and i32 %x, 43
@@ -34,3 +27,16 @@
   ret i1 %cmp
 }
 
+; Don't use an 'andc' if we're masking off a single bit.
+
+define i1 @and_cmp_const_power_of_2(i32 %x) {
+; CHECK-LABEL: and_cmp_const_power_of_2:
+; CHECK:       ; BB#0:
+; CHECK-NEXT:    rlwinm r3, r3, 27, 31, 31
+; CHECK-NEXT:    blr
+;
+  %and = and i32 %x, 32
+  %cmp = icmp eq i32 %and, 32
+  ret i1 %cmp
+}
+
Index: test/CodeGen/X86/bmi.ll
===================================================================
--- test/CodeGen/X86/bmi.ll
+++ test/CodeGen/X86/bmi.ll
@@ -150,12 +150,11 @@
   ret i1 %cmp
 }
 
-; TODO: Recognize a disguised andn in the following 4 tests.
+; Recognize a disguised andn in the following 4 tests.
 define i1 @and_cmp1(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
 ;
@@ -167,8 +166,7 @@
 define i1 @and_cmp2(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
 ;
@@ -180,8 +178,7 @@
 define i1 @and_cmp3(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp3:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
 ;
@@ -193,8 +190,7 @@
 define i1 @and_cmp4(i32 %x, i32 %y) {
 ; CHECK-LABEL: and_cmp4:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl %esi, %edi
-; CHECK-NEXT:    cmpl %edi, %esi
+; CHECK-NEXT:    andnl %esi, %edi, %eax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
 ;
@@ -208,8 +204,8 @@
 define i1 @and_cmp_const(i32 %x) {
 ; CHECK-LABEL: and_cmp_const:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andl $43, %edi
-; CHECK-NEXT:    cmpl $43, %edi
+; CHECK-NEXT:    movl $43, %eax
+; CHECK-NEXT:    andnl %eax, %edi, %eax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
 ;
@@ -218,6 +214,38 @@
   ret i1 %cmp
 }
 
+; But don't use 'andn' if the mask is a power-of-two.
+define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
+; CHECK-LABEL: and_cmp_const_power_of_two:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    btl %esi, %edi
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    retq
+;
+  %shl = shl i32 1, %y
+  %and = and i32 %x, %shl
+  %cmp = icmp ne i32 %and, %shl
+  ret i1 %cmp
+}
+
+; Don't transform to 'andn' if there's another use of the 'and'.
+define i32 @and_cmp_not_one_use(i32 %x) {
+; CHECK-LABEL: and_cmp_not_one_use:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $37, %edi
+; CHECK-NEXT:    cmpl $37, %edi
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    retq
+;
+  %and = and i32 %x, 37
+  %cmp = icmp eq i32 %and, 37
+  %ext = zext i1 %cmp to i32
+  %add = add i32 %and, %ext
+  ret i32 %add
+}
+
 ; Don't choose a 'test' if an 'andn' can be used.
 define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
 ; CHECK-LABEL: andn_cmp_swap_ops: