Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -569,37 +569,6 @@
     }
     return false;   // Don't fall through, will infinitely loop.
   case ISD::AND:
-    // If the RHS is a constant, check to see if the LHS would be zero without
-    // using the bits from the RHS.  Below, we use knowledge about the RHS to
-    // simplify the LHS, here we're using information from the LHS to simplify
-    // the RHS.
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      SDValue Op0 = Op.getOperand(0);
-      APInt LHSZero, LHSOne;
-      // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth);
-      // If the LHS already has zeros where RHSC does, this and is dead.
-      if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
-        return TLO.CombineTo(Op, Op0);
-
-      // If any of the set bits in the RHS are known zero on the LHS, shrink
-      // the constant.
-      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
-        return true;
-
-      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
-      // constant, but if this 'and' is only clearing bits that were just set by
-      // the xor, then this 'and' can be eliminated by shrinking the mask of
-      // the xor. For example, for a 32-bit X:
-      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
-      if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
-          LHSOne == ~RHSC->getAPIntValue()) {
-        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
-                                      Op0.getOperand(0), Op.getOperand(1));
-        return TLO.CombineTo(Op, Xor);
-      }
-    }
-
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
                              KnownOne, TLO, Depth+1))
       return true;
@@ -625,6 +594,21 @@
     if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      SDValue Op0 = Op.getOperand(0);
+      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
+      // constant, but if this 'and' is only clearing bits that were just set by
+      // the xor, then this 'and' can be eliminated by shrinking the mask of
+      // the xor. For example, for a 32-bit X:
+      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
+      if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
+          KnownOne2 == ~RHSC->getAPIntValue()) {
+        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
+                                      Op0.getOperand(0), Op.getOperand(1));
+        return TLO.CombineTo(Op, Xor);
+      }
+    }
+
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
Index: test/CodeGen/AArch64/fast-isel-select.ll
===================================================================
--- test/CodeGen/AArch64/fast-isel-select.ll
+++ test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SLOWISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK -check-prefix=FASTISEL
 
 ; First test the different supported value types for select.
 define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -294,9 +294,14 @@
 }
 
 define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt2
-; CHECK:       eor [[REG:w[0-9]+]], w0, #0x1
-; CHECK:       orr {{w[0-9]+}}, [[REG]], w1
+; SLOWISEL-LABEL: select_opt2
+; SLOWISEL:       orn [[REG:w[0-9]+]], w1, w0
+; SLOWISEL:       and {{w[0-9]+}}, [[REG]], #0x1
+;
+; FASTISEL-LABEL: select_opt2
+; FASTISEL:       eor [[REG:w[0-9]+]], w0, #0x1
+; FASTISEL:       orr [[REG2:w[0-9]+]], [[REG]], w1
+; FASTISEL:       and {{w[0-9]+}}, [[REG2]], #0x1
   %1 = select i1 %c, i1 %a, i1 true
   ret i1 %1
 }
Index: test/CodeGen/AMDGPU/fneg.f16.ll
===================================================================
--- test/CodeGen/AMDGPU/fneg.f16.ll
+++ test/CodeGen/AMDGPU/fneg.f16.ll
@@ -64,8 +64,9 @@
 ; GCN-LABEL: {{^}}s_fneg_v2f16:
 ; CI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
 ; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
-; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
+; CI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
 ; CI: v_xor_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
+; CI: v_lshlrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
 ; CI: v_or_b32_e32
 
 ; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x8000{{$}}
Index: test/CodeGen/PowerPC/rlwimi-and.ll
===================================================================
--- test/CodeGen/PowerPC/rlwimi-and.ll
+++ test/CodeGen/PowerPC/rlwimi-and.ll
@@ -29,8 +29,8 @@
   unreachable
 
 ; CHECK: @test
-; CHECK: clrlwi [[R1:[0-9]+]], {{[0-9]+}}, 31
-; CHECK: rlwimi [[R1]], {{[0-9]+}}, 8, 23, 23
+; CHECK: rlwimi [[R1:[0-9]+]], {{[0-9]+}}, 8, 16, 23
+; CHECK: andi. {{[0-9]+}}, [[R1]], 257
 
 codeRepl29:                                       ; preds = %codeRepl1
   unreachable