Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -860,6 +860,12 @@
   return false;
 }
 
+static SDValue peekThroughBitcast(SDValue V) {
+  while (V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  return V;
+}
+
 // Returns the SDNode if it is a constant float BuildVector
 // or constant float.
 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
@@ -894,6 +900,7 @@
 // constant null integer (with no undefs).
 // Build vector implicit truncation is not an issue for null values.
 static bool isNullConstantOrNullSplatConstant(SDValue N) {
+  // TODO: may want to use peekThroughBitcast() here.
   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     return Splat->isNullValue();
   return false;
@@ -903,6 +910,7 @@
 // constant integer of one (with no undefs).
 // Do not permit build vector implicit truncation.
 static bool isOneConstantOrOneSplatConstant(SDValue N) {
+  // TODO: may want to use peekThroughBitcast() here.
   unsigned BitWidth = N.getScalarValueSizeInBits();
   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
@@ -913,6 +921,7 @@
 // constant integer of all ones (with no undefs).
 // Do not permit build vector implicit truncation.
 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+  N = peekThroughBitcast(N);
   unsigned BitWidth = N.getScalarValueSizeInBits();
   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
     return Splat->isAllOnesValue() &&
@@ -13055,12 +13064,6 @@
   return false;
 }
 
-static SDValue peekThroughBitcast(SDValue V) {
-  while (V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-  return V;
-}
-
 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
                                          unsigned NumStores) {
   SmallVector<SDValue, 8> Chains;
Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
@@ -58,20 +58,18 @@
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_mone:
 ; CHECK-SSE2:       # %bb.0:
-; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm0
+; CHECK-SSE2-NEXT:    movdqa (%rdi), %xmm0
 ; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    pand (%rdi), %xmm0
-; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pandn (%rdx), %xmm0
+; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-SSE2-NEXT:    retq
 ;
 ; CHECK-XOP-LABEL: in_constant_varx_mone:
 ; CHECK-XOP:       # %bb.0:
-; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
+; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm1
-; CHECK-XOP-NEXT:    vpand (%rdi), %xmm0, %xmm0
-; CHECK-XOP-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; CHECK-XOP-NEXT:    vpandn (%rdx), %xmm0, %xmm0
+; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; CHECK-XOP-NEXT:    retq
   %x = load <4 x i32>, <4 x i32> *%px, align 16
   %y = load <4 x i32>, <4 x i32> *%py, align 16
@@ -134,22 +132,21 @@
 ;
 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
 ; CHECK-SSE2:       # %bb.0:
-; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm1
-; CHECK-SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm0
+; CHECK-SSE2-NEXT:    movdqa (%rdi), %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm2
+; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pandn %xmm2, %xmm0
 ; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    pandn (%rdi), %xmm1
-; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
 ; CHECK-SSE2-NEXT:    retq
 ;
 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
 ; CHECK-XOP:       # %bb.0:
-; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
+; CHECK-XOP-NEXT:    vmovdqa (%rdi), %xmm0
 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-XOP-NEXT:    vpandn (%rdi), %xmm0, %xmm2
-; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-XOP-NEXT:    vpxor (%rdx), %xmm1, %xmm2
+; CHECK-XOP-NEXT:    vpandn %xmm2, %xmm0, %xmm0
 ; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm0
-; CHECK-XOP-NEXT:    vpor %xmm0, %xmm2, %xmm0
 ; CHECK-XOP-NEXT:    retq
   %x = load <4 x i32>, <4 x i32> *%px, align 16
   %y = load <4 x i32>, <4 x i32> *%py, align 16