Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -913,6 +913,8 @@ // constant integer of all ones (with no undefs). // Do not permit build vector implicit truncation. static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { + if (N.getOpcode() == ISD::BITCAST) + return isAllOnesConstantOrAllOnesSplatConstant(N.getOperand(0)); unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isAllOnesValue() && Index: test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll =================================================================== --- test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -58,20 +58,18 @@ ; ; CHECK-SSE2-LABEL: in_constant_varx_mone: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 -; CHECK-SSE2-NEXT: pand (%rdi), %xmm0 -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_varx_mone: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0 -; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 @@ -134,22 +132,21 @@ ; ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm1 -; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pandn (%rdi), %xmm1 -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpandn (%rdi), %xmm0, %xmm2 -; CHECK-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2 +; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-XOP-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16