Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -860,6 +860,12 @@ return false; } +static SDValue peekThroughBitcast(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + // Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { @@ -894,6 +900,7 @@ // constant null integer (with no undefs). // Build vector implicit truncation is not an issue for null values. static bool isNullConstantOrNullSplatConstant(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isNullValue(); return false; @@ -903,6 +910,7 @@ // constant integer of one (with no undefs). // Do not permit build vector implicit truncation. static bool isOneConstantOrOneSplatConstant(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; @@ -913,6 +921,7 @@ // constant integer of all ones (with no undefs). // Do not permit build vector implicit truncation. static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { + N = peekThroughBitcast(N); unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isAllOnesValue() && @@ -13055,12 +13064,6 @@ return false; } -static SDValue peekThroughBitcast(SDValue V) { - while (V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); - return V; -} - SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl &StoreNodes, unsigned NumStores) { SmallVector Chains; Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -58,20 +58,18 @@ ; ; CHECK-SSE2-LABEL: in_constant_varx_mone: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 -; CHECK-SSE2-NEXT: pand (%rdi), %xmm0 -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_varx_mone: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0 -; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 @@ -134,22 +132,21 @@ ; ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm1 -; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pandn (%rdi), %xmm1 -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpandn (%rdi), %xmm0, %xmm2 -; CHECK-XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2 +; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-XOP-NEXT: vpor %xmm0, %xmm2, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16