Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -11973,53 +11973,6 @@ llvm_unreachable("Unimplemented!"); } -// This function assumes its argument is a BUILD_VECTOR of constants or -// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is -// true. -static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, - unsigned &MaskValue) { - MaskValue = 0; - unsigned NumElems = BuildVector->getNumOperands(); - - // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - // We don't handle the >2 lanes case right now. - unsigned NumLanes = (NumElems - 1) / 8 + 1; - if (NumLanes > 2) - return false; - - unsigned NumElemsInLane = NumElems / NumLanes; - - // Blend for v16i16 should be symmetric for the both lanes. - for (unsigned i = 0; i < NumElemsInLane; ++i) { - SDValue EltCond = BuildVector->getOperand(i); - SDValue SndLaneEltCond = - (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond; - - int Lane1Cond = -1, Lane2Cond = -1; - if (isa(EltCond)) - Lane1Cond = !isNullConstant(EltCond); - if (isa(SndLaneEltCond)) - Lane2Cond = !isNullConstant(SndLaneEltCond); - - unsigned LaneMask = 0; - if (Lane1Cond == Lane2Cond || Lane2Cond < 0) - // Lane1Cond != 0, means we want the first argument. - // Lane1Cond == 0, means we want the second argument. - // The encoding of this argument is 0 for the first argument, 1 - // for the second. Therefore, invert the condition. - LaneMask = !Lane1Cond << i; - else if (Lane1Cond < 0) - LaneMask = !Lane2Cond << i; - else - return false; - - MaskValue |= LaneMask; - if (NumLanes == 2) - MaskValue |= LaneMask << NumElemsInLane; - } - return true; -} - /// \brief Try to lower a VSELECT instruction to a vector shuffle. static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, @@ -25539,50 +25492,6 @@ return SDValue(); } -static SDValue -transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDLoc dl(N); - SDValue Cond = N->getOperand(0); - SDValue LHS = N->getOperand(1); - SDValue RHS = N->getOperand(2); - - if (Cond.getOpcode() == ISD::SIGN_EXTEND) { - SDValue CondSrc = Cond->getOperand(0); - if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG) - Cond = CondSrc->getOperand(0); - } - - if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) - return SDValue(); - - // A vselect where all conditions and data are constants can be optimized into - // a single vector load by SelectionDAGLegalize::ExpandBUILD_VECTOR(). - if (ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && - ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) - return SDValue(); - - unsigned MaskValue = 0; - if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue)) - return SDValue(); - - MVT VT = N->getSimpleValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - SmallVector ShuffleMask(NumElems, -1); - for (unsigned i = 0; i < NumElems; ++i) { - // Be sure we emit undef where we can. - if (Cond.getOperand(i)->isUndef()) - ShuffleMask[i] = -1; - else - ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1); - } - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isShuffleMaskLegal(ShuffleMask, VT)) - return SDValue(); - return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]); -} - /// Do target-specific dag combines on SELECT and VSELECT nodes. static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -25996,25 +25905,6 @@ } } - // We should generate an X86ISD::BLENDI from a vselect if its argument - // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of - // constants. This specific pattern gets generated when we split a - // selector for a 512 bit vector in a machine without AVX512 (but with - // 256-bit vectors), during legalization: - // - // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) - // - // Iff we find this pattern and the build_vectors are built from - // constants, we translate the vselect into a shuffle_vector that we - // know will be matched by LowerVECTOR_SHUFFLEtoBlend. - if ((N->getOpcode() == ISD::VSELECT || - N->getOpcode() == X86ISD::SHRUNKBLEND) && - !DCI.isBeforeLegalize() && !VT.is512BitVector()) { - if (SDValue Shuffle = - transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget)) - return Shuffle; - } - // If this is a *dynamic* select (non-constant condition) and we can match // this node with one of the variable blend instructions, restructure the // condition so that the blends can use the high bit of each element and use Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -273,15 +273,15 @@ ; SSE41-LABEL: vsel_i8: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] -; SSE41-NEXT: pblendvb %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE41-NEXT: pblendvb %xmm2, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: vsel_i8: ; AVX: # BB#0: # %entry -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255] -; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq entry: %vsel = select <16 x i1> , <16 x i8> %v1, <16 x i8> %v2 @@ -652,10 +652,11 @@ ; SSE41-LABEL: constant_pblendvb_avx2: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm4 -; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255] -; SSE41-NEXT: pblendvb %xmm2, %xmm4 -; SSE41-NEXT: pblendvb %xmm3, %xmm1 -; SSE41-NEXT: movdqa %xmm4, %xmm0 +; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0] +; SSE41-NEXT: pblendvb %xmm4, %xmm2 +; SSE41-NEXT: pblendvb %xmm1, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movdqa %xmm3, %xmm1 ; SSE41-NEXT: retq ; ; AVX1-LABEL: constant_pblendvb_avx2: