Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1433,6 +1433,48 @@ return Simplified; } +/// Given a vector binary operation and known undefined elements for each input +/// operand, compute whether each element of the output is undefined. +static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, + const APInt &UndefOp0, + const APInt &UndefOp1) { + EVT VT = BO.getValueType(); + assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only"); + + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + assert(UndefOp0.getBitWidth() == NumElts && + UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis"); + + auto getUndefOrConstantElt = [&](SDValue V, unsigned Index, + const APInt &UndefVals) { + if (UndefVals[Index]) + return DAG.getUNDEF(EltVT); + + if (auto *BV = dyn_cast(V)) { + SDValue BVOp = BV->getOperand(Index); + if (isa(BVOp) || isa(BVOp) || + BVOp.isUndef()) + return BVOp; + } + + return SDValue(); + }; + + APInt KnownUndef = APInt::getNullValue(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + // If both inputs for this element are either constant or undef and match + // the element type, compute the constant/undef result for this element of + // the vector. + SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0); + SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1); + if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT) + if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef()) + KnownUndef.setBit(i); + } + return KnownUndef; +} + bool TargetLowering::SimplifyDemandedVectorElts( SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, @@ -1814,15 +1856,17 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { - APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, + ZeroRHS, TLO, Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, - KnownZero, TLO, Depth + 1)) + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, + ZeroLHS, TLO, Depth + 1)) return true; - KnownZero &= SrcZero; - KnownUndef &= SrcUndef; + + KnownZero = ZeroLHS & ZeroRHS; + KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); break; } case ISD::AND: { Index: test/CodeGen/X86/vector-partial-undef.ll =================================================================== --- test/CodeGen/X86/vector-partial-undef.ll +++ test/CodeGen/X86/vector-partial-undef.ll @@ -51,10 +51,6 @@ ; ; AVX-LABEL: add_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,5,4,3,2,1,7] -; AVX-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> %bogus_bo = add <8 x i32> %extend, @@ -71,11 +67,6 @@ ; ; AVX-LABEL: sub_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = -; AVX-NEXT: vpsubd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,5,4,3,2,6,7] -; AVX-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> %bogus_bo = sub <8 x i32> , %extend @@ -130,24 +121,10 @@ define <8 x i32> @xor_undef_elts(<4 x i32> %x) { ; SSE-LABEL: xor_undef_elts: ; SSE: # %bb.0: -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] -; SSE-NEXT: pxor {{.*}}(%rip), %xmm2 -; SSE-NEXT: pxor {{.*}}(%rip), %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0] -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0] ; SSE-NEXT: retq ; ; AVX-LABEL: xor_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,2] -; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; AVX-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 -; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7] -; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> %bogus_bo = xor <8 x i32> %extend,