Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1433,6 +1433,53 @@ return Simplified; } +/// Given a vector binary operation and known undefined elements for each input +/// operand, compute whether each element of the output is undefined. +static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, + const APInt &UndefOp0, + const APInt &UndefOp1) { + EVT VT = BO.getValueType(); + assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only"); + + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + assert(UndefOp0.getBitWidth() == NumElts && + UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis"); + + auto getUndefOrConstantElt = [&](SDValue V, unsigned Index, + const APInt &UndefVals) { + if (UndefVals[Index]) + return DAG.getUNDEF(EltVT); + + if (auto *BV = dyn_cast(V)) { + // Try hard to make sure that the getNode() call is not creating temporary + // nodes. Ignore opaque integers because they do not constant fold. + SDValue Elt = BV->getOperand(Index); + auto *C = dyn_cast(Elt); + if (isa(Elt) || Elt.isUndef() || (C && !C->isOpaque())) + return Elt; + } + + return SDValue(); + }; + + APInt KnownUndef = APInt::getNullValue(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + // If both inputs for this element are either constant or undef and match + // the element type, compute the constant/undef result for this element of + // the vector. + // TODO: Ideally we would use FoldConstantArithmetic() here, but that does + // not handle FP constants. The code within getNode() should be refactored + // to avoid the danger of creating a bogus temporary node here. + SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0); + SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1); + if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT) + if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef()) + KnownUndef.setBit(i); + } + return KnownUndef; +} + bool TargetLowering::SimplifyDemandedVectorElts( SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, @@ -1805,6 +1852,9 @@ } break; } + + // TODO: There are more binop opcodes that could be handled here - MUL, MIN, + // MAX, saturated math, etc. case ISD::OR: case ISD::XOR: case ISD::ADD: @@ -1814,15 +1864,17 @@ case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { - APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, + ZeroRHS, TLO, Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, - KnownZero, TLO, Depth + 1)) + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, + ZeroLHS, TLO, Depth + 1)) return true; - KnownZero &= SrcZero; - KnownUndef &= SrcUndef; + + KnownZero = ZeroLHS & ZeroRHS; + KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); break; } case ISD::AND: { @@ -1836,6 +1888,8 @@ // If either side has a zero element, then the result element is zero, even // if the other is an UNDEF. + // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros + // and then handle 'and' nodes with the rest of the binop opcodes. KnownZero |= SrcZero; KnownUndef &= SrcUndef; KnownUndef &= ~KnownZero; Index: llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll +++ llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll @@ -51,10 +51,6 @@ ; ; AVX-LABEL: add_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,5,4,3,2,1,7] -; AVX-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> %bogus_bo = add <8 x i32> %extend, @@ -71,11 +67,6 @@ ; ; AVX-LABEL: sub_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = -; AVX-NEXT: vpsubd %ymm0, %ymm1, %ymm0 -; AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,0,5,4,3,2,6,7] -; AVX-NEXT: vpermd %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> %bogus_bo = sub <8 x i32> , %extend @@ -130,24 +121,10 @@ define <8 x i32> @xor_undef_elts(<4 x i32> %x) { ; SSE-LABEL: xor_undef_elts: ; SSE: # %bb.0: -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] -; SSE-NEXT: pxor {{.*}}(%rip), %xmm2 -; SSE-NEXT: pxor {{.*}}(%rip), %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0] -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0] -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0] ; SSE-NEXT: retq ; ; AVX-LABEL: xor_undef_elts: ; AVX: # %bb.0: -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,2] -; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] -; AVX-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 -; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7] -; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0 ; AVX-NEXT: retq %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> %bogus_bo = xor <8 x i32> %extend,