Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1433,6 +1433,53 @@
   return Simplified;
 }
 
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+                                         const APInt &UndefOp0,
+                                         const APInt &UndefOp1) {
+  EVT VT = BO.getValueType();
+  assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only");
+
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(UndefOp0.getBitWidth() == NumElts &&
+         UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+  auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+                                   const APInt &UndefVals) {
+    if (UndefVals[Index])
+      return DAG.getUNDEF(EltVT);
+
+    if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+      // Try hard to make sure that the getNode() call is not creating temporary
+      // nodes. Ignore opaque integers because they do not constant fold.
+      SDValue Elt = BV->getOperand(Index);
+      auto *C = dyn_cast<ConstantSDNode>(Elt);
+      if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+        return Elt;
+    }
+
+    return SDValue();
+  };
+
+  APInt KnownUndef = APInt::getNullValue(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    // If both inputs for this element are either constant or undef and match
+    // the element type, compute the constant/undef result for this element of
+    // the vector.
+    // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+    // not handle FP constants. The code within getNode() should be refactored
+    // to avoid the danger of creating a bogus temporary node here.
+    SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+    SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+    if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+      if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+        KnownUndef.setBit(i);
+  }
+  return KnownUndef;
+}
+
 bool TargetLowering::SimplifyDemandedVectorElts(
     SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
@@ -1805,6 +1852,9 @@
     }
     break;
   }
+
+  // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+  // MAX, saturated math, etc.
   case ISD::OR:
   case ISD::XOR:
   case ISD::ADD:
@@ -1814,15 +1864,17 @@
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM: {
-    APInt SrcUndef, SrcZero;
-    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
-                                   SrcZero, TLO, Depth + 1))
+    APInt UndefRHS, ZeroRHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+                                   ZeroRHS, TLO, Depth + 1))
       return true;
-    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
-                                   KnownZero, TLO, Depth + 1))
+    APInt UndefLHS, ZeroLHS;
+    if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+                                   ZeroLHS, TLO, Depth + 1))
       return true;
-    KnownZero &= SrcZero;
-    KnownUndef &= SrcUndef;
+
+    KnownZero = ZeroLHS & ZeroRHS;
+    KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
     break;
   }
   case ISD::AND: {
@@ -1836,6 +1888,8 @@
 
     // If either side has a zero element, then the result element is zero, even
     // if the other is an UNDEF.
+    // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+    // and then handle 'and' nodes with the rest of the binop opcodes.
     KnownZero |= SrcZero;
     KnownUndef &= SrcUndef;
     KnownUndef &= ~KnownZero;
Index: llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll
+++ llvm/trunk/test/CodeGen/X86/vector-partial-undef.ll
@@ -51,10 +51,6 @@
 ;
 ; AVX-LABEL: add_undef_elts:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,0,5,4,3,2,1,7]
-; AVX-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>
@@ -71,11 +67,6 @@
 ;
 ; AVX-LABEL: sub_undef_elts:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,u,42,43,44,12>
-; AVX-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
-; AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,5,4,3,2,6,7]
-; AVX-NEXT:    vpermd %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend
@@ -130,24 +121,10 @@
 define <8 x i32> @xor_undef_elts(<4 x i32> %x) {
 ; SSE-LABEL: xor_undef_elts:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
-; SSE-NEXT:    pxor {{.*}}(%rip), %xmm2
-; SSE-NEXT:    pxor {{.*}}(%rip), %xmm1
-; SSE-NEXT:    movdqa %xmm1, %xmm0
-; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0]
-; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0]
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: xor_undef_elts:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,3,0,2]
-; AVX-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; AVX-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
-; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
-; AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0
 ; AVX-NEXT:    retq
   %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef>
   %bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>