Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -2492,10 +2492,11 @@ /// Attempt to match a binary predicate against a pair of scalar/splat /// constants or every element of a pair of constant BUILD_VECTORs. + /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. bool matchBinaryPredicate( SDValue LHS, SDValue RHS, - std::function Match); - + std::function Match, + bool AllowUndefs = false); } // end namespace ISD } // end namespace llvm Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5175,12 +5175,12 @@ return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) - // iff (c1 & c2) != 0. - auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { - return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); + // iff (c1 & c2) != 0 or c1/c2 are undef. + auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { + return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); }; if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { if (SDValue COR = DAG.FoldConstantArithmetic( ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -287,26 +287,33 @@ bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, - std::function Match) { + std::function Match, + bool AllowUndefs) { if (LHS.getValueType() != RHS.getValueType()) return false; + // TODO: Add support for scalar UNDEF cases? if (auto *LHSCst = dyn_cast(LHS)) if (auto *RHSCst = dyn_cast(RHS)) return Match(LHSCst, RHSCst); + // TODO: Add support for vector UNDEF cases? if (ISD::BUILD_VECTOR != LHS.getOpcode() || ISD::BUILD_VECTOR != RHS.getOpcode()) return false; EVT SVT = LHS.getValueType().getScalarType(); for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - auto *LHSCst = dyn_cast(LHS.getOperand(i)); - auto *RHSCst = dyn_cast(RHS.getOperand(i)); - if (!LHSCst || !RHSCst) + SDValue LHSOp = LHS.getOperand(i); + SDValue RHSOp = RHS.getOperand(i); + bool LHSUndef = AllowUndefs && LHSOp.isUndef(); + bool RHSUndef = AllowUndefs && RHSOp.isUndef(); + auto *LHSCst = dyn_cast(LHSOp); + auto *RHSCst = dyn_cast(RHSOp); + if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef)) return false; - if (LHSCst->getValueType(0) != SVT || - LHSCst->getValueType(0) != RHSCst->getValueType(0)) + if (LHSOp.getValueType() != SVT || + LHSOp.getValueType() != RHSOp.getValueType()) return false; if (!Match(LHSCst, RHSCst)) return false; Index: test/CodeGen/X86/known-bits-vector.ll =================================================================== --- test/CodeGen/X86/known-bits-vector.ll +++ test/CodeGen/X86/known-bits-vector.ll @@ -162,18 +162,12 @@ define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind { ; X32-LABEL: knownbits_mask_or_shuffle_uitofp: ; X32: # %bb.0: -; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vorps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4] ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_or_shuffle_uitofp: ; X64: # %bb.0: -; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vorps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [6.5535E+4,6.5535E+4,6.5535E+4,6.5535E+4] ; X64-NEXT: retq %1 = and <4 x i32> %a0, %2 = or <4 x i32> %1,