Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -737,6 +737,10 @@ return false; } + /// Return true if all the users of N are contained in Nodes. + /// NOTE: Requires at least one match, but doesn't require them all. + static bool areOnlyUsersOf(ArrayRef Nodes, const SDNode *N); + /// Return the number of values used by this operation. unsigned getNumOperands() const { return NumOperands; } Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7125,6 +7125,21 @@ return Seen; } +/// Return true if the only users of N are contained in Nodes. +bool SDNode::areOnlyUsersOf(ArrayRef Nodes, const SDNode *N) { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (llvm::any_of(Nodes, + [&User](const SDNode *Node) { return User == Node; })) + Seen = true; + else + return false; + } + + return Seen; +} + /// isOperand - Return true if this node is an operand of N. /// bool SDValue::isOperandOf(const SDNode *N) const { Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27442,6 +27442,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, int SrcOpIndex, SDValue Root, ArrayRef RootMask, + ArrayRef SrcNodes, int Depth, bool HasVariableMask, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -27578,11 +27579,20 @@ HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode()); - // See if we can recurse into each shuffle source op (if it's a target shuffle). + // Update the list of shuffle nodes that have been combined so far. + SmallVector CombinedNodes(SrcNodes.begin(), + SrcNodes.end()); + CombinedNodes.push_back(Op.getNode()); + + // See if we can recurse into each shuffle source op (if it's a target + // shuffle). The source op should only be combined if it either has a + // single use (i.e. current Op) or all its users have already been combined. for (int i = 0, e = Ops.size(); i < e; ++i) - if (Ops[i].getNode()->hasOneUse() || Op->isOnlyUserOf(Ops[i].getNode())) - if (combineX86ShufflesRecursively(Ops, i, Root, Mask, Depth + 1, - HasVariableMask, DAG, DCI, Subtarget)) + if (Ops[i].getNode()->hasOneUse() || + SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) + if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes, + Depth + 1, HasVariableMask, DAG, DCI, + Subtarget)) return true; // Attempt to constant fold all of the constant source ops. @@ -28380,7 +28390,7 @@ // a particular chain. SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. @@ -30568,7 +30578,7 @@ SDValue Op(N, 0); SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. @@ -30609,7 +30619,7 @@ SDValue Op(N, 0); SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget); return SDValue(); @@ -30922,7 +30932,7 @@ SDValue Op(N, 0); SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. @@ -33080,7 +33090,7 @@ SDValue Op(N, 0); SmallVector NonceMask; // Just a placeholder. NonceMask.push_back(0); - if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, + if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, DAG, DCI, Subtarget)) return SDValue(); // This routine will use CombineTo to replace N. Index: test/CodeGen/X86/clear_upper_vector_element_bits.ll =================================================================== --- test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -260,13 +260,7 @@ define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { ; SSE-LABEL: _clearupper2xi64b: ; SSE: # BB#0: -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[2,0] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,0] -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: _clearupper2xi64b: