Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10832,6 +10832,7 @@ // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { + unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { unsigned Opcode = N->getOperand(i).getOpcode(); @@ -10858,8 +10859,8 @@ continue; } - // Otherwise, use InIdx + VecSize - Mask.push_back(NumInScalars+ExtIndex); + // Otherwise, use InIdx + InputVecSize + Mask.push_back(InNumElements + ExtIndex); } // Avoid introducing illegal shuffles with zero. @@ -10869,14 +10870,12 @@ // We can't generate a shuffle node with mismatched input and output types. // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { - // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode()) - return SDValue(); - // If the input vector type has a different base type to the output // vector type, bail out. - if (VecIn1.getValueType().getVectorElementType() != - VT.getVectorElementType()) + EVT VTElemType = VT.getVectorElementType(); + if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || + (VecIn2.getNode() && + (VecIn2.getValueType().getVectorElementType() != VTElemType))) return SDValue(); // If the input vector is too small, widen it. @@ -10884,11 +10883,22 @@ // output registers. For example XMM->YMM widening on X86 with AVX. EVT VecInT = VecIn1.getValueType(); if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If we only have one small input, widen it by adding undef values. + if (!VecIn2.getNode()) + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, + DAG.getUNDEF(VecIn1.getValueType())); + else if (VecIn1.getValueType() == VecIn2.getValueType()) { + // If we have two small inputs of the same type, try to concat them. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); + VecIn2 = SDValue(nullptr, 0); + } else + return SDValue(); } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { // If the input vector is too large, try to split it. + // We don't support having two input vectors that are too large. + if (VecIn2.getNode()) + return SDValue(); + if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) return SDValue(); @@ -10899,7 +10909,7 @@ VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); UsesZeroVector = false; - } else + } else return SDValue(); } Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1584,6 +1584,26 @@ ret <4 x i32> %2 } +define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) { +; SSE-LABEL: combine_test22: +; SSE: # BB#0: +; SSE-NEXT: movq (%rdi), %xmm0 +; SSE-NEXT: movhpd (%rsi), %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: combine_test22: +; AVX1: # BB#0: +; AVX1-NEXT: vmovq (%rdi), %xmm0 +; AVX1-NEXT: vmovhpd (%rsi), %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; Current AVX2 lowering of this is still awful, not adding a test case. + %1 = load <2 x float>* %a, align 8 + %2 = load <2 x float>* %b, align 8 + %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> + ret <8 x float> %3 +} + ; Check some negative cases. ; FIXME: Do any of these really make sense? Are they redundant with the above tests?