Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5779,14 +5779,133 @@ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V); } -/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. +/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real +/// underlying vector and index. +/// +/// Modifies \p ExtractedFromVec to the real vector and returns the real +/// index. +static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, + SDValue ExtIdx) { + int Idx = cast(ExtIdx)->getZExtValue(); + if (!isa(ExtractedFromVec)) + return Idx; + + // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already + // lowered this: + // (extract_vector_elt (v8f32 %vreg1), Constant<6>) + // to: + // (extract_vector_elt (vector_shuffle<2,u,u,u> + // (extract_subvector (v8f32 %vreg0), Constant<4>), + // undef) + // Constant<0>) + // In this case the vector is the extract_subvector expression and the index + // is 2, as specified by the shuffle. + ShuffleVectorSDNode *SVOp = cast(ExtractedFromVec); + SDValue ShuffleVec = SVOp->getOperand(0); + MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); + assert(ShuffleVecVT.getVectorElementType() == + ExtractedFromVec.getSimpleValueType().getVectorElementType()); + + int ShuffleIdx = SVOp->getMaskElt(Idx); + if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { + ExtractedFromVec = ShuffleVec; + return ShuffleIdx; + } + return Idx; +} + +/// \brief - If BUILD_VECTOR uses several extractelts and few insertions (currently +/// one), transform it into a SHUFFLE + INSERT_VECTOR_ELT. +static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + + // Skip if insert_vec_elt is not supported. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) + return SDValue(); + + SDLoc DL(Op); + unsigned NumElems = Op.getNumOperands(); + + SDValue VecIn1; + SDValue VecIn2; + SmallVector InsertIndices; + SmallVector Mask(NumElems, -1); + + for (unsigned i = 0; i != NumElems; ++i) { + unsigned Opc = Op.getOperand(i).getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + if (Opc != ISD::EXTRACT_VECTOR_ELT) { + // Quit if more than 1 elements need inserting. + if (InsertIndices.size() > 1) + return SDValue(); + + InsertIndices.push_back(i); + continue; + } + + SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); + SDValue ExtIdx = Op.getOperand(i).getOperand(1); + // Quit if non-constant index. + if (!isa(ExtIdx)) + return SDValue(); + int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); + + // Quit if extracted from vector of different type. + if (ExtractedFromVec.getValueType() != VT) + return SDValue(); + + if (!VecIn1.getNode()) + VecIn1 = ExtractedFromVec; + else if (VecIn1 != ExtractedFromVec) { + if (!VecIn2.getNode()) + VecIn2 = ExtractedFromVec; + else if (VecIn2 != ExtractedFromVec) + // Quit if more than 2 vectors to shuffle + return SDValue(); + } + + if (ExtractedFromVec == VecIn1) + Mask[i] = Idx; + else if (ExtractedFromVec == VecIn2) + Mask[i] = Idx + NumElems; + } + + if (!VecIn1.getNode()) + return SDValue(); + + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]); + for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { + unsigned Idx = InsertIndices[i]; + NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), + DAG.getIntPtrConstant(Idx)); + } + + return NV; +} + + +/// \brief Custom lower build_vector of v8i16. Couple of options available: +/// +/// (1) More than half non zero elts without SSE2 fallback to a likely series +/// of unpck*. +/// (2) With SSE2 support use several insert_vector_elts to get PINSRWs. +/// (3) Use a shuffle. /// static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, SelectionDAG &DAG, const X86Subtarget* Subtarget, const TargetLowering &TLI) { - if (NumNonZero > 4) + SDValue Sh = buildFromShuffleMostly(Op, DAG); + if (Sh.getNode()) + return Sh; + + if (NumNonZero > 4 && !Subtarget->hasSSE2()) return SDValue(); SDLoc dl(Op); @@ -6271,113 +6390,6 @@ return SDValue(); } -/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real -/// underlying vector and index. -/// -/// Modifies \p ExtractedFromVec to the real vector and returns the real -/// index. -static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, - SDValue ExtIdx) { - int Idx = cast(ExtIdx)->getZExtValue(); - if (!isa(ExtractedFromVec)) - return Idx; - - // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already - // lowered this: - // (extract_vector_elt (v8f32 %vreg1), Constant<6>) - // to: - // (extract_vector_elt (vector_shuffle<2,u,u,u> - // (extract_subvector (v8f32 %vreg0), Constant<4>), - // undef) - // Constant<0>) - // In this case the vector is the extract_subvector expression and the index - // is 2, as specified by the shuffle. - ShuffleVectorSDNode *SVOp = cast(ExtractedFromVec); - SDValue ShuffleVec = SVOp->getOperand(0); - MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); - assert(ShuffleVecVT.getVectorElementType() == - ExtractedFromVec.getSimpleValueType().getVectorElementType()); - - int ShuffleIdx = SVOp->getMaskElt(Idx); - if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { - ExtractedFromVec = ShuffleVec; - return ShuffleIdx; - } - return Idx; -} - -static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); - - // Skip if insert_vec_elt is not supported. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) - return SDValue(); - - SDLoc DL(Op); - unsigned NumElems = Op.getNumOperands(); - - SDValue VecIn1; - SDValue VecIn2; - SmallVector InsertIndices; - SmallVector Mask(NumElems, -1); - - for (unsigned i = 0; i != NumElems; ++i) { - unsigned Opc = Op.getOperand(i).getOpcode(); - - if (Opc == ISD::UNDEF) - continue; - - if (Opc != ISD::EXTRACT_VECTOR_ELT) { - // Quit if more than 1 elements need inserting. - if (InsertIndices.size() > 1) - return SDValue(); - - InsertIndices.push_back(i); - continue; - } - - SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); - SDValue ExtIdx = Op.getOperand(i).getOperand(1); - // Quit if non-constant index. - if (!isa(ExtIdx)) - return SDValue(); - int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); - - // Quit if extracted from vector of different type. - if (ExtractedFromVec.getValueType() != VT) - return SDValue(); - - if (!VecIn1.getNode()) - VecIn1 = ExtractedFromVec; - else if (VecIn1 != ExtractedFromVec) { - if (!VecIn2.getNode()) - VecIn2 = ExtractedFromVec; - else if (VecIn2 != ExtractedFromVec) - // Quit if more than 2 vectors to shuffle - return SDValue(); - } - - if (ExtractedFromVec == VecIn1) - Mask[i] = Idx; - else if (ExtractedFromVec == VecIn2) - Mask[i] = Idx + NumElems; - } - - if (!VecIn1.getNode()) - return SDValue(); - - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); - SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]); - for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { - unsigned Idx = InsertIndices[i]; - NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), - DAG.getIntPtrConstant(Idx)); - } - - return NV; -} - // Lower BUILD_VECTOR operation for v8i1 and v16i1 types. SDValue X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { Index: test/CodeGen/X86/vec_set.ll =================================================================== --- test/CodeGen/X86/vec_set.ll +++ test/CodeGen/X86/vec_set.ll @@ -1,15 +1,27 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | grep punpckl | count 7 +; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | FileCheck %s -define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { - %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0 ; <<8 x i16>> [#uses=1] - %tmp2 = insertelement <8 x i16> %tmp, i16 %a1, i32 1 ; <<8 x i16>> [#uses=1] - %tmp4 = insertelement <8 x i16> %tmp2, i16 %a2, i32 2 ; <<8 x i16>> [#uses=1] - %tmp6 = insertelement <8 x i16> %tmp4, i16 %a3, i32 3 ; <<8 x i16>> [#uses=1] - %tmp8 = insertelement <8 x i16> %tmp6, i16 %a4, i32 4 ; <<8 x i16>> [#uses=1] - %tmp10 = insertelement <8 x i16> %tmp8, i16 %a5, i32 5 ; <<8 x i16>> [#uses=1] - %tmp12 = insertelement <8 x i16> %tmp10, i16 %a6, i32 6 ; <<8 x i16>> [#uses=1] - %tmp14 = insertelement <8 x i16> %tmp12, i16 %a7, i32 7 ; <<8 x i16>> [#uses=1] - store <8 x i16> %tmp14, <8 x i16>* %b - ret void +; CHECK-LABEL: test0 +define void @test0(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { +; CHECK-LABEL: test0: +; CHECK: ## BB#0: +; CHECK-NEXT: pinsrw $0, %esi, %xmm0 +; CHECK-NEXT: pinsrw $1, %edx, %xmm0 +; CHECK-NEXT: pinsrw $2, %ecx, %xmm0 +; CHECK-NEXT: pinsrw $3, %r8d, %xmm0 +; CHECK-NEXT: pinsrw $4, %r9d, %xmm0 +; CHECK-NEXT: pinsrw $5, {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: pinsrw $6, {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm0 +; CHECK-NEXT: movdqa %xmm0, (%rdi) +; CHECK-NEXT: retq + %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0 + %tmp2 = insertelement <8 x i16> %tmp, i16 %a1, i32 1 + %tmp4 = insertelement <8 x i16> %tmp2, i16 %a2, i32 2 + %tmp6 = insertelement <8 x i16> %tmp4, i16 %a3, i32 3 + %tmp8 = insertelement <8 x i16> %tmp6, i16 %a4, i32 4 + %tmp10 = insertelement <8 x i16> %tmp8, i16 %a5, i32 5 + %tmp12 = insertelement <8 x i16> %tmp10, i16 %a6, i32 6 + %tmp14 = insertelement <8 x i16> %tmp12, i16 %a7, i32 7 + store <8 x i16> %tmp14, <8 x i16>* %b + ret void } -