Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4110,6 +4110,7 @@ // shuffle in combination with VEXTs. SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4162,6 +4163,56 @@ // This loop extracts the usage patterns of the source vectors // and prepares appropriate SDValues for a shuffle if possible. for (unsigned i = 0; i < SourceVecs.size(); ++i) { + if (SourceVecs[i].getValueType().getVectorElementType() != + VT.getVectorElementType()) { + if (SourceVecs[i].getOpcode() == ISD::AssertSext || + SourceVecs[i].getOpcode() == ISD::AssertZext) { + // For AssertSext/AssertZext, we need to bitcast it to the vector which + // holds asserted element type, and modify the extracted lane number + // pointing to correct lane. For example, if a v2i32 AssertSext node + // asserts it holds 2 of i16 elements, firstly it will bitcast to v4i16. + // then all lane number in EXTRACT_VECTOR_ELT extracting on it will be + // doubled. Finally rebuild a new BUILD_VECTOR operating on those newly + // created EXTRACT_VECTOR_ELTs to replace old Op. + EVT AssertTy = cast(SourceVecs[i].getOperand(1))->getVT(); + EVT AssertVT = EVT::getVectorVT(*DAG.getContext(), AssertTy, + SourceVecs[i].getValueSizeInBits() / + AssertTy.getSizeInBits()); + EVT LegalTy = Op.getOperand(0).getValueType(); + // Create BITCAST on AssertSext/AssertZext to get a vector which element + // type is AssertTy. + SDValue BitCst = DAG.getNode(ISD::BITCAST, dl, AssertVT, SourceVecs[i]); + unsigned OffsetMultipliers = + AssertVT.getVectorNumElements() / + SourceVecs[i].getValueType().getVectorNumElements(); + // Collect operands to create new BUILD_VECTOR node, lanes in extracting + // SourceVecs[i] should multiply OffsetMultipliers. + SmallVector BuildSrc; + for (unsigned j = 0; j < NumElts; ++j) { + if (Op.getOperand(j).getOperand(0) != SourceVecs[i]) { + BuildSrc.push_back(Op.getOperand(j)); + continue; + } + unsigned OriginLane = + cast(Op.getOperand(j).getOperand(1)) + ->getSExtValue(); + SDValue ExtElt = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, BitCst, + DAG.getIntPtrConstant(OriginLane * OffsetMultipliers)); + BuildSrc.push_back(ExtElt); + } + // Create new BUILD_VECTOR to replace old one. + Op = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, + makeArrayRef(BuildSrc.data(), NumElts)); + SourceVecs[i] = BitCst; + MaxElts[i] *= OffsetMultipliers; + MinElts[i] *= OffsetMultipliers; + } else { + // Don't attempt to extract subvectors from BUILD_VECTOR sources + // that expand or trunc the original value. + return SDValue(); + } + } if (SourceVecs[i].getValueType() == VT) { // No VEXT necessary ShuffleSrcs[i] = SourceVecs[i]; @@ -4175,15 +4226,6 @@ continue; } - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - // Since only 64-bit and 128-bit vectors are legal on ARM and // we've eliminated the other cases... assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && Index: test/CodeGen/AArch64/arm64-convert-v4f64.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s + + +define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) { +; CHECK: fptosi_v4f64_to_v4i16 +; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d +; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d +; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d +; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h + %tmp1 = load <4 x double>* %ptr + %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16> + ret <4 x i16> %tmp2 +} + +define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) { +; CHECK: fptosi_v4f64_to_v4i8 +; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d +; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d +; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d +; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d +; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d +; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d +; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d +; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d +; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h +; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h +; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b + %tmp1 = load <8 x double>* %ptr + %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> + ret <8 x i8> %tmp2 +}