Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -593,6 +593,7 @@ bool SplitVectorOperand(SDNode *N, unsigned OpNo); SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); + SDValue SplitVecOp_TruncateHelper(SDNode *N, unsigned TruncateOp); SDValue SplitVecOp_BITCAST(SDNode *N); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); @@ -600,7 +601,6 @@ SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); - SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1293,7 +1293,9 @@ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; - case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE); + break; case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); @@ -1304,20 +1306,32 @@ case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N, ISD::TRUNCATE); + else + Res = SplitVecOp_UnaryOp(N); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FTRUNC: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = SplitVecOp_UnaryOp(N); break; + case ISD::FTRUNC: + Res = SplitVecOp_TruncateHelper(N, ISD::FTRUNC); + break; } } @@ -1581,7 +1595,8 @@ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } -SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { +SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N, + unsigned TruncateOp) { // The result type is legal, but the input type is illegal. If splitting // ends up with the result type of each half still being legal, just // do that. If, however, that would result in an illegal result type, @@ -1624,8 +1639,8 @@ EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements/2); - SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); // Concatenate them to get the full intermediate truncation result. EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, @@ -1634,7 +1649,7 @@ // type. This should normally be something that ends up being legal directly, // but in theory if a target has very wide vectors and an annoyingly // restricted set of legal types, this split can chain to build things up. - return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); + return DAG.getNode(TruncateOp, DL, OutVT, InterVec); } SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { Index: test/CodeGen/AArch64/arm64-convert-v4f64.ll =================================================================== --- test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -3,11 +3,11 @@ define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) { ; CHECK: fptosi_v4f64_to_v4i16 -; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d -; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d -; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d -; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d -; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h +; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d +; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d +; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d +; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d +; CHECK: xtn v0.4h, v[[MID]].4s %tmp1 = load <4 x double>, <4 x double>* %ptr %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16> ret <4 x i16> %tmp2 @@ -19,13 +19,13 @@ ; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d ; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d ; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d -; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d ; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d -; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d +; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d ; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d -; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h -; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h -; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b +; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d +; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA0]].4s +; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA2]].4s +; CHECK: xtn v0.8b, v[[TMP1]].8h %tmp1 = load <8 x double>, <8 x double>* %ptr %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8> ret <8 x i8> %tmp2 Index: test/CodeGen/AArch64/vcvt-oversize.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/vcvt-oversize.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define <8 x i8> @float_to_i8(<8 x float>* %in) { +; CHECK-LABEL: float_to_i8: +; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s +; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s +; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s +; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s +; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s +; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s +; CHECK-DAG: xtn v0.8b, v[[TMP]].8h + %l = load <8 x float>, <8 x float>* %in + %scale = fmul <8 x float> %l, + %conv = fptoui <8 x float> %scale to <8 x i8> + ret <8 x i8> %conv +} Index: test/CodeGen/ARM/vcvt.ll =================================================================== --- test/CodeGen/ARM/vcvt.ll +++ test/CodeGen/ARM/vcvt.ll @@ -180,8 +180,8 @@ define <4 x i16> @fix_double_to_i16(<4 x double> %in) { ; CHECK-LABEL: fix_double_to_i16: -; CHECK: vcvt.s32.f64 -; CHECK: vcvt.s32.f64 +; CHECK: vcvt.u32.f64 +; CHECK: vcvt.u32.f64 %scale = fmul <4 x double> %in, %conv = fptoui <4 x double> %scale to <4 x i16>