Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -690,7 +690,7 @@ case TargetLowering::TypePromoteInteger: Res = GetPromotedInteger(InOp); break; - case TargetLowering::TypeSplitVector: + case TargetLowering::TypeSplitVector: { EVT InVT = InOp.getValueType(); assert(InVT.isVector() && "Cannot split scalar types"); unsigned NumElts = InVT.getVectorNumElements(); @@ -709,6 +709,26 @@ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } + case TargetLowering::TypeWidenVector: { + SDValue WideInOp = GetWidenedVector(InOp); + + // Truncate widened InOp. + unsigned NumElem = WideInOp.getValueType().getVectorNumElements(); + EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), + N->getValueType(0).getScalarType(), NumElem); + SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp); + + // Zero extend so that the elements are of same type as those of NVT + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(), + NumElem); + SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc); + + // Extract the low NVT subvector. + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx); + } + } // Truncate to NVT instead of VT return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); Index: test/CodeGen/SystemZ/vec-trunc-to-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/vec-trunc-to-i1.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +; +; Check that a widening truncate to a vector of i1 elements can be handled. + +define void @autogen_SD29574(i8*, i32*, i64*, i32, i64, i8) { +; CHECK: .text +BB: + %A4 = alloca <8 x i32> + %A3 = alloca <16 x i64> + %A2 = alloca <2 x i16> + %A1 = alloca <16 x i32> + %A = alloca <4 x i32> + %L = load i8, i8* %0 + store <16 x i32> zeroinitializer, <16 x i32>* %A1 + %E = extractelement <4 x i16> zeroinitializer, i32 0 + %Shuff = shufflevector <2 x i64> zeroinitializer, <2 x i64> zeroinitializer, <2 x i32> + %I = insertelement <2 x i64> %Shuff, i64 111415, i32 1 + %Tr = trunc <16 x i32> zeroinitializer to <16 x i16> + %Sl = select i1 true, float 0x469BEFFFC0000000, float 0x3952E376C0000000 + %Cmp = fcmp une double 0xDB116CB0010F082E, 0x438536246B8353A2 + br label %CF + +CF: ; preds = %CF, %CF35, %BB + %L5 = load i8, i8* %0 + store i32 0, i32* %1 + %E6 = extractelement <8 x i16> zeroinitializer, i32 4 + %Shuff7 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> + %I8 = insertelement <2 x i64> zeroinitializer, i64 %4, i32 1 + %B = shl i16 %E6, -1 + %FC = fptoui float 0x3952E376C0000000 to i1 + br i1 %FC, label %CF, label %CF35 + +CF35: ; preds = %CF + %Sl9 = select i1 true, <16 x i64>* %A3, <16 x i64>* %A3 + %Cmp10 = icmp ult <2 x i64> %Shuff, %Shuff + %L11 = load <16 x i64>, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + %E12 = extractelement <2 x i8> zeroinitializer, i32 1 + %Shuff13 = shufflevector <4 x i8> zeroinitializer, <4 x i8> zeroinitializer, <4 x i32> + %I14 = insertelement <4 x i8> %Shuff13, i8 -1, i32 2 + %B15 = sub <4 x i8> %I14, zeroinitializer + %Tr16 = trunc <4 x i8> %I14 to <4 x i1> + %Sl17 = select i1 true, i32 %3, i32 0 + %Cmp18 = icmp sgt <2 x i64> %I, %Shuff + %L19 = load <16 x i64>, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + %E20 = extractelement <2 x i1> %Cmp18, i32 0 + br i1 %E20, label %CF, label %CF34 + +CF34: ; preds = %CF34, %CF35 + %Shuff21 = shufflevector <8 x i16> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> + %I22 = insertelement <4 x i16> zeroinitializer, i16 %E6, i32 3 + %B23 = srem i32 %Sl17, -1 + %Tr24 = trunc <4 x i8> %B15 to <4 x i1> + %Sl25 = select i1 true, i16 -20525, i16 -20525 + %Cmp26 = icmp slt <4 x i1> %Tr24, %Tr16 + %L27 = load <16 x i64>, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + %E28 = extractelement <4 x i1> %Cmp26, i32 3 + br i1 %E28, label %CF34, label %CF36 + +CF36: ; preds = %CF34 + %Shuff29 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %Shuff21, <8 x i32> + %I30 = insertelement <4 x i1> %Tr24, i1 true, i32 0 + %B31 = mul i32 0, 0 + %Se = sext <4 x i1> %Tr16 to <4 x i8> + %Sl32 = select i1 true, i32 244675, i32 %Sl17 + %Cmp33 = icmp ne <4 x i8> %Se, %Shuff13 + store <16 x i64> %L11, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + store <16 x i64> %L11, <16 x i64>* %Sl9 + ret void +}