diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1620,8 +1620,9 @@ EVT OpTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::VSELECT) - if (SDValue Res = WidenVSELECTAndMask(N)) - return Res; + if (SDValue Res = WidenVSELECTMask(N)) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + Res, N->getOperand(1), N->getOperand(2)); // Promote all the way up to the canonical SetCC type. EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -864,7 +864,7 @@ SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_ScalarOp(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); - SDValue WidenVSELECTAndMask(SDNode *N); + SDValue WidenVSELECTMask(SDNode *N); SDValue WidenVecRes_SELECT_CC(SDNode* N); SDValue WidenVecRes_SETCC(SDNode* N); SDValue WidenVecRes_STRICT_FSETCC(SDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -514,8 +514,8 @@ SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - if (SDValue Res = WidenVSELECTAndMask(N)) - std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); + if (SDValue Res = WidenVSELECTMask(N)) + std::tie(CL, CH) = DAG.SplitVector(Res, dl); // Check if there are already splitted versions of the vector available and // use those instead of splitting the mask operand again. else if (getTypeAction(Cond.getValueType()) == diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3968,11 +3968,11 @@ return Mask; } -// This method tries to handle VSELECT and its mask by legalizing operands -// (which may require widening) and if needed adjusting the mask vector type -// to match that of the VSELECT. Without it, many cases end up with -// scalarization of the SETCC, with many unnecessary instructions. -SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { +// This method tries to handle some special cases for the vselect mask +// and if needed adjusting the mask vector type to match that of the VSELECT. +// Without it, many cases end up with scalarization of the SETCC, with many +// unnecessary instructions. +SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { LLVMContext &Ctx = *DAG.getContext(); SDValue Cond = N->getOperand(0); @@ -4019,14 +4019,9 @@ return SDValue(); } - // Get the VT and operands for VSELECT, and widen if needed. - SDValue VSelOp1 = N->getOperand(1); - SDValue VSelOp2 = N->getOperand(2); - if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) { + // Widen the vselect result type if needed. + if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT); - VSelOp1 = GetWidenedVector(VSelOp1); - VSelOp2 = GetWidenedVector(VSelOp2); - } // The mask of the VSELECT should have integer elements. EVT ToMaskVT = VSelVT; @@ -4075,7 +4070,7 @@ } else return SDValue(); - return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2); + return Mask; } SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { @@ -4085,8 +4080,13 @@ SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { - if (SDValue Res = WidenVSELECTAndMask(N)) - return Res; + if (SDValue WideCond = WidenVSELECTMask(N)) { + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, WideCond, InOp1, InOp2); + } EVT CondEltVT = CondVT.getVectorElementType(); EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), diff --git a/llvm/test/CodeGen/SystemZ/pr47019.ll b/llvm/test/CodeGen/SystemZ/pr47019.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/pr47019.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -O3 | FileCheck %s + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" + +@g_150 = external dso_local unnamed_addr global [9 x i32], align 4 +@g_317 = external dso_local unnamed_addr global [1 x [10 x [8 x i32]]], align 4 + +define dso_local void @main() local_unnamed_addr { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: strl %r0, g_317+296 +; CHECK-NEXT: lhi %r0, 6 +; CHECK-NEXT: strl %r0, g_150+12 +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + br i1 undef, label %bb2, label %bb1 + +bb2: ; preds = %bb1 + store i32 0, i32* getelementptr inbounds ([1 x [10 x [8 x i32]]], [1 x [10 x [8 x i32]]]* @g_317, i64 0, i64 0, i64 9, i64 2), align 4 + %i = load i32, i32* getelementptr inbounds ([1 x [10 x [8 x i32]]], [1 x [10 x [8 x i32]]]* @g_317, i64 0, i64 0, i64 9, i64 2), align 4 + %i3 = insertelement <8 x i32> undef, i32 %i, i32 0 + %i4 = shufflevector <8 x i32> %i3, <8 x i32> undef, <8 x i32> zeroinitializer + %i5 = add nsw <8 x i8> zeroinitializer, + %i6 = zext <8 x i8> %i5 to <8 x i32> + %i7 = icmp slt <8 x i32> undef, %i6 + %i8 = or <8 x i1> zeroinitializer, %i7 + %i9 = select <8 x i1> %i8, <8 x i32> zeroinitializer, <8 x i32> %i4 + %i10 = shl <8 x i32> %i6, %i9 + %i11 = xor <8 x i32> %i10, zeroinitializer + %i12 = xor <8 x i32> %i11, zeroinitializer + %i13 = xor <8 x i32> %i12, zeroinitializer + %i14 = extractelement <8 x i32> %i13, i32 0 + %i15 = xor i32 %i14, 0 + %i16 = xor i32 %i15, 0 + %i17 = shl i32 %i16, 24 + %i18 = ashr exact i32 %i17, 24 + store i32 %i18, i32* getelementptr inbounds ([9 x i32], [9 x i32]* @g_150, i64 0, i64 3), align 4 + unreachable +} + +attributes #0 = { "use-soft-float"="false" }