Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -627,6 +627,7 @@ SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); + SDValue ScalarizeVecOp_VSETCC(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -484,6 +484,9 @@ case ISD::VSELECT: Res = ScalarizeVecOp_VSELECT(N); break; + case ISD::SETCC: + Res = ScalarizeVecOp_VSETCC(N); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; @@ -560,6 +563,36 @@ N->getOperand(2)); } +/// If the operand is a vector that needs to be scalarized then the +/// result must be v1i1, so just convert to a scalar SETCC and wrap +/// with a scalar_to_vector since the res type is legal if we got here +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); + + EVT VT = N->getValueType(0); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + + EVT OpVT = N->getOperand(0).getValueType(); + EVT NVT = VT.getVectorElementType(); + SDLoc DL(N); + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + + Res = DAG.getNode(ExtendCode, DL, NVT, Res); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); +} + /// If the value to store is a vector that needs to be scalarized, it must be /// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ Index: test/CodeGen/X86/pr34177.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr34177.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() local_unnamed_addr { +; CHECK-LABEL: test: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,3] +; CHECK-NEXT: vpextrq $1, %xmm0, %rax +; CHECK-NEXT: vmovq %xmm0, %rcx +; CHECK-NEXT: negq %rdx +; CHECK-NEXT: fld1 +; CHECK-NEXT: fldz +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: fcmove %st(2), %st(0) +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fcmove %st(3), %st(0) +; CHECK-NEXT: cmpq %rax, %rax +; CHECK-NEXT: fld %st(2) +; CHECK-NEXT: fcmove %st(4), %st(0) +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmpq %rax, %rax +; CHECK-NEXT: fld %st(3) +; CHECK-NEXT: fcmove %st(5), %st(0) +; CHECK-NEXT: fstp %st(5) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fadd %st(3) +; CHECK-NEXT: fxch %st(4) +; CHECK-NEXT: fadd %st(3) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fadd %st(3) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: faddp %st(3) +; CHECK-NEXT: fxch %st(3) +; CHECK-NEXT: fstpt (%rax) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpt (%rax) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpt (%rax) +; CHECK-NEXT: fstpt (%rax) + %1 = icmp eq <4 x i64> , undef + %2 = select <4 x i1> %1, <4 x x86_fp80> , <4 x x86_fp80> zeroinitializer + %3 = fadd <4 x x86_fp80> undef, %2 + %4 = shufflevector <4 x x86_fp80> %3, <4 x x86_fp80> undef, <8 x i32> + store <8 x x86_fp80> %4, <8 x x86_fp80>* undef, align 16 + unreachable +} +