Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -302,7 +302,21 @@ } SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { - SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue Cond = N->getOperand(0); + EVT OpVT = Cond.getValueType(); + SDLoc DL(N); + // The vselect result and true/value operands needs scalarizing, but it's + // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Cond = GetScalarizedVector(Cond); + } else { + EVT VT = OpVT.getVectorElementType(); + Cond = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + SDValue LHS = GetScalarizedVector(N->getOperand(1)); TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false, false); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1388,7 +1388,7 @@ // (result) is 256-bit but the source is 512-bit wide. // 128-bit was made Custom under AVX1. for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, - MVT::v8f32, MVT::v4f64 }) + MVT::v8f32, MVT::v4f64, MVT::v1i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1, MVT::v32i1, MVT::v64i1 }) @@ -14532,6 +14532,21 @@ unsigned IdxVal = cast(Idx)->getZExtValue(); MVT ResVT = Op.getSimpleValueType(); + // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond + // would result with: v1i1 = extract_subvector(vXi1, idx). + // Lower these into extract_vector_elt which is already selectable. + if (ResVT == MVT::v1i1) { + assert(Subtarget.hasAVX512() && + "Boolean EXTRACT_SUBVECTOR requires AVX512"); + + MVT EltVT = ResVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT LegalVT = + (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT(); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res); + } + assert((In.getSimpleValueType().is256BitVector() || In.getSimpleValueType().is512BitVector()) && "Can only extract from 256-bit or 512-bit vectors"); Index: llvm/trunk/test/CodeGen/X86/pr33349.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr33349.ll +++ llvm/trunk/test/CodeGen/X86/pr33349.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=KNL +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=SKX + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + + define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr { +; KNL-LABEL: test: +; KNL: # BB#0: # %bb +; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld1 +; KNL-NEXT: fldz +; KNL-NEXT: fld %st(0) +; KNL-NEXT: fcmovne %st(2), %st(0) +; KNL-NEXT: vpextrb $4, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld %st(1) +; KNL-NEXT: fcmovne %st(3), %st(0) +; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fld %st(2) +; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: vpextrb $12, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fcmovne %st(4), %st(0) +; KNL-NEXT: fstp %st(4) +; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fstpt 30(%rdi) +; KNL-NEXT: fxch %st(1) +; KNL-NEXT: fstpt 20(%rdi) +; KNL-NEXT: fxch %st(1) +; KNL-NEXT: fstpt 10(%rdi) +; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: retq +; +; SKX-LABEL: test: +; SKX: # BB#0: # %bb +; SKX-NEXT: vpslld $31, %xmm0, %xmm0 +; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: kshiftrw $2, %k0, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kshiftlw $15, %k2, %k2 +; SKX-NEXT: kshiftrw $15, %k2, %k2 +; SKX-NEXT: kmovd %k2, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld1 +; SKX-NEXT: fldz +; SKX-NEXT: fld %st(0) +; SKX-NEXT: fcmovne %st(2), %st(0) +; SKX-NEXT: kshiftlw $14, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kmovd %k1, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld %st(1) +; SKX-NEXT: fcmovne %st(3), %st(0) +; SKX-NEXT: kshiftlw $15, %k0, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kshiftlw $15, %k1, %k1 +; SKX-NEXT: kshiftrw $15, %k1, %k1 +; SKX-NEXT: kmovd %k1, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fld %st(2) +; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: kshiftlw $14, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kshiftlw $15, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: testb $1, %al +; SKX-NEXT: fxch %st(3) +; SKX-NEXT: fcmovne %st(4), %st(0) +; SKX-NEXT: fstp %st(4) +; SKX-NEXT: fxch %st(3) +; SKX-NEXT: fstpt 10(%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt (%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt 30(%rdi) +; SKX-NEXT: fstpt 20(%rdi) +; SKX-NEXT: retq + bb: + %tmp = select <4 x i1> %m, <4 x x86_fp80> , <4 x x86_fp80> zeroinitializer + store <4 x x86_fp80> %tmp, <4 x x86_fp80>* %p, align 16 + ret void + } +