diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39633,10 +39633,14 @@ // vselect Cond, 000..., X -> andn Cond, X if (TValIsAllZeros) { - MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64); - SDValue CastCond = DAG.getBitcast(AndNVT, Cond); - SDValue CastRHS = DAG.getBitcast(AndNVT, RHS); - SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS); + SDValue CastRHS = DAG.getBitcast(CondVT, RHS); + SDValue AndN; + // The canonical form differs for i1 vectors - x86andnp is not used + if (CondVT.getScalarType() == MVT::i1) + AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), + CastRHS); + else + AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS); return DAG.getBitcast(VT, AndN); } diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -705,3 +705,64 @@ store <1 x i1> %c, <1 x i1>* %x ret void } + +; Regression test from https://github.com/JuliaLang/julia/issues/36955 +define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) { +; X86-AVX512F-LABEL: julia_issue36955: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; X86-AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; X86-AVX512F-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; X86-AVX512F-NEXT: vcmplepd %zmm2, %zmm1, %k1 +; X86-AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; X86-AVX512F-NEXT: korw %k0, %k1, %k0 +; X86-AVX512F-NEXT: kmovw %k0, %eax +; X86-AVX512F-NEXT: # kill: def $al killed $al killed $eax +; X86-AVX512F-NEXT: vzeroupper +; X86-AVX512F-NEXT: retl +; +; X64-AVX512F-LABEL: julia_issue36955: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; X64-AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; X64-AVX512F-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; X64-AVX512F-NEXT: vcmplepd %zmm2, %zmm1, %k1 +; X64-AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; X64-AVX512F-NEXT: korw %k0, %k1, %k0 +; X64-AVX512F-NEXT: kmovw %k0, %eax +; X64-AVX512F-NEXT: # kill: def $al killed $al killed $eax +; X64-AVX512F-NEXT: vzeroupper +; X64-AVX512F-NEXT: retq +; +; X86-AVX512BW-LABEL: julia_issue36955: +; X86-AVX512BW: # %bb.0: +; X86-AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 +; X86-AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X86-AVX512BW-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; X86-AVX512BW-NEXT: vcmplepd %zmm3, %zmm1, %k1 +; X86-AVX512BW-NEXT: vpcmpgtw %zmm0, %zmm2, %k0 {%k1} +; X86-AVX512BW-NEXT: korw %k0, %k1, %k0 +; X86-AVX512BW-NEXT: kmovd %k0, %eax +; X86-AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; X86-AVX512BW-NEXT: vzeroupper +; X86-AVX512BW-NEXT: retl +; +; X64-AVX512BW-LABEL: julia_issue36955: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vpsllw $15, %xmm0, %xmm0 +; X64-AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-AVX512BW-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; X64-AVX512BW-NEXT: vcmplepd %zmm3, %zmm1, %k1 +; X64-AVX512BW-NEXT: vpcmpgtw %zmm0, %zmm2, %k0 {%k1} +; X64-AVX512BW-NEXT: korw %k0, %k1, %k0 +; X64-AVX512BW-NEXT: kmovd %k0, %eax +; X64-AVX512BW-NEXT: # kill: def $al killed $al killed $eax +; X64-AVX512BW-NEXT: vzeroupper +; X64-AVX512BW-NEXT: retq + %fcmp = fcmp ugt <8 x double> %a, zeroinitializer + %xor = xor <8 x i1> %fcmp, + %select1 = select <8 x i1> %fcmp, <8 x i1> zeroinitializer, <8 x i1> %mask + %select2 = select <8 x i1> %xor, <8 x i1> , <8 x i1> %select1 + %ret = bitcast <8 x i1> %select2 to i8 + ret i8 %ret +}