diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24564,6 +24564,13 @@ SDValue LHS = Src.getOperand(0); SDValue RHS = Src.getOperand(1); EVT LHSVT = LHS.getValueType(); + if (LHSVT.getScalarSizeInBits() > 64) + return SDValue(); + if (LHSVT.isFloatingPoint()) { + LHSVT = LHSVT.changeVectorElementTypeToInteger(); + LHS = DAG.getBitcast(LHSVT, LHS); + RHS = DAG.getBitcast(LHSVT, RHS); + } ISD::CondCode SrcCC = cast(Src.getOperand(2))->get(); if (SrcCC == (CmpNull ? ISD::SETNE : ISD::SETEQ) && llvm::has_single_bit(LHSVT.getSizeInBits())) { diff --git a/llvm/test/CodeGen/X86/pr53419.ll b/llvm/test/CodeGen/X86/pr53419.ll --- a/llvm/test/CodeGen/X86/pr53419.ll +++ b/llvm/test/CodeGen/X86/pr53419.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64 -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=X64,SSE42 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=X64,AVX ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86 declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) @@ -314,3 +314,263 @@ %all_eq = icmp eq i64 %lhs, %rhs ret i1 %all_eq } + +define void @vector_version_v8f32(<8 x float> %a) { +; SSE2-LABEL: vector_version_v8f32: +; SSE2: # %bb.0: # %bb +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 +; SSE2-NEXT: movmskps %xmm1, %eax +; SSE2-NEXT: xorl $15, %eax +; SSE2-NEXT: jne .LBB12_3 +; SSE2-NEXT: # %bb.1: # %bb +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: testb %al, %al +; SSE2-NEXT: jne .LBB12_3 +; SSE2-NEXT: # %bb.2: # %bb +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: testb %al, %al +; SSE2-NEXT: je .LBB12_3 +; SSE2-NEXT: # %bb.4: # %if.end +; SSE2-NEXT: .LBB12_3: # %if.then +; +; SSE42-LABEL: vector_version_v8f32: +; SSE42: # %bb.0: # %bb +; SSE42-NEXT: por %xmm1, %xmm0 +; SSE42-NEXT: ptest %xmm0, %xmm0 +; SSE42-NEXT: jne .LBB12_3 +; SSE42-NEXT: # %bb.1: # %bb +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: testb %al, %al +; SSE42-NEXT: jne .LBB12_3 +; SSE42-NEXT: # %bb.2: # %bb +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: testb %al, %al +; SSE42-NEXT: je .LBB12_3 +; SSE42-NEXT: # %bb.4: # %if.end +; SSE42-NEXT: .LBB12_3: # %if.then +; +; AVX-LABEL: vector_version_v8f32: +; AVX: # %bb.0: # %bb +; AVX-NEXT: vptest %ymm0, %ymm0 +; AVX-NEXT: jne .LBB12_3 +; AVX-NEXT: # %bb.1: # %bb +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: testb %al, %al +; AVX-NEXT: jne .LBB12_3 +; AVX-NEXT: # %bb.2: # %bb +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: testb %al, %al +; AVX-NEXT: je .LBB12_3 +; AVX-NEXT: # %bb.4: # %if.end +; AVX-NEXT: .LBB12_3: # %if.then +; +; X86-LABEL: vector_version_v8f32: +; X86: # %bb.0: # %bb +; X86-NEXT: vptest %ymm0, %ymm0 +; X86-NEXT: jne .LBB12_3 +; X86-NEXT: # %bb.1: # %bb +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %al, %al +; X86-NEXT: jne .LBB12_3 +; X86-NEXT: # %bb.2: # %bb +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testb %al, %al +; X86-NEXT: je .LBB12_3 +; X86-NEXT: # %bb.4: # %if.end +; X86-NEXT: .LBB12_3: # %if.then +bb: + %0 = fcmp nnan une <8 x float> %a, zeroinitializer + %1 = bitcast <8 x i1> %0 to i8 + %2 = icmp ne i8 %1, 0 + %op.rdx = or i1 %2, poison + %op.rdx93 = select i1 %op.rdx, i1 true, i1 poison + br i1 %op.rdx93, label %if.then, label %if.end + +if.then: ; preds = %entry + unreachable + +if.end: ; preds = %entry + unreachable +} + +define i1 @vector_version_v4i256(<4 x i256> %0) nounwind { +; SSE2-LABEL: vector_version_v4i256: +; SSE2: # %bb.0: # %bb +; SSE2-NEXT: pushq %rbp +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax +; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; SSE2-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; SSE2-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; SSE2-NEXT: xorl %ebp, %ebp +; SSE2-NEXT: orq %rbx, %r11 +; SSE2-NEXT: setne %bpl +; SSE2-NEXT: negl %ebp +; SSE2-NEXT: movd %ebp, %xmm0 +; SSE2-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; SSE2-NEXT: orq {{[0-9]+}}(%rsp), %rax +; SSE2-NEXT: xorl %r11d, %r11d +; SSE2-NEXT: orq %r10, %rax +; SSE2-NEXT: setne %r11b +; SSE2-NEXT: negl %r11d +; SSE2-NEXT: movd %r11d, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: orq %rcx, %rsi +; SSE2-NEXT: orq %rdx, %rdi +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: orq %rsi, %rdi +; SSE2-NEXT: setne %al +; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; SSE2-NEXT: orq {{[0-9]+}}(%rsp), %r8 +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: orq %r9, %r8 +; SSE2-NEXT: setne %al +; SSE2-NEXT: negl %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movmskps %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %rbp +; SSE2-NEXT: retq +; +; SSE42-LABEL: vector_version_v4i256: +; SSE42: # %bb.0: # %bb +; SSE42-NEXT: pushq %r14 +; SSE42-NEXT: pushq %rbx +; SSE42-NEXT: movq {{[0-9]+}}(%rsp), %rax +; SSE42-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE42-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; SSE42-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; SSE42-NEXT: orq {{[0-9]+}}(%rsp), %rbx +; SSE42-NEXT: orq {{[0-9]+}}(%rsp), %r11 +; SSE42-NEXT: xorl %r14d, %r14d +; SSE42-NEXT: orq %rbx, %r11 +; SSE42-NEXT: setne %r14b +; SSE42-NEXT: negq %r14 +; SSE42-NEXT: movq %r14, %xmm1 +; SSE42-NEXT: orq {{[0-9]+}}(%rsp), %r10 +; SSE42-NEXT: orq {{[0-9]+}}(%rsp), %rax +; SSE42-NEXT: xorl %r11d, %r11d +; SSE42-NEXT: orq %r10, %rax +; SSE42-NEXT: setne %r11b +; SSE42-NEXT: negq %r11 +; SSE42-NEXT: movq %r11, %xmm0 +; SSE42-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE42-NEXT: orq %rcx, %rsi +; SSE42-NEXT: orq %rdx, %rdi +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: orq %rsi, %rdi +; SSE42-NEXT: setne %al +; SSE42-NEXT: negq %rax +; SSE42-NEXT: movq %rax, %xmm1 +; SSE42-NEXT: orq {{[0-9]+}}(%rsp), %r9 +; SSE42-NEXT: orq {{[0-9]+}}(%rsp), %r8 +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: orq %r9, %r8 +; SSE42-NEXT: setne %al +; SSE42-NEXT: negq %rax +; SSE42-NEXT: movq %rax, %xmm2 +; SSE42-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE42-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE42-NEXT: movmskps %xmm1, %eax +; SSE42-NEXT: testl %eax, %eax +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: popq %rbx +; SSE42-NEXT: popq %r14 +; SSE42-NEXT: retq +; +; X86-LABEL: vector_version_v4i256: +; X86: # %bb.0: # %bb +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl %ecx, %ebx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: setne %cl +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: negl %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: orl %esi, %ebp +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: orl %edi, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: setne %bl +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: negl %ebx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl %edx, %edi +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: orl %esi, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: setne %dl +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl %edi, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: negl %edx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: orl %esi, %edi +; X86-NEXT: vmovd %ebx, %xmm0 +; X86-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; X86-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: negl %eax +; X86-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; X86-NEXT: vmovmskps %xmm0, %eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +bb: + %1 = icmp ne <4 x i256> %0, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = icmp eq i4 %2, 0 + br i1 %3, label %l2, label %l1 + +l1: ; preds = %entry + ret i1 false + +l2: ; preds = %entry + ret i1 false +}