diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2221,7 +2221,7 @@ // If we got a 512-bit vector then we'll definitely have a vXi1 compare. if (LegalVT.getSimpleVT().is512BitVector()) - return VT.changeVectorElementType(MVT::i1); + return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { // If we legalized to less than a 512-bit vector, then we will use a vXi1 @@ -2229,7 +2229,7 @@ // vXi16/vXi8. MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) - return VT.changeVectorElementType(MVT::i1); + return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); } } diff --git a/llvm/test/CodeGen/X86/vec3-setcc-crash.ll b/llvm/test/CodeGen/X86/vec3-setcc-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec3-setcc-crash.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 + +define void @vec3_setcc_crash(<3 x i32>* %in, <3 x i32>* %out) { +; X86-LABEL: vec3_setcc_crash: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovdqa (%ecx), %xmm0 +; X86-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vpextrd $2, %xmm0, 8(%eax) +; X86-NEXT: vpextrd $1, %xmm0, 4(%eax) +; X86-NEXT: vmovd %xmm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: vec3_setcc_crash: +; X64: # %bb.0: +; X64-NEXT: vmovdqa (%rdi), %xmm0 +; X64-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vpextrd $2, %xmm0, 8(%rsi) +; X64-NEXT: vmovq %xmm0, (%rsi) +; X64-NEXT: retq + %a = load <3 x i32>, <3 x i32>* %in + %cmp = icmp eq <3 x i32> %a, zeroinitializer + %c = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> zeroinitializer + store <3 x i32> %c, <3 x i32>* %out + ret void +}