Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1454,6 +1454,8 @@ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom); setOperationAction(ISD::SELECT, MVT::v32i1, Custom); @@ -1539,29 +1541,24 @@ addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); - setOperationAction(ISD::ADD, MVT::v2i1, Expand); - setOperationAction(ISD::ADD, MVT::v4i1, Expand); - setOperationAction(ISD::SUB, MVT::v2i1, Expand); - setOperationAction(ISD::SUB, MVT::v4i1, Expand); - setOperationAction(ISD::MUL, MVT::v2i1, Expand); - setOperationAction(ISD::MUL, MVT::v4i1, Expand); - - setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); - setOperationAction(ISD::SETCC, MVT::v4i1, Custom); - setOperationAction(ISD::SETCC, MVT::v2i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); + for (auto VT : { MVT::v2i1, MVT::v4i1 }) { + setOperationAction(ISD::ADD, VT, Expand); + setOperationAction(ISD::SUB, VT, Expand); + setOperationAction(ISD::MUL, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); + + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); - setOperationAction(ISD::SELECT, MVT::v4i1, Custom); - setOperationAction(ISD::SELECT, MVT::v2i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom); - setOperationAction(ISD::VSELECT, MVT::v2i1, Expand); - setOperationAction(ISD::VSELECT, MVT::v4i1, Expand); for (auto VT : { MVT::v4i32, MVT::v8i32 }) { setOperationAction(ISD::AND, VT, Legal); @@ -12483,7 +12480,8 @@ } unsigned IdxVal = cast(Idx)->getZExtValue(); - if (!Subtarget.hasDQI() && (VecVT.getVectorNumElements() <= 8)) { + if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) || + (VecVT.getVectorNumElements() < 8)) { // Use kshiftlw/rw instruction. VecVT = MVT::v16i1; Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, @@ -12492,8 +12490,9 @@ DAG.getIntPtrConstant(0, dl)); } unsigned MaxSift = VecVT.getVectorNumElements() - 1; - Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, - DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); + if (MaxSift - IdxVal) + Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, + DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, DAG.getConstant(MaxSift, dl, MVT::i8)); return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec, Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -2229,18 +2229,12 @@ def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; -let Predicates = [HasAVX512] in { - def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), - (COPY_TO_REGCLASS VK16:$src, VK1)>; - def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), - (COPY_TO_REGCLASS VK8:$src, VK1)>; -} -let Predicates = [HasBWI] in { - def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), - (COPY_TO_REGCLASS VK32:$src, VK1)>; - def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), - (COPY_TO_REGCLASS VK64:$src, VK1)>; -} +def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), (COPY_TO_REGCLASS VK64:$src, VK1)>; +def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), (COPY_TO_REGCLASS VK32:$src, VK1)>; +def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>; +def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))), (COPY_TO_REGCLASS VK8:$src, VK1)>; +def : Pat<(i1 (X86Vextract VK4:$src, (iPTR 0))), (COPY_TO_REGCLASS VK4:$src, VK1)>; +def : Pat<(i1 (X86Vextract VK2:$src, (iPTR 0))), (COPY_TO_REGCLASS VK2:$src, VK1)>; // Mask unary operation // - KNOT Index: llvm/trunk/test/CodeGen/X86/avx512-ext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-ext.ll +++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll @@ -1567,205 +1567,201 @@ ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %r14d -; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2 -; KNL-NEXT: kshiftlw $0, %k0, %k0 +; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vmovd %r15d, %xmm4 ; KNL-NEXT: kmovw %k0, %r15d -; KNL-NEXT: kshiftlw $14, %k2, %k0 +; KNL-NEXT: kshiftlw $14, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $15, %k2, %k0 +; KNL-NEXT: kshiftlw $15, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $2, %r12d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $13, %k2, %k0 +; KNL-NEXT: kshiftlw $13, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %r12d -; KNL-NEXT: kshiftlw $12, %k2, %k0 +; KNL-NEXT: kshiftlw $12, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $4, %r13d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: kshiftlw $11, %k2, %k0 +; KNL-NEXT: kshiftlw $11, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload ; KNL-NEXT: kmovw %k0, %r13d -; KNL-NEXT: kshiftlw $10, %k2, %k0 +; KNL-NEXT: kshiftlw $10, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $6, %esi, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %esi ; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill -; KNL-NEXT: kshiftlw $9, %k2, %k0 +; KNL-NEXT: kshiftlw $9, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $7, %edi, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %esi -; KNL-NEXT: kshiftlw $8, %k2, %k0 +; KNL-NEXT: kshiftlw $8, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $8, %r8d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %edi -; KNL-NEXT: kshiftlw $7, %k2, %k0 +; KNL-NEXT: kshiftlw $7, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $9, %r9d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %r8d -; KNL-NEXT: kshiftlw $6, %k2, %k0 +; KNL-NEXT: kshiftlw $6, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $10, %r10d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %r9d -; KNL-NEXT: kshiftlw $5, %k2, %k0 +; KNL-NEXT: kshiftlw $5, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $11, %r11d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: kshiftlw $4, %k2, %k0 +; KNL-NEXT: kshiftlw $4, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $12, %ebx, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %ebx -; KNL-NEXT: kshiftlw $3, %k2, %k0 +; KNL-NEXT: kshiftlw $3, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $13, %ebp, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %ebp -; KNL-NEXT: kshiftlw $2, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $14, %r14d, %xmm4, %xmm4 -; KNL-NEXT: kmovw %k0, %r11d -; KNL-NEXT: kshiftlw $1, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $15, %r15d, %xmm4, %xmm4 -; KNL-NEXT: kmovw %k0, %r14d -; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1 -; KNL-NEXT: kshiftlw $0, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %eax, %xmm5 -; KNL-NEXT: kmovw %k0, %r15d -; KNL-NEXT: kshiftlw $14, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $1, %ecx, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $15, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $2, %r12d, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $13, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $3, %edx, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %r12d -; KNL-NEXT: kshiftlw $12, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: kshiftlw $11, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload -; KNL-NEXT: kmovw %k0, %r13d -; KNL-NEXT: kshiftlw $10, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %esi -; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill -; KNL-NEXT: kshiftlw $9, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $7, %edi, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %esi -; KNL-NEXT: kshiftlw $8, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $8, %r8d, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %edi -; KNL-NEXT: kshiftlw $7, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $9, %r9d, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %r8d -; KNL-NEXT: kshiftlw $6, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $10, %r10d, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %r9d -; KNL-NEXT: kshiftlw $5, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %ebx -; KNL-NEXT: kshiftlw $4, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $12, %ebp, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %ebp -; KNL-NEXT: kshiftlw $3, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $13, %r11d, %xmm5, %xmm5 -; KNL-NEXT: kmovw %k0, %r10d ; KNL-NEXT: kshiftlw $2, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $14, %r14d, %xmm5, %xmm5 +; KNL-NEXT: vpinsrb $14, %r14d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %r11d ; KNL-NEXT: kshiftlw $1, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $15, %r15d, %xmm5, %xmm5 +; KNL-NEXT: vpinsrb $15, %r15d, %xmm4, %xmm4 ; KNL-NEXT: kmovw %k0, %r14d -; KNL-NEXT: vptestmd %zmm7, %zmm7, %k0 -; KNL-NEXT: kshiftlw $0, %k1, %k1 +; KNL-NEXT: vptestmd %zmm6, %zmm6, %k0 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %eax, %xmm6 +; KNL-NEXT: vmovd %eax, %xmm5 ; KNL-NEXT: kmovw %k1, %r15d ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $1, %ecx, %xmm6, %xmm6 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm5, %xmm5 ; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r12d +; KNL-NEXT: vpinsrb $2, %r12d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $3, %edx, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vpinsrb $3, %edx, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %r12d ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $4, %r13d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r13d +; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %edx ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; KNL-NEXT: kmovw %k1, %r13d ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6 +; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5 ; KNL-NEXT: kmovw %k1, %esi +; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $7, %edi, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %edi +; KNL-NEXT: vpinsrb $7, %edi, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %esi ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $8, %r8d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r8d +; KNL-NEXT: vpinsrb $8, %r8d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %edi ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $9, %r9d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r9d +; KNL-NEXT: vpinsrb $9, %r9d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %r8d ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $10, %ebx, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %ebx +; KNL-NEXT: vpinsrb $10, %r10d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %r9d ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $11, %ebp, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %ebp +; KNL-NEXT: vpinsrb $11, %ebx, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %ebx ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r10d +; KNL-NEXT: vpinsrb $12, %ebp, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %ebp ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $13, %r11d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r11d +; KNL-NEXT: vpinsrb $13, %r11d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %r10d ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $14, %r14d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r14d +; KNL-NEXT: vpinsrb $14, %r14d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %r11d ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6 -; KNL-NEXT: kmovw %k1, %r15d -; KNL-NEXT: kshiftlw $0, %k0, %k0 +; KNL-NEXT: vpinsrb $15, %r15d, %xmm5, %xmm5 +; KNL-NEXT: kmovw %k1, %r14d +; KNL-NEXT: vptestmd %zmm7, %zmm7, %k1 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vmovd %eax, %xmm6 +; KNL-NEXT: kmovw %k0, %r15d +; KNL-NEXT: kshiftlw $14, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $15, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r12d +; KNL-NEXT: kshiftlw $13, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $3, %edx, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: kshiftlw $12, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $4, %r13d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r13d +; KNL-NEXT: kshiftlw $11, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $10, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: kshiftlw $9, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $7, %edi, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %edi +; KNL-NEXT: kshiftlw $8, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $8, %r8d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r8d +; KNL-NEXT: kshiftlw $7, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $9, %r9d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r9d +; KNL-NEXT: kshiftlw $6, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $10, %ebx, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %ebx +; KNL-NEXT: kshiftlw $5, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $11, %ebp, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %ebp +; KNL-NEXT: kshiftlw $4, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r10d +; KNL-NEXT: kshiftlw $3, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $13, %r11d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r11d +; KNL-NEXT: kshiftlw $2, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $14, %r14d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r14d +; KNL-NEXT: kshiftlw $1, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6 +; KNL-NEXT: kmovw %k0, %r15d +; KNL-NEXT: kshiftrw $15, %k1, %k0 ; KNL-NEXT: vmovd %r12d, %xmm7 ; KNL-NEXT: kmovw %k0, %r12d ; KNL-NEXT: vpinsrb $1, %ecx, %xmm7, %xmm7 Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -1011,3 +1011,113 @@ %r = insertelement <32 x i8> %x, i8 %y, i32 20 ret <32 x i8> %r } + +define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) { +; KNL-LABEL: test_extractelement_v2i1: +; KNL: ## BB#0: +; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] +; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vmovq %xmm0, %rax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: sete %al +; KNL-NEXT: addb $3, %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_v2i1: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 +; SKX-NEXT: kshiftlw $15, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: testb %al, %al +; SKX-NEXT: sete %al +; SKX-NEXT: addb $3, %al +; SKX-NEXT: movzbl %al, %eax +; SKX-NEXT: retq + %t1 = icmp ugt <2 x i64> %a, %b + %t2 = extractelement <2 x i1> %t1, i32 0 + %res = select i1 %t2, i8 3, i8 4 + ret i8 %res +} + +define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) { +; KNL-LABEL: test_extractelement_v4i1: +; KNL: ## BB#0: +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 +; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vpextrd $3, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_v4i1: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 +; SKX-NEXT: kshiftlw $12, %k0, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: retq + %t1 = icmp ugt <4 x i32> %a, %b + %t2 = extractelement <4 x i1> %t1, i32 3 + %res = zext i1 %t2 to i8 + ret i8 %res +} + +define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) { +; KNL-LABEL: test_extractelement_v32i1: +; KNL: ## BB#0: +; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpextrb $2, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_v32i1: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 +; SKX-NEXT: kshiftld $29, %k0, %k0 +; SKX-NEXT: kshiftrd $31, %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: retq + %t1 = icmp ugt <32 x i8> %a, %b + %t2 = extractelement <32 x i1> %t1, i32 2 + %res = zext i1 %t2 to i8 + ret i8 %res +} + +define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) { +; KNL-LABEL: test_extractelement_v64i1: +; KNL: ## BB#0: +; KNL-NEXT: vmovdqa {{.*#+}} ymm0 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; KNL-NEXT: vpxor %ymm0, %ymm3, %ymm2 +; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: testb $1, %al +; KNL-NEXT: sete %al +; KNL-NEXT: addb $3, %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retq +; +; SKX-LABEL: test_extractelement_v64i1: +; SKX: ## BB#0: +; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 +; SKX-NEXT: kshiftrq $63, %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax +; SKX-NEXT: testb %al, %al +; SKX-NEXT: sete %al +; SKX-NEXT: addb $3, %al +; SKX-NEXT: movzbl %al, %eax +; SKX-NEXT: retq + %t1 = icmp ugt <64 x i8> %a, %b + %t2 = extractelement <64 x i1> %t1, i32 63 + %res = select i1 %t2, i8 3, i8 4 + ret i8 %res +} Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -1080,8 +1080,7 @@ ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $0, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $15, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 @@ -1145,8 +1144,7 @@ ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $0, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $15, %k2, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 @@ -1217,7 +1215,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 @@ -1282,7 +1279,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 @@ -1694,7 +1690,6 @@ ; KNL-NEXT: vmovd %r9d, %xmm3 ; KNL-NEXT: kmovw %k1, %r9d ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 @@ -1763,8 +1758,7 @@ ; KNL-NEXT: vmovd %r10d, %xmm2 ; KNL-NEXT: kmovw %k0, %r10d ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL-NEXT: kshiftlw $0, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $15, %k2, %k0 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 ; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 @@ -1832,7 +1826,6 @@ ; KNL-NEXT: vmovd %r10d, %xmm1 ; KNL-NEXT: kmovw %k0, %r10d ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftlw $0, %k1, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 ; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 @@ -1912,7 +1905,6 @@ ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll @@ -240,7 +240,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 @@ -309,7 +308,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -405,7 +403,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 @@ -477,7 +474,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -549,7 +545,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -621,7 +616,6 @@ ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftlw $0, %k0, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 Index: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll +++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll @@ -1456,11 +1456,12 @@ ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm2, %xmm2 ; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1 -; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp) +; SKX-NEXT: kshiftlw $15, %k1, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 ; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 ; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SKX-NEXT: kmovw %k0, %eax ; SKX-NEXT: # implicit-def: %XMM0 ; SKX-NEXT: testb %al, %al ; SKX-NEXT: je .LBB29_2 @@ -1468,16 +1469,18 @@ ; SKX-NEXT: vmovq %xmm1, %rax ; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX-NEXT: .LBB29_2: # %else -; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp) -; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SKX-NEXT: kshiftlw $14, %k1, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax ; SKX-NEXT: testb %al, %al ; SKX-NEXT: je .LBB29_4 ; SKX-NEXT: # BB#3: # %cond.load1 ; SKX-NEXT: vpextrq $1, %xmm1, %rax ; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0 ; SKX-NEXT: .LBB29_4: # %else2 -; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp) -; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SKX-NEXT: kshiftlw $13, %k1, %k0 +; SKX-NEXT: kshiftrw $15, %k0, %k0 +; SKX-NEXT: kmovw %k0, %eax ; SKX-NEXT: testb %al, %al ; SKX-NEXT: je .LBB29_6 ; SKX-NEXT: # BB#5: # %cond.load4 @@ -1495,10 +1498,11 @@ ; SKX_32-NEXT: .cfi_def_cfa_offset 16 ; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 ; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 -; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) +; SKX_32-NEXT: kshiftlw $15, %k1, %k0 +; SKX_32-NEXT: kshiftrw $15, %k0, %k0 ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 ; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al +; SKX_32-NEXT: kmovw %k0, %eax ; SKX_32-NEXT: # implicit-def: %XMM0 ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_2 @@ -1506,8 +1510,9 @@ ; SKX_32-NEXT: vmovd %xmm1, %eax ; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX_32-NEXT: .LBB29_2: # %else -; SKX_32-NEXT: kmovb %k1, {{[0-9]+}}(%esp) -; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al +; SKX_32-NEXT: kshiftlw $14, %k1, %k0 +; SKX_32-NEXT: kshiftrw $15, %k0, %k0 +; SKX_32-NEXT: kmovw %k0, %eax ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_4 ; SKX_32-NEXT: # BB#3: # %cond.load1 @@ -1515,8 +1520,9 @@ ; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0 ; SKX_32-NEXT: .LBB29_4: # %else2 ; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2 -; SKX_32-NEXT: kmovb %k1, (%esp) -; SKX_32-NEXT: movb (%esp), %al +; SKX_32-NEXT: kshiftlw $13, %k1, %k0 +; SKX_32-NEXT: kshiftrw $15, %k0, %k0 +; SKX_32-NEXT: kmovw %k0, %eax ; SKX_32-NEXT: testb %al, %al ; SKX_32-NEXT: je .LBB29_6 ; SKX_32-NEXT: # BB#5: # %cond.load4 Index: llvm/trunk/test/CodeGen/X86/masked_memop.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_memop.ll +++ llvm/trunk/test/CodeGen/X86/masked_memop.ll @@ -2465,8 +2465,7 @@ ; AVX512F-NEXT: ## BB#29: ## %cond.load40 ; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm0, %xmm0 ; AVX512F-NEXT: LBB50_30: ## %else41 -; AVX512F-NEXT: kshiftlw $0, %k1, %k0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k1, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: je LBB50_32 @@ -4763,7 +4762,6 @@ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] ; AVX512F-NEXT: LBB52_30: ## %else41 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 -; AVX512F-NEXT: kshiftlw $0, %k0, %k0 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill ; AVX512F-NEXT: kmovw %k0, %eax @@ -4941,7 +4939,6 @@ ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: LBB52_62: ## %else89 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kshiftlw $0, %k1, %k1 ; AVX512F-NEXT: kshiftrw $15, %k1, %k1 ; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill ; AVX512F-NEXT: kmovw %k1, %eax @@ -5105,7 +5102,6 @@ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] ; AVX512F-NEXT: LBB52_94: ## %else137 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 -; AVX512F-NEXT: kshiftlw $0, %k0, %k0 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill ; AVX512F-NEXT: kmovw %k0, %eax @@ -5274,7 +5270,6 @@ ; AVX512F-NEXT: vpinsrb $14, 62(%rdi), %xmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: LBB52_126: ## %else185 -; AVX512F-NEXT: kshiftlw $0, %k1, %k1 ; AVX512F-NEXT: kshiftrw $15, %k1, %k1 ; AVX512F-NEXT: kmovw %k1, %eax ; AVX512F-NEXT: testb %al, %al @@ -6108,8 +6103,7 @@ ; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: LBB54_30: ## %else41 -; AVX512F-NEXT: kshiftlw $0, %k1, %k0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k1, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: je LBB54_32 @@ -7145,7 +7139,6 @@ ; AVX512F-NEXT: ## BB#29: ## %cond.store27 ; AVX512F-NEXT: vpextrb $14, %xmm1, 14(%rdi) ; AVX512F-NEXT: LBB56_30: ## %else28 -; AVX512F-NEXT: kshiftlw $0, %k0, %k0 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al @@ -8780,7 +8773,6 @@ ; AVX512F-NEXT: vpextrb $14, %xmm4, 14(%rdi) ; AVX512F-NEXT: LBB58_30: ## %else28 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: kshiftlw $0, %k0, %k0 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al @@ -8926,7 +8918,6 @@ ; AVX512F-NEXT: vpextrb $14, %xmm1, 30(%rdi) ; AVX512F-NEXT: LBB58_62: ## %else60 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $0, %k1, %k1 ; AVX512F-NEXT: kshiftrw $15, %k1, %k1 ; AVX512F-NEXT: kmovw %k1, %eax ; AVX512F-NEXT: testb %al, %al @@ -9058,7 +9049,6 @@ ; AVX512F-NEXT: vpextrb $14, %xmm5, 46(%rdi) ; AVX512F-NEXT: LBB58_94: ## %else92 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 -; AVX512F-NEXT: kshiftlw $0, %k0, %k0 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al @@ -9201,8 +9191,7 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 ; AVX512F-NEXT: vpextrb $14, %xmm0, 62(%rdi) ; AVX512F-NEXT: LBB58_126: ## %else124 -; AVX512F-NEXT: kshiftlw $0, %k1, %k0 -; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k1, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al ; AVX512F-NEXT: je LBB58_128 @@ -9709,7 +9698,6 @@ ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 ; AVX512F-NEXT: vpextrw $6, %xmm0, 28(%rdi) ; AVX512F-NEXT: LBB60_30: ## %else28 -; AVX512F-NEXT: kshiftlw $0, %k0, %k0 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb %al, %al Index: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll +++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll @@ -4420,7 +4420,6 @@ ; AVX512-NEXT: kshiftrw $15, %k1, %k1 ; AVX512-NEXT: kmovw %k1, %eax ; AVX512-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; AVX512-NEXT: kshiftlw $0, %k0, %k0 ; AVX512-NEXT: kshiftrw $15, %k0, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 @@ -4488,7 +4487,6 @@ ; AVX512-NEXT: kshiftrw $15, %k1, %k1 ; AVX512-NEXT: kmovw %k1, %eax ; AVX512-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; AVX512-NEXT: kshiftlw $0, %k0, %k0 ; AVX512-NEXT: kshiftrw $15, %k0, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 @@ -4562,7 +4560,6 @@ ; AVX512-NEXT: kshiftrw $15, %k1, %k1 ; AVX512-NEXT: kmovw %k1, %eax ; AVX512-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 -; AVX512-NEXT: kshiftlw $0, %k0, %k0 ; AVX512-NEXT: kshiftrw $15, %k0, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 @@ -4630,7 +4627,6 @@ ; AVX512-NEXT: kshiftrw $15, %k1, %k1 ; AVX512-NEXT: kmovw %k1, %eax ; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kshiftlw $0, %k0, %k0 ; AVX512-NEXT: kshiftrw $15, %k0, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0