Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -452,6 +452,8 @@ if (N->getOpcode() == X86ISD::PCMPEQM || N->getOpcode() == X86ISD::PCMPGTM || N->getOpcode() == X86ISD::CMPM || + N->getOpcode() == X86ISD::TESTM || + N->getOpcode() == X86ISD::TESTNM || N->getOpcode() == X86ISD::CMPMU) { // We can get 256-bit 8 element types here without VLX being enabled. When // this happens we will use 512-bit operations and the mask will not be Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5006,6 +5006,8 @@ switch (Opcode) { default: return false; + case X86ISD::TESTM: + case X86ISD::TESTNM: case X86ISD::PCMPEQM: case X86ISD::PCMPGTM: case X86ISD::CMPM: @@ -17179,6 +17181,22 @@ if (Swap) std::swap(Op0, Op1); + + // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM. + if((!Opc && SSECC == 4) || Opc == X86ISD::PCMPEQM) { + SDValue A = peekThroughBitcasts(Op0); + SDValue B = peekThroughBitcasts(Op1); + APInt C; + if(A.getOpcode() == ISD::AND && ISD::isBuildVectorAllZeros(B.getNode())) { + //assert(! ISD::isBuildVectorAllZeros(B.getNode())); + MVT VT0 = Op0.getSimpleValueType(); + SDValue RHS = DAG.getNode(ISD::BITCAST, dl, VT0, A.getOperand(0)); + SDValue LHS = DAG.getNode(ISD::BITCAST, dl, VT0, A.getOperand(1)); + return DAG.getNode(Opc == X86ISD::PCMPEQM ? X86ISD::TESTNM : X86ISD::TESTM, + dl, VT, RHS, LHS); + } + } + if (Opc) return DAG.getNode(Opc, dl, VT, Op0, Op1); Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; Index: test/CodeGen/X86/avx512-skx-insert-subvec.ll =================================================================== --- test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -46,8 +46,6 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 ; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 -; CHECK-NEXT: kshiftlb $4, %k0, %k0 -; CHECK-NEXT: kshiftrb $4, %k0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512bw-vec-test-testn.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx512bw-vec-test-testn.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp ne <32 x i16> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp ne <64 x i8> %0, zeroinitializer + %2 = bitcast <64 x i1> %1 to i64 + ret i64 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp ne <32 x i16> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp ne <64 x i8> %0, zeroinitializer + %2 = bitcast i64 %__U to <64 x i1> + %3 = and <64 x i1> %1, %2 + %4 = bitcast <64 x i1> %3 to i64 + ret i64 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp eq <32 x i16> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp eq <64 x i8> %0, zeroinitializer + %2 = bitcast <64 x i1> %1 to i64 + ret i64 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @TEST_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <32 x i16> + %1 = icmp eq <32 x i16> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i64 @TEST_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <64 x i8> + %1 = icmp eq <64 x i8> %0, zeroinitializer + %2 = bitcast i64 %__U to <64 x i1> + %3 = and <64 x i1> %1, %2 + %4 = bitcast <64 x i1> %3 to i64 + ret i64 %4 +} + Index: test/CodeGen/X86/avx512bwvl-vec-test-testn.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx512bwvl-vec-test-testn.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp ne <16 x i8> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp ne <16 x i8> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp ne <8 x i16> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp ne <8 x i16> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp eq <16 x i8> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <16 x i8> + %1 = icmp eq <16 x i8> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp eq <8 x i16> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm_mask_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: retq +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <8 x i16> + %1 = icmp eq <8 x i16> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp ne <32 x i8> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_test_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp ne <32 x i8> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp ne <16 x i16> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_test_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp ne <16 x i16> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp eq <32 x i8> %0, zeroinitializer + %2 = bitcast <32 x i1> %1 to i32 + ret i32 %2 +} + +; Function Attrs: norecurse nounwind readnone +define i32 @TEST_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_testn_epi8_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <32 x i8> + %1 = icmp eq <32 x i8> %0, zeroinitializer + %2 = bitcast i32 %__U to <32 x i1> + %3 = and <32 x i1> %1, %2 + %4 = bitcast <32 x i1> %3 to i32 + ret i32 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp eq <16 x i16> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm256_mask_testn_epi16_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <16 x i16> + %1 = icmp eq <16 x i16> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + + Index: test/CodeGen/X86/avx512f-vec-test-testn.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx512f-vec-test-testn.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_test_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp ne <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast <8 x i1> %0 to i8 + ret i8 %1 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_test_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_test_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp ne <16 x i32> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_mask_test_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp ne <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = and <8 x i1> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_mask_test_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_test_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp ne <16 x i32> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp eq <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast <8 x i1> %0 to i8 + ret i8 %1 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_testn_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp eq <16 x i32> %0, zeroinitializer + %2 = bitcast <16 x i1> %1 to i16 + ret i16 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm512_mask_testn_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi64_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AL %AL %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = icmp eq <8 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = and <8 x i1> %0, %1 + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i16 @TEST_mm512_mask_testn_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { +; CHECK-LABEL: TEST_mm512_mask_testn_epi32_mask: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %and.i.i = and <8 x i64> %__B, %__A + %0 = bitcast <8 x i64> %and.i.i to <16 x i32> + %1 = icmp eq <16 x i32> %0, zeroinitializer + %2 = bitcast i16 %__U to <16 x i1> + %3 = and <16 x i1> %1, %2 + %4 = bitcast <16 x i1> %3 to i16 + ret i16 %4 +} + Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -1217,8 +1217,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -1246,8 +1244,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -1278,8 +1274,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -1311,8 +1305,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13586,8 +13578,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13615,8 +13605,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13647,8 +13635,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -13680,8 +13666,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -25987,8 +25971,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -26019,8 +26001,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -26053,8 +26033,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -26089,8 +26067,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38587,8 +38563,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38619,8 +38593,6 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38654,8 +38626,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -38690,8 +38660,6 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k1 -; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper Index: test/CodeGen/X86/avx512vl-vec-test-testn.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/avx512vl-vec-test-testn.ll @@ -0,0 +1,440 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86_64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=I386 + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_test_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestmq %xmm0, %xmm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestmq %xmm0, %xmm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_test_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestmd %xmm0, %xmm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestmd %xmm0, %xmm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp ne <4 x i32> %0, zeroinitializer + %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_test_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestmq %ymm0, %ymm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestmq %ymm0, %ymm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_test_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestmd %ymm0, %ymm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp ne <8 x i32> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_test_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %3 = and <2 x i1> %0, %2 + %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_test_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp ne <4 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %1, %3 + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_test_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_test_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_test_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %3 = and <4 x i1> %0, %2 + %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_test_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_test_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_test_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp ne <8 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_testn_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestnmq %xmm0, %xmm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestnmq %xmm0, %xmm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_testn_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestnmd %xmm0, %xmm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestnmd %xmm0, %xmm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp eq <4 x i32> %0, zeroinitializer + %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> + %3 = bitcast <8 x i1> %2 to i8 + ret i8 %3 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_testn_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestnmq %ymm0, %ymm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestnmq %ymm0, %ymm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer + %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_testn_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0 +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0 +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp eq <8 x i32> %0, zeroinitializer + %2 = bitcast <8 x i1> %1 to i8 + ret i8 %2 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_testn_epi64_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> + %3 = and <2 x i1> %0, %2 + %4 = shufflevector <2 x i1> %3, <2 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm_mask_testn_epi32_mask(i8 %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm_mask_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm_mask_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: retl +entry: + %and.i.i = and <2 x i64> %__B, %__A + %0 = bitcast <2 x i64> %and.i.i to <4 x i32> + %1 = icmp eq <4 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> + %4 = and <4 x i1> %1, %3 + %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> + %6 = bitcast <8 x i1> %5 to i8 + ret i8 %6 +} + + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_testn_epi64_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_testn_epi64_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_testn_epi64_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer + %1 = bitcast i8 %__U to <8 x i1> + %2 = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> + %3 = and <4 x i1> %0, %2 + %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> + %5 = bitcast <8 x i1> %4 to i8 + ret i8 %5 +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i8 @TEST_mm256_mask_testn_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 { +; X86_64-LABEL: TEST_mm256_mask_testn_epi32_mask: +; X86_64: # BB#0: # %entry +; X86_64-NEXT: kmovw %edi, %k1 +; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1} +; X86_64-NEXT: kmovw %k0, %eax +; X86_64-NEXT: # kill: %AL %AL %EAX +; X86_64-NEXT: vzeroupper +; X86_64-NEXT: retq +; +; I386-LABEL: TEST_mm256_mask_testn_epi32_mask: +; I386: # BB#0: # %entry +; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NEXT: kmovw %eax, %k1 +; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1} +; I386-NEXT: kmovw %k0, %eax +; I386-NEXT: # kill: %AL %AL %EAX +; I386-NEXT: vzeroupper +; I386-NEXT: retl +entry: + %and.i.i = and <4 x i64> %__B, %__A + %0 = bitcast <4 x i64> %and.i.i to <8 x i32> + %1 = icmp eq <8 x i32> %0, zeroinitializer + %2 = bitcast i8 %__U to <8 x i1> + %3 = and <8 x i1> %1, %2 + %4 = bitcast <8 x i1> %3 to i8 + ret i8 %4 +} + Index: test/CodeGen/X86/compress_expand.ll =================================================================== --- test/CodeGen/X86/compress_expand.ll +++ test/CodeGen/X86/compress_expand.ll @@ -140,9 +140,7 @@ ; KNL-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 ; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0 -; KNL-NEXT: kshiftlw $8, %k0, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v8f32(<8 x float> %V, float* %base, <8 x i1> %mask) Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -1057,9 +1057,7 @@ ; SKX: # BB#0: ; SKX-NEXT: # kill: %XMM1 %XMM1 %YMM1 ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 +; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX-NEXT: vscatterqps %xmm0, (,%ymm1) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1068,9 +1066,7 @@ ; SKX_32: # BB#0: ; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 +; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1} ; SKX_32-NEXT: retl call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) @@ -1105,9 +1101,7 @@ ; SKX: # BB#0: ; SKX-NEXT: # kill: %XMM1 %XMM1 %YMM1 ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 +; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX-NEXT: vzeroupper @@ -1117,9 +1111,7 @@ ; SKX_32: # BB#0: ; SKX_32-NEXT: # kill: %XMM1 %XMM1 %YMM1 ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 +; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 ; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX_32-NEXT: vzeroupper @@ -1165,9 +1157,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k0 -; SKX-NEXT: kshiftlb $6, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 +; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1} ; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq @@ -1176,9 +1166,7 @@ ; SKX_32: # BB#0: ; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k0 -; SKX_32-NEXT: kshiftlb $6, %k0, %k0 -; SKX_32-NEXT: kshiftrb $6, %k0, %k1 +; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1} ; SKX_32-NEXT: vmovaps %xmm2, %xmm0 Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -285,9 +285,7 @@ ; AVX512F-NEXT: ## kill: %YMM1 %YMM1 %ZMM1 ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} ; AVX512F-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq @@ -327,9 +325,7 @@ ; AVX512F: ## BB#0: ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ; AVX512F-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq @@ -369,9 +365,7 @@ ; AVX512F: ## BB#0: ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kshiftlw $8, %k0, %k0 -; AVX512F-NEXT: kshiftrw $8, %k0, %k1 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ; AVX512F-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; AVX512F-NEXT: retq Index: test/CodeGen/X86/setcc-lowering.ll =================================================================== --- test/CodeGen/X86/setcc-lowering.ll +++ test/CodeGen/X86/setcc-lowering.ll @@ -23,10 +23,9 @@ ; ; KNL-32-LABEL: pr25080: ; KNL-32: # BB#0: # %entry -; KNL-32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607] -; KNL-32-NEXT: vpand %ymm1, %ymm0, %ymm0 -; KNL-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-32-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; KNL-32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; KNL-32-NEXT: vbroadcastss {{.*#+}} ymm1 = [8388607,8388607,8388607,8388607,8388607,8388607,8388607,8388607] +; KNL-32-NEXT: vptestnmd %zmm1, %zmm0, %k0 ; KNL-32-NEXT: movb $15, %al ; KNL-32-NEXT: kmovw %eax, %k1 ; KNL-32-NEXT: korw %k1, %k0, %k1