Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -18144,18 +18144,22 @@ Op0 = Op0.getOperand(0); MVT VT = Op0.getSimpleValueType(); - if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) && + if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) && + !(Subtarget.hasDQI() && VT == MVT::v8i1) && !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) return SDValue(); X86::CondCode X86CC; if (isNullConstant(Op1)) { X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; + } else if (isAllOnesConstant(Op1)) { + // C flag is set for all ones. + X86CC = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE; } else return SDValue(); - SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0); - return getSETCC(X86CC, KTEST, dl, DAG); + SDValue KORTEST = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, Op0, Op0); + return getSETCC(X86CC, KORTEST, dl, DAG); } SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -584,7 +584,7 @@ ; SKX-NEXT: movb $85, %al ; SKX-NEXT: kmovd %eax, %k1 ; SKX-NEXT: korb %k1, %k0, %k0 -; SKX-NEXT: ktestb %k0, %k0 +; SKX-NEXT: kortestb %k0, %k0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test7: @@ -607,7 +607,7 @@ ; AVX512DQ-NEXT: movb $85, %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 -; AVX512DQ-NEXT: ktestb %k0, %k0 +; AVX512DQ-NEXT: kortestb %k0, %k0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq allocas: @@ -1673,7 +1673,7 @@ ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} -; SKX-NEXT: ktestb %k0, %k0 +; SKX-NEXT: kortestb %k0, %k0 ; SKX-NEXT: je LBB42_2 ; SKX-NEXT: ## %bb.1: ## %L1 ; SKX-NEXT: vmovapd %zmm0, (%rdi) @@ -1708,7 +1708,7 @@ ; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 ; AVX512DQ-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} ; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} -; AVX512DQ-NEXT: ktestb %k0, %k0 +; AVX512DQ-NEXT: kortestb %k0, %k0 ; AVX512DQ-NEXT: je LBB42_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovapd %zmm0, (%rdi) @@ -1788,7 +1788,7 @@ ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 ; SKX-NEXT: kord %k1, %k0, %k0 -; SKX-NEXT: ktestd %k0, %k0 +; SKX-NEXT: kortestd %k0, %k0 ; SKX-NEXT: je LBB43_2 ; SKX-NEXT: ## %bb.1: ## %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) @@ -1814,7 +1814,7 @@ ; AVX512BW-NEXT: vcmpltps %zmm2, %zmm1, %k2 ; AVX512BW-NEXT: kunpckwd %k1, %k2, %k1 ; AVX512BW-NEXT: kord %k1, %k0, %k0 -; AVX512BW-NEXT: ktestd %k0, %k0 +; AVX512BW-NEXT: kortestd %k0, %k0 ; AVX512BW-NEXT: je LBB43_2 ; AVX512BW-NEXT: ## %bb.1: ## %L1 ; AVX512BW-NEXT: vmovaps %zmm0, (%rdi) @@ -2786,3 +2786,32 @@ } declare void @foo() +; Make sure we can use the C flag from kortest to check for all ones. +define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) { +; CHECK-LABEL: ktest_allones: +; CHECK: ## %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; CHECK-NEXT: kortestw %k0, %k0 +; CHECK-NEXT: jb LBB65_2 +; CHECK-NEXT: ## %bb.1: ## %bb.1 +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: callq _foo +; CHECK-NEXT: LBB65_2: ## %bb.2 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %a = icmp eq <16 x i32> %x, zeroinitializer + %b = icmp eq <16 x i32> %y, zeroinitializer + %c = and <16 x i1> %a, %b + %d = bitcast <16 x i1> %c to i16 + %e = icmp eq i16 %d, -1 + br i1 %e, label %bb.2, label %bb.1 +bb.1: + call void @foo() + br label %bb.2 +bb.2: + ret void +} Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -7031,7 +7031,7 @@ ; GENERIC-NEXT: movb $85, %al # sched: [1:0.33] ; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33] ; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test7: @@ -7041,7 +7041,7 @@ ; SKX-NEXT: movb $85, %al # sched: [1:0.25] ; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: ktestb %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] allocas: %a= or <8 x i1> %mask, @@ -7615,7 +7615,7 @@ ; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50] ; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] -; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] @@ -7632,7 +7632,7 @@ ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50] ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] -; SKX-NEXT: ktestb %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00] ; SKX-NEXT: je .LBB410_2 # sched: [1:0.50] ; SKX-NEXT: # %bb.1: # %L1 ; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] @@ -7684,7 +7684,7 @@ ; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00] ; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00] -; GENERIC-NEXT: ktestd %k0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: kortestd %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.1: # %L1 ; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] @@ -7710,7 +7710,7 @@ ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] ; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] ; SKX-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: ktestd %k0, %k0 # sched: [3:1.00] +; SKX-NEXT: kortestd %k0, %k0 # sched: [3:1.00] ; SKX-NEXT: je .LBB411_2 # sched: [1:0.50] ; SKX-NEXT: # %bb.1: # %L1 ; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] Index: test/CodeGen/X86/setcc-lowering.ll =================================================================== --- test/CodeGen/X86/setcc-lowering.ll +++ test/CodeGen/X86/setcc-lowering.ll @@ -84,8 +84,7 @@ ; KNL-32-NEXT: cmovlw %dx, %si ; KNL-32-NEXT: kmovw %esi, %k1 ; KNL-32-NEXT: kandw %k0, %k1, %k1 -; KNL-32-NEXT: kmovw %k1, %esi -; KNL-32-NEXT: testw %si, %si +; KNL-32-NEXT: kortestw %k1, %k1 ; KNL-32-NEXT: jne .LBB1_1 ; KNL-32-NEXT: # %bb.2: # %for_exit600 ; KNL-32-NEXT: popl %esi