Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -948,6 +948,41 @@ } } + // Look for a KAND+KORTEST and turn it into KTEST if only the zero flag is + // used. We're doing this late so we can prefer to fold the AND into masked + // comparisons. Doing that can be better for the live range of the mask + // register. + if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr || + Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) && + N->getOperand(0) == N->getOperand(1) && + N->isOnlyUserOf(N->getOperand(0).getNode()) && + N->getOperand(0).isMachineOpcode() && + onlyUsesZeroFlag(SDValue(N, 0))) { + SDValue And = N->getOperand(0); + unsigned N0Opc = And.getMachineOpcode(); + // KANDW is legal with AVX512F, but KTESTW requires AVX512DQ. The other + // KAND instructions and KTEST use the same ISA feature. + if (N0Opc == X86::KANDBrr || + (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) || + N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) { + unsigned NewOpc; + switch (Opc) { + default: llvm_unreachable("Unexpected opcode!"); + case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break; + case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break; + case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break; + case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break; + } + MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N), + MVT::i32, + And.getOperand(0), + And.getOperand(1)); + ReplaceUses(N, KTest); + MadeChange = true; + continue; + } + } + // Attempt to remove vectors moves that were inserted to zero upper bits. if (Opc != TargetOpcode::SUBREG_TO_REG) continue; Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -3519,8 +3519,7 @@ ; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k2 ; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k1, %k0, %k0 -; SKX-NEXT: kortestb %k0, %k0 +; SKX-NEXT: ktestb %k1, %k0 ; SKX-NEXT: je LBB71_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: popq %rax @@ -3574,8 +3573,7 @@ ; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k3 ; AVX512DQ-NEXT: korb %k1, %k0, %k0 ; AVX512DQ-NEXT: korb %k3, %k2, %k1 -; AVX512DQ-NEXT: kandb %k1, %k0, %k0 -; AVX512DQ-NEXT: kortestb %k0, %k0 +; AVX512DQ-NEXT: ktestb %k1, %k0 ; AVX512DQ-NEXT: je LBB71_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: popq %rax @@ -3597,8 +3595,7 @@ ; X86-NEXT: vptestnmd %ymm2, %ymm2, %k1 ; X86-NEXT: vptestnmd %ymm3, %ymm3, %k2 ; X86-NEXT: korb %k2, %k1, %k1 -; X86-NEXT: kandb %k1, %k0, %k0 -; X86-NEXT: kortestb %k0, %k0 +; X86-NEXT: ktestb %k1, %k0 ; X86-NEXT: je LBB71_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: addl $12, %esp @@ -3663,8 +3660,7 @@ ; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k2 ; SKX-NEXT: korb %k2, %k1, %k1 -; SKX-NEXT: kandb %k1, %k0, %k0 -; SKX-NEXT: kortestb %k0, %k0 +; SKX-NEXT: ktestb %k1, %k0 ; SKX-NEXT: je LBB72_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: popq %rax @@ -3710,8 +3706,7 @@ ; AVX512DQ-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; AVX512DQ-NEXT: vptestnmq %zmm3, %zmm3, %k2 ; AVX512DQ-NEXT: korb %k2, %k1, %k1 -; AVX512DQ-NEXT: kandb %k1, %k0, %k0 -; AVX512DQ-NEXT: kortestb %k0, %k0 +; AVX512DQ-NEXT: ktestb %k1, %k0 ; AVX512DQ-NEXT: je LBB72_1 ; AVX512DQ-NEXT: ## %bb.2: ## %exit ; AVX512DQ-NEXT: popq %rax @@ -3733,8 +3728,7 @@ ; X86-NEXT: vptestnmq %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmq %zmm3, %zmm3, %k2 ; X86-NEXT: korb %k2, %k1, %k1 -; X86-NEXT: kandb %k1, %k0, %k0 -; X86-NEXT: kortestb %k0, %k0 +; X86-NEXT: ktestb %k1, %k0 ; X86-NEXT: je LBB72_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: addl $12, %esp @@ -3765,28 +3759,95 @@ } define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) { -; CHECK-LABEL: ktest_5: -; CHECK: ## %bb.0: -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 -; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k2 -; CHECK-NEXT: korw %k2, %k1, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: kortestw %k0, %k0 -; CHECK-NEXT: je LBB73_1 -; CHECK-NEXT: ## %bb.2: ## %exit -; CHECK-NEXT: popq %rax -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: retq -; CHECK-NEXT: LBB73_1: ## %bar -; CHECK-NEXT: vzeroupper -; CHECK-NEXT: callq _foo -; CHECK-NEXT: popq %rax -; CHECK-NEXT: retq +; KNL-LABEL: ktest_5: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; KNL-NEXT: korw %k1, %k0, %k0 +; KNL-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; KNL-NEXT: vptestnmd %zmm3, %zmm3, %k2 +; KNL-NEXT: korw %k2, %k1, %k1 +; KNL-NEXT: kandw %k1, %k0, %k0 +; KNL-NEXT: kortestw %k0, %k0 +; KNL-NEXT: je LBB73_1 +; KNL-NEXT: ## %bb.2: ## %exit +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB73_1: ## %bar +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_5: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k2 +; SKX-NEXT: korw %k2, %k1, %k1 +; SKX-NEXT: ktestw %k1, %k0 +; SKX-NEXT: je LBB73_1 +; SKX-NEXT: ## %bb.2: ## %exit +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB73_1: ## %bar +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_5: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; AVX512BW-NEXT: korw %k1, %k0, %k0 +; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; AVX512BW-NEXT: vptestnmd %zmm3, %zmm3, %k2 +; AVX512BW-NEXT: korw %k2, %k1, %k1 +; AVX512BW-NEXT: kandw %k1, %k0, %k0 +; AVX512BW-NEXT: kortestw %k0, %k0 +; AVX512BW-NEXT: je LBB73_1 +; AVX512BW-NEXT: ## %bb.2: ## %exit +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB73_1: ## %bar +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_5: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestnmd %zmm1, %zmm1, %k1 +; AVX512DQ-NEXT: korw %k1, %k0, %k0 +; AVX512DQ-NEXT: vptestnmd %zmm2, %zmm2, %k1 +; AVX512DQ-NEXT: vptestnmd %zmm3, %zmm3, %k2 +; AVX512DQ-NEXT: korw %k2, %k1, %k1 +; AVX512DQ-NEXT: ktestw %k1, %k0 +; AVX512DQ-NEXT: je LBB73_1 +; AVX512DQ-NEXT: ## %bb.2: ## %exit +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB73_1: ## %bar +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq ; ; X86-LABEL: ktest_5: ; X86: ## %bb.0: @@ -3798,8 +3859,7 @@ ; X86-NEXT: vptestnmd %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmd %zmm3, %zmm3, %k2 ; X86-NEXT: korw %k2, %k1, %k1 -; X86-NEXT: kandw %k1, %k0, %k0 -; X86-NEXT: kortestw %k0, %k0 +; X86-NEXT: ktestw %k1, %k0 ; X86-NEXT: je LBB73_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: addl $12, %esp @@ -3878,8 +3938,7 @@ ; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmw %zmm3, %zmm3, %k2 ; SKX-NEXT: kord %k2, %k1, %k1 -; SKX-NEXT: kandd %k1, %k0, %k0 -; SKX-NEXT: kortestd %k0, %k0 +; SKX-NEXT: ktestd %k1, %k0 ; SKX-NEXT: je LBB74_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: popq %rax @@ -3901,8 +3960,7 @@ ; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vptestnmw %zmm3, %zmm3, %k2 ; AVX512BW-NEXT: kord %k2, %k1, %k1 -; AVX512BW-NEXT: kandd %k1, %k0, %k0 -; AVX512BW-NEXT: kortestd %k0, %k0 +; AVX512BW-NEXT: ktestd %k1, %k0 ; AVX512BW-NEXT: je LBB74_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: popq %rax @@ -3962,8 +4020,7 @@ ; X86-NEXT: vptestnmw %zmm2, %zmm2, %k1 ; X86-NEXT: vptestnmw %zmm3, %zmm3, %k2 ; X86-NEXT: kord %k2, %k1, %k1 -; X86-NEXT: kandd %k1, %k0, %k0 -; X86-NEXT: kortestd %k0, %k0 +; X86-NEXT: ktestd %k1, %k0 ; X86-NEXT: je LBB74_1 ; X86-NEXT: ## %bb.2: ## %exit ; X86-NEXT: addl $12, %esp @@ -4066,8 +4123,7 @@ ; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; SKX-NEXT: vptestnmb %zmm3, %zmm3, %k2 ; SKX-NEXT: korq %k2, %k1, %k1 -; SKX-NEXT: kandq %k1, %k0, %k0 -; SKX-NEXT: kortestq %k0, %k0 +; SKX-NEXT: ktestq %k1, %k0 ; SKX-NEXT: je LBB75_1 ; SKX-NEXT: ## %bb.2: ## %exit ; SKX-NEXT: popq %rax @@ -4089,8 +4145,7 @@ ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vptestnmb %zmm3, %zmm3, %k2 ; AVX512BW-NEXT: korq %k2, %k1, %k1 -; AVX512BW-NEXT: kandq %k1, %k0, %k0 -; AVX512BW-NEXT: kortestq %k0, %k0 +; AVX512BW-NEXT: ktestq %k1, %k0 ; AVX512BW-NEXT: je LBB75_1 ; AVX512BW-NEXT: ## %bb.2: ## %exit ; AVX512BW-NEXT: popq %rax