Index: lib/Target/X86/X86DomainReassignment.cpp =================================================================== --- lib/Target/X86/X86DomainReassignment.cpp +++ lib/Target/X86/X86DomainReassignment.cpp @@ -663,8 +663,10 @@ createReplacer(X86::XOR32rr, X86::KXORDrr); createReplacer(X86::XOR64rr, X86::KXORQrr); - createReplacer(X86::TEST32rr, X86::KTESTDrr); - createReplacer(X86::TEST64rr, X86::KTESTQrr); + // TODO: KTEST is not a replacement for TEST due to flag differences. Need + // to prove only Z flag is used. + //createReplacer(X86::TEST32rr, X86::KTESTDrr); + //createReplacer(X86::TEST64rr, X86::KTESTQrr); } if (STI->hasDQI()) { @@ -684,8 +686,10 @@ createReplacer(X86::SHR8ri, X86::KSHIFTRBri); createReplacer(X86::SHL8ri, X86::KSHIFTLBri); - createReplacer(X86::TEST8rr, X86::KTESTBrr); - createReplacer(X86::TEST16rr, X86::KTESTWrr); + // TODO: KTEST is not a replacement for TEST due to flag differences. Need + // to prove only Z flag is used. + //createReplacer(X86::TEST8rr, X86::KTESTBrr); + //createReplacer(X86::TEST16rr, X86::KTESTWrr); createReplacer(X86::XOR8rr, X86::KXORBrr); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -17078,24 +17078,6 @@ return false; } -// Emit KTEST instruction for bit vectors on AVX-512 -static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - if (Op.getOpcode() == ISD::BITCAST) { - auto hasKTEST = [&](MVT VT) { - unsigned SizeInBits = VT.getSizeInBits(); - return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) || - (Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64)); - }; - SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType().getSimpleVT(); - if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 && - hasKTEST(Op0VT)) - return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0); - } - return SDValue(); -} - /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, @@ -17140,9 +17122,6 @@ // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. if (Op.getResNo() != 0 || NeedOF || NeedCF) { - // Emit KTEST for bit vectors - if (auto Node = EmitKTEST(Op, DAG, Subtarget)) - return Node; // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); @@ -17371,10 +17350,6 @@ } if (Opcode == 0) { - // Emit KTEST for bit vectors - if (auto Node = EmitKTEST(Op, DAG, Subtarget)) - return Node; - // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, dl, Op.getValueType())); @@ -18128,6 +18103,38 @@ return Result; } +// Try to select this as a KTEST+SETCC if possible. +static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC, + const SDLoc &dl, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // Only support equality comparisons. + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + + // Must be a bitcast from vXi1. + if (Op0.getOpcode() != ISD::BITCAST) + return SDValue(); + + Op0 = Op0.getOperand(0); + MVT VT = Op0.getSimpleValueType(); + if (!VT.isVector() || VT.getVectorElementType() != MVT::i1) + return SDValue(); + + unsigned SizeInBits = VT.getSizeInBits(); + if (!(Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) && + !(Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64))) + return SDValue(); + + X86::CondCode X86CC; + if (isNullConstant(Op1)) { + X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; + } else + return SDValue(); + + SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0); + return getSETCC(X86CC, KTEST, dl, DAG); +} + SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -18150,6 +18157,10 @@ return NewSetCC; } + // Try to lower using KTEST. + if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget)) + return NewSetCC; + // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. if ((isOneConstant(Op1) || isNullConstant(Op1)) && Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -2744,3 +2744,95 @@ %ret = bitcast <8 x i1> %m2 to i8 ret i8 %ret } + +; Make sure we don't emit a ktest for signed comparisons. +define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) { +; KNL-LABEL: ktest_signed: +; KNL: ## %bb.0: +; KNL-NEXT: pushq %rax +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: vporq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: testw %ax, %ax +; KNL-NEXT: jle LBB64_1 +; KNL-NEXT: ## %bb.2: ## %bb.2 +; KNL-NEXT: popq %rax +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq +; KNL-NEXT: LBB64_1: ## %bb.1 +; KNL-NEXT: vzeroupper +; KNL-NEXT: callq _foo +; KNL-NEXT: popq %rax +; KNL-NEXT: retq +; +; SKX-LABEL: ktest_signed: +; SKX: ## %bb.0: +; SKX-NEXT: pushq %rax +; SKX-NEXT: .cfi_def_cfa_offset 16 +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 +; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; SKX-NEXT: kmovd %k0, %eax +; SKX-NEXT: testw %ax, %ax +; SKX-NEXT: jle LBB64_1 +; SKX-NEXT: ## %bb.2: ## %bb.2 +; SKX-NEXT: popq %rax +; SKX-NEXT: vzeroupper +; SKX-NEXT: retq +; SKX-NEXT: LBB64_1: ## %bb.1 +; SKX-NEXT: vzeroupper +; SKX-NEXT: callq _foo +; SKX-NEXT: popq %rax +; SKX-NEXT: retq +; +; AVX512BW-LABEL: ktest_signed: +; AVX512BW: ## %bb.0: +; AVX512BW-NEXT: pushq %rax +; AVX512BW-NEXT: .cfi_def_cfa_offset 16 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: testw %ax, %ax +; AVX512BW-NEXT: jle LBB64_1 +; AVX512BW-NEXT: ## %bb.2: ## %bb.2 +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; AVX512BW-NEXT: LBB64_1: ## %bb.1 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: callq _foo +; AVX512BW-NEXT: popq %rax +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: ktest_signed: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: pushq %rax +; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: kmovw %k0, %eax +; AVX512DQ-NEXT: testw %ax, %ax +; AVX512DQ-NEXT: jle LBB64_1 +; AVX512DQ-NEXT: ## %bb.2: ## %bb.2 +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq +; AVX512DQ-NEXT: LBB64_1: ## %bb.1 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: callq _foo +; AVX512DQ-NEXT: popq %rax +; AVX512DQ-NEXT: retq + %a = icmp eq <16 x i32> %x, zeroinitializer + %b = icmp eq <16 x i32> %y, zeroinitializer + %c = and <16 x i1> %a, %b + %d = bitcast <16 x i1> %c to i16 + %e = icmp sgt i16 %d, 0 + br i1 %e, label %bb.2, label %bb.1 +bb.1: + call void @foo() + br label %bb.2 +bb.2: + ret void +} +declare void @foo() + Index: test/CodeGen/X86/domain-reassignment.mir =================================================================== --- test/CodeGen/X86/domain-reassignment.mir +++ test/CodeGen/X86/domain-reassignment.mir @@ -263,14 +263,6 @@ %11 = VMOVAPDZrrk %2, killed %10, %1 VMOVAPDZmr %0, 1, $noreg, 0, $noreg, killed %11 - ; CHECK: KTESTBrr %18, %18, implicit-def $eflags - TEST8rr %18, %18, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 - - bb.1: - - bb.2: RET 0 ... @@ -365,14 +357,6 @@ %11 = VMOVAPSZrrk %2, killed %10, %1 VMOVAPSZmr %0, 1, $noreg, 0, $noreg, killed %11 - ; CHECK: KTESTWrr %17, %17, implicit-def $eflags - TEST16rr %17, %17, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 - - bb.1: - - bb.2: RET 0 ... @@ -456,14 +440,6 @@ %4 = VMOVDQU16Zrrk %2, killed %3, %1 VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4 - ; CHECK: KTESTDrr %13, %13, implicit-def $eflags - TEST32rr %13, %13, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 - - bb.1: - - bb.2: RET 0 ... @@ -547,14 +523,6 @@ %4 = VMOVDQU8Zrrk %2, killed %3, %1 VMOVDQA32Zmr %0, 1, $noreg, 0, $noreg, killed %4 - ; CHECK: KTESTQrr %13, %13, implicit-def $eflags - TEST64rr %13, %13, implicit-def $eflags - JE_1 %bb.1, implicit $eflags - JMP_1 %bb.2 - - bb.1: - - bb.2: RET 0 ...