Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -29013,6 +29013,21 @@ {Chain, LHS, RHS}, VT, MMO); } +// TEST (AND a, b) ,(AND a, b) -> TEST a, b +static SDValue PerformTESTM(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0 != Op1 || Op1->getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + return DAG.getNode(X86ISD::TESTM, DL, VT, + Op0->getOperand(0), Op0->getOperand(1)); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -29086,6 +29101,7 @@ case ISD::MGATHER: case ISD::MSCATTER: return combineGatherScatter(N, DAG); case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget); + case X86ISD::TESTM: return PerformTESTM(N, DAG); } return SDValue(); Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -3864,6 +3864,7 @@ multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let isCommutable = 1 in defm rr : AVX512_maskable_cmp, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; + +def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, + SDTCisSameNumEltsAs<0, 1>]>; + def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>; def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>; @@ -264,14 +269,9 @@ def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; -def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisVec<1>, SDTCisSameAs<2, 1>, - SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>>; -def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisVec<1>, SDTCisSameAs<2, 1>, - SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>>; +def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>; +def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>; + def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", Index: llvm/trunk/test/CodeGen/X86/combine-testm-and.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-testm-and.ll +++ llvm/trunk/test/CodeGen/X86/combine-testm-and.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s + +define i32 @combineTESTM_AND_1(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: combineTESTM_AND_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 -1) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_2: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_mask_3: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %b = load <8 x i64>, <8 x i64>* %bptr + %and.i = and <8 x i64> %a, %b + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_mask_4: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %b = load <8 x i64>, <8 x i64>* %bptr + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)