Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -28998,6 +28998,21 @@ {Chain, LHS, RHS}, VT, MMO); } +// TEST (AND a, b) ,(AND a, b) -> TEST a, b +static SDValue PerformTESTM(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0 != Op1 || Op1->getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + return DAG.getNode(X86ISD::TESTM, DL, VT, + Op0->getOperand(0), Op0->getOperand(1)); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -29071,6 +29086,7 @@ case ISD::MGATHER: case ISD::MSCATTER: return combineGatherScatter(N, DAG); case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget); + case X86ISD::TESTM: return PerformTESTM(N, DAG); } return SDValue(); Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3864,6 +3864,7 @@ multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let isCommutable = 1 in defm rr : AVX512_maskable_cmp, SDTCisVec<1>, SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>>; + SDTCisSameNumEltsAs<0, 1>]>, [SDNPCommutative]>; def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, Index: test/CodeGen/X86/combine-testm-and.ll =================================================================== --- test/CodeGen/X86/combine-testm-and.ll +++ test/CodeGen/X86/combine-testm-and.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s + +define i32 @combineTESTM_AND_1(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: combineTESTM_AND_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 -1) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_2: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_mask_3: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %b = load <8 x i64>, <8 x i64>* %bptr + %and.i = and <8 x i64> %a, %b + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_mask_4: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %b = load <8 x i64>, <8 x i64>* %bptr + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)