diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39817,6 +39817,70 @@ return SDValue(); } +/// If we are inverting an PTEST/TESTP operand, attempt to adjust the CC +/// to avoid the inversion. +static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, + SelectionDAG &DAG) { + // TODO: Handle X86ISD::KTEST/X86ISD::KORTEST. + if (EFLAGS.getOpcode() != X86ISD::PTEST && + EFLAGS.getOpcode() != X86ISD::TESTP) + return SDValue(); + + // PTEST/TESTP sets EFLAGS as: + // TESTZ: ZF = (Op0 & Op1) == 0 + // TESTC: CF = (~Op0 & Op1) == 0 + // TESTNZC: ZF == 0 && CF == 0 + EVT VT = EFLAGS.getValueType(); + SDValue Op0 = EFLAGS.getOperand(0); + SDValue Op1 = EFLAGS.getOperand(1); + EVT OpVT = Op0.getValueType(); + + // TEST*(~X,Y) == TEST*(X,Y) + if (SDValue NotOp0 = IsNOT(Op0, DAG)) { + X86::CondCode InvCC; + switch (CC) { + case X86::COND_B: + // testc -> testz. + InvCC = X86::COND_E; + break; + case X86::COND_AE: + // !testc -> !testz. + InvCC = X86::COND_NE; + break; + case X86::COND_E: + // testz -> testc. + InvCC = X86::COND_B; + break; + case X86::COND_NE: + // !testz -> !testc. + InvCC = X86::COND_AE; + break; + case X86::COND_A: + // testnzc -> testnzc (no change). + InvCC = CC; + break; + default: + InvCC = X86::COND_INVALID; + break; + } + + if (InvCC != X86::COND_INVALID) { + CC = InvCC; + return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, + DAG.getBitcast(OpVT, NotOp0), Op1); + } + } + + // TODO: TEST*(X,~Y) == TEST*(Y,X) + + // TESTZ(X,-1) == TESTZ(X,X) + if ((CC == X86::COND_E || CC == X86::COND_NE) && + ISD::isBuildVectorAllOnes(Op1.getNode())) + return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); + + return SDValue(); +} + /// Optimize an EFLAGS definition used according to the condition code \p CC /// into a simpler EFLAGS value, potentially returning a new \p CC and replacing /// uses of chain values. @@ -39829,6 +39893,10 @@ if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) return R; + + if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG)) + return R; + return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll --- a/llvm/test/CodeGen/X86/combine-ptest.ll +++ b/llvm/test/CodeGen/X86/combine-ptest.ll @@ -9,10 +9,8 @@ ; CHECK-LABEL: ptestz_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %c, %t2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %d) @@ -25,11 +23,8 @@ ; CHECK-LABEL: ptestz_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vptest %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t1 = xor <4 x i64> %c, @@ -47,10 +42,8 @@ ; CHECK-LABEL: ptestc_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %c, %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d) @@ -63,11 +56,8 @@ ; CHECK-LABEL: ptestc_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vptest %ymm1, %ymm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t1 = xor <4 x i64> %c, @@ -85,10 +75,8 @@ ; CHECK-LABEL: ptestnzc_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t1 = xor <2 x i64> %c, %t2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %d) @@ -101,9 +89,6 @@ ; CHECK-LABEL: ptestnzc_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vptest %ymm1, %ymm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: vzeroupper @@ -120,10 +105,8 @@ ; CHECK: # %bb.0: # %start ; CHECK-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vptest %xmm1, %xmm0 -; CHECK-NEXT: setb %al +; CHECK-NEXT: vptest %xmm0, %xmm0 +; CHECK-NEXT: sete %al ; CHECK-NEXT: retq start: %0 = load <16 x i8>, <16 x i8>* %x, align 16 diff --git a/llvm/test/CodeGen/X86/combine-testpd.ll b/llvm/test/CodeGen/X86/combine-testpd.ll --- a/llvm/test/CodeGen/X86/combine-testpd.ll +++ b/llvm/test/CodeGen/X86/combine-testpd.ll @@ -9,10 +9,8 @@ ; CHECK-LABEL: testpdz_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <2 x double> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -27,11 +25,8 @@ ; CHECK-LABEL: testpdz_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <4 x double> %c to <4 x i64> @@ -51,10 +46,8 @@ ; CHECK-LABEL: testpdc_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <2 x double> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -69,11 +62,8 @@ ; CHECK-LABEL: testpdc_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <4 x double> %c to <4 x i64> @@ -93,8 +83,6 @@ ; CHECK-LABEL: testpdnzc_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestpd %xmm1, %xmm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq @@ -111,9 +99,6 @@ ; CHECK-LABEL: testpdnzc_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestpd %ymm1, %ymm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/combine-testps.ll b/llvm/test/CodeGen/X86/combine-testps.ll --- a/llvm/test/CodeGen/X86/combine-testps.ll +++ b/llvm/test/CodeGen/X86/combine-testps.ll @@ -9,10 +9,8 @@ ; CHECK-LABEL: testpsz_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestps %xmm1, %xmm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <4 x float> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -27,11 +25,8 @@ ; CHECK-LABEL: testpsz_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestps %ymm1, %ymm0 -; CHECK-NEXT: cmovnel %esi, %eax +; CHECK-NEXT: cmovael %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <8 x float> %c to <4 x i64> @@ -51,10 +46,8 @@ ; CHECK-LABEL: testpsc_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestps %xmm1, %xmm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: retq %t0 = bitcast <4 x float> %c to <2 x i64> %t1 = xor <2 x i64> %t0, @@ -69,11 +62,8 @@ ; CHECK-LABEL: testpsc_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestps %ymm1, %ymm0 -; CHECK-NEXT: cmovael %esi, %eax +; CHECK-NEXT: cmovnel %esi, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %t0 = bitcast <8 x float> %c to <4 x i64> @@ -93,8 +83,6 @@ ; CHECK-LABEL: testpsnzc_128_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; CHECK-NEXT: vtestps %xmm1, %xmm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: retq @@ -111,9 +99,6 @@ ; CHECK-LABEL: testpsnzc_256_invert: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 -; CHECK-NEXT: vxorps %ymm2, %ymm0, %ymm0 ; CHECK-NEXT: vtestps %ymm1, %ymm0 ; CHECK-NEXT: cmovbel %esi, %eax ; CHECK-NEXT: vzeroupper