Index: llvm/trunk/include/llvm/Target/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h +++ llvm/trunk/include/llvm/Target/TargetLowering.h @@ -437,6 +437,15 @@ return false; } + /// Use bitwise logic to make pairs of compares more efficient. For example: + /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 + /// This should be true when it takes more than one instruction to lower + /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on + /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. + virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { + return false; + } + /// Return the preferred operand type if the target has a quick way to compare /// integer values of the given size. Assume that any legal integer type can /// be compared efficiently. Targets may override this to allow illegal wide Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3255,6 +3255,21 @@ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); } + // Try more general transforms if the predicates match and the only user of + // the compares is the 'and' or 'or'. + if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 && + N0.hasOneUse() && N1.hasOneUse()) { + // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 + // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 + if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) { + SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); + SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); + SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); + SDValue Zero = DAG.getConstant(0, DL, OpVT); + return DAG.getSetCC(DL, VT, Or, Zero, CC1); + } + } + // Canonicalize equivalent operands to LL == RL. if (LL == RR && LR == RL) { CC1 = ISD::getSetCCSwappedOperands(CC1); Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -515,6 +515,10 @@ bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + bool supportSwiftError() const override { return true; } Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -531,6 +531,10 @@ return true; } + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -814,6 +814,10 @@ bool hasAndNotCompare(SDValue Y) const override; + bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { + return VT.isScalarInteger(); + } + /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. MVT hasFastEqualityCompare(unsigned NumBits) const override; Index: llvm/trunk/test/CodeGen/ARM/setcc-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/setcc-logic.ll +++ llvm/trunk/test/CodeGen/ARM/setcc-logic.ll @@ -20,13 +20,11 @@ define zeroext i1 @and_eq(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: and_eq: ; CHECK: @ BB#0: -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movweq r2, #1 -; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: movweq r12, #1 -; CHECK-NEXT: and r0, r12, r2 +; CHECK-NEXT: eor r2, r2, r3 +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: orrs r0, r0, r2 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movweq r0, #1 ; CHECK-NEXT: bx lr %cmp1 = icmp eq i32 %a, %b %cmp2 = icmp eq i32 %c, %d @@ -37,13 +35,10 @@ define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK-LABEL: or_ne: ; CHECK: @ BB#0: -; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwne r2, #1 -; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: cmp r0, r1 -; CHECK-NEXT: movwne r12, #1 -; CHECK-NEXT: orr r0, r12, r2 +; CHECK-NEXT: eor r2, r2, r3 +; CHECK-NEXT: eor r0, r0, r1 +; CHECK-NEXT: orrs r0, r0, r2 +; CHECK-NEXT: movwne r0, #1 ; CHECK-NEXT: bx lr %cmp1 = icmp ne i32 %a, %b %cmp2 = icmp ne i32 %c, %d Index: llvm/trunk/test/CodeGen/PowerPC/setcc-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/setcc-logic.ll +++ llvm/trunk/test/CodeGen/PowerPC/setcc-logic.ll @@ -433,11 +433,11 @@ define zeroext i1 @and_eq(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) { ; CHECK-LABEL: and_eq: ; CHECK: # BB#0: -; CHECK-NEXT: cmpw 0, 3, 4 -; CHECK-NEXT: cmpw 1, 5, 6 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: crnand 20, 2, 6 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 5, 5, 6 +; CHECK-NEXT: xor 3, 3, 4 +; CHECK-NEXT: or 3, 3, 5 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 ; CHECK-NEXT: blr %cmp1 = icmp eq i16 %a, %b %cmp2 = icmp eq i16 %c, %d @@ -448,11 +448,12 @@ define zeroext i1 @or_ne(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: or_ne: ; CHECK: # BB#0: -; CHECK-NEXT: cmpw 0, 3, 4 -; CHECK-NEXT: cmpw 1, 5, 6 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: crand 20, 6, 2 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: xor 5, 5, 6 +; CHECK-NEXT: xor 3, 3, 4 +; CHECK-NEXT: or 3, 3, 5 +; CHECK-NEXT: cntlzw 3, 3 +; CHECK-NEXT: nor 3, 3, 3 +; CHECK-NEXT: rlwinm 3, 3, 27, 31, 31 ; CHECK-NEXT: blr %cmp1 = icmp ne i32 %a, %b %cmp2 = icmp ne i32 %c, %d Index: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll @@ -120,12 +120,12 @@ define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { ; ALL-LABEL: test8: ; ALL: ## BB#0: +; ALL-NEXT: notl %edi +; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000 ; ALL-NEXT: testl %edx, %edx ; ALL-NEXT: movl $1, %eax ; ALL-NEXT: cmovel %eax, %edx -; ALL-NEXT: cmpl $-2147483648, %esi ## imm = 0x80000000 -; ALL-NEXT: cmovnel %edx, %eax -; ALL-NEXT: cmpl $-1, %edi +; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: cmovnel %edx, %eax ; ALL-NEXT: retq %tmp1 = icmp eq i32 %a1, -1 Index: llvm/trunk/test/CodeGen/X86/setcc-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/setcc-logic.ll +++ llvm/trunk/test/CodeGen/X86/setcc-logic.ll @@ -440,11 +440,10 @@ define zeroext i1 @and_eq(i8 %a, i8 %b, i8 %c, i8 %d) nounwind { ; CHECK-LABEL: and_eq: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: sete %sil -; CHECK-NEXT: cmpb %cl, %dl +; CHECK-NEXT: xorl %esi, %edi +; CHECK-NEXT: xorl %ecx, %edx +; CHECK-NEXT: orb %dl, %dil ; CHECK-NEXT: sete %al -; CHECK-NEXT: andb %sil, %al ; CHECK-NEXT: retq %cmp1 = icmp eq i8 %a, %b %cmp2 = icmp eq i8 %c, %d @@ -455,11 +454,10 @@ define zeroext i1 @or_ne(i8 %a, i8 %b, i8 %c, i8 %d) nounwind { ; CHECK-LABEL: or_ne: ; CHECK: # BB#0: -; CHECK-NEXT: cmpb %sil, %dil -; CHECK-NEXT: setne %sil -; CHECK-NEXT: cmpb %cl, %dl +; CHECK-NEXT: xorl %esi, %edi +; CHECK-NEXT: xorl %ecx, %edx +; CHECK-NEXT: orb %dl, %dil ; CHECK-NEXT: setne %al -; CHECK-NEXT: orb %sil, %al ; CHECK-NEXT: retq %cmp1 = icmp ne i8 %a, %b %cmp2 = icmp ne i8 %c, %d