Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -305,6 +305,21 @@ return MaskAndBranchFoldingIsLegal; } + /// Return true if the target should transform: + /// X & Y == Y + /// into: + /// ~X & Y == 0 + /// + /// If all bits of X that are masked by Y are set, then all bits in the + /// bitwise-not of X that are masked by Y are not set. + /// + /// This may be profitable if the target has a bitwise and-not operation that + /// sets comparison flags. A target may want to limit the transformation based + /// on the type of Y or if Y is a constant. + virtual bool useAndNotCompare(const DataLayout &DL, Value *Y) const { + return false; + } + /// \brief Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -189,6 +189,7 @@ bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); bool optimizeSwitchInst(SwitchInst *CI); bool optimizeExtractElementInst(Instruction *Inst); + bool optimizeICmpInst(ICmpInst *ICmp); bool dupRetToEnableTailCallOpts(BasicBlock *BB); bool placeDbgValues(Function &F); bool sinkAndCmp(Function &F); @@ -5161,6 +5162,45 @@ return false; } +bool CodeGenPrepare::optimizeICmpInst(ICmpInst *ICmp) { + if (!TLI || !DL || !ICmp->isEquality()) + return false; + + // Match this pattern in any of its permutations: + // X & Y == Y + Value *Op0 = ICmp->getOperand(0), *Op1 = ICmp->getOperand(1); + Value *X = nullptr, *Y = nullptr; + ICmpInst::Predicate P; + if (!match(ICmp, m_ICmp(P, m_And(m_Value(X), m_Specific(Op1)), m_Value(Y))) && + !match(ICmp, m_ICmp(P, m_And(m_Specific(Op1), m_Value(X)), m_Value(Y))) && + !match(ICmp, m_ICmp(P, m_Value(Y), m_And(m_Value(X), m_Specific(Op0)))) && + !match(ICmp, m_ICmp(P, m_Value(Y), m_And(m_Specific(Op0), m_Value(X))))) + return false; + + // Bail out if the 'and' has a use besides the compare or if the compare + // operand that we want to turn into a zero is already a zero (otherwise, + // infinite loop). Finally, make sure that the target wants to do this. + int AndOpIndex = ICmp->getOperand(0) == Y ? 1 : 0; + auto *And = cast(ICmp->getOperand(AndOpIndex)); + auto *YConst = dyn_cast(Y); + if (!And->hasOneUse() || (YConst && YConst->isNullValue()) || + !TLI->useAndNotCompare(*DL, Y)) + return false; + + // Turn the original code into: ~X & Y == 0. + // %And = and i32 %X, %Y + // %ICmp = icmp eq i32 %And, %Y + // --> + // %NotX = xor i32 %X, -1 + // %And = and i32 %NotX, %Y + // %ICmp = icmp eq i32 %And, 0 + + BinaryOperator *NotX = BinaryOperator::CreateNot(X, "not", And); + And->replaceUsesOfWith(X, NotX); + ICmp->replaceUsesOfWith(Y, Constant::getNullValue(And->getType())); + return true; +} + bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { // Bail out if we inserted the instruction to prevent optimizations from // stepping on each other's toes. @@ -5209,9 +5249,16 @@ return false; } - if (CmpInst *CI = dyn_cast(I)) + if (CmpInst *CI = dyn_cast(I)) { + bool Modified = false; + if (ICmpInst *ICmp = dyn_cast(I)) + Modified = optimizeICmpInst(ICmp); + if (!TLI || !TLI->hasMultipleConditionRegisters()) - return OptimizeCmpExpression(CI, TLI); + Modified |= OptimizeCmpExpression(CI, TLI); + + return Modified; + } if (LoadInst *LI = dyn_cast(I)) { stripInvariantGroupMetadata(*LI); Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -442,6 +442,8 @@ return true; } + bool useAndNotCompare(const DataLayout &DL, Value *V) const override; + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -9382,6 +9383,18 @@ // Target Optimization Hooks //===----------------------------------------------------------------------===// +bool PPCTargetLowering::useAndNotCompare(const DataLayout &DL, Value *V) const { + // Only scalar integer operations will benefit. + if (getValueType(DL, V->getType()).isVector()) + return false; + + // A power-of-2 operand can be lowered using 'rlwinm', so don't use 'andc'. + if (isKnownToBeAPowerOfTwo(V, DL, true)) + return false; + + return true; +} + static std::string getRecipOp(const char *Base, EVT VT) { std::string RecipOp(Base); if (VT.getScalarType() == MVT::f64) Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -755,6 +755,8 @@ bool isCheapToSpeculateCtlz() const override; + bool useAndNotCompare(const DataLayout &DL, Value *V) const override; + /// Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -4275,6 +4276,22 @@ return Subtarget.hasLZCNT(); } +bool X86TargetLowering::useAndNotCompare(const DataLayout &DL, Value *V) const { + if (!Subtarget.hasBMI()) + return false; + + // There are only 32-bit and 64-bit forms for 'andn'. + EVT VT = getValueType(DL, V->getType()); + if (!(VT == MVT::i32 || VT == MVT::i64)) + return false; + + // A power-of-2 operand can be lowered using 'bt', so don't use 'andn'. + if (isKnownToBeAPowerOfTwo(V, DL, true)) + return false; + + return true; +} + /// Return true if every element in Mask, beginning /// from position Pos and ending in Pos+Size is undef. static bool isUndefInRange(ArrayRef Mask, unsigned Pos, unsigned Size) { Index: test/CodeGen/PowerPC/andc.ll =================================================================== --- test/CodeGen/PowerPC/andc.ll +++ test/CodeGen/PowerPC/andc.ll @@ -1,16 +1,11 @@ ; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck %s -; TODO: These could use 'andc'. - define i1 @and_cmp1(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp1: ; CHECK: ; BB#0: -; CHECK-NEXT: and r2, r3, r4 -; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: cmpw cr0, r2, r4 -; CHECK-NEXT: bclr 12, 2, 0 -; CHECK-NEXT: ; BB#1: -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: andc r2, r4, r3 +; CHECK-NEXT: cntlzw r2, r2 +; CHECK-NEXT: rlwinm r3, r2, 27, 31, 31 ; CHECK-NEXT: blr ; %and = and i32 %x, %y @@ -21,12 +16,10 @@ define i1 @and_cmp_const(i32 %x) { ; CHECK-LABEL: and_cmp_const: ; CHECK: ; BB#0: -; CHECK-NEXT: andi. r2, r3, 43 -; CHECK-NEXT: li r3, 1 -; CHECK-NEXT: cmpwi r2, 43 -; CHECK-NEXT: bclr 12, 2, 0 -; CHECK-NEXT: ; BB#1: -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r2, 43 +; CHECK-NEXT: andc r2, r2, r3 +; CHECK-NEXT: cntlzw r2, r2 +; CHECK-NEXT: rlwinm r3, r2, 27, 31, 31 ; CHECK-NEXT: blr ; %and = and i32 %x, 43 @@ -34,3 +27,16 @@ ret i1 %cmp } +; Don't use an 'andc' if we're masking off a single bit. + +define i1 @and_cmp_const_power_of_2(i32 %x) { +; CHECK-LABEL: and_cmp_const_power_of_2: +; CHECK: ; BB#0: +; CHECK-NEXT: rlwinm r3, r3, 27, 31, 31 +; CHECK-NEXT: blr +; + %and = and i32 %x, 32 + %cmp = icmp eq i32 %and, 32 + ret i1 %cmp +} + Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -150,12 +150,11 @@ ret i1 %cmp } -; TODO: Recognize a disguised andn in the following 4 tests. +; Recognize a disguised andn in the following 4 tests. define i1 @and_cmp1(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp1: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq ; @@ -167,8 +166,7 @@ define i1 @and_cmp2(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp2: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq ; @@ -180,8 +178,7 @@ define i1 @and_cmp3(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp3: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq ; @@ -193,8 +190,7 @@ define i1 @and_cmp4(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp4: ; CHECK: # BB#0: -; CHECK-NEXT: andl %esi, %edi -; CHECK-NEXT: cmpl %edi, %esi +; CHECK-NEXT: andnl %esi, %edi, %eax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq ; @@ -208,8 +204,8 @@ define i1 @and_cmp_const(i32 %x) { ; CHECK-LABEL: and_cmp_const: ; CHECK: # BB#0: -; CHECK-NEXT: andl $43, %edi -; CHECK-NEXT: cmpl $43, %edi +; CHECK-NEXT: movl $43, %eax +; CHECK-NEXT: andnl %eax, %edi, %eax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq ; @@ -218,6 +214,38 @@ ret i1 %cmp } +; But don't use 'andn' if the mask is a power-of-two. +define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { +; CHECK-LABEL: and_cmp_const_power_of_two: +; CHECK: # BB#0: +; CHECK-NEXT: btl %esi, %edi +; CHECK-NEXT: setae %al +; CHECK-NEXT: retq +; + %shl = shl i32 1, %y + %and = and i32 %x, %shl + %cmp = icmp ne i32 %and, %shl + ret i1 %cmp +} + +; Don't transform to 'andn' if there's another use of the 'and'. +define i32 @and_cmp_not_one_use(i32 %x) { +; CHECK-LABEL: and_cmp_not_one_use: +; CHECK: # BB#0: +; CHECK-NEXT: andl $37, %edi +; CHECK-NEXT: cmpl $37, %edi +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: addl %edi, %eax +; CHECK-NEXT: retq +; + %and = and i32 %x, 37 + %cmp = icmp eq i32 %and, 37 + %ext = zext i1 %cmp to i32 + %add = add i32 %and, %ext + ret i32 %add +} + ; Don't choose a 'test' if an 'andn' can be used. define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { ; CHECK-LABEL: andn_cmp_swap_ops: