Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -262,6 +262,12 @@ return PredictableSelectIsExpensive; } + /// Return true if conditional compares are only cheaper than branches if the + /// branch is unlikely to be predicted right. + bool isPredictableConditionalCompareExpensive() const { + return PredictableConditionalCompareIsExpensive; + } + /// If a branch or a select condition is skewed in one direction by more than /// this factor, it is very likely to be predicted correctly. virtual BranchProbability getPredictableBranchThreshold() const; @@ -2127,6 +2133,10 @@ /// the branch is usually predicted right. bool PredictableSelectIsExpensive; + /// Tells the code generator that conditional compare is more expensive than a + /// branch if the branch is usually predicted right. + bool PredictableConditionalCompareIsExpensive; + /// MaskAndBranchFoldingIsLegal - Indicates if the target supports folding /// a mask of a single bit, a compare, and a branch into a single instruction. bool MaskAndBranchFoldingIsLegal; Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -197,6 +197,7 @@ Instruction *&Inst, const SmallVectorImpl &Exts, unsigned CreatedInstCost); + bool swapConstantCmp(Function &F); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); void stripInvariantGroupMetadata(Instruction &I); @@ -260,6 +261,7 @@ // OptimizeCmpExpression, which perturbs the pattern being searched for. if (!DisableBranchOpts) { EverMadeChange |= sinkAndCmp(F); + EverMadeChange |= swapConstantCmp(F); EverMadeChange |= splitBranchCondition(F); } @@ -5453,6 +5455,77 @@ NewFalse = NewFalse / Scale; } +/// \brief If there is a sequence that branches based on two constant +/// comparisons like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp eq i32 %b, 2 +/// %or.cond = and i1 %0, %1 +/// br i1 %or.cond, label %TrueBB, label %FalseBB +/// \endcode +/// Swap the order of comparisons based on their constant ranges like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp eq i32 %b, 2 +/// %and.cond = and i1 %1, %0 +/// br i1 %and.cond, label %TrueBB, label %FalseBB +/// \endcode +/// This will shorten the branch decision by moving the most likely +/// condition to the right if the binary operation is Instruction::And +/// If the binary operation is Instruction::Or we move the less likely +/// condition to the right. +bool CodeGenPrepare::swapConstantCmp(Function &F) { + if (!TLI || !TLI->isPredictableConditionalCompareExpensive()) + return false; + + bool MadeChange = false; + for (auto &BB : F) { + // Does this BB end with the following? + // %cond1 = icmp pred, X, C1 + // %cond2 = icmp pred, Y, C2 + // %cond.or = or|and i1 %cond1, cond2 + // br i1 %cond.or label %dest1, label %dest2" + BinaryOperator *LogicOp; + BasicBlock *TBB, *FBB; + if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) + continue; + + unsigned Opc; + Value *Cond1, *Cond2; + if (match(LogicOp, + m_And(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) + Opc = Instruction::And; + else if (match(LogicOp, + m_Or(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) + Opc = Instruction::Or; + else + continue; + + ConstantInt *RHS1, *RHS2; + ICmpInst::Predicate Pred1, Pred2; + if (!match(Cond1, m_ICmp(Pred1, m_Value(), m_ConstantInt(RHS1))) || + !match(Cond2, m_ICmp(Pred2, m_Value(), m_ConstantInt(RHS2)))) + continue; + + ConstantRange CR1 = + ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); + ConstantRange CR2 = + ConstantRange::makeExactICmpRegion(Pred2, RHS2->getValue()); + uint32_t TrueWeight = CR1.getSetSize().ceilLogBase2() + CR2.getBitWidth(); + uint32_t FalseWeight = CR2.getSetSize().ceilLogBase2() + CR1.getBitWidth(); + // Keep the source order if weights are the same or profitable. + if (TrueWeight == FalseWeight) + continue; + if ((Opc == Instruction::And) != (TrueWeight > FalseWeight)) + continue; + + // Swap the operands. + LogicOp->swapOperands(); + MadeChange = true; + } + return MadeChange; +} + /// \brief Some targets prefer to split a conditional branch like: /// \code /// %0 = icmp ne i32 %a, 0 Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -810,6 +810,7 @@ FsqrtIsCheap = false; JumpIsExpensive = JumpIsExpensiveOverride; PredictableSelectIsExpensive = false; + PredictableConditionalCompareIsExpensive = false; MaskAndBranchFoldingIsLegal = false; EnableExtLdPromotion = false; HasFloatingPointExceptions = true; Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -76,6 +76,10 @@ "predictable-select-expensive", "PredictableSelectIsExpensive", "true", "Prefer likely predicted branches over selects">; +def FeaturePredictableConditionalCompareIsExpensive : SubtargetFeature< + "predictable-ccmp-expensive", "PredictableConditionalCompareIsExpensive", "true", + "Prefer likely predicted branches over conditional compares">; + def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", "CustomAsCheapAsMove", "true", "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">; @@ -212,7 +216,8 @@ FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive + FeaturePredictableSelectIsExpensive, + FeaturePredictableConditionalCompareIsExpensive ]>; def : ProcessorModel<"generic", NoSchedModel, [ Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -639,6 +639,8 @@ } PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); + PredictableConditionalCompareIsExpensive = + Subtarget->predictableConditionalCompareIsExpensive(); } void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -71,6 +71,7 @@ bool MergeNarrowLoads = false; bool UseAA = false; bool PredictableSelectIsExpensive = false; + bool PredictableConditionalCompareIsExpensive = false; bool BalanceFPOps = false; bool CustomAsCheapAsMove = false; bool UsePostRAScheduler = false; @@ -179,6 +180,9 @@ bool predictableSelectIsExpensive() const { return PredictableSelectIsExpensive; } + bool predictableConditionalCompareIsExpensive() const { + return PredictableConditionalCompareIsExpensive; + } bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; } Index: test/CodeGen/AArch64/aarch64-codegen-prepare-constant-cmp.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-codegen-prepare-constant-cmp.ll @@ -0,0 +1,31 @@ +; RUN: opt -codegenprepare -mtriple=aarch64 -mcpu=kryo -S -o - %s | FileCheck %s +; CHECK-LABEL: @constant_cmp_and +define i32 @constant_cmp_and(i32 %a, i32 %b) { +entry: +; CHECK [[Cond1:%.*]] = icmp ne i32 %a, 0 +; CHECK-NEXT [[Cond2:%.*]] = icmp eq i32 %b, 2 +; CHECK-NEXT and i1 [[Cond2]], [[Cond1]] + %cmp2.i = icmp ne i32 %a, 0 + %cmp4.i = icmp eq i32 %b, 2 + %or.cond47 = and i1 %cmp2.i, %cmp4.i + br i1 %or.cond47, label %true, label %false +true: + ret i32 42 +false: + ret i32 0 +} +; CHECK-LABEL: @constant_cmp_or +define i32 @constant_cmp_or(i32 %a, i32 %b) { +entry: +; CHECK [[Cond1:%.*]] = icmp eq i32 %a, 2 +; CHECK-NEXT [[Cond2:%.*]] = icmp ne i32 %b, 0 +; CHECK-NEXT or i1 [[Cond2]], [[Cond1]] + %cmp2.i = icmp eq i32 %a, 2 + %cmp4.i = icmp ne i32 %b, 0 + %or.cond47 = or i1 %cmp2.i, %cmp4.i + br i1 %or.cond47, label %true, label %false +true: + ret i32 42 +false: + ret i32 0 +}