Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -197,6 +197,7 @@ Instruction *&Inst, const SmallVectorImpl &Exts, unsigned CreatedInstCost); + bool swapConstantCmp(Function &F); bool splitBranchCondition(Function &F); bool simplifyOffsetableRelocate(Instruction &I); void stripInvariantGroupMetadata(Instruction &I); @@ -260,6 +261,7 @@ // OptimizeCmpExpression, which perturbs the pattern being searched for. if (!DisableBranchOpts) { EverMadeChange |= sinkAndCmp(F); + EverMadeChange |= swapConstantCmp(F); EverMadeChange |= splitBranchCondition(F); } @@ -5453,6 +5455,78 @@ NewFalse = NewFalse / Scale; } +/// \brief If there is a sequence that branches based on two constant +/// comparisons like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp eq i32 %b, 2 +/// %or.cond = and i1 %0, %1 +/// br i1 %or.cond, label %TrueBB, label %FalseBB +/// \endcode +/// Swap the order of comparisons based on their constant ranges like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp eq i32 %b, 2 +/// %and.cond = and i1 %1, %0 +/// br i1 %and.cond, label %TrueBB, label %FalseBB +/// \endcode +/// This will shorten the branch decision by moving the most likely +/// condition to the right if the binary operation is Instruction::And +/// If the binary operation is Instruction::Or we move the less likely +/// condition to the right. +bool CodeGenPrepare::swapConstantCmp(Function &F) { + bool MadeChange = false; + for (auto &BB : F) { + // Does this BB end with the following? + // %cond1 = icmp pred, X, C1 + // %cond2 = icmp pred, Y, C2 + // %cond.or = or|and i1 %cond1, cond2 + // br i1 %cond.or label %dest1, label %dest2" + BinaryOperator *LogicOp; + BasicBlock *TBB, *FBB; + if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) + continue; + + unsigned Opc; + Value *Cond1, *Cond2; + if (match(LogicOp, + m_And(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) + Opc = Instruction::And; + else if (match(LogicOp, + m_Or(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2))))) + Opc = Instruction::Or; + else + continue; + + ConstantInt *RHS1, *RHS2; + ICmpInst::Predicate Pred1, Pred2; + if (!match(Cond1, m_ICmp(Pred1, m_Value(), m_ConstantInt(RHS1))) || + !match(Cond2, m_ICmp(Pred2, m_Value(), m_ConstantInt(RHS2)))) + continue; + + ConstantRange CR1 = + ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); + ConstantRange CR2 = + ConstantRange::makeExactICmpRegion(Pred2, RHS2->getValue()); + uint32_t TrueWeight = CR1.getSetSize().ceilLogBase2() + CR2.getBitWidth(); + uint32_t FalseWeight = CR2.getSetSize().ceilLogBase2() + CR1.getBitWidth(); + // Keep the source order if weights are the same or profitable. + if (TrueWeight == FalseWeight) + continue; + if ((Opc == Instruction::And) != (TrueWeight > FalseWeight)) + continue; + + IRBuilder<> Builder(LogicOp->getNextNode()); + Builder.SetCurrentDebugLocation(LogicOp->getDebugLoc()); + auto *NewLogicOp = Builder.CreateBinOp((Instruction::BinaryOps)Opc, Cond2, + Cond1, LogicOp->getName() + ".swap"); + LogicOp->replaceAllUsesWith(NewLogicOp); + LogicOp->eraseFromParent(); + MadeChange = true; + } + return MadeChange; +} + /// \brief Some targets prefer to split a conditional branch like: /// \code /// %0 = icmp ne i32 %a, 0 Index: test/CodeGen/AArch64/aarch64-codegen-prepare-constant-cmp.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64-codegen-prepare-constant-cmp.ll @@ -0,0 +1,31 @@ +; RUN: opt -codegenprepare -mtriple=aarch64 -S -o - %s | FileCheck %s +; CHECK-LABEL: @constant_cmp_and +define i32 @constant_cmp_and(i32 %a, i32 %b) { +entry: +; CHECK [[Cond1:%.*]] = icmp ne i32 %a, 0 +; CHECK-NEXT [[Cond2:%.*]] = icmp eq i32 %b, 2 +; CHECK-NEXT and i1 [[Cond2]], [[Cond1]] + %cmp2.i = icmp ne i32 %a, 0 + %cmp4.i = icmp eq i32 %b, 2 + %or.cond47 = and i1 %cmp2.i, %cmp4.i + br i1 %or.cond47, label %true, label %false +true: + ret i32 42 +false: + ret i32 0 +} +; CHECK-LABEL: @constant_cmp_or +define i32 @constant_cmp_or(i32 %a, i32 %b) { +entry: +; CHECK [[Cond1:%.*]] = icmp eq i32 %a, 2 +; CHECK-NEXT [[Cond2:%.*]] = icmp ne i32 %b, 0 +; CHECK-NEXT or i1 [[Cond2]], [[Cond1]] + %cmp2.i = icmp eq i32 %a, 2 + %cmp4.i = icmp ne i32 %b, 0 + %or.cond47 = or i1 %cmp2.i, %cmp4.i + br i1 %or.cond47, label %true, label %false +true: + ret i32 42 +false: + ret i32 0 +} Index: test/CodeGen/AArch64/arm64-ccmp.ll =================================================================== --- test/CodeGen/AArch64/arm64-ccmp.ll +++ test/CodeGen/AArch64/arm64-ccmp.ll @@ -183,7 +183,7 @@ ; Convert a cbz in the head block. ; CHECK: cbz_head -; CHECK: cmp w0, #0 +; CHECK: cmp w1, #17 ; CHECK: ccmp define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp { entry: @@ -222,9 +222,9 @@ ; Convert a cbz in the second block. ; CHECK: cbz_second -; CHECK: cmp w0, #0 -; CHECK: ccmp w1, #0, #0, ne -; CHECK: b.eq +; CHECK: cmp w1, #0 +; CHECK: ccmp w0, #0, #4, eq +; CHECK: b.ne define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp { entry: %cmp = icmp eq i32 %a, 0 Index: test/CodeGen/ARM/arm-shrink-wrapping-linux.ll =================================================================== --- test/CodeGen/ARM/arm-shrink-wrapping-linux.ll +++ test/CodeGen/ARM/arm-shrink-wrapping-linux.ll @@ -52,8 +52,8 @@ ; Use the back edge to check we get the label of the loop right. ; This is to make sure we check the right loop pattern. ; CHECK: @ %while.body24.land.rhs14_crit_edge -; CHECK: cmp r{{[0-9]+}}, #192 -; CHECK-NEXT bhs [[LOOP_HEADER]] +; CHECK: cmp lr, #191 +; CHECK-NEXT bhi [[LOOP_HEADER]] define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) { entry: %cmp = icmp sgt i32 %off, -1 Index: test/CodeGen/ARM/ifcvt2.ll =================================================================== --- test/CodeGen/ARM/ifcvt2.ll +++ test/CodeGen/ARM/ifcvt2.ll @@ -20,11 +20,11 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: t2: -; CHECK: bxgt lr +; CHECK: bxlt lr ; CHECK: cmp -; CHECK: addge -; CHECK: subge -; CHECK-NOT: bxge lr +; CHECK: addle +; CHECK: suble +; CHECK-NOT: bxle lr ; CHECK: bx lr %tmp2 = icmp sgt i32 %c, 10 %tmp5 = icmp slt i32 %d, 4 Index: test/CodeGen/ARM/ifcvt6.ll =================================================================== --- test/CodeGen/ARM/ifcvt6.ll +++ test/CodeGen/ARM/ifcvt6.ll @@ -2,8 +2,8 @@ define void @foo(i32 %X, i32 %Y) { entry: -; CHECK: cmpne -; CHECK: bxhi lr +; CHECK: cmp +; CHECK: bxne lr %tmp1 = icmp ult i32 %X, 4 ; [#uses=1] %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1] %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1] Index: test/CodeGen/Thumb2/thumb2-ifcvt2.ll =================================================================== --- test/CodeGen/Thumb2/thumb2-ifcvt2.ll +++ test/CodeGen/Thumb2/thumb2-ifcvt2.ll @@ -6,9 +6,7 @@ entry: ; CHECK-LABEL: foo: ; CHECK: it ne -; CHECK: cmpne -; CHECK: it hi -; CHECK: bxhi lr +; CHECK: bxne lr %tmp1 = icmp ult i32 %X, 4 ; [#uses=1] %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1] %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1]