Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -166,6 +166,59 @@ BBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } +// Returns the single outgoing edge of the dominating predecessor block +// that leads to the PhiNode's incoming block: +static std::pair +GetPredOutEdge(BasicBlock *IncomingBB, BasicBlock *PhiBB) { + // If the incoming edge is conditional branch, we just select it. + BranchInst *PredBr = dyn_cast(IncomingBB->getTerminator()); + if (PredBr && PredBr->isConditional()) + return {IncomingBB, PhiBB}; + + // We identify the BBs post-dominated by IncomingBB. + // We do not use the existing DominatorTree analyses since the CFG may be + // already changed if we have multiple branches with branch hint. + SmallVector PostDominatedBBs; + PostDominatedBBs.push_back(IncomingBB); + unsigned Idx = 0; + while (Idx < PostDominatedBBs.size()) { + BasicBlock *CBB = PostDominatedBBs[Idx++]; + for (BasicBlock *PrevBB : predecessors(CBB)) { + if (is_contained(PostDominatedBBs, PrevBB)) + continue; + + // If all the successors are known to be post-dominated by IncomingBB, + // this block is also post-dominated. + if (all_of(successors(PrevBB), [&](BasicBlock *SBB) { + return is_contained(PostDominatedBBs, SBB); + })) + PostDominatedBBs.push_back(PrevBB); + } + } + + BasicBlock *PredBB = nullptr, *SuccBB = nullptr; + for (BasicBlock *CBB : PostDominatedBBs) { + for (BasicBlock *PrevBB : predecessors(CBB)) { + // If a previous block is not post-dominated by IncomingBB, + // this block is at the dominance frontier. + if (is_contained(PostDominatedBBs, PrevBB)) + continue; + + BranchInst *BI = dyn_cast(PrevBB->getTerminator()); + if (!BI || !BI->isConditional()) + continue; + + // We found a CFG edge that leads to the incoming block. + // If we already found another, we return nullptr. + if (PredBB) + return {nullptr, nullptr}; + PredBB = PrevBB; + SuccBB = CBB; + } + } + return {PredBB, SuccBB}; +} + // Update branch probability information according to conditional // branch probablity. This is usually made possible for cloned branches // in inline instances by the context specific profile in the caller. @@ -201,7 +254,20 @@ // In other words, if we know P(cond == true) is unlikely, we know // that P(t == true) is also unlikely. // -static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) { +static void +updatePredecessorProfileMetadata(Instruction *CondInst, BasicBlock *BB) { + PHINode *PN = dyn_cast(CondInst); + ICmpInst *CmpNode = nullptr; + if (!PN) { + // We allow phi + icmp with an immediate + br as well as phi + br. + CmpNode = dyn_cast(CondInst); + if (CmpNode && isa(CmpNode->getOperand(1))) + PN = dyn_cast(CmpNode->getOperand(0)); + + if (!PN) + return; + } + BranchInst *CondBr = dyn_cast(BB->getTerminator()); if (!CondBr) return; @@ -211,35 +277,47 @@ if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight)) return; - // Returns the outgoing edge of the dominating predecessor block - // that leads to the PhiNode's incoming block: - auto GetPredOutEdge = - [](BasicBlock *IncomingBB, - BasicBlock *PhiBB) -> std::pair { - auto *PredBB = IncomingBB; - auto *SuccBB = PhiBB; - while (true) { - BranchInst *PredBr = dyn_cast(PredBB->getTerminator()); - if (PredBr && PredBr->isConditional()) - return {PredBB, SuccBB}; - auto *SinglePredBB = PredBB->getSinglePredecessor(); - if (!SinglePredBB) - return {nullptr, nullptr}; - SuccBB = PredBB; - PredBB = SinglePredBB; - } - }; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *PhiOpnd = PN->getIncomingValue(i); ConstantInt *CI = dyn_cast(PhiOpnd); - if (!CI || !CI->getType()->isIntegerTy(1)) + if (!CI || !CI->getType()->isIntegerTy()) continue; - BP = (CI->isOne() ? BranchProbability::getBranchProbability( + bool IsTrueEdge = false; + if (!CmpNode) { + if (!CI->getType()->isIntegerTy(1)) + continue; + + IsTrueEdge = CI->isOne(); + } + else { + APInt Imm = (dyn_cast(CmpNode->getOperand(1)))->getValue(); + switch(CmpNode->getPredicate()) { + case ICmpInst::ICMP_EQ: + IsTrueEdge = (CI->getValue() == Imm); + break; + case ICmpInst::ICMP_NE: + IsTrueEdge = (CI->getValue() != Imm); + break; + case ICmpInst::ICMP_ULT: + IsTrueEdge = CI->getValue().ult(Imm); + break; + case ICmpInst::ICMP_SLT: + IsTrueEdge = CI->getValue().slt(Imm); + break; + case ICmpInst::ICMP_UGT: + IsTrueEdge = CI->getValue().ugt(Imm); + break; + case ICmpInst::ICMP_SGT: + IsTrueEdge = CI->getValue().sgt(Imm); + break; + default: llvm_unreachable("Unhandled icmp opcode!"); + } + } + BP = (IsTrueEdge ? BranchProbability::getBranchProbability( TrueWeight, TrueWeight + FalseWeight) - : BranchProbability::getBranchProbability( + : BranchProbability::getBranchProbability( FalseWeight, TrueWeight + FalseWeight)); auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB); @@ -1170,9 +1248,8 @@ return true; // Before threading, try to propagate profile data backwards: - if (PHINode *PN = dyn_cast(CondInst)) - if (PN->getParent() == BB && isa(BB->getTerminator())) - updatePredecessorProfileMetadata(PN, BB); + if (CondInst->getParent() == BB && isa(BB->getTerminator())) + updatePredecessorProfileMetadata(CondInst, BB); // Handle a variety of cases where we are branching on something derived from // a PHI node in the current block. If we can prove that any predecessors Index: test/Transforms/JumpThreading/branch-metadata.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/branch-metadata.ll @@ -0,0 +1,162 @@ +; RUN: opt < %s -jump-threading -S | FileCheck %s + +; This test examines how jump-threading handles branch weights metadata +; if the branch to be eliminated has the metadata. + +define i32 @func(i32 %flag) { +; CHECK-LABEL: @func +; CHECK: br i1 %tobool.i, label %if.hot, label %if.cold, !prof !0 +; CHECK-DAG: if.hot: +; CHECK-DAG: if.cold: +entry: + %tobool.i = icmp eq i32 %flag, 0 + br i1 %tobool.i, label %to.be.eliminated, label %if.then + +if.then: + br label %to.be.eliminated + +to.be.eliminated: + %retval.0.i = phi i32 [ 0, %if.then ], [ 1, %entry ] + %cmp = icmp eq i32 %retval.0.i, 0 + br i1 %cmp, label %if.cold, label %if.hot, !prof !1 + +if.cold: + call void @cold_func() + br label %return + +if.hot: + call void @hot_func() + br label %return + +return: + %retval.0 = phi i32 [ 0, %if.cold ], [ 1, %if.hot ] + ret i32 %retval.0 +} + +define i32 @func2(i32 %flag) { +; CHECK-LABEL: @func2 +; CHECK: br i1 %tobool.i, label %if.cold, label %if.hot, !prof !1 +; CHECK-DAG: if.hot: +; CHECK-DAG: if.cold: +entry: + %tobool.i = icmp eq i32 %flag, 0 + br i1 %tobool.i, label %to.be.eliminated, label %if.then + +if.then: + br label %to.be.eliminated + +to.be.eliminated: + %retval.0.i = phi i32 [ 0, %if.then ], [ 1, %entry ] + %cmp = icmp eq i32 %retval.0.i, 0 + br i1 %cmp, label %if.hot, label %if.cold, !prof !0 + +if.cold: + call void @cold_func() + br label %return + +if.hot: + call void @hot_func() + br label %return + +return: + %retval.0 = phi i32 [ 0, %if.cold ], [ 1, %if.hot ] + ret i32 %retval.0 +} + + +define signext i32 @func3(i32 signext %flag) { +; This is a test with more complicated CFG. +; The following is the equivalent C code. +; We want to set branch weights for "if (flag & 1)" and "if (flag & 2)". +; +; inline int bar(int flag) { +; if (flag & 1) { +; cold_func(); +; return 0; +; } +; if (flag & 2) { +; if (__builtin_expect(flag & 4, 0)) cold_func(); +; cold_func(); +; return 0; +; } +; if (__builtin_expect(flag & 8, 0)) cold_func(); +; return 1; +; } +; +; int func3(int flag) { +; if (__builtin_expect(0 == bar(flag), 0)) { +; cold_func(); +; return 0; +; } +; hot_func(); +; return 1; +; } + +; CHECK-LABEL: @func3 +; CHECK: br i1 %tobool.i, label %if.end.i, label %if.then.i, !prof !0 +; CHECK: if.then.i: +; CHECK: br i1 %tobool2.i, label %if.end8.i, label %if.then3.i, !prof !0 +; CHECK: if.then3.i: + +entry: + %and.i = and i32 %flag, 1 + %tobool.i = icmp eq i32 %and.i, 0 + br i1 %tobool.i, label %if.end.i, label %if.then.i + +if.then.i: + call void @cold_func() + br label %bar.exit + +if.end.i: + %and1.i = and i32 %flag, 2 + %tobool2.i = icmp eq i32 %and1.i, 0 + br i1 %tobool2.i, label %if.end8.i, label %if.then3.i + +if.then3.i: + %and4.i = and i32 %flag, 4 + %tobool5.i = icmp eq i32 %and4.i, 0 + br i1 %tobool5.i, label %if.end7.i, label %if.then6.i, !prof !0 + +if.then6.i: + call void @cold_func() + br label %if.end7.i + +if.end7.i: + call void @cold_func() + br label %bar.exit + +if.end8.i: + %and9.i = and i32 %flag, 8 + %tobool12.i = icmp eq i32 %and9.i, 0 + br i1 %tobool12.i, label %bar.exit, label %if.then13.i, !prof !0 + +if.then13.i: + call void @cold_func() + br label %bar.exit + +bar.exit: + %retval.0.i = phi i32 [ 0, %if.then.i ], [ 0, %if.end7.i ], [ 1, %if.end8.i ], [ 1, %if.then13.i ] + %cmp = icmp eq i32 %retval.0.i, 0 + br i1 %cmp, label %if.then, label %if.end, !prof !1 + +if.then: + call void @cold_func() + br label %return + +if.end: + call void @hot_func() + br label %return + +return: + %retval.0 = phi i32 [ 0, %if.then ], [ 1, %if.end ] + ret i32 %retval.0 +} + +; CHECK-DAG: !0 = !{!"branch_weights", i32 2146410443, i32 1073205} +; CHECK-DAG: !1 = !{!"branch_weights", i32 1073205, i32 2146410443} + + +declare void @hot_func() +declare void @cold_func() +!0 = !{!"branch_weights", i32 2000, i32 1} +!1 = !{!"branch_weights", i32 1, i32 2000}