Index: include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- include/llvm/Transforms/Scalar/JumpThreading.h +++ include/llvm/Transforms/Scalar/JumpThreading.h @@ -108,6 +108,8 @@ } void FindLoopHeaders(Function &F); + bool processBBWithCommonDomCondition(Function &F, + SmallPtrSetImpl &Unreachable); bool ProcessBlock(BasicBlock *BB); bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl &PredBBs, BasicBlock *SuccBB); Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -370,7 +371,12 @@ FindLoopHeaders(F); - bool EverChanged = false; + bool EverChanged = processBBWithCommonDomCondition(F, Unreachable); + DTU->getDomTree(); + for (auto &BB : F) + if (!Unreachable.count(&BB) && !DT.isReachableFromEntry(&BB)) + Unreachable.insert(&BB); + bool Changed; do { Changed = false; @@ -548,6 +554,202 @@ LoopHeaders.insert(Edge.second); } +/// collectReachableBBSet - Create a set of reachable basic blocks from BB in +/// the reverse direction of the control flow as TouchBBSet. +/// When reaching BBa, search is stopped there. +static void collectReachableBBSet(BasicBlock *BB, BasicBlock *BBa, + DenseSet& TouchBBSet) { + if (TouchBBSet.count(BB)) + return; + TouchBBSet.insert(BB); + for (BasicBlock *Pred : predecessors(BB)) { + if (Pred != BBa) { + collectReachableBBSet(Pred, BBa, TouchBBSet); + } + } +} + +/// findPredSourceEdge - Classify the predecessor Pred of BB into the following +/// categories: +enum PredOrigin : unsigned { + PredFromSuccBB0, // from SuccBB0 + PredFromSuccBB1, // from SuccBB1 + PredFromBothSuccBB0AndSuccBB1, // from both SuccBB0 and SuccBB1 + PredUnreachable, // unreachable +}; +/// BBa is the basic block which dominates BB. +/// Both SuccBB0 and SuccBB1 are the successors of BBa. +static enum PredOrigin findPredSourceEdge(BasicBlock *BBa, BasicBlock *BB, + BasicBlock *Pred, BasicBlock *SuccBB0, + BasicBlock *SuccBB1) { + if (Pred == BBa) { + if (BB == SuccBB0) + return PredFromSuccBB0; + else if (BB == SuccBB1) + return PredFromSuccBB1; + else + llvm_unreachable("This cannot happen!"); + } + DenseSet TouchBBSet; + collectReachableBBSet(Pred, BBa, TouchBBSet); + bool ReachSuccBB0 = TouchBBSet.count(SuccBB0); + bool ReachSuccBB1 = TouchBBSet.count(SuccBB1); + if (ReachSuccBB0 && ReachSuccBB1) + return PredFromBothSuccBB0AndSuccBB1; + else if (ReachSuccBB0) + return PredFromSuccBB0; + else if (ReachSuccBB1) + return PredFromSuccBB1; + else + return PredUnreachable; +} + +/// findBBWithCommonDomCondition - Detect BBa that dominates BB and has the same +/// condition code C as BB. BBList is sorted in descending order of levels on +/// the dominating tree with the condition code C. The position of BB in BBList +/// is StartIdx - 1. +/// If there was BBa, then return it and also the set of predecessor of BB is +/// divided into Preds0 and Preds1. +static BasicBlock* +findBBWithCommonDomCondition(DominatorTree &DT, + SmallVectorImpl &BBList, + BasicBlock *BB, unsigned StartIdx, + SmallVectorImpl& Preds0, + SmallVectorImpl& Preds1) { + size_t Size = BBList.size(); + for (unsigned Idx = StartIdx; Idx < Size; Idx++) { + BasicBlock *BBa = BBList[Idx]; + if (DT.dominates(BBa, BB)) { + BranchInst *CondBr = dyn_cast(BBa->getTerminator()); + BasicBlock *SuccBB0 = CondBr->getSuccessor(0); + BasicBlock *SuccBB1 = CondBr->getSuccessor(1); + if (SuccBB0 != SuccBB1 && BBa != SuccBB0 && BBa != SuccBB1) { + SmallVector Preds(predecessors(BB)); + SmallVector PredsBoth; + for (auto Pred : Preds) { + unsigned Src = PredFromBothSuccBB0AndSuccBB1; + if (Pred == BBa) { + if (BB == SuccBB0) + Src = PredFromSuccBB0; + else if (BB == SuccBB1) + Src = PredFromSuccBB1; + else + llvm_unreachable("This cannot happen!"); + } else if (Pred == BB) + Src = PredFromBothSuccBB0AndSuccBB1; + else + Src = findPredSourceEdge(BBa, BB, Pred, SuccBB0, SuccBB1); + switch (Src) { + case PredFromSuccBB0: + Preds0.push_back(Pred); + break; + case PredFromSuccBB1: + Preds1.push_back(Pred); + break; + case PredFromBothSuccBB0AndSuccBB1: + PredsBoth.push_back(Pred); + break; + case PredUnreachable: + break; + default: + llvm_unreachable("This cannot happen!"); + } + } + if (PredsBoth.empty()) { + return BBa; + } else { + return nullptr; + } + } + } + } + return nullptr; +} + +/// conditionalToUnconditional - Change BBd to the basic block of unconditional +/// branching to ToKeep. +static void conditionalToUnconditional(DomTreeUpdater *DTU, BasicBlock *BBd, + unsigned ToKeep, unsigned ToRemove) { + BranchInst *CondBr = dyn_cast(BBd->getTerminator()); + BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); + BasicBlock *ToKeepSucc = CondBr->getSuccessor(ToKeep); + ToRemoveSucc->removePredecessor(BBd); + BranchInst *NewBI = BranchInst::Create(ToKeepSucc, CondBr); + NewBI->setDebugLoc(ToKeepSucc->getTerminator()->getDebugLoc()); + CondBr->eraseFromParent(); + DTU->deleteEdgeRelaxed(BBd, ToRemoveSucc); +} + +/// processBBWithCommonDomCondition - Convert conditional branches into +/// unconditional branches using common dominator conditions. +bool JumpThreadingPass::processBBWithCommonDomCondition( + Function &F, SmallPtrSetImpl &Unreachable) { + DominatorTree &DT = DTU->getDomTree(); + MapVector> CtoBBList; + for (auto &BB : F) { + if (Unreachable.count(&BB)) + continue; + Instruction *Terminator = BB.getTerminator(); + if (BranchInst *BI = dyn_cast(Terminator)) { + if (BI->isConditional()) { + Value *Condition = BI->getCondition(); + CtoBBList[Condition].push_back(&BB); + } + } + } + + unsigned UB = 0; + unsigned DUP = 0; + bool Changed = false; + for (auto &Pair : CtoBBList) { + SmallVectorImpl &BBList = Pair.second; + if (BBList.size() > 1) { + std::stable_sort(BBList.begin(), BBList.end(), + [&DT](BasicBlock* X, BasicBlock* Y) { + return DT.getNode(X)->getLevel() > DT.getNode(Y)->getLevel(); + }); + unsigned Idx = 0; + for (auto *BB : BBList) { + SmallVector Preds0; + SmallVector Preds1; + if (findBBWithCommonDomCondition(DT, BBList, BB, Idx + 1, + Preds0, Preds1)) { + if (Preds0.size() == 0 && Preds1.size() == 0) + ; + else if (Preds0.size() == 0) { + conditionalToUnconditional(DTU, BB, 1, 0); + UB++; + Changed = true; + } else if (Preds1.size() == 0) { + conditionalToUnconditional(DTU, BB, 0, 1); + UB++; + Changed = true; + } else if (!LoopHeaders.count(BB)) { + BranchInst *CondBr = dyn_cast(BB->getTerminator()); + BasicBlock *SuccBB0 = CondBr->getSuccessor(0); + BasicBlock *SuccBB1 = CondBr->getSuccessor(1); + if (SuccBB0 != SuccBB1 && SuccBB0 != BB && SuccBB1 != BB) { + if (ThreadEdge(BB, Preds1, SuccBB1)) { + SuccBB1->removePredecessor(BB); + BranchInst *NewBI = BranchInst::Create(SuccBB0, CondBr); + NewBI->setDebugLoc(SuccBB0->getTerminator()->getDebugLoc()); + CondBr->eraseFromParent(); + DTU->deleteEdgeRelaxed(BB, SuccBB1); + DUP++; + Changed = true; + } + } + } + } + Idx++; + } + } + } + LLVM_DEBUG(dbgs() << "JT: Processing BB with common dominator condition: #UB=" + << UB << " #DUP=" << DUP << "\n"); + return Changed; +} + /// getKnownConstant - Helper method to determine if we can thread over a /// terminator with the given value as its condition, and if so what value to /// use for that. What kind of value this is depends on whether we want an Index: test/Transforms/JumpThreading/gvn-dom-cb.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/gvn-dom-cb.ll @@ -0,0 +1,222 @@ +; RUN: opt -jump-threading -S < %s | FileCheck %s + +declare void @show1() +declare void @show2() +declare void @show3() +declare void @show4() +declare void @show5() +declare void @show6() +declare void @show7() + +@E = external global i32, align 4 + +define void @check1(i32 %x, i32 %y) { +; CHECK-LABEL: @check1( +; CHECK: br i1 %cmp, +; CHECK-NOT: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %do.body + +do.body: ; preds = %do.body, %if.then + %i.0 = phi i32 [ 0, %if.then ], [ %inc, %do.body ] + call void () @show1() + %inc = add nsw i32 %i.0, 1 + %0 = load i32, i32* @E, align 4 + %cmp1 = icmp slt i32 %inc, %0 + br i1 %cmp1, label %do.body, label %do.end + +do.end: ; preds = %do.body + br label %if.end + +if.else: ; preds = %entry + call void () @show2() + br label %if.end + +if.end: ; preds = %if.else, %do.end + call void () @show3() + br i1 %cmp, label %if.then3, label %if.else4 + +if.then3: ; preds = %if.end + call void () @show4() + br label %if.end5 + +if.else4: ; preds = %if.end + call void () @show5() + br label %if.end5 + +if.end5: ; preds = %if.else4, %if.then3 + ret void +} + +define void @check2(i32 %x, i32 %y) { +; CHECK-LABEL: @check2( +; CHECK: br i1 %cmp, +; CHECK: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %do.body + +do.body: ; preds = %do.body, %if.then + %i.0 = phi i32 [ 0, %if.then ], [ %inc, %do.body ] + call void () @show1() + %inc = add nsw i32 %i.0, 1 + %0 = load i32, i32* @E, align 4 + %cmp1 = icmp slt i32 %inc, %0 + br i1 %cmp1, label %do.body, label %do.end + +do.end: ; preds = %do.body + br label %if.end + +if.else: ; preds = %entry + call void () @show2() + br label %if.end + +if.end: ; preds = %if.else, %do.end + call void () @show3() noduplicate + br i1 %cmp, label %if.then3, label %if.else4 + +if.then3: ; preds = %if.end + call void () @show4() + br label %if.end5 + +if.else4: ; preds = %if.end + call void () @show5() + br label %if.end5 + +if.end5: ; preds = %if.else4, %if.then3 + ret void +} + +define void @check3(i32 %x, i32 %y) { +; CHECK-LABEL: @check3( +; CHECK: br i1 %cmp, +; CHECK-NOT: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %do.body + +do.body: ; preds = %do.body, %if.then + %i.0 = phi i32 [ 0, %if.then ], [ %inc, %do.body ] + call void () @show1() + %inc = add nsw i32 %i.0, 1 + %0 = load i32, i32* @E, align 4 + %cmp1 = icmp slt i32 %inc, %0 + br i1 %cmp1, label %do.body, label %do.end + +do.end: ; preds = %do.body + br label %if.end + +if.else: ; preds = %entry + call void () @show2() + br label %if.end + +if.end: ; preds = %if.else, %do.end + call void () @show3() + br i1 %cmp, label %if.then3, label %if.else10 + +if.then3: ; preds = %if.end + br label %do.body5 + +do.body5: ; preds = %do.body5, %if.then3 + %i4.0 = phi i32 [ 0, %if.then3 ], [ %inc6, %do.body5 ] + call void () @show4() + %inc6 = add nsw i32 %i4.0, 1 + %1 = load i32, i32* @E, align 4 + %cmp8 = icmp slt i32 %inc6, %1 + br i1 %cmp8, label %do.body5, label %do.end9 + +do.end9: ; preds = %do.body5 + br label %if.end11 + +if.else10: ; preds = %if.end + call void () @show5() + br label %if.end11 + +if.end11: ; preds = %if.else10, %do.end9 + br i1 %cmp, label %if.then13, label %if.else14 + +if.then13: ; preds = %if.end11 + call void () @show6() + br label %if.end15 + +if.else14: ; preds = %if.end11 + call void () @show7() + br label %if.end15 + +if.end15: ; preds = %if.else14, %if.then13 + ret void +} + +define void @check4(i32 %x, i32 %y) { +; CHECK-LABEL: @check4( +; CHECK: br i1 %cmp, +; CHECK: br i1 %cmp, +; CHECK-NOT: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %do.body + +do.body: ; preds = %do.body, %if.then + %i.0 = phi i32 [ 0, %if.then ], [ %inc, %do.body ] + call void () @show1() + %inc = add nsw i32 %i.0, 1 + %0 = load i32, i32* @E, align 4 + %cmp1 = icmp slt i32 %inc, %0 + br i1 %cmp1, label %do.body, label %do.end + +do.end: ; preds = %do.body + br label %if.end + +if.else: ; preds = %entry + call void () @show2() + br label %if.end + +if.end: ; preds = %if.else, %do.end + call void () @show3() noduplicate + br i1 %cmp, label %if.then3, label %if.else10 + +if.then3: ; preds = %if.end + br label %do.body5 + +do.body5: ; preds = %do.body5, %if.then3 + %i4.0 = phi i32 [ 0, %if.then3 ], [ %inc6, %do.body5 ] + call void () @show4() + %inc6 = add nsw i32 %i4.0, 1 + %1 = load i32, i32* @E, align 4 + %cmp8 = icmp slt i32 %inc6, %1 + br i1 %cmp8, label %do.body5, label %do.end9 + +do.end9: ; preds = %do.body5 + br label %if.end11 + +if.else10: ; preds = %if.end + call void () @show5() + br label %if.end11 + +if.end11: ; preds = %if.else10, %do.end9 + br i1 %cmp, label %if.then13, label %if.else14 + +if.then13: ; preds = %if.end11 + call void () @show6() + br label %if.end15 + +if.else14: ; preds = %if.end11 + call void () @show7() + br label %if.end15 + +if.end15: ; preds = %if.else14, %if.then13 + ret void +} Index: test/Transforms/JumpThreading/gvn-dom-ub.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/gvn-dom-ub.ll @@ -0,0 +1,118 @@ +; RUN: opt -jump-threading -S < %s | FileCheck %s + +declare void @show1() +declare void @show2() +declare void @show3() +declare void @show4() + +@c = external global i32, align 4 + +define void @check1(i32 %x, i32 %y) { +; CHECK-LABEL: @check1( +; CHECK: br i1 %cmp, +; CHECK-NOT: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end3 + +if.then: ; preds = %entry + call void () @show1() + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %if.then + call void () @show2() + br label %if.end + +if.else: ; preds = %if.then + call void () @show3() + br label %if.end + +if.end: ; preds = %if.else, %if.then2 + br label %if.end3 + +if.end3: ; preds = %if.end, %entry + ret void +} + +define void @check2(i32 %x, i32 %y) { +; CHECK-LABEL: @check2( +; CHECK: br i1 %cmp, +; CHECK-NOT: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end3 + +if.then: ; preds = %entry + call void () @show1() + br label %do.body + +do.body: ; preds = %do.cond, %if.then + call void () @show2() + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %do.body + call void () @show3() + br label %if.end + +if.else: ; preds = %do.body + call void () @show4() + br label %if.end + +if.end: ; preds = %if.else, %if.then2 + %0 = load i32, i32* @c, align 4 + %dec = add nsw i32 %0, -1 + store i32 %dec, i32* @c, align 4 + br label %do.cond + +do.cond: ; preds = %if.end + %1 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %if.end3 + +if.end3: ; preds = %do.end, %entry + ret void +} + +define void @check3(i32 %x, i32 %y) { +; CHECK-LABEL: @check3( +; CHECK: br i1 %cmp, +; CHECK-NOT: br i1 %cmp, +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end3 + +if.then: ; preds = %entry + call void () @show1() + br label %do.body + +do.body: ; preds = %do.cond, %if.then + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %do.body + call void () @show2() + br label %if.end + +if.else: ; preds = %do.body + call void () @show3() + br label %if.end + +if.end: ; preds = %if.else, %if.then2 + %0 = load i32, i32* @c, align 4 + %dec = add nsw i32 %0, -1 + store i32 %dec, i32* @c, align 4 + br label %do.cond + +do.cond: ; preds = %if.end + %1 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %if.end3 + +if.end3: ; preds = %do.end, %entry + ret void +}