Index: include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- include/llvm/Transforms/Scalar/JumpThreading.h +++ include/llvm/Transforms/Scalar/JumpThreading.h @@ -108,6 +108,7 @@ } void FindLoopHeaders(Function &F); + bool processBBWithCommonDomCondition(Function &F); bool ProcessBlock(BasicBlock *BB); bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl &PredBBs, BasicBlock *SuccBB); Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -13,6 +13,7 @@ #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -370,7 +371,12 @@ FindLoopHeaders(F); - bool EverChanged = false; + bool EverChanged = processBBWithCommonDomCondition(F); + DTU->getDomTree(); + for (auto &BB : F) + if (!Unreachable.count(&BB) && !DT.isReachableFromEntry(&BB)) + Unreachable.insert(&BB); + bool Changed; do { Changed = false; @@ -548,6 +554,221 @@ LoopHeaders.insert(Edge.second); } +/// visitBB - Set up BBtoDepth +static void visitBB(DominatorTree &DT, + DenseMap &BBtoDepth, BasicBlock *BB, + int Level) { + for (auto *DTN : children(DT.getNode(BB))) { + BasicBlock *ChildBB = DTN->getBlock(); + BBtoDepth[ChildBB] = Level; + visitBB(DT, BBtoDepth, ChildBB, Level + 1); + } +} + +/// collectReachableBBSet - Create a set of reachable basic blocks from BB in +/// the reverse direction of the control flow as TouchBBSet. +/// When reaching SuccBB0 or SuccBB1, search is stopped there. +static void collectReachableBBSet(BasicBlock *BB, + BasicBlock *SuccBB0, BasicBlock *SuccBB1, + DenseSet& TouchBBSet) { + if (TouchBBSet.count(BB)) + return; + TouchBBSet.insert(BB); + for (BasicBlock *Pred : predecessors(BB)) { + if (Pred == SuccBB0) { + TouchBBSet.insert(SuccBB0); + } else if (Pred == SuccBB1) { + TouchBBSet.insert(SuccBB1); + } else { + collectReachableBBSet(Pred, SuccBB0, SuccBB1, TouchBBSet); + } + } +} + +/// findPredSourceEdge - Classify the predecessor Pred of BB into the following +/// categories: +/// 0 : from SuccBB0 +/// 1 : from SuccBB1 +/// 2 : from both SuccBB0 and SuccBB1 +/// BBa is the basic block which dominates BB. +/// Both SuccBB0 and SuccBB1 are the successors of BBa. +static unsigned findPredSourceEdge(BasicBlock *BBa, BasicBlock *BB, + BasicBlock *Pred, BasicBlock *SuccBB0, + BasicBlock *SuccBB1) { + if (Pred == BBa) { + if (BB == SuccBB0) + return 0; + else if (BB == SuccBB1) + return 1; + else + llvm_unreachable("This cannot happen!"); + } + DenseSet TouchBBSet; + collectReachableBBSet(Pred, SuccBB0, SuccBB1, TouchBBSet); + bool ReachSuccBB0 = TouchBBSet.count(SuccBB0); + bool ReachSuccBB1 = TouchBBSet.count(SuccBB1); + if (ReachSuccBB0 && ReachSuccBB1) + return 2; + else if (ReachSuccBB0) + return 0; + else if (ReachSuccBB1) + return 1; + else + llvm_unreachable("This cannot happen!"); +} + +/// findBBWithCommonDomCondition - Detect BBa that dominates BB and has the same +/// condition code C as BB. BBList is sorted in descending order of levels on +/// the dominating tree with the condition code C. The position of BB in BBList +/// is StartIdx - 1. +/// If there was BBa, then return it and also the set of predecessor of BB is +/// divided into Preds0 and Preds1. +static BasicBlock* +findBBWithCommonDomCondition(DominatorTree &DT, + SmallVectorImpl &BBList, + BasicBlock *BB, unsigned StartIdx, + SmallVector& Preds0, + SmallVector& Preds1) { + size_t Size = BBList.size(); + for (unsigned Idx = StartIdx; Idx < Size; Idx++) { + BasicBlock *BBa = BBList[Idx]; + if (DT.dominates(BBa, BB)) { + BranchInst *CondBr = dyn_cast(BBa->getTerminator()); + BasicBlock *SuccBB0 = CondBr->getSuccessor(0); + BasicBlock *SuccBB1 = CondBr->getSuccessor(1); + if (SuccBB0 != SuccBB1 && BBa != SuccBB0 && BBa != SuccBB1) { + SmallVector Preds(predecessors(BB)); + SmallVector Preds2; + for (auto Pred : Preds) { + unsigned Src = 2; + if (Pred == BBa) { + if (BB == SuccBB0) + Src = 0; + else if (BB == SuccBB1) + Src = 1; + else + llvm_unreachable("This cannot happen!"); + } else if (Pred == BB) + Src = 2; + else + Src = findPredSourceEdge(BBa, BB, Pred, SuccBB0, SuccBB1); + switch (Src) { + case 0: + Preds0.push_back(Pred); + break; + case 1: + Preds1.push_back(Pred); + break; + case 2: + Preds2.push_back(Pred); + break; + default: + llvm_unreachable("This cannot happen!"); + } + } + if (Preds2.empty()) { + return BBa; + } else { + return nullptr; + } + } + } + } + return nullptr; +} + +/// conditionalToUnconditional - Change BBd to the basic block of unconditional +/// branching to ToKeep. +static void conditionalToUnconditional(DomTreeUpdater *DTU, BasicBlock *BBd, + unsigned ToKeep, unsigned ToRemove) { + BranchInst *CondBr = dyn_cast(BBd->getTerminator()); + BasicBlock *ToRemoveSucc = CondBr->getSuccessor(ToRemove); + BasicBlock *ToKeepSucc = CondBr->getSuccessor(ToKeep); + ToRemoveSucc->removePredecessor(BBd); + BranchInst *NewBI = BranchInst::Create(ToKeepSucc, CondBr); + NewBI->setDebugLoc(ToKeepSucc->getTerminator()->getDebugLoc()); + CondBr->eraseFromParent(); + DTU->deleteEdgeRelaxed(BBd, ToRemoveSucc); +} + +/// processBBWithCommonDomCondition - Convert conditional branches into +/// unconditional branches using common dominator conditions. +bool JumpThreadingPass::processBBWithCommonDomCondition(Function &F) { + DominatorTree DT(F); + DenseMap BBtoDepth; + BasicBlock &Entry = F.getEntryBlock(); + BBtoDepth[&Entry] = 0; + visitBB(DT, BBtoDepth, &Entry, 1); + MapVector> CtoBBList; + for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE; ++BB) { + Instruction *Terminator = BB->getTerminator(); + if (BranchInst *BI = dyn_cast(Terminator)) { + if (BI->isConditional()) { + Value *Condition = BI->getCondition(); + CtoBBList[Condition].push_back(&*BB); + } + } + } + + unsigned UB = 0; + unsigned DUP = 0; + bool Changed = false; + for (auto &Pair : CtoBBList) { + SmallVectorImpl &BBList = Pair.second; + if (BBList.size() > 1) { + std::stable_sort(BBList.begin(), BBList.end(), + [&BBtoDepth](BasicBlock* X, BasicBlock* Y) { + return BBtoDepth[X] > BBtoDepth[Y]; + }); + unsigned Idx = 0; + for (auto *BB : BBList) { + SmallVector Preds0; + SmallVector Preds1; + if (findBBWithCommonDomCondition(DT, BBList, BB, Idx + 1, + Preds0, Preds1)) { + if (Preds0.size() == 0) { + conditionalToUnconditional(DTU, BB, 1, 0); + UB++; + Changed = true; + } else if (Preds1.size() == 0) { + conditionalToUnconditional(DTU, BB, 0, 1); + UB++; + Changed = true; + } else if (!LoopHeaders.count(BB)) { + BranchInst *CondBr = dyn_cast(BB->getTerminator()); + BasicBlock *SuccBB0 = CondBr->getSuccessor(0); + BasicBlock *SuccBB1 = CondBr->getSuccessor(1); + if (SuccBB0 != SuccBB1 && SuccBB0 != BB && SuccBB1 != BB) { + bool Done = false; + { + // TODO: It is necessary to explain the validity of + // temporarily increasing BBDupThreshold. + unsigned BBDupThresholdBackup = BBDupThreshold; + BBDupThreshold = 20; + Done = ThreadEdge(BB, Preds1, SuccBB1); + BBDupThreshold = BBDupThresholdBackup; + } + if (Done) { + SuccBB1->removePredecessor(BB); + BranchInst *NewBI = BranchInst::Create(SuccBB0, CondBr); + NewBI->setDebugLoc(SuccBB0->getTerminator()->getDebugLoc()); + CondBr->eraseFromParent(); + DTU->deleteEdgeRelaxed(BB, SuccBB1); + DUP++; + Changed = true; + } + } + } + } + Idx++; + } + } + } + LLVM_DEBUG(dbgs() << "JT: Processing BB with common dominator condition: #UB=" + << UB << " #DUP=" << DUP << "\n"); + return Changed; +} + /// getKnownConstant - Helper method to determine if we can thread over a /// terminator with the given value as its condition, and if so what value to /// use for that. What kind of value this is depends on whether we want an Index: test/Transforms/JumpThreading/gvn-dom-cb.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/gvn-dom-cb.ll @@ -0,0 +1,105 @@ +; RUN: opt -jump-threading -S < %s | FileCheck %s + +declare void @show1() +declare void @show2() +declare void @show3() +declare void @show4() +declare void @show5() +declare void @show6() + +declare void @noduplicate_show2() + +define void @check1(i32 %x, i32 %y) { +; CHECK-LABEL: @check1( +; CHECK: br i1 %cmp +; CHECK-NOT: br i1 %cmp +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + call void () @show1() + br label %if.end + +if.end: ; preds = %if.then, %entry + call void () @show2() + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %if.end + call void () @show3() + br label %if.end3 + +if.else: ; preds = %if.end + call void () @show4() + br label %if.end3 + +if.end3: ; preds = %if.else, %if.then2 + ret void +} + +define void @check2(i32 %x, i32 %y) { +; CHECK-LABEL: @check2( +; CHECK: br i1 %cmp +; CHECK: br i1 %cmp +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + call void () @show1() + br label %if.end + +if.end: ; preds = %if.then, %entry + call void () @noduplicate_show2() noduplicate + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %if.end + call void () @show3() + br label %if.end3 + +if.else: ; preds = %if.end + call void () @show4() + br label %if.end3 + +if.end3: ; preds = %if.else, %if.then2 + ret void +} + +define void @check3(i32 %x, i32 %y) { +; CHECK-LABEL: @check3( +; CHECK: br i1 %cmp +; CHECK-NOT: br i1 %cmp +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + call void () @show1() + br label %if.end + +if.end: ; preds = %if.then, %entry + call void () @show2() + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %if.end + call void () @show3() + br label %if.end3 + +if.else: ; preds = %if.end + call void () @show4() + br label %if.end3 + +if.end3: ; preds = %if.else, %if.then2 + br i1 %cmp, label %if.then5, label %if.else6 + +if.then5: ; preds = %if.end3 + call void () @show5() + br label %if.end7 + +if.else6: ; preds = %if.end3 + call void () @show6() + br label %if.end7 + +if.end7: ; preds = %if.else6, %if.then5 + ret void +} Index: test/Transforms/JumpThreading/gvn-dom-ub.ll =================================================================== --- /dev/null +++ test/Transforms/JumpThreading/gvn-dom-ub.ll @@ -0,0 +1,118 @@ +; RUN: opt -jump-threading -S < %s | FileCheck %s + +declare void @show1() +declare void @show2() +declare void @show3() +declare void @show4() + +@c = external global i32, align 4 + +define void @check1(i32 %x, i32 %y) { +; CHECK-LABEL: @check1( +; CHECK: br i1 %cmp +; CHECK-NOT: br i1 %cmp +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end3 + +if.then: ; preds = %entry + call void () @show1() + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %if.then + call void () @show2() + br label %if.end + +if.else: ; preds = %if.then + call void () @show3() + br label %if.end + +if.end: ; preds = %if.else, %if.then2 + br label %if.end3 + +if.end3: ; preds = %if.end, %entry + ret void +} + +define void @check2(i32 %x, i32 %y) { +; CHECK-LABEL: @check2( +; CHECK: br i1 %cmp +; CHECK-NOT: br i1 %cmp +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end3 + +if.then: ; preds = %entry + call void () @show1() + br label %do.body + +do.body: ; preds = %do.cond, %if.then + call void () @show2() + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %do.body + call void () @show3() + br label %if.end + +if.else: ; preds = %do.body + call void () @show4() + br label %if.end + +if.end: ; preds = %if.else, %if.then2 + %0 = load i32, i32* @c, align 4 + %dec = add nsw i32 %0, -1 + store i32 %dec, i32* @c, align 4 + br label %do.cond + +do.cond: ; preds = %if.end + %1 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %if.end3 + +if.end3: ; preds = %do.end, %entry + ret void +} + +define void @check3(i32 %x, i32 %y) { +; CHECK-LABEL: @check3( +; CHECK: br i1 %cmp +; CHECK-NOT: br i1 %cmp +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.end3 + +if.then: ; preds = %entry + call void () @show1() + br label %do.body + +do.body: ; preds = %do.cond, %if.then + br i1 %cmp, label %if.then2, label %if.else + +if.then2: ; preds = %do.body + call void () @show2() + br label %if.end + +if.else: ; preds = %do.body + call void () @show3() + br label %if.end + +if.end: ; preds = %if.else, %if.then2 + %0 = load i32, i32* @c, align 4 + %dec = add nsw i32 %0, -1 + store i32 %dec, i32* @c, align 4 + br label %do.cond + +do.cond: ; preds = %if.end + %1 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %if.end3 + +if.end3: ; preds = %do.end, %entry + ret void +}