Index: include/llvm/Transforms/Scalar/JumpThreading.h =================================================================== --- include/llvm/Transforms/Scalar/JumpThreading.h +++ include/llvm/Transforms/Scalar/JumpThreading.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/ValueHandle.h" namespace llvm { @@ -122,6 +123,12 @@ bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); bool TryToUnfoldSelectInCurrBB(BasicBlock *BB); + bool ProcessGuards(BasicBlock *BB); + bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI); + BasicBlock *DuplicateInstructionsInSplitBetween( + BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, + DenseMap &ValueMapping); + private: BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef Preds, const char *Suffix); Index: lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- lib/Transforms/Scalar/JumpThreading.cpp +++ lib/Transforms/Scalar/JumpThreading.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -41,6 +42,7 @@ #include using namespace llvm; using namespace jumpthreading; +using namespace PatternMatch; #define DEBUG_TYPE "jump-threading" @@ -712,6 +714,10 @@ if (TryToUnfoldSelectInCurrBB(BB)) return true; + // Look if we can propagate guards to predecessors. + if (ProcessGuards(BB)) + return true; + // What kind of constant we're looking for. ConstantPreference Preference = WantInteger; @@ -2019,3 +2025,147 @@ return false; } + +/// Try to propagate a guard from the current BB into one of its predecessors +/// in case if another branch of execution implies that the condition of this +/// guard is always true. Currently we only process the simplest case that +/// looks like: +/// +/// Start: +/// %cond = ... +/// br i1 %cond, label %T1, label %F1 +/// T1: +/// br label %Merge +/// F1: +/// br label %Merge +/// Merge: +/// %condGuard = ... +/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ] +/// +/// And cond either implies condGuard or !condGuard. In this case all the +/// instructions before the guard can be duplicated in both branches, and the +/// guard is then threaded to one of them. +bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) { + // We only want to deal with two predecessors. + SmallVector Predecessors; + for (auto Pred : predecessors(BB)) { + Predecessors.push_back(Pred); + if (Predecessors.size() > 2) + return false; + } + + // TODO: Generalize it for larger number of predecessors. + if (Predecessors.size() != 2) + return false; + + BasicBlock *Pred1 = Predecessors[0]; + BasicBlock *Pred2 = Predecessors[1]; + + // Try to thread one of the guards of the block. + // TODO: Look up deeper than to immediate predecessor? + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (match(&*I, m_Intrinsic())) + if (auto Parent = Pred1->getSinglePredecessor()) + if (Parent == Pred2->getSinglePredecessor()) + if (BranchInst *BI = dyn_cast(Parent->getTerminator())) + if (ThreadGuard(BB, cast(&*I), BI)) + return true; + + return false; +} + +/// Try to propagate the guard from BB which is the lower block of a diamond +/// to one of its branches, in case if diamond's condition implies guard's +/// condition. +bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, + BranchInst *BI) { + Value *GuardCond = Guard->getArgOperand(0); + Value *BranchCond = BI->getCondition(); + BasicBlock *TrueBranch = BI->getSuccessor(0); + BasicBlock *FalseBranch = BI->getSuccessor(1); + + auto &DL = BB->getModule()->getDataLayout(); + Optional Implication = isImpliedCondition(BranchCond, GuardCond, DL); + + if (!Implication) + return false; + + // Decide which branch can be executed without the guard. + bool ImpliedTrueBranch = *Implication; + BasicBlock *UnguardedBlock = ImpliedTrueBranch ? TrueBranch : FalseBranch; + BasicBlock *GuardedBlock = ImpliedTrueBranch ? FalseBranch : TrueBranch; + + DEBUG(dbgs() << "Moving guard " << *Guard << " to block " + << GuardedBlock->getName() << "\n"); + + DenseMap UnguardedMapping, GuardedMapping; + Instruction *AfterGuard = Guard->getNextNode(); + // Duplicate all instructions before the guard in the unguarded branch. + UnguardedBlock = + DuplicateInstructionsInSplitBetween(BB, UnguardedBlock, Guard, + UnguardedMapping); + // Duplicate all instructions before the guard and the guard itself to the + // branch where implication is not proved. + GuardedBlock = + DuplicateInstructionsInSplitBetween(BB, GuardedBlock, AfterGuard, + GuardedMapping); + + // Some instructions before the guard may still have uses. For them, we need + // to create Phi nodes merging their copies in both guarded and unguarded + // branches. Those instructions that have no uses can be just removed. + SmallVector ToRemove; + for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI) + ToRemove.push_back(&*BI); + + // Substitute with Phis & remove. + for (auto I = ToRemove.rbegin(), E = ToRemove.rend(); I != E; ++I) { + Instruction *Inst = *I; + if (!Inst->use_empty()) { + PHINode *NewPN = PHINode::Create(Inst->getType(), 2); + NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock); + NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock); + NewPN->insertBefore(Inst); + Inst->replaceAllUsesWith(NewPN); + } + Inst->eraseFromParent(); + } + return true; +} + +/// Split edge between BB and PredBB and duplicate all non-Phi instructions +/// from BB between its beginning and the StopAt instruction into the split +/// block. ValueMapping is used to map the original instructions from BB to +/// their newly-created copies. The split block is returned. +BasicBlock *JumpThreadingPass::DuplicateInstructionsInSplitBetween( + BasicBlock *BB, BasicBlock *PredBB, Instruction *StopAt, + DenseMap &ValueMapping) { + // We are going to have to map operands from the original BB block to the new + // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to + // account for entry from PredBB. + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + BasicBlock *NewBB = SplitEdge(PredBB, BB); + NewBB->setName(PredBB->getName() + ".threadguard"); + Instruction *NewTerm = NewBB->getTerminator(); + + // Clone the non-phi instructions of BB into NewBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; StopAt != &*BI; ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + New->insertBefore(NewTerm); + ValueMapping[&*BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast(New->getOperand(i))) { + auto I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + return NewBB; +} Index: test/Transforms/JumpThreading/guards.ll =================================================================== --- test/Transforms/JumpThreading/guards.ll +++ test/Transforms/JumpThreading/guards.ll @@ -0,0 +1,111 @@ +; RUN: opt < %s -jump-threading -dce -S | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +declare i32 @f1() +declare i32 @f2() +declare void @f3() + +define i32 @simple_cond_true(i32 %a, i32 %b) { +; CHECK-LABEL: @simple_cond_true( + %cond = icmp slt i32 %a, 10 + br i1 %cond, label %T1, label %F1 + +T1: +; CHECK: T1.threadguard +; CHECK: %v1 = call i32 @f1() +; CHECK-NEXT: %retVal1 = add i32 %v1, 10 +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: br label %Merge + %v1 = call i32 @f1() + br label %Merge + +F1: +; CHECK: F1.threadguard +; CHECK: %v2 = call i32 @f2() +; CHECK-NEXT: %retVal3 = add i32 %v2, 10 +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: %condGuard4 = icmp slt i32 %a, 20 +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard4) [ "deopt"() ] +; CHECK-NEXT: br label %Merge + %v2 = call i32 @f2() + br label %Merge + +Merge: +; CHECK: Merge +; CHECK-NOT: call void(i1, ...) @llvm.experimental.guard( + %retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ] + %retVal = add i32 %retPhi, 10 + call void @f3() + %condGuard = icmp slt i32 %a, 20 + call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ] + ret i32 %retVal +} + +define i32 @simple_cond_false(i32 %a, i32 %b) { +; CHECK-LABEL: @simple_cond_false( + %cond = icmp slt i32 %a, 10 + br i1 %cond, label %T1, label %F1 + +T1: +; CHECK: T1.threadguard: +; CHECK-NEXT: %v1 = call i32 @f1() +; CHECK-NEXT: %retVal3 = add i32 %v1, 10 +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: %condGuard4 = icmp sgt i32 %a, 10 +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard4) [ "deopt"() ] +; CHECK-NEXT: br label %Merge + %v1 = call i32 @f1() + br label %Merge + +F1: +; CHECK: F1.threadguard: +; CHECK-NEXT: %v2 = call i32 @f2() +; CHECK-NEXT: %retVal1 = add i32 %v2, 10 +; CHECK-NEXT: call void @f3() +; CHECK-NEXT: br label %Merge + %v2 = call i32 @f2() + br label %Merge + +Merge: +; CHECK: Merge +; CHECK-NOT: call void(i1, ...) @llvm.experimental.guard( + %retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ] + %retVal = add i32 %retPhi, 10 + call void @f3() + %condGuard = icmp sgt i32 %a, 10 + call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ] + ret i32 %retVal +} + +define i32 @simple_cond_negative(i32 %a, i32 %b) { +; CHECK-LABEL: @simple_cond_negative( + %cond = icmp slt i32 %a, 10 + br i1 %cond, label %T1, label %F1 + +T1: +; CHECK: T1: +; CHECK-NEXT: %v1 = call i32 @f1() +; CHECK-NEXT: br label %Merge + %v1 = call i32 @f1() + br label %Merge + +F1: +; CHECK: F1: +; CHECK-NEXT: %v2 = call i32 @f2() +; CHECK-NEXT: br label %Merge + %v2 = call i32 @f2() + br label %Merge + +Merge: +; CHECK: Merge +; CHECK: call void @f3() +; CHECK-NEXT: %condGuard = icmp slt i32 %a, 5 +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %condGuard) [ "deopt"() ] + %retPhi = phi i32 [ %v1, %T1 ], [ %v2, %F1 ] + %retVal = add i32 %retPhi, 10 + call void @f3() + %condGuard = icmp slt i32 %a, 5 + call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ] + ret i32 %retVal +}