Index: llvm/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Local.h +++ llvm/include/llvm/Transforms/Utils/Local.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/IR/Constant.h" @@ -157,8 +158,9 @@ /// other than PHI nodes, potential debug intrinsics and the branch. If /// possible, eliminate BB by rewriting all the predecessors to branch to the /// successor block and return true. If we can't transform, return false. -bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, - DomTreeUpdater *DTU = nullptr); +bool TryToSimplifyUncondBranchFromEmptyBlock( + BasicBlock *BB, DomTreeUpdater *DTU = nullptr, + SetVector *AssumesAndDeps = nullptr); /// Check for and eliminate duplicate PHI nodes in this block. This doesn't try /// to be clever about PHI nodes which differ only in the order of the incoming Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -1025,8 +1025,9 @@ replaceUndefValuesInPhi(PN, IncomingValues); } -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, - DomTreeUpdater *DTU) { +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock( + BasicBlock *BB, DomTreeUpdater *DTU, + SetVector *AssumesAndDeps) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -1079,6 +1080,50 @@ LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + if (AssumesAndDeps) { + // Hoist @llvm.assume into predecessors. + for (BasicBlock *PredBB : predecessors(BB)) { + auto *PredBranch = dyn_cast(PredBB->getTerminator()); + if (!PredBranch || PredBranch->isUnconditional()) + continue; + + // Now clone AssumesAndDeps into Pred. When we get to a @llvm.assume + // update the assume condition to depend on PredBranch->getCondition(). + SmallDenseMap VMap; + auto newIfAvailable = [&VMap](Value *OldV) { + if (VMap.count(OldV)) + return VMap[OldV]; + else + return OldV; + }; + Module *M = BB->getParent()->getParent(); + IRBuilder<> IRB(M->getContext()); + IRB.SetInsertPoint(PredBranch); + + Value *Cond = PredBranch->getCondition(); + auto *PredTrueSucc = PredBranch->getSuccessor(0); + if (BB == PredTrueSucc) + Cond = IRB.CreateNot(Cond); + + for (auto It = AssumesAndDeps->rbegin(), E = AssumesAndDeps->rend(); + It != E; ++It) { + if (auto *Assume = dyn_cast(*It)) { + auto *OrCond = + IRB.CreateOr(Cond, newIfAvailable(Assume->getOperand(0))); + // Note that any Operand Bundle is dropped at this point. + IRB.CreateAssumption(OrCond); + } else { + auto *OldI = *It; + auto *NewI = OldI->clone(); + for (unsigned I = 0, E = OldI->getNumOperands(); I < E; ++I) + NewI->setOperand(I, newIfAvailable(OldI->getOperand(I))); + VMap[OldI] = NewI; + NewI->insertBefore(PredBranch); + } + } + } + } + SmallVector Updates; if (DTU) { // All predecessors of BB will be moved to Succ. Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -290,6 +290,41 @@ } // end anonymous namespace +static bool +isBlockEmptyExceptAssumes(BasicBlock *BB, + SetVector &AssumesAndDeps) { + auto *Pred = BB->getSinglePredecessor(); + if (!Pred) + return false; + + for (auto It = BB->rbegin(), E = BB->rend(); It != E; ++It) { + auto I = &(*It); + // Skip the terminator. + if (I->isTerminator()) + continue; + + // Bail if side-effects. + if (I->mayHaveSideEffects() && !isa(I)) + return false; + + // Insert into AssumesAndDeps either if instruction is a @llvm.assume or if + // all uses of the instruction are covered by the AssumesAndDeps set. + // Otherwise bail. + if (isa(I)) + AssumesAndDeps.insert(I); + else if (!empty(I->users()) && all_of(I->users(), [&](User *U) { + return isa(U) + ? AssumesAndDeps.count(cast(U)) + : false; + })) + AssumesAndDeps.insert(I); + else + return false; + } + + return true; +} + /// Return true if it is safe to merge these two /// terminator instructions together. static bool @@ -6445,8 +6480,10 @@ (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator(); - if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) + SetVector AssumesAndDeps; + if (isBlockEmptyExceptAssumes(BB, AssumesAndDeps) && + BB != &BB->getParent()->getEntryBlock() && !NeedCanonicalLoop && + TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU, &AssumesAndDeps)) return true; // If the only instruction in the block is a seteq/setne comparison against a Index: llvm/test/Transforms/SimplifyCFG/hoist-assume.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/hoist-assume.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=simplify-cfg < %s 2>&1 | FileCheck %s + +declare void @llvm.assume(i1 noundef) + +define dso_local void @f(i32 %a, i32 %b) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B:%.*]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.then, label %end + +if.then: + %cond2 = icmp sgt i32 %b, 5 + call void @llvm.assume(i1 %cond2) + br label %end + +end: + ret void +} + +define dso_local void @g(i32 %a, i32 %b) { +; CHECK-LABEL: @g( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B:%.*]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp sle i32 [[B]], 5 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[COND]], [[TMP3]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP4]]) +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.then, label %if.else + +if.then: + %cond2 = icmp sgt i32 %b, 5 + call void @llvm.assume(i1 %cond2) + br label %end + +if.else: + %cond3 = icmp sle i32 %b, 5 + call void @llvm.assume(i1 %cond3) + br label %end + +end: + ret void +}