Index: llvm/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Local.h +++ llvm/include/llvm/Transforms/Utils/Local.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/IR/Constant.h" @@ -157,8 +158,9 @@ /// other than PHI nodes, potential debug intrinsics and the branch. If /// possible, eliminate BB by rewriting all the predecessors to branch to the /// successor block and return true. If we can't transform, return false. -bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, - DomTreeUpdater *DTU = nullptr); +bool TryToSimplifyUncondBranchFromEmptyBlock( + BasicBlock *BB, DomTreeUpdater *DTU = nullptr, + SetVector *AssumesAndDeps = nullptr); /// Check for and eliminate duplicate PHI nodes in this block. This doesn't try /// to be clever about PHI nodes which differ only in the order of the incoming Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -1025,8 +1025,9 @@ replaceUndefValuesInPhi(PN, IncomingValues); } -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, - DomTreeUpdater *DTU) { +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock( + BasicBlock *BB, DomTreeUpdater *DTU, + SetVector *AssumesAndDeps) { assert(BB != &BB->getParent()->getEntryBlock() && "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -1117,6 +1118,46 @@ Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), BB->getInstList()); } else { + if (AssumesAndDeps && AssumesAndDeps->size() > 0) { + // Hoist @llvm.assume into predecessors. + for (BasicBlock *PredBB : predecessors(BB)) { + auto *PredBranch = dyn_cast(PredBB->getTerminator()); + if (!PredBranch || PredBranch->isUnconditional()) + continue; + + // Now clone AssumesAndDeps into Pred. When we get to a @llvm.assume + // update the assume condition to depend on PredBranch->getCondition(). + SmallDenseMap VMap; + auto NewIfAvailable = [&VMap](Value *OldV) { + return VMap.count(OldV) ? VMap[OldV] : OldV; + }; + IRBuilder<> IRB(BB->getModule()->getContext()); + IRB.SetInsertPoint(PredBranch); + + Value *Cond = PredBranch->getCondition(); + auto *PredTrueSucc = PredBranch->getSuccessor(0); + if (BB == PredTrueSucc) + Cond = IRB.CreateNot(Cond); + + for (auto It = AssumesAndDeps->rbegin(), E = AssumesAndDeps->rend(); + It != E; ++It) { + if (auto *Assume = dyn_cast(*It)) { + auto *OrCond = IRB.CreateLogicalOr( + Cond, NewIfAvailable(Assume->getOperand(0))); + // Note that any Operand Bundle is dropped at this point. + IRB.CreateAssumption(OrCond); + } else { + auto *OldI = *It; + auto *NewI = OldI->clone(); + for (unsigned I = 0, E = OldI->getNumOperands(); I < E; ++I) + NewI->setOperand(I, NewIfAvailable(OldI->getOperand(I))); + VMap[OldI] = NewI; + NewI->insertBefore(PredBranch); + } + } + } + } + while (PHINode *PN = dyn_cast(&BB->front())) { // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. assert(PN->use_empty() && "There shouldn't be any uses here!"); Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -290,6 +290,38 @@ } // end anonymous namespace +static bool +isBlockEmptyExceptAssumes(BasicBlock *BB, + SetVector &AssumesAndDeps) { + for (auto It = BB->rbegin(), E = BB->rend(); It != E; ++It) { + auto *I = &(*It); + // Skip the terminator. + if (I->isTerminator()) + continue; + + // Bail if side-effects. + if (!isSafeToSpeculativelyExecute(I) && !isa(I) && + !isa(I) && !isa(I)) + return false; + + // Insert into AssumesAndDeps either if instruction is a @llvm.assume or if + // all uses of the instruction are covered by the AssumesAndDeps set. + // Otherwise bail. Phi-nodes and debug instructions are ignored. + if (isa(I)) + AssumesAndDeps.insert(I); + else if (!empty(I->users()) && all_of(I->users(), [&](User *U) { + return isa(U) + ? AssumesAndDeps.count(cast(U)) + : false; + })) + AssumesAndDeps.insert(I); + else if (!isa(I) && !isa(I)) + return false; + } + + return true; +} + /// Return true if it is safe to merge these two /// terminator instructions together. static bool @@ -6445,8 +6477,10 @@ (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator(); - if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && - !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) + SetVector AssumesAndDeps; + if (isBlockEmptyExceptAssumes(BB, AssumesAndDeps) && + BB != &BB->getParent()->getEntryBlock() && !NeedCanonicalLoop && + TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU, &AssumesAndDeps)) return true; // If the only instruction in the block is a seteq/setne comparison against a Index: llvm/test/Transforms/InstCombine/assume-align.ll =================================================================== --- llvm/test/Transforms/InstCombine/assume-align.ll +++ llvm/test/Transforms/InstCombine/assume-align.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint i8* [[PTR]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] -; CHECK: if.then: +; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]] +; CHECK: if.then1: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i8* [[PTR]], i64 4) ] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[PTR]] to i32* ; CHECK-NEXT: store i32 4, i32* [[TMP3]], align 4 Index: llvm/test/Transforms/SimplifyCFG/hoist-assume.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/hoist-assume.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=simplify-cfg < %s 2>&1 | FileCheck %s + +declare void @llvm.assume(i1 noundef) + +define dso_local void @f(i32 %a, i32 %b) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B:%.*]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.then, label %end + +if.then: + %cond2 = icmp sgt i32 %b, 5 + call void @llvm.assume(i1 %cond2) + br label %end + +end: + ret void +} + +define dso_local void @g(i32 %a, i32 %b) { +; CHECK-LABEL: @g( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[B:%.*]], 5 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = icmp sle i32 [[B]], 5 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[COND]], i1 true, i1 [[TMP3]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP4]]) +; CHECK-NEXT: ret void +; +entry: + %cond = icmp eq i32 %a, 0 + br i1 %cond, label %if.then, label %if.else + +if.then: + %cond2 = icmp sgt i32 %b, 5 + call void @llvm.assume(i1 %cond2) + br label %end + +if.else: + %cond3 = icmp sle i32 %b, 5 + call void @llvm.assume(i1 %cond3) + br label %end + +end: + ret void +}