Index: llvm/trunk/include/llvm/IR/BasicBlock.h =================================================================== --- llvm/trunk/include/llvm/IR/BasicBlock.h +++ llvm/trunk/include/llvm/IR/BasicBlock.h @@ -152,6 +152,15 @@ return const_cast(this)->getFirstNonPHIOrDbgOrLifetime(); } + /// \brief Returns a pointer to the first instruction in this block that is + /// not a PHINode, a debug intrinsic, a lifetime intrinsic, or a bitcast + /// instruction coupled with the following lifetime intrinsic. + Instruction *getFirstNonPHIOrDbgOrLifetimeOrBitCast(); + const Instruction *getFirstNonPHIOrDbgOrLifetimeOrBitCast() const { + return const_cast(this) + ->getFirstNonPHIOrDbgOrLifetimeOrBitCast(); + } + /// \brief Returns an iterator to the first instruction in this block that is /// suitable for inserting a non-PHI instruction. /// Index: llvm/trunk/lib/IR/BasicBlock.cpp =================================================================== --- llvm/trunk/lib/IR/BasicBlock.cpp +++ llvm/trunk/lib/IR/BasicBlock.cpp @@ -206,6 +206,30 @@ return nullptr; } +Instruction *BasicBlock::getFirstNonPHIOrDbgOrLifetimeOrBitCast() { + for (Instruction &I : *this) { + if (isa(I) || isa(I)) + continue; + + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) + continue; + + if (auto *BCI = dyn_cast(&I)) { + if (auto *II = dyn_cast(++I.getIterator())) { + if ((II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) && + II->getOperand(1) == BCI) { + continue; + } + } + } + return &I; + } + return nullptr; +} + BasicBlock::iterator BasicBlock::getFirstInsertionPt() { Instruction *FirstNonPHI = getFirstNonPHI(); if (!FirstNonPHI) Index: llvm/trunk/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/Local.cpp +++ llvm/trunk/lib/Transforms/Utils/Local.cpp @@ -800,6 +800,55 @@ replaceUndefValuesInPhi(PN, IncomingValues); } +/// Return true if BB has lifetime.end intrinsic. +/// +static bool hasLifetime(BasicBlock *BB) { + for (auto &I : *BB) { + if (const IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::lifetime_start) { + return true; + } + } + } + return false; +} + +/// hoistLifetimeFromEmptyBlockToPred - Hoist lifetime.end intrinsics and +/// related bitcast instructions from BB to predecessors of BB. +/// +static bool hoistLifetimeFromEmptyBlockToPred(BasicBlock *BB) { + // Check to see if all Preds have single successor and if not, we cannot + // hoist lifetime intrinsics because it would change semantics. + for (auto Pred : predecessors(BB)) + if (!Pred->getSingleSuccessor()) + return false; + + // Hoist all lifetime.end intrinsics and related bitcast instrunctions + // in BB to Preds. + for (auto &I : *BB) { + if (auto *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_end || + II->getIntrinsicID() == Intrinsic::lifetime_start) { + for (auto Pred : predecessors(BB)) { + Instruction *NewII = I.clone(); + NewII->insertBefore(Pred->getTerminator()); + + if (I.getIterator() != BB->begin()) { + if (auto BC = dyn_cast(--I.getIterator())) { + assert(BC == I.getOperand(1)); + auto NewBC = BC->clone(); + NewBC->insertBefore(NewII); + NewII->setOperand(1, NewBC); + } + } + } + } + } + } + return true; +} + /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an /// unconditional branch, and contains no instructions other than PHI nodes, /// potential side-effect free intrinsics and the branch. If possible, @@ -813,74 +862,118 @@ BasicBlock *Succ = cast(BB->getTerminator())->getSuccessor(0); if (BB == Succ) return false; - // Check to see if merging these blocks would cause conflicts for any of the - // phi nodes in BB or Succ. If not, we can safely merge. - if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; - - // Check for cases where Succ has multiple predecessors and a PHI node in BB - // has uses which will not disappear when the PHI nodes are merged. It is - // possible to handle such cases, but difficult: it requires checking whether - // BB dominates Succ, which is non-trivial to calculate in the case where - // Succ has multiple predecessors. Also, it requires checking whether - // constructing the necessary self-referential PHI node doesn't introduce any - // conflicts; this isn't too difficult, but the previous code for doing this - // was incorrect. - // - // Note that if this check finds a live use, BB dominates Succ, so BB is - // something like a loop pre-header (or rarely, a part of an irreducible CFG); - // folding the branch isn't profitable in that case anyway. - if (!Succ->getSinglePredecessor()) { - BasicBlock::iterator BBI = BB->begin(); - while (isa(*BBI)) { - for (Use &U : BBI->uses()) { - if (PHINode* PN = dyn_cast(U.getUser())) { - if (PN->getIncomingBlock(U) != BB) - return false; - } else { - return false; - } + // If BB has lifetime.end intrinsics, simplify BB under more constraints. + if (hasLifetime(BB)) { + // Check to see if BB and its predecessors and successors have PHI. + if (isa(BB->begin())) + return false; + + for (auto Pred : predecessors(BB)) + if (isa(Pred->begin())) + return false; + + for (auto Succ : successors(BB)) + if (isa(Succ->begin())) + return false; + + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); + + } else { + // Unless BB is the only predecessor of Succ, hoist lifetime intrinsics + // to predecessors of BB and simplify BB. + if (!hoistLifetimeFromEmptyBlockToPred(BB)) { + return false; } - ++BBI; } - } - DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) + Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; + } else { + // Check to see if merging these blocks would cause conflicts for any of the + // phi nodes in BB or Succ. If not, we can safely merge. + if (!CanPropagatePredecessorsForPHIs(BB, Succ)) + return false; - if (isa(Succ->begin())) { - // If there is more than one pred of succ, and there are PHI nodes in - // the successor, then we need to add incoming edges for the PHI nodes + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking + // whether BB dominates Succ, which is non-trivial to calculate in the + // case where Succ has multiple predecessors. Also, it requires checking + // whether constructing the necessary self-referential PHI node doesn't + // introduce any conflicts; this isn't too difficult, but the previous code + // for doing this was incorrect. // - const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible + // CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa(*BBI)) { + for (Use &U : BBI->uses()) { + if (PHINode *PN = dyn_cast(U.getUser())) { + if (PN->getIncomingBlock(U) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } - // Loop over all of the PHI nodes in the successor of BB. - for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { - PHINode *PN = cast(I); + DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); - redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); - } - } + if (isa(Succ->begin())) { + // If there is more than one pred of succ, and there are PHI nodes in + // the successor, then we need to add incoming edges for the PHI nodes + // + const PredBlockVector BBPreds(pred_begin(BB), pred_end(BB)); + + // Loop over all of the PHI nodes in the successor of BB. + for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { + PHINode *PN = cast(I); - if (Succ->getSinglePredecessor()) { - // BB is the only predecessor of Succ, so Succ will end up with exactly - // the same predecessors BB had. + redirectValuesFromPredecessorsToPhi(BB, BBPreds, PN); + } + } - // Copy over any phi, debug or lifetime instruction. - BB->getTerminator()->eraseFromParent(); - Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), - BB->getInstList()); - } else { - while (PHINode *PN = dyn_cast(&BB->front())) { - // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. - assert(PN->use_empty() && "There shouldn't be any uses here!"); - PN->eraseFromParent(); + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. + + // Copy over any phi, debug or lifetime instruction. + BB->getTerminator()->eraseFromParent(); + Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(), + BB->getInstList()); + } else { + while (PHINode *PN = dyn_cast(&BB->front())) { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); + } } - } - // Everything that jumped to BB now goes to Succ. - BB->replaceAllUsesWith(Succ); - if (!Succ->hasName()) Succ->takeName(BB); - BB->eraseFromParent(); // Delete the old basic block. - return true; + // Everything that jumped to BB now goes to Succ. + BB->replaceAllUsesWith(Succ); + if (!Succ->hasName()) + Succ->takeName(BB); + BB->eraseFromParent(); // Delete the old basic block. + return true; + } } /// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI Index: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5056,14 +5056,15 @@ if (SinkCommon && SinkThenElseCodeToEnd(BI)) return true; - - // If the Terminator is the only non-phi instruction, simplify the block. - // if LoopHeader is provided, check if the block is a loop header + // If the Terminator is the only non-phi instruction except for bitcast + // instruction coupled with the following lifetime intrinsic, simplify the + // block. If LoopHeader is provided, check if the block is a loop header // (This is for early invocations before loop simplify and vectorization // to keep canonical loop forms for nested loops. // These blocks can be eliminated when the pass is invoked later // in the back-end.) - BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator(); + BasicBlock::iterator I = + BB->getFirstNonPHIOrDbgOrLifetimeOrBitCast()->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && (!LoopHeaders || !LoopHeaders->count(BB)) && TryToSimplifyUncondBranchFromEmptyBlock(BB)) Index: llvm/trunk/test/Transforms/SimplifyCFG/lifetime.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/lifetime.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/lifetime.ll @@ -1,6 +1,9 @@ ; RUN: opt < %s -simplifycfg -S | FileCheck %s ; Test that a lifetime intrinsic isn't removed because that would change semantics +; This case is that predecessor(s) of the target empty block (bb0) has multiple +; successors (bb0 and bb1) and its successor has multiple predecessors (entry and +; bb0). ; CHECK: foo ; CHECK: entry: @@ -27,3 +30,232 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind + +; Test that empty block including lifetime intrinsic and not related bitcast +; instruction cannot be removed. It is because the block is not empty. + +; CHECK-LABEL: coo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-LABEL: if.else: +; CHECK-LABEL: if.end: +; CHECK-LABEL: bb: +; CHECK: ret + +define void @coo(i1 %x, i1 %y) { +entry: + %a = alloca i8, align 4 + %b = alloca i32, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + br i1 %y, label %while.body, label %bb + +while.body: ; preds = %while.cond + call void @llvm.lifetime.start(i64 4, i8* %a) + %c = load i8, i8* %a, align 4 + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %while.body + %d = add i8 %c, 1 + br label %if.end + +if.else: ; preds = %while.body + %e = sub i8 %c, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %f = bitcast i32* %b to i8* + call void @llvm.lifetime.end(i64 4, i8* %a) + br label %while.cond + +bb: ; preds = %while.cond + ret void +} + +; Test that empty block including lifetime intrinsic can be removed. +; Lifetime.end intrinsic is moved to predecessors because successor has +; multiple predecessors. + +; CHECK-LABEL: soo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: %e +; CHECK-NEXT: call void @llvm.lifetime.end +; CHECK-LABEL: if.else: +; CHECK-NEXT: %g +; CHECK-NEXT: call void @llvm.lifetime.end +; CHECK-NEXT: br label %while.cond +; CHECK-NOT: if.end: +; CHECK: ret + +define void @soo(i1 %x, i1 %y) { +entry: + %a = alloca i8, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + br i1 %y, label %while.body, label %bb + +while.body: ; preds = %while.cond + call void @llvm.lifetime.start(i64 4, i8* %a) + %d = load i8, i8* %a, align 4 + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %while.body + %e = add i8 %d, 1 + br label %if.end + +if.else: ; preds = %while.body + %g = sub i8 %d, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void @llvm.lifetime.end(i64 4, i8* %a) + br label %while.cond + +bb: ; preds = %while.cond + ret void +} + +; Test that empty block including lifetime intrinsic and related bitcast +; instruction can be removed. Lifetime.end intrinsic and related bitcast +; instruction are moved to predecessors because successor has multiple +; predecessors. + +; CHECK-LABEL: boo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: %e +; CHECK-NEXT: %[[T:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* %[[T]]) +; CHECK-LABEL: if.else: +; CHECK-NEXT: %g +; CHECK-NEXT: %[[B:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* %[[B]]) +; CHECK-NEXT: br label %while.cond +; CHECK-NOT: if.end: +; CHECK: ret + +define void @boo(i1 %x, i1 %y) { +entry: + %a = alloca i32, align 4 + br label %while.cond + +while.cond: ; preds = %if.end, %entry + br i1 %y, label %while.body, label %bb + +while.body: ; preds = %while.cond + %b = bitcast i32* %a to i8* + call void @llvm.lifetime.start(i64 4, i8* %b) + %d = load i32, i32* %a, align 4 + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %while.body + %e = add i32 %d, 1 + br label %if.end + +if.else: ; preds = %while.body + %g = sub i32 %d, 1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c = bitcast i32* %a to i8* + call void @llvm.lifetime.end(i64 4, i8* %c) + br label %while.cond + +bb: ; preds = %while.cond + ret void +} + +; Test that empty block including lifetime intrinsic can be removed. +; Lifetime.start intrinsic is moved to predecessors because successor has +; multiple predecessors. + +; CHECK-LABEL: koo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: call void @f +; CHECK-NEXT: call void @llvm.lifetime.start +; CHECK-LABEL: if.else: +; CHECK-NEXT: call void @g +; CHECK-NEXT: call void @llvm.lifetime.start +; CHECK-NEXT: br label %bb +; CHECK-NOT: if.end: +; CHECK: ret + +define void @koo(i1 %x, i1 %y, i1 %z) { +entry: + %a = alloca i8, align 4 + br i1 %z, label %bb, label %bb0 + +bb0: ; preds = %entry + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %bb0 + call void @f() + br label %if.end + +if.else: ; preds = %bb0 + call void @g() + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void @llvm.lifetime.start(i64 4, i8* %a) + br label %bb + +bb: ; preds = %if.end, %entry + %d = load i8, i8* %a, align 4 + call void @llvm.lifetime.end(i64 4, i8* %a) + ret void +} + +declare void @g() + +; Test that empty block including lifetime intrinsic and related bitcast +; instruction can be removed. Lifetime.start intrinsic and related bitcast +; instruction are moved to predecessors because successor has multiple +; predecessors. + + +; CHECK-LABEL: goo +; CHECK-LABEL: entry: +; CHECK-LABEL: if.then: +; CHECK-NEXT: call void @f +; CHECK-NEXT: %[[T:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %[[T]]) +; CHECK-LABEL: if.else: +; CHECK-NEXT: call void @g +; CHECK-NEXT: %[[B:[^ ]+]] = bitcast +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %[[B]]) +; CHECK-NEXT: br label %bb +; CHECK-NOT: if.end: +; CHECK: ret + +define void @goo(i1 %x, i1 %y, i1 %z) { +entry: + %a = alloca i32, align 4 + br i1 %z, label %bb, label %bb0 + +bb0: ; preds = %entry + br i1 %x, label %if.then, label %if.else + +if.then: ; preds = %bb0 + call void @f() + br label %if.end + +if.else: ; preds = %bb0 + call void @g() + br label %if.end + +if.end: ; preds = %if.else, %if.then + %b = bitcast i32* %a to i8* + call void @llvm.lifetime.start(i64 4, i8* %b) + br label %bb + +bb: ; preds = %if.end, %entry + %d = load i32, i32* %a, align 4 + %c = bitcast i32* %a to i8* + call void @llvm.lifetime.end(i64 4, i8* %c) + ret void +}