Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -37,7 +37,18 @@ bool run(Module &M); Function *unswitchFunction(Function *F); + // Finds a chain of blocks (starting from function entry) + // that guards a return block. Returns the ReturnBlock and NonReturnBlock. + std::tuple findEntryBlocks(Function *F); + private: + // A chain of basic blocks starting from the function entry point. + // Except for the function entry block, each block in the chain + // has the previous block in the vector as its single predecessor and + // all the blocks in the chain have a common non return block + // as their successor. The last block in the chain also has the return + // block as its successor. + std::vector Entries; InlineFunctionInfo IFI; }; struct PartialInlinerLegacyPass : public ModulePass { @@ -64,34 +75,157 @@ }; } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { - // First, verify that this function is an unswitching candidate... +std::tuple +PartialInlinerImpl::findEntryBlocks(Function *F) { BasicBlock *EntryBlock = &F->front(); BranchInst *BR = dyn_cast(EntryBlock->getTerminator()); if (!BR || BR->isUnconditional()) - return nullptr; + return std::make_tuple(nullptr, nullptr); BasicBlock *ReturnBlock = nullptr; BasicBlock *NonReturnBlock = nullptr; unsigned ReturnCount = 0; + for (BasicBlock *BB : successors(EntryBlock)) { if (isa(BB->getTerminator())) { ReturnBlock = BB; ReturnCount++; - } else + } else { NonReturnBlock = BB; + } + } + + if (ReturnCount == 1) { + Entries.push_back(EntryBlock); + return std::make_tuple(ReturnBlock, NonReturnBlock); + } else if (ReturnCount != 0) { + return std::make_tuple(nullptr, nullptr); + } + + // Handle more complicated case where the ReturnBlock is guarded by + // a chain of predicates: + // First find the return block: + NonReturnBlock = nullptr; + +#if 0 + for (auto &BB : *F) { + TerminatorInst *TI = BB.getTerminator(); + if (isa(TI)) { + ReturnBlock = &BB; + ReturnCount++; + } } if (ReturnCount != 1) + return std::make_tuple(nullptr, nullptr); + +#endif + // Returns true if Succ is BB's successor + auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { + return is_contained(successors(BB), Succ); + }; + + auto SuccSize = [](BasicBlock *BB) { return succ_end(BB) - succ_begin(BB); }; + + auto IsReturnBlock = [](BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + return isa(TI); + }; + + BasicBlock *CurrEntryInChain = EntryBlock; + auto IsCurrEntryBlockLastInChain = [&](BasicBlock *RetBlockCand, + BasicBlock *NonReturnCand) { + if (!IsReturnBlock(RetBlockCand)) + return false; + + if (NonReturnCand == NonReturnBlock) { + ReturnBlock = RetBlockCand; + Entries.push_back(CurrEntryInChain); + } else { + NonReturnBlock = nullptr; + } + return true; + }; + + do { + if (SuccSize(CurrEntryInChain) != 2) { + NonReturnBlock = nullptr; + break; + } + BasicBlock *Succ1 = *succ_begin(CurrEntryInChain); + BasicBlock *Succ2 = *(succ_begin(CurrEntryInChain) + 1); + BasicBlock *CommSucc = nullptr; + BasicBlock *OtherSucc = nullptr; + if (IsSuccessor(Succ1, Succ2)) { + CommSucc = Succ1; + OtherSucc = Succ2; + } else if (IsSuccessor(Succ2, Succ1)) { + CommSucc = Succ2; + OtherSucc = Succ1; + } + + if (CommSucc) { + if (IsCurrEntryBlockLastInChain(CommSucc, OtherSucc)) { + break; + } else { + if ((!NonReturnBlock || NonReturnBlock == CommSucc) && + OtherSucc->getSinglePredecessor() == CurrEntryInChain) { + NonReturnBlock = CommSucc; + Entries.push_back(CurrEntryInChain); + CurrEntryInChain = OtherSucc; + } else { + // No common NonReturnBlock can be found: + NonReturnBlock = nullptr; + break; + } + } + } else { + if (IsCurrEntryBlockLastInChain(Succ1, Succ2)) + break; + if (IsCurrEntryBlockLastInChain(Succ2, Succ1)) + break; + + // Fail to find the entry chain as we expect to see + // a return block as successor here: + NonReturnBlock = nullptr; + break; + } + } while (true); + + if (!NonReturnBlock) + return std::make_tuple(nullptr, nullptr); + + return std::make_tuple(ReturnBlock, NonReturnBlock); +} + +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + + BasicBlock *ReturnBlock = nullptr; + BasicBlock *NonReturnBlock = nullptr; + + std::tie(ReturnBlock, NonReturnBlock) = findEntryBlocks(F); + + if (!ReturnBlock) return nullptr; +#ifndef NDEBUG + BasicBlock *EntryBlock = Entries[0]; + assert(EntryBlock && EntryBlock == &F->front()); + assert(NonReturnBlock); +#endif + // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; Function *DuplicateFunction = CloneFunction(F, VMap); DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); - BasicBlock *NewEntryBlock = cast(VMap[EntryBlock]); + // Last block in the entry chain: + BasicBlock *NewLastEntryBlock = cast(VMap[&*Entries.back()]); BasicBlock *NewReturnBlock = cast(VMap[ReturnBlock]); BasicBlock *NewNonReturnBlock = cast(VMap[NonReturnBlock]); + DenseSet NewEntryChains; + for (BasicBlock *BB : Entries) { + NewEntryChains.insert(cast(VMap[BB])); + } // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. @@ -116,20 +250,28 @@ Ins = NewReturnBlock->getFirstNonPHI(); RetPhi->addIncoming(&*I, PreReturn); - RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), - NewEntryBlock); - OldPhi->removeIncomingValue(NewEntryBlock); + RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewLastEntryBlock), + NewLastEntryBlock); + OldPhi->removeIncomingValue(NewLastEntryBlock); ++I; } - NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); + NewLastEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, + NewReturnBlock); + // Returns true if the block is to be partial inlined into the caller + // (i.e. not to be extracted to the out of line function) + auto ToBeInlined = [=](BasicBlock *BB) { + if (BB == NewReturnBlock || NewEntryChains.count(BB)) + return true; + return false; + + }; // Gather up the blocks that we're going to extract. std::vector ToExtract; ToExtract.push_back(NewNonReturnBlock); for (BasicBlock &BB : *DuplicateFunction) - if (&BB != NewEntryBlock && &BB != NewReturnBlock && - &BB != NewNonReturnBlock) + if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock) ToExtract.push_back(&BB); // The CodeExtractor needs a dominator tree. Index: test/Transforms/CodeExtractor/MultiConditions.ll =================================================================== --- test/Transforms/CodeExtractor/MultiConditions.ll +++ test/Transforms/CodeExtractor/MultiConditions.ll @@ -0,0 +1,92 @@ +; RUN: opt < %s -partial-inliner -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s + +; Function Attrs: noinline nounwind uwtable +define i32 @bar(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb4, label %bb1 + +bb1: ; preds = %bb + %tmp2 = tail call i32 (...) @channels() #1 + %tmp3 = icmp slt i32 %tmp2, %arg + br i1 %tmp3, label %bb4, label %bb5 + +bb4: ; preds = %bb1, %bb + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + br label %bb5 + +bb5: ; preds = %bb4, %bb1 + %.0 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ] + ret i32 %.0 +} + +declare i32 @channels(...) local_unnamed_addr + +declare void @foo(...) local_unnamed_addr + +; Function Attrs: noinline nounwind uwtable +define i32 @dummy_caller(i32 %arg) local_unnamed_addr #0 { +bb: +; CHECK-LABEL: @dummy_caller +; CHECK: br i1 +; CHECK: br i1 +; CHECK: call void @bar.2_ + %tmp = tail call i32 @bar(i32 %arg) + ret i32 %tmp +} + +define i32 @bar_multi_ret(i32 %arg) local_unnamed_addr #0 { +bb: + %tmp = icmp slt i32 %arg, 0 + br i1 %tmp, label %bb4, label %bb1 + +bb1: ; preds = %bb + %tmp2 = tail call i32 (...) @channels() #1 + %tmp3 = icmp slt i32 %tmp2, %arg + br i1 %tmp3, label %bb4, label %bb5 + +bb4: ; preds = %bb1, %bb + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + tail call void (...) @foo() #1 + %tmp4 = icmp slt i32 %arg, 10 + br i1 %tmp4, label %bb6, label %bb5 +bb6: + tail call void (...) @foo() #1 + %tmp5 = icmp slt i32 %arg, 3 + br i1 %tmp5, label %bb7, label %bb5 +bb7: + tail call void (...) @foo() #1 + br label %bb8 +bb8: + ret i32 0 + +bb5: ; preds = %bb4, %bb1 + %.0 = phi i32 [ 0, %bb4 ], [ 1, %bb1 ], [0, %bb6] + ret i32 %.0 +} + +define i32 @dummy_caller2(i32 %arg) local_unnamed_addr #0 { +; CHECK: br i1 +; CHECK: br i1 +; CHECK: call i1 @bar_multi_ret.1_ + %tmp = tail call i32 @bar_multi_ret(i32 %arg) + ret i32 %tmp +} + +attributes #0 = { noinline nounwind uwtable } +attributes #1 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 (trunk 300576)"} Index: test/Transforms/CodeExtractor/SingleCondition.ll =================================================================== --- test/Transforms/CodeExtractor/SingleCondition.ll +++ test/Transforms/CodeExtractor/SingleCondition.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -partial-inliner -S | FileCheck %s +; RUN: opt < %s -passes=partial-inliner -S | FileCheck %s + +define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) { +entry: + br i1 %cond, label %if.then, label %return +if.then: + ; Dummy store to have more than 0 uses + store i32 10, i32* %align.val, align 4 + br label %return +return: ; preds = %entry + ret i32 0 +} + +define internal i32 @dummyCaller(i1 %cond, i32* align 2 %align.val) { +entry: +; CHECK-LABEL: @dummyCaller +; CHECK: br +; CHECK: call void @inlinedFunc.1_ + %val = call i32 @inlinedFunc(i1 %cond, i32* %align.val) + ret i32 %val +} +