diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -391,6 +391,7 @@ void initializeShrinkWrapPass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&); +void initializeSimpleOutlinerLegacyPassPass(PassRegistry &); void initializeSingleLoopExtractorPass(PassRegistry&); void initializeSinkingLegacyPassPass(PassRegistry&); void initializeSjLjEHPreparePass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -205,6 +205,7 @@ (void) llvm::createPrintFunctionPass(os); (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); + (void) llvm::createSimpleOutlinerPass(); (void) llvm::createLintPass(); (void) llvm::createSinkingPass(); (void) llvm::createLowerAtomicPass(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -220,6 +220,10 @@ ModulePass *createPartialInliningPass(); //===----------------------------------------------------------------------===// +/// createSimpleOutlinerPass - This pass outlines parts of functions. +ModulePass *createSimpleOutlinerPass(); + +//===----------------------------------------------------------------------===// /// createBarrierNoopPass - This pass is purely a module pass barrier in a pass /// manager. ModulePass *createBarrierNoopPass(); diff --git a/llvm/include/llvm/Transforms/IPO/SimpleOutliner.h b/llvm/include/llvm/Transforms/IPO/SimpleOutliner.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/SimpleOutliner.h @@ -0,0 +1,33 @@ +//===- SimpleOutliner.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a simple lightweight pass that searches the module for functions +// that have a conditional branch from their entry to an exit block that does +// nothing but return, the blocks along the code path of the other condition +// are outlined to a tail call. The entry and exit must be 'Simple'. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SIMPLEOUTLINER_H +#define LLVM_TRANSFORMS_IPO_SIMPLEOUTLINER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +/// Pass to outliner the body of simple early exit functions +class SimpleOutlinerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_SIMPLEOUTLINER_H diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -132,6 +132,7 @@ initializeDAHPass(R); initializeInstructionCombiningPassPass(R); initializeSimpleInlinerPass(R); + initializeSimpleOutlinerLegacyPassPass(R); initializePruneEHPass(R); initializeGlobalDCELegacyPassPass(R); initializeOpenMPOptLegacyPassPass(R); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -99,6 +99,7 @@ #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" +#include "llvm/Transforms/IPO/SimpleOutliner.h" #include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -275,7 +276,7 @@ extern cl::opt EnableHotColdSplit; extern cl::opt EnableOrderFileInstrumentation; - +extern cl::opt EnableSimpleOutliner; extern cl::opt FlattenedProfileUsed; extern cl::opt AttributorRun; @@ -1036,6 +1037,12 @@ MPM.addPass(GlobalOptPass()); MPM.addPass(GlobalDCEPass()); + // Run simple outliner pass to outline the body of functions to a tail call + // when they have sufficiently simple early exit conditions from entry to + // exiting block. + if (EnableSimpleOutliner) + MPM.addPass(SimpleOutlinerPass()); + // Run partial inlining pass to partially inline functions that have // large bodies. if (RunPartialInlining) @@ -1360,6 +1367,12 @@ return MPM; } + // Run simple outliner pass to outline the body of functions to a tail call + // when they have sufficiently simple early exit conditions from entry to + // exiting block. + if (EnableSimpleOutliner) + MPM.addPass(SimpleOutlinerPass()); + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Load sample profile before running the LTO optimization pipeline. MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -86,6 +86,7 @@ MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("scc-oz-module-inliner", buildInlinerPipeline(OptimizationLevel::Oz, ThinLTOPhase::None, DebugLogging)) +MODULE_PASS("simple-outliner",SimpleOutlinerPass()) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -33,6 +33,7 @@ PruneEH.cpp SampleProfile.cpp SCCP.cpp + SimpleOutliner.cpp StripDeadPrototypes.cpp StripSymbols.cpp SyntheticCountsPropagation.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -38,6 +38,7 @@ initializeIPCPPass(Registry); initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); + initializeSimpleOutlinerLegacyPassPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); initializeInternalizeLegacyPassPass(Registry); initializeLoopExtractorPass(Registry); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -103,6 +103,10 @@ cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); +cl::opt EnableSimpleOutliner("enable-simple-outliner", cl::init(false), + cl::Hidden, + cl::desc("Enable simple outliner pass")); + static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); @@ -632,6 +636,12 @@ // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + // Run simple outliner pass to outline the body of functions to a tail call + // when they have sufficiently simple early exit conditions from entry to + // exiting block. + if (EnableSimpleOutliner) + MPM.add(createSimpleOutlinerPass()); + if (RunPartialInlining) MPM.add(createPartialInliningPass()); @@ -867,6 +877,12 @@ PM.add(createSampleProfileLoaderPass(PGOSampleUse)); } + // Run simple outliner pass to outline the body of functions to a tail call + // when they have sufficiently simple early exit conditions from entry to + // exiting block. + if (EnableSimpleOutliner) + PM.add(createSimpleOutlinerPass()); + // Remove unused virtual tables to improve the quality of code generated by // whole-program devirtualization and bitset lowering. PM.add(createGlobalDCEPass()); diff --git a/llvm/lib/Transforms/IPO/SimpleOutliner.cpp b/llvm/lib/Transforms/IPO/SimpleOutliner.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/SimpleOutliner.cpp @@ -0,0 +1,346 @@ +//===- SimpleOutliner.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a simple lightweight pass that searches the module for functions +// that have a conditional branch from their entry to an exit block that does +// nothing but return, the blocks along the code path of the other condition +// are outlined to a tail call. The entry and exit must be 'Simple'. +// +// Example: BB1 == Entry TC == TailCall +// +// BB1 BB1 BB1 TC_entry +// / \ / \ / \ | +// | BB2 RET2 BB2 RET2 TC BB2 +// | | \ -> | \ -> + | \ +// | ANY BLOCKS ANY BLOCKS ANY BLOCKS +// | / | | +// RET RET RET +// +// A simple entry: no calls or stores and all loads are either used in the +// entry or returned in the exit block. Must have a conditional branch with +// two successors were the simple exit is one of them. +// +// A simple exit: must have only one PHI and a ret instruction. +// +// In the above situation, we can see that if a prologue is not truly needed +// in BB1, it can be sunk into BB2. With the original layout, this +// may not happen if the return value needs to be computed in BB1 and is +// different from the return value from the BB2 subgraph. +// For example, we may return zero from BB1 and compute the return value from +// the graph rooted at BB2. RET will then have a phi node that gets zero from +// BB1 and a different value from other blocks. If those other blocks have +// calls, the computed return value will likely be in a CSR, thereby requiring +// a prologue in BB1. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/SimpleOutliner.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/User.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "simple-outliner" + +// TODO: Instrument useful statistics +STATISTIC(NumTailCallsEnabled, "Number of tail calls enabled by the outliner"); + +namespace { +class SimpleOutlinerLegacyPass : public ModulePass { +public: + static char ID; + SimpleOutlinerLegacyPass() : ModulePass(ID) { + initializeSimpleOutlinerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; +}; +class SimpleOutliner { + static SmallPtrSet ClonedFunctions; + void convertToUncondBranchFromEntry(Function &F); + bool isSimpleReturnBB(BasicBlock *BB, ReturnInst *&RetInst, + PHINode *&OnlyPHI); + bool hasCallOrStore(BasicBlock &BB); + bool hasNonRetLoadUsesNotInEntry(BasicBlock &BB, + ReturnInst *&PossibleRetInst); + void removePHIEntriesFrom(BasicBlock *Pred, BasicBlock *Succ); + +public: + /// Outlines each function if meets the policy requirements. + bool tryOutlining(Function &F); +}; +} // Anonymous namespace. + +INITIALIZE_PASS(SimpleOutlinerLegacyPass, DEBUG_TYPE, "Simple Outliner", false, + false) + +char SimpleOutlinerLegacyPass::ID = 0; + +SmallPtrSet SimpleOutliner::ClonedFunctions; + +ModulePass *llvm::createSimpleOutlinerPass() { + return new SimpleOutlinerLegacyPass(); +} + +// We are looking for a block that simply has a PHI and a return. +bool SimpleOutliner::isSimpleReturnBB(BasicBlock *BB, ReturnInst *&RetInst, + PHINode *&OnlyPHI) { + LLVM_DEBUG(dbgs() << "Checking for simple return: " << BB->getName() << "\n"); + Instruction *Term = BB->getTerminator(); + ReturnInst *Ret = dyn_cast(Term); + if (!Ret || Term != BB->getFirstNonPHIOrDbgOrLifetime()) + return false; + + // We are looking for a single PHI node. + PHINode *SinglePHI = nullptr; + for (auto &PHI : BB->phis()) { + // More than one PHI. + if (SinglePHI) + return false; + SinglePHI = &PHI; + } + + // We have now confirmed that the fist actual instruction is the return + // instruction and that there is a single phi node in the block. This + // is a simple return block, set the RetInst and OnlyPHI and return. + RetInst = Ret; + OnlyPHI = SinglePHI; + LLVM_DEBUG(dbgs() << "Found a simple return block: \n"); + LLVM_DEBUG(BB->dump()); + LLVM_DEBUG(dbgs() << "Return instruction: "); + LLVM_DEBUG(RetInst->dump()); + LLVM_DEBUG(dbgs() << "Only PHI node: "); + LLVM_DEBUG(if (OnlyPHI) OnlyPHI->dump(); else dbgs() << "No PHI node\n"); + return true; +} + +bool SimpleOutliner::hasCallOrStore(BasicBlock &BB) { + for (Instruction &I : BB) { + if ((isa(I) || isa(I)) && !I.isLifetimeStartOrEnd()) { + LLVM_DEBUG(dbgs() << "has call or store: \n"); + LLVM_DEBUG(I.dump()); + return true; + } + } + return false; +} + +// Check for loads and ensure any use outside of block are only by a return +bool SimpleOutliner::hasNonRetLoadUsesNotInEntry(BasicBlock &BB, + ReturnInst *&PossibleRetInst) { + for (Instruction &I : BB) { + if (isa(I)) { + for (User *User : I.users()) { + Instruction *UserInst = dyn_cast(User); + if (UserInst->isUsedInBasicBlock(&BB)) + continue; // LLVM_DEBUG(dbgs() << "is in entry\n"); + else if (dyn_cast(UserInst) && UserInst->hasOneUse()) + if (auto Ret = dyn_cast(*(UserInst->user_begin()))) { + // LLVM_DEBUG(dbgs() << "is PHINode, only use is ret.\n"); + PossibleRetInst = Ret; + continue; + } + LLVM_DEBUG(dbgs() << "load used outside of entry and not by a ret:\n"); + LLVM_DEBUG(I.dump()); + LLVM_DEBUG(dbgs() << "Use:"); + LLVM_DEBUG(UserInst->dump()); + return true; + } + } + } + return false; +} + +void SimpleOutliner::convertToUncondBranchFromEntry(Function &F) { + BasicBlock &Entry = F.front(); + BranchInst *BranchFromEntry = dyn_cast(Entry.getTerminator()); + assert(BranchFromEntry && BranchFromEntry->isConditional() && + "Expecting entry to terminate with a branch"); + assert(succ_size(&Entry) == 2 && "Expected entry to have 2 successors"); + + BasicBlock *SimpleRetSucc = nullptr; + PHINode *OnlyPHI = nullptr; + ReturnInst *RetInst = nullptr; + for (BasicBlock *Succ : successors(&Entry)) { + if (!isSimpleReturnBB(Succ, RetInst, OnlyPHI)) + continue; + SimpleRetSucc = Succ; + break; + } + assert(SimpleRetSucc && + "Expected entry to have a successor that is simply a return block"); + if (BranchFromEntry->getSuccessor(0) == SimpleRetSucc) + BranchFromEntry->setCondition( + ConstantInt::getFalse(BranchFromEntry->getContext())); + else + BranchFromEntry->setCondition( + ConstantInt::getTrue(BranchFromEntry->getContext())); +} + +void SimpleOutliner::removePHIEntriesFrom(BasicBlock *Pred, BasicBlock *Succ) { + for (PHINode &PHI : Succ->phis()) { + while (PHI.getBasicBlockIndex(Pred) != -1) + PHI.removeIncomingValue(Pred, true); + if (Succ->phis().empty()) + break; + } +} + +bool SimpleOutliner::tryOutlining(Function &F) { + if (ClonedFunctions.count(&F)) + return false; + BasicBlock &Entry = F.front(); + BranchInst *BranchFromEntry = dyn_cast(Entry.getTerminator()); + if (succ_size(&Entry) != 2 || !BranchFromEntry || + BranchFromEntry->isUnconditional() || + BranchFromEntry->getSuccessor(0) == BranchFromEntry->getSuccessor(1)) { + LLVM_DEBUG(dbgs() << "Invalid branch from entry"); + return false; + } + + ReturnInst *PossibleRetInst = nullptr; + if (hasCallOrStore(Entry) || + hasNonRetLoadUsesNotInEntry(Entry, PossibleRetInst)) + return false; + + FunctionType *FTy = F.getFunctionType(); + if (FTy->isVarArg()) { + LLVM_DEBUG(dbgs() << "Function has variable arguments"); + return false; + } + + SmallVector Args; + for (Argument &Arg : F.args()) { + if (Arg.hasPassPointeeByValueAttr()) { + LLVM_DEBUG(dbgs() << "Function argument is passed by value"); + return false; + } + Args.push_back(&Arg); + } + + BasicBlock *SimpleRetSucc = nullptr; + PHINode *OnlyPHI = nullptr; + ReturnInst *RetInst = nullptr; + for (BasicBlock *Succ : successors(&Entry)) { + if (!isSimpleReturnBB(Succ, RetInst, OnlyPHI)) + continue; + SimpleRetSucc = Succ; + break; + } + if (!SimpleRetSucc || !RetInst || !OnlyPHI) + return false; + if (RetInst->getReturnValue() != OnlyPHI) + return false; + if (PossibleRetInst && PossibleRetInst != RetInst) + return false; + LLVM_DEBUG(dbgs() << "Entry has a simple return successor\n"); + LLVM_DEBUG(dbgs() << "SimpleRetSucc:\n"); + LLVM_DEBUG(SimpleRetSucc->dump()); + + LLVM_DEBUG(dbgs() << "Function is outline-able\n"); + ValueToValueMapTy Dummy; + Function *NewF = CloneFunction(&F, Dummy); + NewF->setCallingConv(CallingConv::Fast); + NewF->addFnAttr(Attribute::NoInline); + NewF->setLinkage(GlobalValue::InternalLinkage); + ClonedFunctions.insert(NewF); + convertToUncondBranchFromEntry(*NewF); + // Mark the new clone noinline, internal linkage, fastcc. + // Strip out all blocks from the original function except for + // entry and its two successors. + BasicBlock *OutlineCallBB = + BasicBlock::Create(Entry.getContext(), "outline.tc", &F, SimpleRetSucc); + + // Remove any entries from PHI nodes in the old successor of Entry that + // refer to Entry and replace the successor. + if (BranchFromEntry->getSuccessor(0) == SimpleRetSucc) { + removePHIEntriesFrom(&Entry, BranchFromEntry->getSuccessor(1)); + BranchFromEntry->setSuccessor(1, OutlineCallBB); + } else { + removePHIEntriesFrom(&Entry, BranchFromEntry->getSuccessor(0)); + BranchFromEntry->setSuccessor(0, OutlineCallBB); + } + + // Create the call to the outlined function. + const AttributeList &Attrs = F.getAttributes(); + CallInst *OutlineCall = + CallInst::Create(FTy, NewF, Args, "outline.call", OutlineCallBB); + OutlineCall->setAttributes(Attrs); + OutlineCall->setCallingConv(CallingConv::Fast); + ReturnInst::Create(OutlineCallBB->getContext(), OutlineCall, OutlineCallBB); + OutlineCall->setTailCall(); + OutlineCall->setDebugLoc(BranchFromEntry->getDebugLoc()); + NumTailCallsEnabled++; + LLVM_DEBUG(dbgs() << "Old function:\n"); + LLVM_DEBUG(F.dump()); + LLVM_DEBUG(dbgs() << "Cloned function:\n"); + LLVM_DEBUG(NewF->dump()); + return true; +} + +bool SimpleOutlinerLegacyPass::runOnModule(Module &M) { + if (M.empty() || skipModule(M)) + return false; + bool Changed = false; + for (Function &F : M) { + if (F.empty() || F.hasOptNone()) + continue; + /// TODO: Determine if checking the Function contents is sufficient or + /// if checking the MachineFunction contents is also necessary. + /// TODO: replace the FunctionPass::skipfunction check with something + /// equivalent. + LLVM_DEBUG(dbgs() << "Simple Function Outliner\n"); + LLVM_DEBUG(F.dump()); + + Changed |= SimpleOutliner().tryOutlining(F); + } + return Changed; +} + +PreservedAnalyses SimpleOutlinerPass::run(Module &M, ModuleAnalysisManager &) { + if (M.empty()) + return PreservedAnalyses::all(); + bool Changed = false; + for (Function &F : M) { + if (F.empty() || F.hasOptNone()) + continue; + LLVM_DEBUG(dbgs() << "Simple Function Outliner\n"); + LLVM_DEBUG(F.dump()); + Changed |= SimpleOutliner().tryOutlining(F); + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/test/Transforms/Outline/simple-outliner.ll b/llvm/test/Transforms/Outline/simple-outliner.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Outline/simple-outliner.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -O1 -enable-simple-outliner < %s | FileCheck %s + +@a = global i32 0, align 4 +@b = global i32 0, align 4 +@c = global i32 0, align 4 +@d = global i8* null, align 8 + +; simple return +define i32* @_Z3fn1v() { +; CHECK-LABEL: @_Z3fn1v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[OUTLINE_TC:%.*]], label [[RETURN:%.*]] +; CHECK: outline.tc: +; CHECK-NEXT: [[OUTLINE_CALL:%.*]] = tail call fastcc i32* @_Z3fn1v.1() #0 +; CHECK-NEXT: ret i32* [[OUTLINE_CALL]] +; CHECK: return: +; CHECK-NEXT: ret i32* null +; +entry: + %0 = load i32, i32* @b, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.end, label %return + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; multi-use load in entry and multiple loads +define i32* @_Z3fn2v() { +; CHECK-LABEL: @_Z3fn2v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @c, align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[RETURN:%.*]], label [[OUTLINE_TC:%.*]] +; CHECK: outline.tc: +; CHECK-NEXT: [[OUTLINE_CALL:%.*]] = tail call fastcc i32* @_Z3fn2v.2() #0 +; CHECK-NEXT: ret i32* [[OUTLINE_CALL]] +; CHECK: return: +; CHECK-NEXT: ret i32* null +; +entry: + %0 = load i32, i32* @b, align 4 + %1 = load i32, i32* @a, align 4 + %cmp = icmp slt i32 %0, %1 + %2 = load i32, i32* @c, align 4 + %cmp1 = icmp sgt i32 %0, %2 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %return, label %if.end + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; multi-use load outside entry in return +define i8* @_Z3fn3v() { +; CHECK-LABEL: @_Z3fn3v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** @d, align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[TMP0]], null +; CHECK-NEXT: br i1 [[CMP]], label [[OUTLINE_TC:%.*]], label [[IF_END:%.*]] +; CHECK: outline.tc: +; CHECK-NEXT: [[OUTLINE_CALL:%.*]] = tail call fastcc i8* @_Z3fn3v.3() #1 +; CHECK-NEXT: ret i8* [[OUTLINE_CALL]] +; CHECK: if.end: +; CHECK-NEXT: ret i8* [[TMP0]] +; +entry: + %0 = load i8*, i8** @d, align 8 + %cmp = icmp eq i8* %0, null + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = tail call noalias nonnull dereferenceable(1) i8* @_Znwm(i64 1) #7 + store i8* %call, i8** @d, align 8 + br label %if.end + +if.end: + %1 = phi i8* [ %call, %if.then ], [ %0, %entry ] + ret i8* %1 +} + +; multi-use load outside of entry +define i32* @_Z3fn4v() { +; CHECK-LABEL: @_Z3fn4v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[DOTOFF:%.*]] = add i32 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[DOTOFF]], 5391 +; CHECK-NEXT: br i1 [[TMP1]], label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i32 [[TMP0]], i32* @a, align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @a, [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32* [[RETVAL_0]] +; +entry: + %0 = load i32, i32* @b, align 4 + %.off = add i32 %0, 4 + %1 = icmp ugt i32 %.off, 5391 + br i1 %1, label %return, label %if.end + +if.end: + store i32 %0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; function call in entry +define i32* @_Z3fn5v() { +; CHECK-LABEL: @_Z3fn5v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** @d, align 8 +; CHECK-NEXT: [[CALL_I:%.*]] = tail call i64 @strtol(i8* nocapture nonnull [[TMP1]], i8** null, i32 signext 10) +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[CALL_I]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], [[CONV_I]] +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i32 [[TMP0]], i32* @a, align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @a, [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32* [[RETVAL_0]] +; +entry: + %0 = load i32, i32* @b, align 4 + %1 = load i8*, i8** @d, align 8 + %call.i = tail call i64 @strtol(i8* nocapture nonnull %1, i8** null, i32 signext 10) #6 + %conv.i = trunc i64 %call.i to i32 + %cmp = icmp eq i32 %0, %conv.i + br i1 %cmp, label %return, label %if.end + +if.end: + store i32 %0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; inline assembly in entry +define i32* @_Z3fn6v() { +; CHECK-LABEL: @_Z3fn6v( +; CHECK-NEXT: entry: +; CHECK-NEXT: tail call void asm sideeffect "addi 3, 3, 1", "~{r3}"() #3 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i32 0, i32* @a, align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @a, [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32* [[RETVAL_0]] +; +entry: + tail call void asm sideeffect "addi 3, 3, 1", "~{r3}"() #6 + %0 = load i32, i32* @b, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.end, label %return + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; store in entry +define i32* @_Z3fn7v() { +; CHECK-LABEL: @_Z3fn7v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @b, align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: store i32 [[INC]], i32* @b, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[INC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i32 0, i32* @a, align 4 +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @a, [[IF_END]] ], [ null, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32* [[RETVAL_0]] +; +entry: + %0 = load i32, i32* @b, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @b, align 4 + %tobool = icmp eq i32 %inc, 0 + br i1 %tobool, label %if.end, label %return + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; multi-use load in exit block and not by return +define noalias i8* @_Z3fn8v() { +; CHECK-LABEL: @_Z3fn8v( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** @d, align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[TMP0]], null +; CHECK-NEXT: [[VAL:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias nonnull dereferenceable(1) i8* @_Znwm(i64 1) +; CHECK-NEXT: store i8* [[CALL]], i8** @d, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP1:%.*]] = phi i8* [ [[CALL]], [[IF_THEN]] ], [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[VAL1:%.*]] = add i8 [[VAL]], 1 +; CHECK-NEXT: store i8 [[VAL1]], i8* [[TMP1]], align 1 +; CHECK-NEXT: ret i8* null +; +entry: + %0 = load i8*, i8** @d, align 8 + %cmp = icmp eq i8* %0, null + %val = load i8, i8* %0, align 1 + %val1 = add i8 %val, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = tail call noalias nonnull dereferenceable(1) i8* @_Znwm(i64 1) #7 + store i8* %call, i8** @d, align 8 + br label %if.end + +if.end: + %1 = phi i8 [ undef, %if.then ], [ %val1, %entry] + %2 = phi i8* [ %call, %if.then ], [ %0, %entry] + store i8 %1, i8* %2, align 1 + ret i8* null +} + +declare i64 @strtol(i8* readonly, i8** nocapture, i32 signext) +declare noalias nonnull i8* @_Znwm(i64)