diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -50,6 +50,7 @@ PPCExpandISEL.cpp PPCPreEmitPeephole.cpp PPCLowerMASSVEntries.cpp + PPCSimpleOutliner.cpp ) add_subdirectory(AsmParser) diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -78,7 +78,9 @@ extern char &PPCVSXFMAMutateID; ModulePass *createPPCLowerMASSVEntriesPass(); + ModulePass *createPPCSimpleOutlinerPass(); void initializePPCLowerMASSVEntriesPass(PassRegistry &); + void initializePPCSimpleOutlinerPass(PassRegistry &); extern char &PPCLowerMASSVEntriesID; namespace PPCII { diff --git a/llvm/lib/Target/PowerPC/PPCSimpleOutliner.cpp b/llvm/lib/Target/PowerPC/PPCSimpleOutliner.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCSimpleOutliner.cpp @@ -0,0 +1,305 @@ +//===- PPCSimpleOutliner.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a simple lightweight pass that searches the module for functions +// that have a conditional branch from their entry to an exit block that does +// nothing but return, the blocks along the code path of the other condition +// are outlined to a tail call. The entry and exit must be 'Simple'. +// +// Example: BB1 == Entry TC == TailCall +// +// BB1 BB1 BB1 TC_entry +// / \ / \ / \ | +// | BB2 RET2 BB2 RET2 TC BB2 +// | | \ -> | \ -> + | \ +// | ANY BLOCKS ANY BLOCKS ANY BLOCKS +// | / | | +// RET RET RET +// +// A simple entry: no calls or stores and all loads are either used in the +// entry or returned in the exit block. Must have a conditional branch with +// two successors were the simple exit is one of them. +// +// A simple exit: must have only one PHI and a ret instruction. +// +// In the above situation, we can see that if a prologue is not truly needed +// in BB1, it can be sunk into BB2. With the original layout, this +// may not happen if the return value needs to be computed in BB1 and is +// different from the return value from the BB2 subgraph. +// For example, we may return zero from BB1 and compute the return value from +// the graph rooted at BB2. RET will then have a phi node that gets zero from +// BB1 and a different value from other blocks. If those other blocks have +// calls, the computed return value will likely be in a CSR, thereby requiring +// a prologue in BB1. +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "ppc-simple-outliner" + +// TODO: Instrument useful statistics +STATISTIC(NumTailCallsEnabled, + "Number of tail calls enabled by the PPC outliner"); + +namespace llvm { +void initializePPCSimpleOutlinerPass(PassRegistry &); +} + +namespace { +class PPCSimpleOutliner : public ModulePass { + /// Outlines each function if meets the policy requirements. + bool tryOutlining(Function &F); + void convertToUncondBranchFromEntry(Function &F); + SmallPtrSet ClonedFunctions; + +public: + static char ID; + PPCSimpleOutliner() : ModulePass(ID) { + initializePPCSimpleOutlinerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; +}; + +// We are looking for a block that simply has a PHI and a return. +static bool isSimpleReturnBB(BasicBlock *BB, ReturnInst *&RetInst, + PHINode *&OnlyPHI) { + LLVM_DEBUG(dbgs() << "Checking for simple return: " << BB->getName() << "\n"); + Instruction *Term = BB->getTerminator(); + ReturnInst *Ret = dyn_cast(Term); + if (!Ret || Term != BB->getFirstNonPHIOrDbgOrLifetime()) + return false; + + // We are looking for a single PHI node. + PHINode *SinglePHI = nullptr; + for (auto &PHI : BB->phis()) { + // More than one PHI. + if (SinglePHI) + return false; + SinglePHI = &PHI; + } + + // We have now confirmed that the fist actual instruction is the return + // instruction and that there is a single phi node in the block. This + // is a simple return block, set the RetInst and OnlyPHI and return. + RetInst = Ret; + OnlyPHI = SinglePHI; + LLVM_DEBUG(dbgs() << "Found a simple return block: \n"); + LLVM_DEBUG(BB->dump()); + LLVM_DEBUG(dbgs() << "Return instruction: "); + LLVM_DEBUG(RetInst->dump()); + LLVM_DEBUG(dbgs() << "Only PHI node: "); + LLVM_DEBUG(if (OnlyPHI) OnlyPHI->dump(); else dbgs() << "No PHI node\n"); + return true; +} + +static bool hasCallOrStore(BasicBlock &BB) { + for (Instruction &I : BB) { + if ((isa(I) || isa(I)) && !I.isLifetimeStartOrEnd()) { + LLVM_DEBUG(dbgs() << "has call or store: \n"); + LLVM_DEBUG(I.dump()); + return true; + } + } + return false; +} + +// Check for loads and ensure any use outside of block are only by a return +static bool hasNonRetLoadUsesNotInEntry(BasicBlock &BB, + ReturnInst *&PossibleRetInst) { + for (Instruction &I : BB) { + if (isa(I)) { + for (User *user : I.users()) { + Instruction *UserInst = dyn_cast(user); + if (UserInst->isUsedInBasicBlock(&BB)) + continue; // LLVM_DEBUG(dbgs() << "is in entry\n"); + else if (dyn_cast(UserInst) && UserInst->hasOneUse()) + if (auto ret = dyn_cast(*(UserInst->user_begin()))) { + // LLVM_DEBUG(dbgs() << "is PHINode, only use is ret.\n"); + PossibleRetInst = ret; + continue; + } + LLVM_DEBUG(dbgs() << "load used outside of entry and not by a ret:\n"); + LLVM_DEBUG(I.dump()); + LLVM_DEBUG(dbgs() << "Use:"); + LLVM_DEBUG(UserInst->dump()); + return true; + } + } + } + return false; +} + +void PPCSimpleOutliner::convertToUncondBranchFromEntry(Function &F) { + BasicBlock &Entry = F.front(); + BranchInst *BranchFromEntry = dyn_cast(Entry.getTerminator()); + assert(BranchFromEntry && BranchFromEntry->isConditional() && + "Expecting entry to terminate with a branch"); + assert(succ_size(&Entry) == 2 && "Expected entry to have 2 successors"); + + BasicBlock *SimpleRetSucc = nullptr; + PHINode *OnlyPHI = nullptr; + ReturnInst *RetInst = nullptr; + for (BasicBlock *Succ : successors(&Entry)) { + if (!isSimpleReturnBB(Succ, RetInst, OnlyPHI)) + continue; + SimpleRetSucc = Succ; + break; + } + assert(SimpleRetSucc && + "Expected entry to have a successor that is simply a return block"); + if (BranchFromEntry->getSuccessor(0) == SimpleRetSucc) + BranchFromEntry->setCondition( + ConstantInt::getFalse(BranchFromEntry->getContext())); + else + BranchFromEntry->setCondition( + ConstantInt::getTrue(BranchFromEntry->getContext())); +} + +static void removePHIEntriesFrom(BasicBlock *Pred, BasicBlock *Succ) { + for (PHINode &PHI : Succ->phis()) { + while (PHI.getBasicBlockIndex(Pred) != -1) + PHI.removeIncomingValue(Pred, true); + if (Succ->phis().empty()) + break; + } +} + +bool PPCSimpleOutliner::tryOutlining(Function &F) { + BasicBlock &Entry = F.front(); + BranchInst *BranchFromEntry = dyn_cast(Entry.getTerminator()); + if (succ_size(&Entry) != 2 || !BranchFromEntry || + BranchFromEntry->isUnconditional() || + BranchFromEntry->getSuccessor(0) == BranchFromEntry->getSuccessor(1)) { + LLVM_DEBUG(dbgs() << "Invalid branch from entry"); + return false; + } + + ReturnInst *PossibleRetInst = nullptr; + if (hasCallOrStore(Entry) || + hasNonRetLoadUsesNotInEntry(Entry, PossibleRetInst)) + return false; + + FunctionType *FTy = F.getFunctionType(); + if (FTy->isVarArg()) { + LLVM_DEBUG(dbgs() << "Function has variable arguments"); + return false; + } + + std::vector Args; + for (Argument &Arg : F.args()) { + if (Arg.hasPassPointeeByValueAttr()) { + LLVM_DEBUG(dbgs() << "Function argument is on the HEAP"); + return false; + } + Args.push_back(&Arg); + } + + BasicBlock *SimpleRetSucc = nullptr; + PHINode *OnlyPHI = nullptr; + ReturnInst *RetInst = nullptr; + for (BasicBlock *Succ : successors(&Entry)) { + if (!isSimpleReturnBB(Succ, RetInst, OnlyPHI)) + continue; + SimpleRetSucc = Succ; + break; + } + if (!SimpleRetSucc || !RetInst || !OnlyPHI) + return false; + if (RetInst->getReturnValue() != OnlyPHI) + return false; + if (PossibleRetInst && PossibleRetInst != RetInst) + return false; + LLVM_DEBUG(dbgs() << "Entry has a simple return successor\n"); + LLVM_DEBUG(dbgs() << "SimpleRetSucc:\n"); + LLVM_DEBUG(SimpleRetSucc->dump()); + + LLVM_DEBUG(dbgs() << "Function is outline-able\n"); + ValueToValueMapTy dummy; + Function *NewF = CloneFunction(&F, dummy); + NewF->setCallingConv(CallingConv::Fast); + NewF->addFnAttr(Attribute::NoInline); + NewF->setLinkage(GlobalValue::InternalLinkage); + ClonedFunctions.insert(NewF); + convertToUncondBranchFromEntry(*NewF); + // Mark the new clone noinline, internal linkage, fastcc. + // Strip out all blocks from the original function except for + // entry and its two successors. + BasicBlock *OutlineCallBB = + BasicBlock::Create(Entry.getContext(), "outline.tc", &F, SimpleRetSucc); + + // Remove any entries from PHI nodes in the old successor of Entry that + // refer to Entry and replace the successor. + if (BranchFromEntry->getSuccessor(0) == SimpleRetSucc) { + removePHIEntriesFrom(&Entry, BranchFromEntry->getSuccessor(1)); + BranchFromEntry->setSuccessor(1, OutlineCallBB); + } else { + removePHIEntriesFrom(&Entry, BranchFromEntry->getSuccessor(0)); + BranchFromEntry->setSuccessor(0, OutlineCallBB); + } + + // Create the call to the outlined function. + const AttributeList &Attrs = F.getAttributes(); + CallInst *OutlineCall = + CallInst::Create(FTy, NewF, Args, "outline.call", OutlineCallBB); + OutlineCall->setAttributes(Attrs); + OutlineCall->setCallingConv(CallingConv::Fast); + ReturnInst::Create(OutlineCallBB->getContext(), OutlineCall, OutlineCallBB); + OutlineCall->setTailCall(); + OutlineCall->setDebugLoc(BranchFromEntry->getDebugLoc()); + LLVM_DEBUG(dbgs() << "Old function:\n"); + LLVM_DEBUG(F.dump()); + LLVM_DEBUG(dbgs() << "Cloned function:\n"); + LLVM_DEBUG(NewF->dump()); + return true; +} + +bool PPCSimpleOutliner::runOnModule(Module &M) { + if (M.empty()) + return false; + bool Changed = false; + for (Function &F : M) { + if (F.empty()) + continue; + /// TODO: Determine if checking the Function contents is sufficient or + /// if checking the MachineFunction contents is also necessary. + /// TODO: replace the FunctionPass::skipfunction check with something + /// equivalent. + LLVM_DEBUG(dbgs() << "PPC Simple Function Outliner\n"); + LLVM_DEBUG(F.dump()); + + if (ClonedFunctions.count(&F) == 0) + Changed |= tryOutlining(F); + } + return Changed; +} + +} // namespace + +INITIALIZE_PASS(PPCSimpleOutliner, DEBUG_TYPE, + "Recognize idioms that are useful to transform on PPC", false, + false) + +// namespace llvm{ +char PPCSimpleOutliner::ID = 0; +ModulePass *llvm::createPPCSimpleOutlinerPass() { + return new PPCSimpleOutliner(); +} +//}//namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -95,6 +95,12 @@ ReduceCRLogical("ppc-reduce-cr-logicals", cl::desc("Expand eligible cr-logical binary ops to branches"), cl::init(true), cl::Hidden); + +static cl::opt + EnableSimpleOutlinerPass("enable-ppc-simple-outliner", + cl::desc("Enable the simple outliner pass"), + cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { // Register the targets RegisterTargetMachine A(getThePPC32Target()); @@ -121,6 +127,7 @@ initializePPCTLSDynamicCallPass(PR); initializePPCMIPeepholePass(PR); initializePPCLowerMASSVEntriesPass(PR); + initializePPCSimpleOutlinerPass(PR); } /// Return the datalayout string of a subtarget. @@ -405,8 +412,11 @@ } void PPCPassConfig::addIRPasses() { - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() != CodeGenOpt::None) { + if (EnableSimpleOutlinerPass) + addPass(createPPCSimpleOutlinerPass()); addPass(createPPCBoolRetToIntPass()); + } addPass(createAtomicExpandPass()); // Lower generic MASSV routines to PowerPC subtarget-specific entries. diff --git a/llvm/test/CodeGen/PowerPC/simple-outliner.ll b/llvm/test/CodeGen/PowerPC/simple-outliner.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/simple-outliner.ll @@ -0,0 +1,309 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -enable-ppc-simple-outliner -ppc-asm-full-reg-names < %s | \ +; RUN: FileCheck %s + +@a = global i32 0, align 4 +@b = global i32 0, align 4 +@c = global i32 0, align 4 +@d = global i8* null, align 8 + +; simple return +define i32* @_Z3fn1v() { +; CHECK-LABEL: _Z3fn1v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, b@toc@ha +; CHECK-NEXT: lwz r3, b@toc@l(r3) +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: beq cr0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %return +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %outline.tc +; CHECK-NEXT: b _Z3fn1v.1 +; CHECK-NEXT: #TC_RETURNd8 _Z3fn1v.1 0 +entry: + %0 = load i32, i32* @b, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.end, label %return + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; multi-use load outside of entry +define i32* @_Z3fn2v() { +; CHECK-LABEL: _Z3fn2v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, b@toc@ha +; CHECK-NEXT: lwz r4, b@toc@l(r3) +; CHECK-NEXT: addi r3, r4, 4 +; CHECK-NEXT: cmplwi r3, 5391 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: bgtlr cr0 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addis r3, r2, a@toc@ha +; CHECK-NEXT: stw r4, a@toc@l(r3) +; CHECK-NEXT: addi r3, r3, a@toc@l +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @b, align 4 + %.off = add i32 %0, 4 + %1 = icmp ugt i32 %.off, 5391 + br i1 %1, label %return, label %if.end + +if.end: + store i32 %0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; multi-use load in entry and multiple loads +define i32* @_Z3fn3v() { +; CHECK-LABEL: _Z3fn3v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, b@toc@ha +; CHECK-NEXT: addis r4, r2, a@toc@ha +; CHECK-NEXT: lwz r3, b@toc@l(r3) +; CHECK-NEXT: lwz r4, a@toc@l(r4) +; CHECK-NEXT: cmpw r3, r4 +; CHECK-NEXT: addis r4, r2, c@toc@ha +; CHECK-NEXT: lwz r4, c@toc@l(r4) +; CHECK-NEXT: bc 12, lt, .LBB2_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpw r3, r4 +; CHECK-NEXT: bc 12, gt, .LBB2_3 +; CHECK-NEXT: # %bb.2: # %outline.tc +; CHECK-NEXT: b _Z3fn3v.2 +; CHECK-NEXT: #TC_RETURNd8 _Z3fn3v.2 0 +; CHECK-NEXT: .LBB2_3: # %return +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @b, align 4 + %1 = load i32, i32* @a, align 4 + %cmp = icmp slt i32 %0, %1 + %2 = load i32, i32* @c, align 4 + %cmp1 = icmp sgt i32 %0, %2 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %return, label %if.end + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; function call in entry +define i32* @_Z3fn4v() { +; CHECK-LABEL: _Z3fn4v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: addis r3, r2, b@toc@ha +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: li r5, 10 +; CHECK-NEXT: li r30, 0 +; CHECK-NEXT: lwz r29, b@toc@l(r3) +; CHECK-NEXT: addis r3, r2, d@toc@ha +; CHECK-NEXT: ld r3, d@toc@l(r3) +; CHECK-NEXT: bl strtol +; CHECK-NEXT: nop +; CHECK-NEXT: cmplw r29, r3 +; CHECK-NEXT: beq cr0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addis r3, r2, a@toc@ha +; CHECK-NEXT: addi r30, r3, a@toc@l +; CHECK-NEXT: stw r29, a@toc@l(r3) +; CHECK-NEXT: .LBB3_2: # %return +; CHECK-NEXT: mr r3, r30 +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @b, align 4 + %1 = load i8*, i8** @d, align 8 + %call.i = tail call i64 @strtol(i8* nocapture nonnull %1, i8** null, i32 signext 10) #6 + %conv.i = trunc i64 %call.i to i32 + %cmp = icmp eq i32 %0, %conv.i + br i1 %cmp, label %return, label %if.end + +if.end: + store i32 %0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; inline assembly in entry +define i32* @_Z3fn5v() { +; CHECK-LABEL: _Z3fn5v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: addi r3, r3, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addis r3, r2, b@toc@ha +; CHECK-NEXT: lwz r3, b@toc@l(r3) +; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: bnelr cr0 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addis r3, r2, a@toc@ha +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: stw r4, a@toc@l(r3) +; CHECK-NEXT: addi r3, r3, a@toc@l +; CHECK-NEXT: blr +entry: + tail call void asm sideeffect "addi 3, 3, 1", "~{r3}"() #6 + %0 = load i32, i32* @b, align 4 + %tobool = icmp eq i32 %0, 0 + br i1 %tobool, label %if.end, label %return + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; store in entry +define i32* @_Z3fn6v() { +; CHECK-LABEL: _Z3fn6v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, b@toc@ha +; CHECK-NEXT: lwz r4, b@toc@l(r3) +; CHECK-NEXT: addi r5, r4, 1 +; CHECK-NEXT: cmplw r5, r4 +; CHECK-NEXT: stw r5, b@toc@l(r3) +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: bgelr cr0 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: addis r3, r2, a@toc@ha +; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: stw r4, a@toc@l(r3) +; CHECK-NEXT: addi r3, r3, a@toc@l +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* @b, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @b, align 4 + %tobool = icmp eq i32 %inc, 0 + br i1 %tobool, label %if.end, label %return + +if.end: + store i32 0, i32* @a, align 4 + br label %return + +return: + %retval.0 = phi i32* [ @a, %if.end ], [ null, %entry ] + ret i32* %retval.0 +} + +; multi-use load outside entry in return +define i8* @_Z3fn7v() { +; CHECK-LABEL: _Z3fn7v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, d@toc@ha +; CHECK-NEXT: ld r3, d@toc@l(r3) +; CHECK-NEXT: cmpldi r3, 0 +; CHECK-NEXT: bnelr cr0 +; CHECK-NEXT: # %bb.1: # %outline.tc +; CHECK-NEXT: b _Z3fn7v.3 +; CHECK-NEXT: #TC_RETURNd8 _Z3fn7v.3 0 +entry: + %0 = load i8*, i8** @d, align 8 + %cmp = icmp eq i8* %0, null + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = tail call noalias nonnull dereferenceable(1) i8* @_Znwm(i64 1) #7 + store i8* %call, i8** @d, align 8 + br label %if.end + +if.end: + %1 = phi i8* [ %call, %if.then ], [ %0, %entry ] + ret i8* %1 +} + +declare noalias nonnull i8* @_Znwm(i64) + +; multi-use load in exit block and not by return +define noalias i8* @_Z3fn9v() { +; CHECK-LABEL: _Z3fn9v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -24 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: addis r29, r2, d@toc@ha +; CHECK-NEXT: ld r4, d@toc@l(r29) +; CHECK-NEXT: lbz r3, 0(r4) +; CHECK-NEXT: cmpldi r4, 0 +; CHECK-NEXT: addi r30, r3, 1 +; CHECK-NEXT: bne cr0, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: bl _Znwm +; CHECK-NEXT: nop +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: std r3, d@toc@l(r29) +; CHECK-NEXT: .LBB7_2: # %if.end +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: stb r30, 0(r4) +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %0 = load i8*, i8** @d, align 8 + %cmp = icmp eq i8* %0, null + %val = load i8, i8* %0, align 1 + %val1 = add i8 %val, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = tail call noalias nonnull dereferenceable(1) i8* @_Znwm(i64 1) #7 + store i8* %call, i8** @d, align 8 + br label %if.end + +if.end: + %1 = phi i8 [ undef, %if.then ], [ %val1, %entry] + %2 = phi i8* [ %call, %if.then ], [ %0, %entry] + store i8 %1, i8* %2, align 1 + ret i8* null +} + +declare i64 @strtol(i8* readonly, i8** nocapture, i32 signext) +