Index: include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- include/llvm/Transforms/Utils/CodeExtractor.h +++ include/llvm/Transforms/Utils/CodeExtractor.h @@ -42,11 +42,12 @@ /// 3) Add allocas for any scalar outputs, adding all of the outputs' allocas /// as arguments, and inserting stores to the arguments for any scalars. class CodeExtractor { - typedef SetVector ValueSet; + typedef SmallVector ValueSet; // Various bits of state computed on construction. DominatorTree *const DT; const bool AggregateArgs; + const bool ExtractContainedStaticAllocas; // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; @@ -64,7 +65,8 @@ /// /// In this formation, we don't require a dominator tree. The given basic /// block is set up for extraction. - CodeExtractor(BasicBlock *BB, bool AggregateArgs = false); + CodeExtractor(BasicBlock *BB, bool AggregateArgs = false, + bool ExtractContainedStaticAllocas = false); /// \brief Create a code extractor for a sequence of blocks. /// @@ -73,20 +75,23 @@ /// sequence out into its new function. When a DominatorTree is also given, /// extra checking and transformations are enabled. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, - bool AggregateArgs = false); + bool AggregateArgs = false, + bool ExtractContainedStaticAllocas = false); /// \brief Create a code extractor for a loop body. /// /// Behaves just like the generic code sequence constructor, but uses the /// block sequence of the loop. - CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false); + CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false, + bool ExtractContainedStaticAllocas = false); /// \brief Create a code extractor for a region node. /// /// Behaves just like the generic code sequence constructor, but uses the /// block sequence of the region node passed in. CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs = false); + bool AggregateArgs = false, + bool ExtractContainedStaticAllocas = false); /// \brief Perform the extraction, returning the new function. /// Index: lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- lib/Transforms/IPO/PartialInlining.cpp +++ lib/Transforms/IPO/PartialInlining.cpp @@ -135,7 +135,8 @@ // Extract the body of the if. Function *ExtractedFunction = - CodeExtractor(ToExtract, &DT).extractCodeRegion(); + CodeExtractor(ToExtract, &DT, /*AggregateArgs*/false, + /*ExtractContainedStaticAllocas*/true).extractCodeRegion(); // Inline the top-level if test into all callers. std::vector Users(DuplicateFunction->user_begin(), Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -119,22 +119,30 @@ return buildExtractionBlockSet(R.block_begin(), R.block_end()); } -CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs) +CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, + bool ExtractContainedStaticAllocas) : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt), + ExtractContainedStaticAllocas(ExtractContainedStaticAllocas), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, - bool AggregateArgs) + bool AggregateArgs, + bool ExtractContainedStaticAllocas) : DT(DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + ExtractContainedStaticAllocas(ExtractContainedStaticAllocas), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs) +CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, + bool ExtractContainedStaticAllocas) : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + ExtractContainedStaticAllocas(ExtractContainedStaticAllocas), Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs) + bool AggregateArgs, + bool ExtractContainedStaticAllocas) : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt), + ExtractContainedStaticAllocas(ExtractContainedStaticAllocas), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} /// definedInRegion - Return true if the specified value is defined in the @@ -157,8 +165,38 @@ return false; } +/// hasOutsideUses - Return true if the specified instruction is used outside of +/// the region that is being extracted. +static bool hasOutsideUses(const SetVector &Blocks, + Instruction *I) { + for (Use &U : I->uses()) { + Instruction *User = dyn_cast(U.getUser()); + if (!User) + continue; + if (!Blocks.count(User->getParent())) + return true; + if (!I->getType()->isPointerTy()) + continue; + // Check to see if there is a gep of this instruction that geps this + // instruction. + if (GetElementPtrInst *GEP = dyn_cast(User)) { + if (GEP->getPointerOperand() != I) + continue; + if (hasOutsideUses(Blocks, GEP)) + return true; + } + // Do the same for bit cast instructions. + else if (BitCastInst *BC = dyn_cast(User)) { + if (hasOutsideUses(Blocks, BC)) + return true; + } + } + return false; +} + void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs) const { + SmallPtrSet InputSet, OutputSet; for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. @@ -166,11 +204,13 @@ for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; ++OI) if (definedInCaller(Blocks, *OI)) - Inputs.insert(*OI); + if (InputSet.insert(*OI).second) + Inputs.push_back(*OI); for (User *U : II.users()) if (!definedInRegion(Blocks, U)) { - Outputs.insert(&II); + if (OutputSet.insert(&II).second) + Outputs.push_back(&II); break; } } @@ -412,8 +452,8 @@ ValueSet &inputs, ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; - + SmallVector params, StructValues, ReloadOutputs, Reloads; + LLVMContext &Context = newFunction->getContext(); // Add inputs as params, or to be filled into the struct @@ -705,6 +745,25 @@ // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs); + // Loop through all of the inputs looking for static allocas that do not have + // any uses outside of the extracted region. + if (ExtractContainedStaticAllocas) + for (unsigned i = 0, e = inputs.size(); i < e;) { + AllocaInst *AI = dyn_cast(inputs[i]); + if (AI && AI->isStaticAlloca() && !hasOutsideUses(Blocks, AI)) { + AI->moveBefore(&newFuncRoot->front()); + Value *Back = inputs.pop_back_val(); + + // If this is the last input value then just break from the loop. + if (AI == Back) + break; + inputs[i] = Back; + --e; + continue; + } + ++i; + } + SmallPtrSet ExitBlocks; for (BasicBlock *Block : Blocks) for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; Index: test/Transforms/CodeExtractor/ExtractContainedStaticAllocas.ll =================================================================== --- /dev/null +++ test/Transforms/CodeExtractor/ExtractContainedStaticAllocas.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -partial-inliner -S | FileCheck %s +; This testcase hoists a static alloca that has all +; of it uses within the extracted region. + +define internal i32 @inlinedFunc(i1 %cond) { +entry: + %dominated.alloca = alloca i32, align 4 + br i1 %cond, label %if.then, label %return +if.then: +; Dummy store to represent the only use + store i32 10, i32* %dominated.alloca, align 4 + br label %return +return: ; preds = %entry + ret i32 0 +} + +define internal i32 @dummyCaller(i1 %cond) { +entry: + %val = call i32 @inlinedFunc(i1 %cond) + ret i32 %val +} + +; CHECK-LABEL: @inlinedFunc.1_if.then +; CHECK: alloca i32