diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -36,6 +36,7 @@ class Module; class Type; class Value; +class StructType; /// A cache for the CodeExtractor analysis. The operation \ref /// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this @@ -100,9 +101,12 @@ unsigned NumExitBlocks = std::numeric_limits::max(); Type *RetTy; - // Mapping from the original exit blocks, to the new blocks inside - // the function. - SmallVector OldTargets; + /// Lists of blocks that are branched from the code region to be extracted. + /// Each block is contained at most once. Its order defines the return value + /// of the extracted function, when leaving the extracted function via the + /// first block it returns 0. When leaving via the second entry it returns + /// 1, etc. + SmallVector SwitchCases; // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block @@ -226,26 +230,53 @@ getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC, Instruction *Addr, BasicBlock *ExitBlock) const; + /// Updates the list of exit blocks (OldTargets and ExitBlocks) after + /// changes of the control flow or the Blocks list. + void recomputeExitBlocks(); + void severSplitPHINodesOfEntry(BasicBlock *&Header); - void severSplitPHINodesOfExits(const SmallPtrSetImpl &Exits); + void severSplitPHINodesOfExits(); void splitReturnBlocks(); - Function *constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - BasicBlock *newRootNode, BasicBlock *newHeader, - Function *oldFunction, Module *M); - void moveCodeToFunction(Function *newFunction); void calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, - DenseMap &ExitWeights, + const DenseMap &ExitWeights, BranchProbabilityInfo *BPI); - CallInst *emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *newHeader, - ValueSet &inputs, ValueSet &outputs); + /// Normalizes the control flow of the extracted regions, such as ensuring + /// that the extracted region does not contain a return instruction. + void normalizeCFGForExtraction(BasicBlock *&Header); + + /// Generates the function declaration for the function containing the + /// extracted code. + Function *constructFunctionDeclaration(const ValueSet &inputs, + const ValueSet &outputs, + BlockFrequency EntryFreq, + const Twine &Name); + + /// Generates the code for the extracted function. That is: a prolog, the + /// moved or copied code from the original function, and epilogs for each + /// exit. + void emitFunctionBody(const ValueSet &inputs, const ValueSet &outputs, + Function *newFunction, StructType *StructArgTy, + BasicBlock *header, const ValueSet &SinkingCands); + + /// Generates a Basic Block that calls the extracted function. + CallInst *emitReplacerCall(const ValueSet &inputs, const ValueSet &outputs, + Function *newFunction, StructType *StructArgTy, + Function *oldFunction, BasicBlock *ReplIP, + BlockFrequency EntryFreq, + ArrayRef LifetimesStart, + std::vector &Reloads); + + /// Connects the basic block containing the call to the extracted function + /// into the original function's control flow. + void insertReplacerCall( + Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, + const ValueSet &outputs, ArrayRef Reloads, + const DenseMap &ExitWeights); }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -434,7 +434,6 @@ } // Now add the old exit block to the outline region. Blocks.insert(CommonExitBlock); - OldTargets.push_back(NewExitBlock); return CommonExitBlock; } @@ -744,9 +743,8 @@ /// outlined region, we split these PHIs on two: one with inputs from region /// and other with remaining incoming blocks; then first PHIs are placed in /// outlined region. -void CodeExtractor::severSplitPHINodesOfExits( - const SmallPtrSetImpl &Exits) { - for (BasicBlock *ExitBB : Exits) { +void CodeExtractor::severSplitPHINodesOfExits() { + for (BasicBlock *ExitBB : SwitchCases) { BasicBlock *NewBB = nullptr; for (PHINode &PN : ExitBB->phis()) { @@ -811,22 +809,29 @@ /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) -Function *CodeExtractor::constructFunction(const ValueSet &inputs, - const ValueSet &outputs, - BasicBlock *header, - BasicBlock *newRootNode, - BasicBlock *newHeader, - Function *oldFunction, - Module *M) { +Function *CodeExtractor::constructFunctionDeclaration(const ValueSet &inputs, + const ValueSet &outputs, + BlockFrequency EntryFreq, + const Twine &Name) { LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); + Function *oldFunction = Blocks.front()->getParent(); + LLVMContext &Context = oldFunction->getContext(); + Module *M = Blocks.front()->getModule(); + // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: - case 1: RetTy = Type::getVoidTy(header->getContext()); break; - case 2: RetTy = Type::getInt1Ty(header->getContext()); break; - default: RetTy = Type::getInt16Ty(header->getContext()); break; + case 1: + RetTy = Type::getVoidTy(Context); + break; + case 2: + RetTy = Type::getInt1Ty(Context); + break; + default: + RetTy = Type::getInt16Ty(Context); + break; } std::vector paramTy; @@ -863,14 +868,10 @@ FunctionType::get(RetTy, paramTy, AllowVarArgs && oldFunction->isVarArg()); - std::string SuffixToUse = - Suffix.empty() - ? (header->getName().empty() ? "extracted" : header->getName().str()) - : Suffix; // Create the new function - Function *newFunction = Function::Create( - funcType, GlobalValue::InternalLinkage, oldFunction->getAddressSpace(), - oldFunction->getName() + "." + SuffixToUse, M); + Function *newFunction = + Function::Create(funcType, GlobalValue::InternalLinkage, + oldFunction->getAddressSpace(), Name, M); // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) newFunction->setDoesNotThrow(); @@ -879,6 +880,10 @@ if (oldFunction->hasUWTable()) newFunction->setHasUWTable(); + // Propagate personality info to the new function if there is one. + if (oldFunction->hasPersonalityFn()) + newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + // Inherit all of the target dependent attributes and white-listed // target independent attributes. // (e.g. If the extracted region contains a call to an x86.sse @@ -984,58 +989,48 @@ newFunction->addFnAttr(Attr); } - newFunction->getBasicBlockList().push_back(newRootNode); - - // Create an iterator to name all of the arguments we inserted. - Function::arg_iterator AI = newFunction->arg_begin(); - - // Rewrite all users of the inputs in the extracted region to use the - // arguments (or appropriate addressing into struct) instead. - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *RewriteVal; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); - Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); - Instruction *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); - RewriteVal = new LoadInst(StructTy->getElementType(i), GEP, - "loadgep_" + inputs[i]->getName(), TI); - } else - RewriteVal = &*AI++; - - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); - for (User *use : Users) - if (Instruction *inst = dyn_cast(use)) - if (Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(inputs[i], RewriteVal); - } - // Set names for input and output arguments. + // Set parameter attributes. if (!AggregateArgs) { - AI = newFunction->arg_begin(); + // Set swifterror parameter attributes. + for (auto P : enumerate(inputs)) + if (P.value()->isSwiftError()) + newFunction->addParamAttr(P.index(), Attribute::SwiftError); + + // Set names for input and output arguments. + Function::arg_iterator AI = newFunction->arg_begin(); for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) AI->setName(inputs[i]->getName()); for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) AI->setName(outputs[i]->getName()+".out"); } - // Rewrite branches to basic blocks outside of the loop to new dummy blocks - // within the new function. This must be done before we lose track of which - // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); - for (auto &U : Users) - // The BasicBlock which contains the branch is not in the region - // modify the branch target to a new block - if (Instruction *I = dyn_cast(U)) - if (I->isTerminator() && I->getFunction() == oldFunction && - !Blocks.count(I->getParent())) - I->replaceUsesOfWith(header, newHeader); + // Update the entry count of the function. + if (BFI) { + auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); + if (Count.hasValue()) + newFunction->setEntryCount( + ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME + } return newFunction; } +static void applyFirstDebugLoc(Function *oldFunction, + ArrayRef Blocks, + Instruction *BranchI) { + if (oldFunction->getSubprogram()) { + any_of(Blocks, [&BranchI](const BasicBlock *BB) { + return any_of(*BB, [&BranchI](const Instruction &I) { + if (!I.getDebugLoc()) + return false; + BranchI->setDebugLoc(I.getDebugLoc()); + return true; + }); + }); + } +} + /// Erase lifetime.start markers which reference inputs to the extraction /// region, and insert the referenced memory into \p LifetimesStart. /// @@ -1117,397 +1112,118 @@ } } -/// emitCallAndSwitchStatement - This method sets up the caller side by adding -/// the call instruction, splitting any PHI nodes in the header block as -/// necessary. -CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction, - BasicBlock *codeReplacer, - ValueSet &inputs, - ValueSet &outputs) { - // Emit a call to the new function, passing in: *pointer to struct (if - // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; - - Module *M = newFunction->getParent(); - LLVMContext &Context = M->getContext(); - const DataLayout &DL = M->getDataLayout(); - CallInst *call = nullptr; +void CodeExtractor::moveCodeToFunction(Function *newFunction) { + Function *oldFunc = (*Blocks.begin())->getParent(); + Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); + Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - // Add inputs as params, or to be filled into the struct - unsigned ArgNo = 0; - SmallVector SwiftErrorArgs; - for (Value *input : inputs) { - if (AggregateArgs) - StructValues.push_back(input); - else { - params.push_back(input); - if (input->isSwiftError()) - SwiftErrorArgs.push_back(ArgNo); - } - ++ArgNo; - } + auto newFuncIt = newFunction->front().getIterator(); + for (BasicBlock *Block : Blocks) { + // Delete the basic block from the old function, and the list of blocks + oldBlocks.remove(Block); - // Create allocas for the outputs - for (Value *output : outputs) { - if (AggregateArgs) { - StructValues.push_back(output); - } else { - AllocaInst *alloca = - new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), - nullptr, output->getName() + ".loc", - &codeReplacer->getParent()->front().front()); - ReloadOutputs.push_back(alloca); - params.push_back(alloca); - } + // Insert this basic block into the new function + // Insert the original blocks after the entry block created + // for the new function. The entry block may be followed + // by a set of exit blocks at this point, but these exit + // blocks better be placed at the end of the new function. + newFuncIt = newBlocks.insertAfter(newFuncIt, Block); } +} - StructType *StructArgTy = nullptr; - AllocaInst *Struct = nullptr; - if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; - for (Value *V : StructValues) - ArgTypes.push_back(V->getType()); +void CodeExtractor::calculateNewCallTerminatorWeights( + BasicBlock *CodeReplacer, + const DenseMap &ExitWeights, + BranchProbabilityInfo *BPI) { + using Distribution = BlockFrequencyInfoImplBase::Distribution; + using BlockNode = BlockFrequencyInfoImplBase::BlockNode; - // Allocate a struct at the beginning of this function - StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); - params.push_back(Struct); + // Update the branch weights for the exit block. + Instruction *TI = CodeReplacer->getTerminator(); + SmallVector BranchWeights(TI->getNumSuccessors(), 0); - for (unsigned i = 0, e = inputs.size(); i != e; ++i) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - new StoreInst(StructValues[i], GEP, codeReplacer); - } - } + // Block Frequency distribution with dummy node. + Distribution BranchDist; - // Emit the call to the function - call = CallInst::Create(newFunction, params, - NumExitBlocks > 1 ? "targetBlock" : ""); - // Add debug location to the new call, if the original function has debug - // info. In that case, the terminator of the entry block of the extracted - // function contains the first debug location of the extracted function, - // set in extractCodeRegion. - if (codeReplacer->getParent()->getSubprogram()) { - if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) - call->setDebugLoc(DL); - } - codeReplacer->getInstList().push_back(call); + SmallVector EdgeProbabilities( + TI->getNumSuccessors(), BranchProbability::getUnknown()); - // Set swifterror parameter attributes. - for (unsigned SwiftErrArgNo : SwiftErrorArgs) { - call->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); - newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError); + // Add each of the frequencies of the successors. + for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { + BlockNode ExitNode(i); + uint64_t ExitFreq = ExitWeights.lookup(TI->getSuccessor(i)).getFrequency(); + if (ExitFreq != 0) + BranchDist.addExit(ExitNode, ExitFreq); + else + EdgeProbabilities[i] = BranchProbability::getZero(); } - Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); - unsigned FirstOut = inputs.size(); - if (!AggregateArgs) - std::advance(OutputArgBegin, inputs.size()); - - // Reload the outputs passed in by reference. - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = nullptr; - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); - codeReplacer->getInstList().push_back(GEP); - Output = GEP; - } else { - Output = ReloadOutputs[i]; - } - LoadInst *load = new LoadInst(outputs[i]->getType(), Output, - outputs[i]->getName() + ".reload", - codeReplacer); - Reloads.push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); - for (unsigned u = 0, e = Users.size(); u != e; ++u) { - Instruction *inst = cast(Users[u]); - if (!Blocks.count(inst->getParent())) - inst->replaceUsesOfWith(outputs[i], load); - } + // Check for no total weight. + if (BranchDist.Total == 0) { + BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities); + return; } - // Now we can emit a switch statement using the call as a value. - SwitchInst *TheSwitch = - SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), - codeReplacer, 0, codeReplacer); - - // Since there may be multiple exits from the original region, make the new - // function return an unsigned, switch on that number. This loop iterates - // over all of the blocks in the extracted region, updating any terminator - // instructions in the to-be-extracted region that branch to blocks that are - // not in the region to be extracted. - std::map ExitBlockMap; - - // Iterate over the previously collected targets, and create new blocks inside - // the function to branch to. - unsigned switchVal = 0; - for (BasicBlock *OldTarget : OldTargets) { - if (Blocks.count(OldTarget)) - continue; - BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; - if (NewTarget) - continue; - - // If we don't already have an exit stub for this non-extracted - // destination, create one now! - NewTarget = BasicBlock::Create(Context, - OldTarget->getName() + ".exitStub", - newFunction); - unsigned SuccNum = switchVal++; + // Normalize the distribution so that they can fit in unsigned. + BranchDist.normalize(); - Value *brVal = nullptr; - assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); - switch (NumExitBlocks) { - case 0: - case 1: break; // No value needed. - case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); - break; - default: - brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); - break; - } + // Create normalized branch weights and set the metadata. + for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { + const auto &Weight = BranchDist.Weights[I]; - ReturnInst::Create(Context, brVal, NewTarget); + // Get the weight and update the current BFI. + BranchWeights[Weight.TargetNode.Index] = Weight.Amount; + BranchProbability BP(Weight.Amount, BranchDist.Total); + EdgeProbabilities[Weight.TargetNode.Index] = BP; + } + BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities); + TI->setMetadata( + LLVMContext::MD_prof, + MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); +} - // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), - SuccNum), - OldTarget); +/// Erase debug info intrinsics which refer to values in \p F but aren't in +/// \p F. +static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) { + for (Instruction &I : instructions(F)) { + SmallVector DbgUsers; + findDbgUsers(DbgUsers, &I); + for (DbgVariableIntrinsic *DVI : DbgUsers) + if (DVI->getFunction() != &F) + DVI->eraseFromParent(); } +} - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - // add a new basic block which returns the appropriate value - BasicBlock *NewTarget = ExitBlockMap[OldTarget]; - assert(NewTarget && "Unknown target block!"); +/// Fix up the debug info in the old and new functions by pointing line +/// locations and debug intrinsics to the new subprogram scope, and by deleting +/// intrinsics which point to values outside of the new function. +static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, + CallInst &TheCall) { + DISubprogram *OldSP = OldFunc.getSubprogram(); + LLVMContext &Ctx = OldFunc.getContext(); - // rewrite the original branch instruction with this new target - TI->setSuccessor(i, NewTarget); - } + if (!OldSP) { + // Erase any debug info the new function contains. + stripDebugInfo(NewFunc); + // Make sure the old function doesn't contain any non-local metadata refs. + eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); + return; } - // Store the arguments right after the definition of output value. - // This should be proceeded after creating exit stubs to be ensure that invoke - // result restore will be placed in the outlined function. - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - auto *OutI = dyn_cast(outputs[i]); - if (!OutI) - continue; - - // Find proper insertion point. - BasicBlock::iterator InsertPt; - // In case OutI is an invoke, we insert the store at the beginning in the - // 'normal destination' BB. Otherwise we insert the store right after OutI. - if (auto *InvokeI = dyn_cast(OutI)) - InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); - else if (auto *Phi = dyn_cast(OutI)) - InsertPt = Phi->getParent()->getFirstInsertionPt(); - else - InsertPt = std::next(OutI->getIterator()); - - Instruction *InsertBefore = &*InsertPt; - assert((InsertBefore->getFunction() == newFunction || - Blocks.count(InsertBefore->getParent())) && - "InsertPt should be in new function"); - assert(OAI != newFunction->arg_end() && - "Number of output arguments should match " - "the amount of defined values"); - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), - InsertBefore); - new StoreInst(outputs[i], GEP, InsertBefore); - // Since there should be only one struct argument aggregating - // all the output values, we shouldn't increment OAI, which always - // points to the struct argument, in this case. - } else { - new StoreInst(outputs[i], &*OAI, InsertBefore); - ++OAI; - } - } - - // Now that we've done the deed, simplify the switch instruction. - Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); - switch (NumExitBlocks) { - case 0: - // There are no successors (the block containing the switch itself), which - // means that previously this was the last part of the function, and hence - // this should be rewritten as a `ret' - - // Check if the function should return a value - if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, nullptr, TheSwitch); // Return void - } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { - // return what we have - ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); - } else { - // Otherwise we must have code extracted an unwind or something, just - // return whatever we want. - ReturnInst::Create(Context, - Constant::getNullValue(OldFnRetTy), TheSwitch); - } - - TheSwitch->eraseFromParent(); - break; - case 1: - // Only a single destination, change the switch into an unconditional - // branch. - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); - TheSwitch->eraseFromParent(); - break; - case 2: - BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), - call, TheSwitch); - TheSwitch->eraseFromParent(); - break; - default: - // Otherwise, make the default destination of the switch instruction be one - // of the other successors. - TheSwitch->setCondition(call); - TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - // Remove redundant case - TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); - break; - } - - // Insert lifetime markers around the reloads of any output values. The - // allocas output values are stored in are only in-use in the codeRepl block. - insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); - - return call; -} - -void CodeExtractor::moveCodeToFunction(Function *newFunction) { - Function *oldFunc = (*Blocks.begin())->getParent(); - Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); - Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); - - auto newFuncIt = newFunction->front().getIterator(); - for (BasicBlock *Block : Blocks) { - // Delete the basic block from the old function, and the list of blocks - oldBlocks.remove(Block); - - // Insert this basic block into the new function - // Insert the original blocks after the entry block created - // for the new function. The entry block may be followed - // by a set of exit blocks at this point, but these exit - // blocks better be placed at the end of the new function. - newFuncIt = newBlocks.insertAfter(newFuncIt, Block); - } -} - -void CodeExtractor::calculateNewCallTerminatorWeights( - BasicBlock *CodeReplacer, - DenseMap &ExitWeights, - BranchProbabilityInfo *BPI) { - using Distribution = BlockFrequencyInfoImplBase::Distribution; - using BlockNode = BlockFrequencyInfoImplBase::BlockNode; - - // Update the branch weights for the exit block. - Instruction *TI = CodeReplacer->getTerminator(); - SmallVector BranchWeights(TI->getNumSuccessors(), 0); - - // Block Frequency distribution with dummy node. - Distribution BranchDist; - - SmallVector EdgeProbabilities( - TI->getNumSuccessors(), BranchProbability::getUnknown()); - - // Add each of the frequencies of the successors. - for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { - BlockNode ExitNode(i); - uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); - if (ExitFreq != 0) - BranchDist.addExit(ExitNode, ExitFreq); - else - EdgeProbabilities[i] = BranchProbability::getZero(); - } - - // Check for no total weight. - if (BranchDist.Total == 0) { - BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities); - return; - } - - // Normalize the distribution so that they can fit in unsigned. - BranchDist.normalize(); - - // Create normalized branch weights and set the metadata. - for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { - const auto &Weight = BranchDist.Weights[I]; - - // Get the weight and update the current BFI. - BranchWeights[Weight.TargetNode.Index] = Weight.Amount; - BranchProbability BP(Weight.Amount, BranchDist.Total); - EdgeProbabilities[Weight.TargetNode.Index] = BP; - } - BPI->setEdgeProbability(CodeReplacer, EdgeProbabilities); - TI->setMetadata( - LLVMContext::MD_prof, - MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); -} - -/// Erase debug info intrinsics which refer to values in \p F but aren't in -/// \p F. -static void eraseDebugIntrinsicsWithNonLocalRefs(Function &F) { - for (Instruction &I : instructions(F)) { - SmallVector DbgUsers; - findDbgUsers(DbgUsers, &I); - for (DbgVariableIntrinsic *DVI : DbgUsers) - if (DVI->getFunction() != &F) - DVI->eraseFromParent(); - } -} - -/// Fix up the debug info in the old and new functions by pointing line -/// locations and debug intrinsics to the new subprogram scope, and by deleting -/// intrinsics which point to values outside of the new function. -static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc, - CallInst &TheCall) { - DISubprogram *OldSP = OldFunc.getSubprogram(); - LLVMContext &Ctx = OldFunc.getContext(); - - if (!OldSP) { - // Erase any debug info the new function contains. - stripDebugInfo(NewFunc); - // Make sure the old function doesn't contain any non-local metadata refs. - eraseDebugIntrinsicsWithNonLocalRefs(NewFunc); - return; - } - - // Create a subprogram for the new function. Leave out a description of the - // function arguments, as the parameters don't correspond to anything at the - // source level. - assert(OldSP->getUnit() && "Missing compile unit for subprogram"); - DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, - OldSP->getUnit()); - auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); - DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | - DISubprogram::SPFlagOptimized | - DISubprogram::SPFlagLocalToUnit; - auto NewSP = DIB.createFunction( - OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(), - /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags); - NewFunc.setSubprogram(NewSP); + // Create a subprogram for the new function. Leave out a description of the + // function arguments, as the parameters don't correspond to anything at the + // source level. + assert(OldSP->getUnit() && "Missing compile unit for subprogram"); + DIBuilder DIB(*OldFunc.getParent(), /*AllowUnresolved=*/false, + OldSP->getUnit()); + auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); + DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagDefinition | + DISubprogram::SPFlagOptimized | + DISubprogram::SPFlagLocalToUnit; + auto NewSP = DIB.createFunction( + OldSP->getUnit(), NewFunc.getName(), NewFunc.getName(), OldSP->getFile(), + /*LineNo=*/0, SPType, /*ScopeLine=*/0, DINode::FlagZero, SPFlags); + NewFunc.setSubprogram(NewSP); // Debug intrinsics in the new function need to be updated in one of two // ways: @@ -1602,19 +1318,9 @@ BasicBlock *header = *Blocks.begin(); Function *oldFunction = header->getParent(); - // Calculate the entry frequency of the new function before we change the root - // block. - BlockFrequency EntryFreq; - if (BFI) { - assert(BPI && "Both BPI and BFI are required to preserve profile info"); - for (BasicBlock *Pred : predecessors(header)) { - if (Blocks.count(Pred)) - continue; - EntryFreq += - BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); - } - } + normalizeCFGForExtraction(header); + // Transforms/HotColdSplit/stale-assume-in-original-func.ll // Remove @llvm.assume calls that will be moved to the new function from the // old function's assumption cache. for (BasicBlock *Block : Blocks) { @@ -1627,102 +1333,15 @@ } } - // If we have any return instructions in the region, split those blocks so - // that the return is not in the region. - splitReturnBlocks(); - - // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. - DenseMap ExitWeights; - SmallPtrSet ExitBlocks; - for (BasicBlock *Block : Blocks) { - for (BasicBlock *Succ : successors(Block)) { - if (!Blocks.count(Succ)) { - // Update the branch weight for this successor. - if (BFI) { - BlockFrequency &BF = ExitWeights[Succ]; - BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); - } - ExitBlocks.insert(Succ); - } - } - } - NumExitBlocks = ExitBlocks.size(); - - for (BasicBlock *Block : Blocks) { - Instruction *TI = Block->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - if (Blocks.count(TI->getSuccessor(i))) - continue; - BasicBlock *OldTarget = TI->getSuccessor(i); - OldTargets.push_back(OldTarget); - } - } - - // If we have to split PHI nodes of the entry or exit blocks, do so now. - severSplitPHINodesOfEntry(header); - severSplitPHINodesOfExits(ExitBlocks); - - // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), - "codeRepl", oldFunction, - header); - - // The new function needs a root node because other nodes can branch to the - // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), - "newFuncRoot"); - auto *BranchI = BranchInst::Create(header); - // If the original function has debug info, we have to add a debug location - // to the new branch instruction from the artificial entry block. - // We use the debug location of the first instruction in the extracted - // blocks, as there is no other equivalent line in the source code. - if (oldFunction->getSubprogram()) { - any_of(Blocks, [&BranchI](const BasicBlock *BB) { - return any_of(*BB, [&BranchI](const Instruction &I) { - if (!I.getDebugLoc()) - return false; - BranchI->setDebugLoc(I.getDebugLoc()); - return true; - }); - }); - } - newFuncRoot->getInstList().push_back(BranchI); - ValueSet SinkingCands, HoistingCands; BasicBlock *CommonExit = nullptr; findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit); assert(HoistingCands.empty() || CommonExit); + // analysis, after ret splitting (for values returned) // Find inputs to, outputs from the code region. findInputsOutputs(inputs, outputs, SinkingCands); - // Now sink all instructions which only have non-phi uses inside the region. - // Group the allocas at the start of the block, so that any bitcast uses of - // the allocas are well-defined. - AllocaInst *FirstSunkAlloca = nullptr; - for (auto *II : SinkingCands) { - if (auto *AI = dyn_cast(II)) { - AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); - if (!FirstSunkAlloca) - FirstSunkAlloca = AI; - } - } - assert((SinkingCands.empty() || FirstSunkAlloca) && - "Did not expect a sink candidate without any allocas"); - for (auto *II : SinkingCands) { - if (!isa(II)) { - cast(II)->moveAfter(FirstSunkAlloca); - } - } - - if (!HoistingCands.empty()) { - auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); - Instruction *TI = HoistToBlock->getTerminator(); - for (auto *II : HoistingCands) - cast(II)->moveBefore(TI); - } - // Collect objects which are inputs to the extraction region and also // referenced by lifetime start markers within it. The effects of these // markers must be replicated in the calling function to prevent the stack @@ -1730,37 +1349,232 @@ ValueSet LifetimesStart; eraseLifetimeMarkersOnInputs(Blocks, SinkingCands, LifetimesStart); - // Construct new function based on inputs/outputs & add allocas for all defs. - Function *newFunction = - constructFunction(inputs, outputs, header, newFuncRoot, codeReplacer, - oldFunction, oldFunction->getParent()); + if (!HoistingCands.empty()) { + BasicBlock *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); + Instruction *TI = HoistToBlock->getTerminator(); + for (auto *II : HoistingCands) + cast(II)->moveBefore(TI); + recomputeExitBlocks(); + } - // Update the entry count of the function. + // CFG/ExitBlocks fixed after here + + // Calculate the entry frequency of the new function before we change the root + // block. + BlockFrequency EntryFreq; + DenseMap ExitWeights; if (BFI) { - auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); - if (Count.hasValue()) - newFunction->setEntryCount( - ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME - BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + assert(BPI && "Both BPI and BFI are required to preserve profile info"); + for (BasicBlock *Pred : predecessors(header)) { + if (Blocks.count(Pred)) + continue; + EntryFreq += + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); + } + + for (BasicBlock *Succ : SwitchCases) { + for (BasicBlock *Block : predecessors(Succ)) { + if (!Blocks.count(Block)) + continue; + + BlockFrequency &BF = ExitWeights[Succ]; + BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, Succ); + } + } } - CallInst *TheCall = - emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); + // Determine position for the replacement code. Do so before header is moved + // to the new function. + BasicBlock *ReplIP = header; + while (ReplIP && Blocks.count(ReplIP)) + ReplIP = ReplIP->getNextNode(); + + // Construct new function based on inputs/outputs & add allocas for all defs. + std::string SuffixToUse = + Suffix.empty() + ? (header->getName().empty() ? "extracted" : header->getName().str()) + : Suffix; + Function *newFunction = constructFunctionDeclaration( + inputs, outputs, EntryFreq, oldFunction->getName() + "." + SuffixToUse); + + StructType *StructArgTy = nullptr; + if (AggregateArgs && (inputs.size() + outputs.size() > 0)) + StructArgTy = cast(newFunction->getArg(0)->getType()); + + emitFunctionBody(inputs, outputs, newFunction, StructArgTy, header, + SinkingCands); + + std::vector Reloads; + CallInst *call = emitReplacerCall(inputs, outputs, newFunction, StructArgTy, + oldFunction, ReplIP, EntryFreq, + LifetimesStart.getArrayRef(), Reloads); + BasicBlock *codeReplacer = call->getParent(); + + insertReplacerCall(oldFunction, header, codeReplacer, outputs, Reloads, + ExitWeights); + + fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *call); + + // Mark the new function `noreturn` if applicable. Terminators which resume + // exception propagation are treated as returning instructions. This is to + // avoid inserting traps after calls to outlined functions which unwind. + bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { + const Instruction *Term = BB.getTerminator(); + return isa(Term) || isa(Term); + }); + if (doesNotReturn) + newFunction->setDoesNotReturn(); + + LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { + newFunction->dump(); + report_fatal_error("verification of newFunction failed!"); + }); + LLVM_DEBUG(if (verifyFunction(*oldFunction)) + report_fatal_error("verification of oldFunction failed!")); + LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) + report_fatal_error("Stale Asumption cache for old Function!")); + return newFunction; +} + +void CodeExtractor::normalizeCFGForExtraction(BasicBlock *&Header) { + // If we have any return instructions in the region, split those blocks so + // that the return is not in the region. + splitReturnBlocks(); + + // If we have to split PHI nodes of the entry or exit blocks, do so now. + severSplitPHINodesOfEntry(Header); + + // If a PHI in an exit block has multiple invoming values from the outlined + // region, create a new PHI for those values within the region such that only + // PHI itself becomes an output value, not each of its incoming values + // individually. + recomputeExitBlocks(); + severSplitPHINodesOfExits(); +} + +void CodeExtractor::recomputeExitBlocks() { + SwitchCases.clear(); + + SmallPtrSet ExitBlocks; + for (BasicBlock *Block : Blocks) { + for (BasicBlock *Succ : successors(Block)) { + if (Blocks.count(Succ)) + continue; + + bool IsNew = ExitBlocks.insert(Succ).second; + if (IsNew) + SwitchCases.push_back(Succ); + } + } + NumExitBlocks = ExitBlocks.size(); +} + +void CodeExtractor::emitFunctionBody( + const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + StructType *StructArgTy, BasicBlock *header, const ValueSet &SinkingCands) { + Function *oldFunction = header->getParent(); + LLVMContext &Context = oldFunction->getContext(); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *newFuncRoot = + BasicBlock::Create(Context, "newFuncRoot", newFunction); + + // Now sink all instructions which only have non-phi uses inside the region. + // Group the allocas at the start of the block, so that any bitcast uses of + // the allocas are well-defined. + for (auto *II : SinkingCands) { + if (!isa(II)) { + cast(II)->moveBefore(*newFuncRoot, + newFuncRoot->getFirstInsertionPt()); + } + } + for (auto *II : SinkingCands) { + if (auto *AI = dyn_cast(II)) { + AI->moveBefore(*newFuncRoot, newFuncRoot->getFirstInsertionPt()); + } + } + + // Create an iterator to name all of the arguments we inserted. + Function::arg_iterator AI = newFunction->arg_begin(); + + // Rewrite all users of the inputs in the extracted region to use the + // arguments (or appropriate addressing into struct) instead. + SmallVector NewValues; + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + Instruction *TI = newFunction->begin()->getTerminator(); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(StructArgTy->getElementType(i), GEP, + "loadgep_" + inputs[i]->getName(), TI); + } else + RewriteVal = &*AI++; + + NewValues.push_back(RewriteVal); + } moveCodeToFunction(newFunction); - // Replicate the effects of any lifetime start/end markers which referenced - // input objects in the extraction region by placing markers around the call. - insertLifetimeMarkersSurroundingCall( - oldFunction->getParent(), LifetimesStart.getArrayRef(), {}, TheCall); + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *RewriteVal = NewValues[i]; - // Propagate personality info to the new function if there is one. - if (oldFunction->hasPersonalityFn()) - newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + for (User *use : Users) + if (Instruction *inst = dyn_cast(use)) + if (Blocks.count(inst->getParent())) + inst->replaceUsesOfWith(inputs[i], RewriteVal); + } - // Update the branch weights for the exit block. - if (BFI && NumExitBlocks > 1) - calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); + // Create stubs for the original exit blocks. + std::map ExitBlockMap; + for (auto P : enumerate(SwitchCases)) { + BasicBlock *OldTarget = P.value(); + size_t SuccNum = P.index(); + + BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; + assert(!NewTarget && "Switch cases muast be unique"); + + // If we don't already have an exit stub for this non-extracted + // destination, create one now! + NewTarget = BasicBlock::Create(Context, OldTarget->getName() + ".exitStub", + newFunction); + + Value *brVal = nullptr; + assert(NumExitBlocks < 0xffff && "too many exit blocks for switch"); + switch (NumExitBlocks) { + case 0: + case 1: + break; // No value needed. + case 2: // Conditional branch, return a bool + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); + break; + default: + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); + break; + } + + ReturnInst::Create(Context, brVal, NewTarget); + } + + for (BasicBlock *Block : Blocks) { + Instruction *TI = Block->getTerminator(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { + if (Blocks.count(TI->getSuccessor(i))) + continue; + BasicBlock *OldTarget = TI->getSuccessor(i); + // add a new basic block which returns the appropriate value + BasicBlock *NewTarget = ExitBlockMap[OldTarget]; + assert(NewTarget && "Unknown target block!"); + + // rewrite the original branch instruction with this new target + TI->setSuccessor(i, NewTarget); + } + } // Loop over all of the PHI nodes in the header and exit blocks, and change // any references to the old incoming edge to be the new incoming edge. @@ -1771,7 +1585,243 @@ PN->setIncomingBlock(i, newFuncRoot); } - for (BasicBlock *ExitBB : ExitBlocks) + // Connect newFunction entry block to new header. + BranchInst *BranchI2 = BranchInst::Create(header, newFuncRoot); + applyFirstDebugLoc(oldFunction, Blocks.getArrayRef(), BranchI2); + + // Store the arguments right after the definition of output value. + // This should be proceeded after creating exit stubs to be ensure that invoke + // result restore will be placed in the outlined function. + Function::arg_iterator OAI = newFunction->arg_begin() + inputs.size(); + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + + // Find proper insertion point. + BasicBlock::iterator InsertPt; + // In case OutI is an invoke, we insert the store at the beginning in the + // 'normal destination' BB. Otherwise we insert the store right after OutI. + if (auto *InvokeI = dyn_cast(OutI)) + InsertPt = InvokeI->getNormalDest()->getFirstInsertionPt(); + else if (auto *Phi = dyn_cast(OutI)) + InsertPt = Phi->getParent()->getFirstInsertionPt(); + else + InsertPt = std::next(OutI->getIterator()); + + Instruction *InsertBefore = &*InsertPt; + assert((InsertBefore->getFunction() == newFunction || + Blocks.count(InsertBefore->getParent())) && + "InsertPt should be in new function"); + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), + InsertBefore); + new StoreInst(OutI, GEP, InsertBefore); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(OutI, &*OAI, InsertBefore); + ++OAI; + } + } +} + +CallInst *CodeExtractor::emitReplacerCall( + const ValueSet &inputs, const ValueSet &outputs, Function *newFunction, + StructType *StructArgTy, Function *oldFunction, BasicBlock *ReplIP, + BlockFrequency EntryFreq, ArrayRef LifetimesStart, + std::vector &Reloads) { + LLVMContext &Context = oldFunction->getContext(); + Module *M = oldFunction->getParent(); + const DataLayout &DL = M->getDataLayout(); + + // This takes place of the original loop + BasicBlock *codeReplacer = + BasicBlock::Create(Context, "codeRepl", oldFunction, ReplIP); + BasicBlock *AllocaBlock = &oldFunction->front(); + + // Update the entry count of the function. + if (BFI) + BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); + + // Add inputs as params, or to be filled into the struct + std::vector params; + AllocaInst *Struct = nullptr; + if (AggregateArgs && StructArgTy) { + std::vector StructValues; + for (Value *input : inputs) { + StructValues.push_back(input); + } + + Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, + "structArg", &AllocaBlock->front()); + + params.push_back(Struct); + + for (unsigned i = 0, e = inputs.size(); i != e; ++i) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + new StoreInst(StructValues[i], GEP, codeReplacer); + } + } + + std::vector ReloadOutputs; + if (!AggregateArgs) { + for (Value *input : inputs) + params.push_back(input); + + // Create allocas for the outputs + for (Value *output : outputs) { + AllocaInst *alloca = + new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), nullptr, + output->getName() + ".loc", &AllocaBlock->front()); + ReloadOutputs.push_back(alloca); + params.push_back(alloca); + } + } + + // Emit the call to the function + CallInst *call = + CallInst::Create(newFunction, params, + NumExitBlocks > 1 ? "targetBlock" : "", codeReplacer); + + // Set swifterror parameter attributes. + if (!AggregateArgs) { + for (auto P : enumerate(inputs)) { + if (P.value()->isSwiftError()) + call->addParamAttr(P.index(), Attribute::SwiftError); + } + } + + // Add debug location to the new call, if the original function has debug + // info. In that case, the terminator of the entry block of the extracted + // function contains the first debug location of the extracted function, + // set in extractCodeRegion. + if (oldFunction->getSubprogram()) { + if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) + call->setDebugLoc(DL); + } + + // Reload the outputs passed in by reference. + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *Output = nullptr; + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), inputs.size() + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); + codeReplacer->getInstList().push_back(GEP); + Output = GEP; + } else { + Output = ReloadOutputs[i]; + } + LoadInst *load = + new LoadInst(outputs[i]->getType(), Output, + outputs[i]->getName() + ".reload", codeReplacer); + Reloads.push_back(load); + } + + // Now we can emit a switch statement using the call as a value. + SwitchInst *TheSwitch = + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), + codeReplacer, 0, codeReplacer); + for (auto P : enumerate(SwitchCases)) { + BasicBlock *OldTarget = P.value(); + size_t SuccNum = P.index(); + + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), + OldTarget); + } + + // Now that we've done the deed, simplify the switch instruction. + Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); + switch (NumExitBlocks) { + case 0: + // There are no successors (the block containing the switch itself), which + // means that previously this was the last part of the function, and hence + // this should be rewritten as a `ret' + + // Check if the function should return a value + if (OldFnRetTy->isVoidTy()) { + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void + } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { + // return what we have + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); + } else { + // Otherwise we must have code extracted an unwind or something, just + // return whatever we want. + ReturnInst::Create(Context, Constant::getNullValue(OldFnRetTy), + TheSwitch); + } + + TheSwitch->eraseFromParent(); + break; + case 1: + // Only a single destination, change the switch into an unconditional + // branch. + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); + TheSwitch->eraseFromParent(); + break; + case 2: + BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), + call, TheSwitch); + TheSwitch->eraseFromParent(); + break; + default: + // Otherwise, make the default destination of the switch instruction be one + // of the other successors. + TheSwitch->setCondition(call); + TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks - 1)); + break; + } + + // Insert lifetime markers around the reloads of any output values. The + // allocas output values are stored in are only in-use in the codeRepl block. + insertLifetimeMarkersSurroundingCall(M, ReloadOutputs, ReloadOutputs, call); + + // Replicate the effects of any lifetime start/end markers which referenced + // input objects in the extraction region by placing markers around the call. + insertLifetimeMarkersSurroundingCall(M, LifetimesStart, {}, call); + + return call; +} + +void CodeExtractor::insertReplacerCall( + Function *oldFunction, BasicBlock *header, BasicBlock *codeReplacer, + const ValueSet &outputs, ArrayRef Reloads, + const DenseMap &ExitWeights) { + + // Rewrite branches to basic blocks outside of the loop to new dummy blocks + // within the new function. This must be done before we lose track of which + // blocks were originally in the code region. + std::vector Users(header->user_begin(), header->user_end()); + for (auto &U : Users) + // The BasicBlock which contains the branch is not in the region + // modify the branch target to a new block + if (Instruction *I = dyn_cast(U)) + if (I->isTerminator() && I->getFunction() == oldFunction) + I->replaceUsesOfWith(header, codeReplacer); + + // When moving the code region it is sufficient to replace all uses to the + // extracted function values. Since the original definition's block + // dominated its use, it will also be dominated by codeReplacer's switch + // which joined multiple exit blocks. + for (BasicBlock *ExitBB : SwitchCases) for (PHINode &PN : ExitBB->phis()) { Value *IncomingCodeReplacerVal = nullptr; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { @@ -1789,27 +1839,19 @@ } } - fixupDebugInfoPostExtraction(*oldFunction, *newFunction, *TheCall); - - // Mark the new function `noreturn` if applicable. Terminators which resume - // exception propagation are treated as returning instructions. This is to - // avoid inserting traps after calls to outlined functions which unwind. - bool doesNotReturn = none_of(*newFunction, [](const BasicBlock &BB) { - const Instruction *Term = BB.getTerminator(); - return isa(Term) || isa(Term); - }); - if (doesNotReturn) - newFunction->setDoesNotReturn(); + for (unsigned i = 0, e = outputs.size(); i != e; ++i) { + Value *load = Reloads[i]; + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + for (unsigned u = 0, e = Users.size(); u != e; ++u) { + Instruction *inst = cast(Users[u]); + if (inst->getParent()->getParent() == oldFunction) + inst->replaceUsesOfWith(outputs[i], load); + } + } - LLVM_DEBUG(if (verifyFunction(*newFunction, &errs())) { - newFunction->dump(); - report_fatal_error("verification of newFunction failed!"); - }); - LLVM_DEBUG(if (verifyFunction(*oldFunction)) - report_fatal_error("verification of oldFunction failed!")); - LLVM_DEBUG(if (AC && verifyAssumptionCache(*oldFunction, *newFunction, AC)) - report_fatal_error("Stale Asumption cache for old Function!")); - return newFunction; + // Update the branch weights for the exit block. + if (BFI && NumExitBlocks > 1) + calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); } bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,