Index: lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- lib/Transforms/IPO/ArgumentPromotion.cpp +++ lib/Transforms/IPO/ArgumentPromotion.cpp @@ -61,337 +61,449 @@ STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated"); -namespace { - /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. - /// - struct ArgPromotion : public CallGraphSCCPass { - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); - } - - bool runOnSCC(CallGraphSCC &SCC) override; - static char ID; // Pass identification, replacement for typeid - explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { - initializeArgPromotionPass(*PassRegistry::getPassRegistry()); - } - - private: - - using llvm::Pass::doInitialization; - bool doInitialization(CallGraph &CG) override; - /// The maximum number of elements to expand, or 0 for unlimited. - unsigned maxElements; - }; -} - /// A vector used to hold the indices of a single GEP instruction typedef std::vector IndicesVector; -static CallGraphNode * -PromoteArguments(CallGraphNode *CGN, CallGraph &CG, - function_ref AARGetter, - unsigned MaxElements); -static bool isDenselyPacked(Type *type, const DataLayout &DL); -static bool canPaddingBeAccessed(Argument *Arg); -static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR, - unsigned MaxElements); +/// DoPromotion - This method actually performs the promotion of the specified +/// arguments, and returns the new function. At this point, we know that it's +/// safe to do so. static CallGraphNode * DoPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, CallGraph &CG); - -char ArgPromotion::ID = 0; -INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(ArgPromotion, "argpromotion", - "Promote 'by reference' arguments to scalars", false, false) - -Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { - return new ArgPromotion(maxElements); -} - -static bool runImpl(CallGraphSCC &SCC, CallGraph &CG, - function_ref AARGetter, - unsigned MaxElements) { - bool Changed = false, LocalChange; - - do { // Iterate until we stop promoting from this SCC. - LocalChange = false; - // Attempt to promote arguments from all functions in this SCC. - for (CallGraphNode *OldNode : SCC) { - if (CallGraphNode *NewNode = - PromoteArguments(OldNode, CG, AARGetter, MaxElements)) { - LocalChange = true; - SCC.ReplaceNode(OldNode, NewNode); - } - } - Changed |= LocalChange; // Remember that we changed something. - } while (LocalChange); - - return Changed; -} + SmallPtrSetImpl &ByValArgsToTransform, CallGraph &CG) { -bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; + // Start by computing a new prototype for the function, which is the same as + // the old function, but has modified arguments. + FunctionType *FTy = F->getFunctionType(); + std::vector Params; - // Get the callgraph information that we need to update to reflect our - // changes. - CallGraph &CG = getAnalysis().getCallGraph(); + typedef std::set> ScalarizeTable; - // We compute dedicated AA results for each function in the SCC as needed. We - // use a lambda referencing external objects so that they live long enough to - // be queried, but we re-use them each time. - Optional BAR; - Optional AAR; - auto AARGetter = [&](Function &F) -> AAResults & { - BAR.emplace(createLegacyPMBasicAAResult(*this, F)); - AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); - return *AAR; - }; + // ScalarizedElements - If we are promoting a pointer that has elements + // accessed out of it, keep track of which elements are accessed so that we + // can add one argument for each. + // + // Arguments that are directly loaded will have a zero element value here, to + // handle cases where there are both a direct load and GEP accesses. + // + std::map ScalarizedElements; - return runImpl(SCC, CG, AARGetter, maxElements); -} + // OriginalLoads - Keep track of a representative load instruction from the + // original function so that we can tell the alias analysis implementation + // what the new GEP/Load instructions we are inserting look like. + // We need to keep the original loads for each argument and the elements + // of the argument that are accessed. + std::map, LoadInst*> OriginalLoads; -/// \brief Checks if a type could have padding bytes. -static bool isDenselyPacked(Type *type, const DataLayout &DL) { + // Attribute - Keep track of the parameter attributes for the arguments + // that we are *not* promoting. For the ones that we do promote, the parameter + // attributes are lost + SmallVector AttributesVec; + const AttributeSet &PAL = F->getAttributes(); - // There is no size information, so be conservative. - if (!type->isSized()) - return false; + // Add any return attributes. + if (PAL.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + PAL.getRetAttributes())); - // If the alloc size is not equal to the storage size, then there are padding - // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. - if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) - return false; + // First, determine the new argument list + unsigned ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgIndex) { + if (ByValArgsToTransform.count(&*I)) { + // Simple byval argument? Just add all the struct element types. + Type *AgTy = cast(I->getType())->getElementType(); + StructType *STy = cast(AgTy); + Params.insert(Params.end(), STy->element_begin(), STy->element_end()); + ++NumByValArgsPromoted; + } else if (!ArgsToPromote.count(&*I)) { + // Unchanged argument + Params.push_back(I->getType()); + AttributeSet attrs = PAL.getParamAttributes(ArgIndex); + if (attrs.hasAttributes(ArgIndex)) { + AttrBuilder B(attrs, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Params.size(), B)); + } + } else if (I->use_empty()) { + // Dead argument (which are always marked as promotable) + ++NumArgumentsDead; + } else { + // Okay, this is being promoted. This means that the only uses are loads + // or GEPs which are only used by loads - if (!isa(type)) - return true; + // In this table, we will track which indices are loaded from the argument + // (where direct loads are tracked as no indices). + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + for (User *U : I->users()) { + Instruction *UI = cast(U); + Type *SrcTy; + if (LoadInst *L = dyn_cast(UI)) + SrcTy = L->getType(); + else + SrcTy = cast(UI)->getSourceElementType(); + IndicesVector Indices; + Indices.reserve(UI->getNumOperands() - 1); + // Since loads will only have a single operand, and GEPs only a single + // non-index operand, this will record direct loads without any indices, + // and gep+loads with the GEP indices. + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); + II != IE; ++II) + Indices.push_back(cast(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + ArgIndices.insert(std::make_pair(SrcTy, Indices)); + LoadInst *OrigLoad; + if (LoadInst *L = dyn_cast(UI)) + OrigLoad = L; + else + // Take any load, we will use it only to update Alias Analysis + OrigLoad = cast(UI->user_back()); + OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; + } - // For homogenous sequential types, check for padding within members. - if (SequentialType *seqTy = dyn_cast(type)) - return isDenselyPacked(seqTy->getElementType(), DL); + // Add a parameter to the function for each element passed in. + for (const auto &ArgIndex : ArgIndices) { + // not allowed to dereference ->begin() if size() is 0 + Params.push_back(GetElementPtrInst::getIndexedType( + cast(I->getType()->getScalarType())->getElementType(), + ArgIndex.second)); + assert(Params.back()); + } - // Check for padding within and between elements of a struct. - StructType *StructTy = cast(type); - const StructLayout *Layout = DL.getStructLayout(StructTy); - uint64_t StartPos = 0; - for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { - Type *ElTy = StructTy->getElementType(i); - if (!isDenselyPacked(ElTy, DL)) - return false; - if (StartPos != Layout->getElementOffsetInBits(i)) - return false; - StartPos += DL.getTypeAllocSizeInBits(ElTy); + if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) + ++NumArgumentsPromoted; + else + ++NumAggregatesPromoted; + } } - return true; -} + // Add any function attributes. + if (PAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(FTy->getContext(), + PAL.getFnAttributes())); -/// \brief Checks if the padding bytes of an argument could be accessed. -static bool canPaddingBeAccessed(Argument *arg) { + Type *RetTy = FTy->getReturnType(); - assert(arg->hasByValAttr()); + // Construct the new function type using the new arguments. + FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); - // Track all the pointers to the argument to make sure they are not captured. - SmallPtrSet PtrValues; - PtrValues.insert(arg); + // Create the new function body and insert it into the module. + Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); + NF->copyAttributesFrom(F); - // Track all of the stores. - SmallVector Stores; + // Patch the pointer to LLVM function in debug info descriptor. + NF->setSubprogram(F->getSubprogram()); + F->setSubprogram(nullptr); - // Scan through the uses recursively to make sure the pointer is always used - // sanely. - SmallVector WorkList; - WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); - while (!WorkList.empty()) { - Value *V = WorkList.back(); - WorkList.pop_back(); - if (isa(V) || isa(V)) { - if (PtrValues.insert(V).second) - WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); - } else if (StoreInst *Store = dyn_cast(V)) { - Stores.push_back(Store); - } else if (!isa(V)) { - return true; - } - } + DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + << "From: " << *F); + + // Recompute the parameter attributes list based on the new arguments for + // the function. + NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); + AttributesVec.clear(); -// Check to make sure the pointers aren't captured - for (StoreInst *Store : Stores) - if (PtrValues.count(Store->getValueOperand())) - return true; + F->getParent()->getFunctionList().insert(F->getIterator(), NF); + NF->takeName(F); - return false; -} + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); -/// PromoteArguments - This method checks the specified function to see if there -/// are any promotable arguments and if it is safe to promote the function (for -/// example, all callers are direct). If safe to promote some arguments, it -/// calls the DoPromotion method. -/// -static CallGraphNode * -PromoteArguments(CallGraphNode *CGN, CallGraph &CG, - function_ref AARGetter, - unsigned MaxElements) { - Function *F = CGN->getFunction(); - - // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return nullptr; - - // Don't promote arguments for variadic functions. Adding, removing, or - // changing non-pack parameters can change the classification of pack - // parameters. Frontends encode that classification at the call site in the - // IR, while in the callee the classification is determined dynamically based - // on the number of registers consumed so far. - if (F->isVarArg()) return nullptr; - - // First check: see if there are any pointer arguments! If not, quick exit. - SmallVector PointerArgs; - for (Argument &I : F->args()) - if (I.getType()->isPointerTy()) - PointerArgs.push_back(&I); - if (PointerArgs.empty()) return nullptr; - - // Second check: make sure that all callers are direct callers. We can't - // transform functions that have indirect callers. Also see if the function - // is self-recursive. - bool isSelfRecursive = false; - for (Use &U : F->uses()) { - CallSite CS(U.getUser()); - // Must be a direct call. - if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; - - if (CS.getInstruction()->getParent()->getParent() == F) - isSelfRecursive = true; - } - - const DataLayout &DL = F->getParent()->getDataLayout(); - - AAResults &AAR = AARGetter(*F); + // Loop over all of the callers of the function, transforming the call sites + // to pass in the loaded pointers. + // + SmallVector Args; + while (!F->use_empty()) { + CallSite CS(F->user_back()); + assert(CS.getCalledFunction() == F); + Instruction *Call = CS.getInstruction(); + const AttributeSet &CallPAL = CS.getAttributes(); - // Check to see which arguments are promotable. If an argument is promotable, - // add it to ArgsToPromote. - SmallPtrSet ArgsToPromote; - SmallPtrSet ByValArgsToTransform; - for (Argument *PtrArg : PointerArgs) { - Type *AgTy = cast(PtrArg->getType())->getElementType(); + // Add any return attributes. + if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) + AttributesVec.push_back(AttributeSet::get(F->getContext(), + CallPAL.getRetAttributes())); - // Replace sret attribute with noalias. This reduces register pressure by - // avoiding a register copy. - if (PtrArg->hasStructRetAttr()) { - unsigned ArgNo = PtrArg->getArgNo(); - F->setAttributes( - F->getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); - for (Use &U : F->uses()) { - CallSite CS(U.getUser()); - CS.setAttributes( - CS.getAttributes() - .removeAttribute(F->getContext(), ArgNo + 1, - Attribute::StructRet) - .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); - } - } + // Loop over the operands, inserting GEP and loads in the caller as + // appropriate. + CallSite::arg_iterator AI = CS.arg_begin(); + ArgIndex = 1; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I, ++AI, ++ArgIndex) + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + Args.push_back(*AI); // Unmodified argument - // If this is a byval argument, and if the aggregate type is small, just - // pass the elements, which is always safe, if the passed value is densely - // packed or if we can prove the padding bytes are never accessed. This does - // not apply to inalloca. - bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); - if (isSafeToPromote) { - if (StructType *STy = dyn_cast(AgTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - DEBUG(dbgs() << "argpromotion disable promoting argument '" - << PtrArg->getName() << "' because it would require adding more" - << " than " << MaxElements << " arguments to the function.\n"); - continue; + if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } - - // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (const auto *EltTy : STy->elements()) { - if (!EltTy->isSingleValueType()) { - AllSimple = false; - break; - } + } else if (ByValArgsToTransform.count(&*I)) { + // Emit a GEP and load for each element of the struct. + Type *AgTy = cast(I->getType())->getElementType(); + StructType *STy = cast(AgTy); + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = GetElementPtrInst::Create( + STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); + // TODO: Tell AA about the new values? + Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } + } else if (!I->use_empty()) { + // Non-dead argument: insert GEPs and loads as appropriate. + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + // Store the Value* version of the indices in here, but declare it now + // for reuse. + std::vector Ops; + for (const auto &ArgIndex : ArgIndices) { + Value *V = *AI; + LoadInst *OrigLoad = + OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; + if (!ArgIndex.second.empty()) { + Ops.reserve(ArgIndex.second.size()); + Type *ElTy = V->getType(); + for (unsigned long II : ArgIndex.second) { + // Use i32 to index structs, and i64 for others (pointers/arrays). + // This satisfies GEP constraints. + Type *IdxTy = (ElTy->isStructTy() ? + Type::getInt32Ty(F->getContext()) : + Type::getInt64Ty(F->getContext())); + Ops.push_back(ConstantInt::get(IdxTy, II)); + // Keep track of the type we're currently indexing. + if (auto *ElPTy = dyn_cast(ElTy)) + ElTy = ElPTy->getElementType(); + else + ElTy = cast(ElTy)->getTypeAtIndex(II); + } + // And create a GEP to extract those indices. + V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, + V->getName() + ".idx", Call); + Ops.clear(); + } + // Since we're replacing a load make sure we take the alignment + // of the previous load. + LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); + newLoad->setAlignment(OrigLoad->getAlignment()); + // Transfer the AA info too. + AAMDNodes AAInfo; + OrigLoad->getAAMetadata(AAInfo); + newLoad->setAAMetadata(AAInfo); - // Safe to transform, don't even bother trying to "promote" it. - // Passing the elements as a scalar will allow sroa to hack on - // the new alloca we introduce. - if (AllSimple) { - ByValArgsToTransform.insert(PtrArg); - continue; + Args.push_back(newLoad); } } - } - // If the argument is a recursive type and we're in a recursive - // function, we could end up infinitely peeling the function argument. - if (isSelfRecursive) { - if (StructType *STy = dyn_cast(AgTy)) { - bool RecursiveType = false; - for (const auto *EltTy : STy->elements()) { - if (EltTy == PtrArg->getType()) { - RecursiveType = true; - break; - } - } - if (RecursiveType) - continue; + // Push any varargs arguments on the list. + for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { + Args.push_back(*AI); + if (CallPAL.hasAttributes(ArgIndex)) { + AttrBuilder B(CallPAL, ArgIndex); + AttributesVec. + push_back(AttributeSet::get(F->getContext(), Args.size(), B)); } } - - // Otherwise, see if we can promote the pointer to its value. - if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, - MaxElements)) - ArgsToPromote.insert(PtrArg); - } - // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) - return nullptr; + // Add any function attributes. + if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) + AttributesVec.push_back(AttributeSet::get(Call->getContext(), + CallPAL.getFnAttributes())); - return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG); -} + SmallVector OpBundles; + CS.getOperandBundlesAsDefs(OpBundles); -/// AllCallersPassInValidPointerForArgument - Return true if we can prove that -/// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { - Function *Callee = Arg->getParent(); - const DataLayout &DL = Callee->getParent()->getDataLayout(); + Instruction *New; + if (InvokeInst *II = dyn_cast(Call)) { + New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(AttributeSet::get(II->getContext(), + AttributesVec)); + } else { + New = CallInst::Create(NF, Args, OpBundles, "", Call); + cast(New)->setCallingConv(CS.getCallingConv()); + cast(New)->setAttributes(AttributeSet::get(New->getContext(), + AttributesVec)); + cast(New)->setTailCallKind( + cast(Call)->getTailCallKind()); + } + New->setDebugLoc(Call->getDebugLoc()); + Args.clear(); + AttributesVec.clear(); - unsigned ArgNo = Arg->getArgNo(); + // Update the callgraph to know that the callsite has been transformed. + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); - // Look at all call sites of the function. At this point we know we only have - // direct callees. - for (User *U : Callee->users()) { - CallSite CS(U); - assert(CS && "Should only have direct calls!"); + if (!Call->use_empty()) { + Call->replaceAllUsesWith(New); + New->takeName(Call); + } - if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) - return false; + // Finally, remove the old call from the program, reducing the use-count of + // F. + Call->eraseFromParent(); } - return true; -} -/// Returns true if Prefix is a prefix of longer. That means, Longer has a size -/// that is greater than or equal to the size of prefix, and each of the -/// elements in Prefix is the same as the corresponding elements in Longer. -/// -/// This means it also returns true when Prefix and Longer are equal! -static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { - if (Prefix.size() > Longer.size()) + // Since we have now created the new function, splice the body of the old + // function right into the new function, leaving the old rotting hulk of the + // function empty. + NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. + // + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), + I2 = NF->arg_begin(); I != E; ++I) { + if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + // If this is an unmodified argument, move the name and users over to the + // new version. + I->replaceAllUsesWith(&*I2); + I2->takeName(&*I); + ++I2; + continue; + } + + if (ByValArgsToTransform.count(&*I)) { + // In the callee, we create an alloca, and store each of the new incoming + // arguments into the alloca. + Instruction *InsertPt = &NF->begin()->front(); + + // Just add all the struct element types. + Type *AgTy = cast(I->getType())->getElementType(); + Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt); + StructType *STy = cast(AgTy); + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; + + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = GetElementPtrInst::Create( + AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), + InsertPt); + I2->setName(I->getName()+"."+Twine(i)); + new StoreInst(&*I2++, Idx, InsertPt); + } + + // Anything that used the arg should now use the alloca. + I->replaceAllUsesWith(TheAlloca); + TheAlloca->takeName(&*I); + + // If the alloca is used in a call, we must clear the tail flag since + // the callee now uses an alloca from the caller. + for (User *U : TheAlloca->users()) { + CallInst *Call = dyn_cast(U); + if (!Call) + continue; + Call->setTailCall(false); + } + continue; + } + + if (I->use_empty()) + continue; + + // Otherwise, if we promoted this argument, then all users are load + // instructions (or GEPs with only load users), and all loads should be + // using the new argument that we added. + ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; + + while (!I->use_empty()) { + if (LoadInst *LI = dyn_cast(I->user_back())) { + assert(ArgIndices.begin()->second.empty() && + "Load element should sort to front!"); + I2->setName(I->getName()+".val"); + LI->replaceAllUsesWith(&*I2); + LI->eraseFromParent(); + DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"); + } else { + GetElementPtrInst *GEP = cast(I->user_back()); + IndicesVector Operands; + Operands.reserve(GEP->getNumIndices()); + for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); + II != IE; ++II) + Operands.push_back(cast(*II)->getSExtValue()); + + // GEPs with a single 0 index can be merged with direct loads + if (Operands.size() == 1 && Operands.front() == 0) + Operands.clear(); + + Function::arg_iterator TheArg = I2; + for (ScalarizeTable::iterator It = ArgIndices.begin(); + It->second != Operands; ++It, ++TheArg) { + assert(It != ArgIndices.end() && "GEP not handled??"); + } + + std::string NewName = I->getName(); + for (unsigned i = 0, e = Operands.size(); i != e; ++i) { + NewName += "." + utostr(Operands[i]); + } + NewName += ".val"; + TheArg->setName(NewName); + + DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << NF->getName() << "'\n"); + + // All of the uses must be load instructions. Replace them all with + // the argument specified by ArgNo. + while (!GEP->use_empty()) { + LoadInst *L = cast(GEP->user_back()); + L->replaceAllUsesWith(&*TheArg); + L->eraseFromParent(); + } + GEP->eraseFromParent(); + } + } + + // Increment I2 past all of the arguments added for this promoted pointer. + std::advance(I2, ArgIndices.size()); + } + + NF_CGN->stealCalledFunctionsFrom(CG[F]); + + // Now that the old function is dead, delete it. If there is a dangling + // reference to the CallgraphNode, just leave the dead function around for + // someone else to nuke. + CallGraphNode *CGN = CG[F]; + if (CGN->getNumReferences() == 0) + delete CG.removeFunctionFromModule(CGN); + else + F->setLinkage(Function::ExternalLinkage); + + return NF_CGN; +} + + +/// AllCallersPassInValidPointerForArgument - Return true if we can prove that +/// all callees pass in a valid pointer for the specified function argument. +static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { + Function *Callee = Arg->getParent(); + const DataLayout &DL = Callee->getParent()->getDataLayout(); + + unsigned ArgNo = Arg->getArgNo(); + + // Look at all call sites of the function. At this point we know we only have + // direct callees. + for (User *U : Callee->users()) { + CallSite CS(U); + assert(CS && "Should only have direct calls!"); + + if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) + return false; + } + return true; +} + +/// Returns true if Prefix is a prefix of longer. That means, Longer has a size +/// that is greater than or equal to the size of prefix, and each of the +/// elements in Prefix is the same as the corresponding elements in Longer. +/// +/// This means it also returns true when Prefix and Longer are equal! +static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { + if (Prefix.size() > Longer.size()) return false; return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); } @@ -625,416 +737,290 @@ return true; } -/// DoPromotion - This method actually performs the promotion of the specified -/// arguments, and returns the new function. At this point, we know that it's -/// safe to do so. -static CallGraphNode * -DoPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, CallGraph &CG) { - // Start by computing a new prototype for the function, which is the same as - // the old function, but has modified arguments. - FunctionType *FTy = F->getFunctionType(); - std::vector Params; +/// \brief Checks if a type could have padding bytes. +static bool isDenselyPacked(Type *type, const DataLayout &DL) { - typedef std::set> ScalarizeTable; + // There is no size information, so be conservative. + if (!type->isSized()) + return false; - // ScalarizedElements - If we are promoting a pointer that has elements - // accessed out of it, keep track of which elements are accessed so that we - // can add one argument for each. - // - // Arguments that are directly loaded will have a zero element value here, to - // handle cases where there are both a direct load and GEP accesses. - // - std::map ScalarizedElements; + // If the alloc size is not equal to the storage size, then there are padding + // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. + if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)) + return false; - // OriginalLoads - Keep track of a representative load instruction from the - // original function so that we can tell the alias analysis implementation - // what the new GEP/Load instructions we are inserting look like. - // We need to keep the original loads for each argument and the elements - // of the argument that are accessed. - std::map, LoadInst*> OriginalLoads; + if (!isa(type)) + return true; - // Attribute - Keep track of the parameter attributes for the arguments - // that we are *not* promoting. For the ones that we do promote, the parameter - // attributes are lost - SmallVector AttributesVec; - const AttributeSet &PAL = F->getAttributes(); + // For homogenous sequential types, check for padding within members. + if (SequentialType *seqTy = dyn_cast(type)) + return isDenselyPacked(seqTy->getElementType(), DL); - // Add any return attributes. - if (PAL.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeSet::get(F->getContext(), - PAL.getRetAttributes())); + // Check for padding within and between elements of a struct. + StructType *StructTy = cast(type); + const StructLayout *Layout = DL.getStructLayout(StructTy); + uint64_t StartPos = 0; + for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) { + Type *ElTy = StructTy->getElementType(i); + if (!isDenselyPacked(ElTy, DL)) + return false; + if (StartPos != Layout->getElementOffsetInBits(i)) + return false; + StartPos += DL.getTypeAllocSizeInBits(ElTy); + } - // First, determine the new argument list - unsigned ArgIndex = 1; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++ArgIndex) { - if (ByValArgsToTransform.count(&*I)) { - // Simple byval argument? Just add all the struct element types. - Type *AgTy = cast(I->getType())->getElementType(); - StructType *STy = cast(AgTy); - Params.insert(Params.end(), STy->element_begin(), STy->element_end()); - ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(&*I)) { - // Unchanged argument - Params.push_back(I->getType()); - AttributeSet attrs = PAL.getParamAttributes(ArgIndex); - if (attrs.hasAttributes(ArgIndex)) { - AttrBuilder B(attrs, ArgIndex); - AttributesVec. - push_back(AttributeSet::get(F->getContext(), Params.size(), B)); - } - } else if (I->use_empty()) { - // Dead argument (which are always marked as promotable) - ++NumArgumentsDead; - } else { - // Okay, this is being promoted. This means that the only uses are loads - // or GEPs which are only used by loads + return true; +} - // In this table, we will track which indices are loaded from the argument - // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - for (User *U : I->users()) { - Instruction *UI = cast(U); - Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) - SrcTy = L->getType(); - else - SrcTy = cast(UI)->getSourceElementType(); - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); - II != IE; ++II) - Indices.push_back(cast(*II)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); - ArgIndices.insert(std::make_pair(SrcTy, Indices)); - LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) - OrigLoad = L; - else - // Take any load, we will use it only to update Alias Analysis - OrigLoad = cast(UI->user_back()); - OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; - } +/// \brief Checks if the padding bytes of an argument could be accessed. +static bool canPaddingBeAccessed(Argument *arg) { - // Add a parameter to the function for each element passed in. - for (const auto &ArgIndex : ArgIndices) { - // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType( - cast(I->getType()->getScalarType())->getElementType(), - ArgIndex.second)); - assert(Params.back()); - } + assert(arg->hasByValAttr()); - if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) - ++NumArgumentsPromoted; - else - ++NumAggregatesPromoted; + // Track all the pointers to the argument to make sure they are not captured. + SmallPtrSet PtrValues; + PtrValues.insert(arg); + + // Track all of the stores. + SmallVector Stores; + + // Scan through the uses recursively to make sure the pointer is always used + // sanely. + SmallVector WorkList; + WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); + while (!WorkList.empty()) { + Value *V = WorkList.back(); + WorkList.pop_back(); + if (isa(V) || isa(V)) { + if (PtrValues.insert(V).second) + WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); + } else if (StoreInst *Store = dyn_cast(V)) { + Stores.push_back(Store); + } else if (!isa(V)) { + return true; } } - // Add any function attributes. - if (PAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeSet::get(FTy->getContext(), - PAL.getFnAttributes())); - - Type *RetTy = FTy->getReturnType(); +// Check to make sure the pointers aren't captured + for (StoreInst *Store : Stores) + if (PtrValues.count(Store->getValueOperand())) + return true; - // Construct the new function type using the new arguments. - FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); + return false; +} - // Create the new function body and insert it into the module. - Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); - NF->copyAttributesFrom(F); +/// PromoteArguments - This method checks the specified function to see if there +/// are any promotable arguments and if it is safe to promote the function (for +/// example, all callers are direct). If safe to promote some arguments, it +/// calls the DoPromotion method. +/// +static CallGraphNode * +PromoteArguments(CallGraphNode *CGN, CallGraph &CG, + function_ref AARGetter, + unsigned MaxElements) { + Function *F = CGN->getFunction(); - // Patch the pointer to LLVM function in debug info descriptor. - NF->setSubprogram(F->getSubprogram()); - F->setSubprogram(nullptr); + // Make sure that it is local to this module. + if (!F || !F->hasLocalLinkage()) return nullptr; - DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" - << "From: " << *F); - - // Recompute the parameter attributes list based on the new arguments for - // the function. - NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec)); - AttributesVec.clear(); + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) return nullptr; - F->getParent()->getFunctionList().insert(F->getIterator(), NF); - NF->takeName(F); + // First check: see if there are any pointer arguments! If not, quick exit. + SmallVector PointerArgs; + for (Argument &I : F->args()) + if (I.getType()->isPointerTy()) + PointerArgs.push_back(&I); + if (PointerArgs.empty()) return nullptr; - // Get a new callgraph node for NF. - CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + // Second check: make sure that all callers are direct callers. We can't + // transform functions that have indirect callers. Also see if the function + // is self-recursive. + bool isSelfRecursive = false; + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + // Must be a direct call. + if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; + + if (CS.getInstruction()->getParent()->getParent() == F) + isSelfRecursive = true; + } + + const DataLayout &DL = F->getParent()->getDataLayout(); - // Loop over all of the callers of the function, transforming the call sites - // to pass in the loaded pointers. - // - SmallVector Args; - while (!F->use_empty()) { - CallSite CS(F->user_back()); - assert(CS.getCalledFunction() == F); - Instruction *Call = CS.getInstruction(); - const AttributeSet &CallPAL = CS.getAttributes(); + AAResults &AAR = AARGetter(*F); - // Add any return attributes. - if (CallPAL.hasAttributes(AttributeSet::ReturnIndex)) - AttributesVec.push_back(AttributeSet::get(F->getContext(), - CallPAL.getRetAttributes())); + // Check to see which arguments are promotable. If an argument is promotable, + // add it to ArgsToPromote. + SmallPtrSet ArgsToPromote; + SmallPtrSet ByValArgsToTransform; + for (Argument *PtrArg : PointerArgs) { + Type *AgTy = cast(PtrArg->getType())->getElementType(); - // Loop over the operands, inserting GEP and loads in the caller as - // appropriate. - CallSite::arg_iterator AI = CS.arg_begin(); - ArgIndex = 1; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++AI, ++ArgIndex) - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { - Args.push_back(*AI); // Unmodified argument + // Replace sret attribute with noalias. This reduces register pressure by + // avoiding a register copy. + if (PtrArg->hasStructRetAttr()) { + unsigned ArgNo = PtrArg->getArgNo(); + F->setAttributes( + F->getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + for (Use &U : F->uses()) { + CallSite CS(U.getUser()); + CS.setAttributes( + CS.getAttributes() + .removeAttribute(F->getContext(), ArgNo + 1, + Attribute::StructRet) + .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); + } + } - if (CallPAL.hasAttributes(ArgIndex)) { - AttrBuilder B(CallPAL, ArgIndex); - AttributesVec. - push_back(AttributeSet::get(F->getContext(), Args.size(), B)); - } - } else if (ByValArgsToTransform.count(&*I)) { - // Emit a GEP and load for each element of the struct. - Type *AgTy = cast(I->getType())->getElementType(); - StructType *STy = cast(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create( - STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); - // TODO: Tell AA about the new values? - Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); + // If this is a byval argument, and if the aggregate type is small, just + // pass the elements, which is always safe, if the passed value is densely + // packed or if we can prove the padding bytes are never accessed. This does + // not apply to inalloca. + bool isSafeToPromote = + PtrArg->hasByValAttr() && + (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + if (isSafeToPromote) { + if (StructType *STy = dyn_cast(AgTy)) { + if (MaxElements > 0 && STy->getNumElements() > MaxElements) { + DEBUG(dbgs() << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more" + << " than " << MaxElements << " arguments to the function.\n"); + continue; } - } else if (!I->use_empty()) { - // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - // Store the Value* version of the indices in here, but declare it now - // for reuse. - std::vector Ops; - for (const auto &ArgIndex : ArgIndices) { - Value *V = *AI; - LoadInst *OrigLoad = - OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; - if (!ArgIndex.second.empty()) { - Ops.reserve(ArgIndex.second.size()); - Type *ElTy = V->getType(); - for (unsigned long II : ArgIndex.second) { - // Use i32 to index structs, and i64 for others (pointers/arrays). - // This satisfies GEP constraints. - Type *IdxTy = (ElTy->isStructTy() ? - Type::getInt32Ty(F->getContext()) : - Type::getInt64Ty(F->getContext())); - Ops.push_back(ConstantInt::get(IdxTy, II)); - // Keep track of the type we're currently indexing. - if (auto *ElPTy = dyn_cast(ElTy)) - ElTy = ElPTy->getElementType(); - else - ElTy = cast(ElTy)->getTypeAtIndex(II); - } - // And create a GEP to extract those indices. - V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, - V->getName() + ".idx", Call); - Ops.clear(); + + // If all the elements are single-value types, we can promote it. + bool AllSimple = true; + for (const auto *EltTy : STy->elements()) { + if (!EltTy->isSingleValueType()) { + AllSimple = false; + break; } - // Since we're replacing a load make sure we take the alignment - // of the previous load. - LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); - newLoad->setAlignment(OrigLoad->getAlignment()); - // Transfer the AA info too. - AAMDNodes AAInfo; - OrigLoad->getAAMetadata(AAInfo); - newLoad->setAAMetadata(AAInfo); + } - Args.push_back(newLoad); + // Safe to transform, don't even bother trying to "promote" it. + // Passing the elements as a scalar will allow sroa to hack on + // the new alloca we introduce. + if (AllSimple) { + ByValArgsToTransform.insert(PtrArg); + continue; } } + } - // Push any varargs arguments on the list. - for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { - Args.push_back(*AI); - if (CallPAL.hasAttributes(ArgIndex)) { - AttrBuilder B(CallPAL, ArgIndex); - AttributesVec. - push_back(AttributeSet::get(F->getContext(), Args.size(), B)); + // If the argument is a recursive type and we're in a recursive + // function, we could end up infinitely peeling the function argument. + if (isSelfRecursive) { + if (StructType *STy = dyn_cast(AgTy)) { + bool RecursiveType = false; + for (const auto *EltTy : STy->elements()) { + if (EltTy == PtrArg->getType()) { + RecursiveType = true; + break; + } + } + if (RecursiveType) + continue; } } + + // Otherwise, see if we can promote the pointer to its value. + if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, + MaxElements)) + ArgsToPromote.insert(PtrArg); + } - // Add any function attributes. - if (CallPAL.hasAttributes(AttributeSet::FunctionIndex)) - AttributesVec.push_back(AttributeSet::get(Call->getContext(), - CallPAL.getFnAttributes())); - - SmallVector OpBundles; - CS.getOperandBundlesAsDefs(OpBundles); - - Instruction *New; - if (InvokeInst *II = dyn_cast(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(AttributeSet::get(II->getContext(), - AttributesVec)); - } else { - New = CallInst::Create(NF, Args, OpBundles, "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(AttributeSet::get(New->getContext(), - AttributesVec)); - cast(New)->setTailCallKind( - cast(Call)->getTailCallKind()); - } - New->setDebugLoc(Call->getDebugLoc()); - Args.clear(); - AttributesVec.clear(); + // No promotable pointer arguments. + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + return nullptr; - // Update the callgraph to know that the callsite has been transformed. - CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN); + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG); +} - if (!Call->use_empty()) { - Call->replaceAllUsesWith(New); - New->takeName(Call); +namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. + /// + struct ArgPromotion : public CallGraphSCCPass { + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + getAAResultsAnalysisUsage(AU); + CallGraphSCCPass::getAnalysisUsage(AU); } - // Finally, remove the old call from the program, reducing the use-count of - // F. - Call->eraseFromParent(); - } - - // Since we have now created the new function, splice the body of the old - // function right into the new function, leaving the old rotting hulk of the - // function empty. - NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - - // Loop over the argument list, transferring uses of the old arguments over to - // the new arguments, also transferring over the names as well. - // - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), - I2 = NF->arg_begin(); I != E; ++I) { - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { - // If this is an unmodified argument, move the name and users over to the - // new version. - I->replaceAllUsesWith(&*I2); - I2->takeName(&*I); - ++I2; - continue; + bool runOnSCC(CallGraphSCC &SCC) override; + static char ID; // Pass identification, replacement for typeid + explicit ArgPromotion(unsigned MaxElements = 3) + : CallGraphSCCPass(ID), MaxElements(MaxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } - if (ByValArgsToTransform.count(&*I)) { - // In the callee, we create an alloca, and store each of the new incoming - // arguments into the alloca. - Instruction *InsertPt = &NF->begin()->front(); - - // Just add all the struct element types. - Type *AgTy = cast(I->getType())->getElementType(); - Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt); - StructType *STy = cast(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; - - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); - Value *Idx = GetElementPtrInst::Create( - AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), - InsertPt); - I2->setName(I->getName()+"."+Twine(i)); - new StoreInst(&*I2++, Idx, InsertPt); - } - - // Anything that used the arg should now use the alloca. - I->replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&*I); - - // If the alloca is used in a call, we must clear the tail flag since - // the callee now uses an alloca from the caller. - for (User *U : TheAlloca->users()) { - CallInst *Call = dyn_cast(U); - if (!Call) - continue; - Call->setTailCall(false); - } - continue; - } + private: - if (I->use_empty()) - continue; + using llvm::Pass::doInitialization; + bool doInitialization(CallGraph &CG) override; + /// The maximum number of elements to expand, or 0 for unlimited. + unsigned MaxElements; + }; +} - // Otherwise, if we promoted this argument, then all users are load - // instructions (or GEPs with only load users), and all loads should be - // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; +char ArgPromotion::ID = 0; +INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ArgPromotion, "argpromotion", + "Promote 'by reference' arguments to scalars", false, false) - while (!I->use_empty()) { - if (LoadInst *LI = dyn_cast(I->user_back())) { - assert(ArgIndices.begin()->second.empty() && - "Load element should sort to front!"); - I2->setName(I->getName()+".val"); - LI->replaceAllUsesWith(&*I2); - LI->eraseFromParent(); - DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() - << "' in function '" << F->getName() << "'\n"); - } else { - GetElementPtrInst *GEP = cast(I->user_back()); - IndicesVector Operands; - Operands.reserve(GEP->getNumIndices()); - for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); - II != IE; ++II) - Operands.push_back(cast(*II)->getSExtValue()); +Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { + return new ArgPromotion(MaxElements); +} - // GEPs with a single 0 index can be merged with direct loads - if (Operands.size() == 1 && Operands.front() == 0) - Operands.clear(); +bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; - Function::arg_iterator TheArg = I2; - for (ScalarizeTable::iterator It = ArgIndices.begin(); - It->second != Operands; ++It, ++TheArg) { - assert(It != ArgIndices.end() && "GEP not handled??"); - } + // Get the callgraph information that we need to update to reflect our + // changes. + CallGraph &CG = getAnalysis().getCallGraph(); - std::string NewName = I->getName(); - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { - NewName += "." + utostr(Operands[i]); - } - NewName += ".val"; - TheArg->setName(NewName); + // We compute dedicated AA results for each function in the SCC as needed. We + // use a lambda referencing external objects so that they live long enough to + // be queried, but we re-use them each time. + Optional BAR; + Optional AAR; + auto AARGetter = [&](Function &F) -> AAResults & { + BAR.emplace(createLegacyPMBasicAAResult(*this, F)); + AAR.emplace(createLegacyPMAAResults(*this, F, *BAR)); + return *AAR; + }; - DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"); + bool Changed = false, LocalChange; - // All of the uses must be load instructions. Replace them all with - // the argument specified by ArgNo. - while (!GEP->use_empty()) { - LoadInst *L = cast(GEP->user_back()); - L->replaceAllUsesWith(&*TheArg); - L->eraseFromParent(); - } - GEP->eraseFromParent(); + // Iterate until we stop promoting from this SCC. + do { + LocalChange = false; + // Attempt to promote arguments from all functions in this SCC. + for (CallGraphNode *OldNode : SCC) { + if (CallGraphNode *NewNode = + PromoteArguments(OldNode, CG, AARGetter, MaxElements)) { + LocalChange = true; + SCC.ReplaceNode(OldNode, NewNode); } } + // Remember that we changed something. + Changed |= LocalChange; + } while (LocalChange); - // Increment I2 past all of the arguments added for this promoted pointer. - std::advance(I2, ArgIndices.size()); - } - - NF_CGN->stealCalledFunctionsFrom(CG[F]); - - // Now that the old function is dead, delete it. If there is a dangling - // reference to the CallgraphNode, just leave the dead function around for - // someone else to nuke. - CallGraphNode *CGN = CG[F]; - if (CGN->getNumReferences() == 0) - delete CG.removeFunctionFromModule(CGN); - else - F->setLinkage(Function::ExternalLinkage); - - return NF_CGN; + return Changed; } bool ArgPromotion::doInitialization(CallGraph &CG) {