Skip to content

Commit cd836cd

Browse files
committedJan 29, 2017
[ArgPromote] Re-arrange the code in a more typical, logical way.
This arranges the static helpers in an order where they are defined prior to their use to avoid the need of forward declarations, and collect the core pass components at the bottom below their helpers. This also folds one trivial function into the pass itself. Factoring this 'runImpl' was an attempt to help porting to the new pass manager, however in my attempt to begin this port in earnest it turned out to not be a substantial help. I think it will be easier to factor things without it. This is an NFC change and does a minimal amount of edits over all. Subsequent NFC cleanups will normalize the formatting with clang-format and improve the basic doxygen commenting. Differential Revision: https://reviews.llvm.org/D29247 llvm-svn: 293424
1 parent 62a188d commit cd836cd

File tree

1 file changed

+634
-648
lines changed

1 file changed

+634
-648
lines changed
 

‎llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

Lines changed: 634 additions & 648 deletions
Original file line numberDiff line numberDiff line change
@@ -61,337 +61,449 @@ STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
6161
STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
6262
STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
6363

64-
namespace {
65-
/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
66-
///
67-
struct ArgPromotion : public CallGraphSCCPass {
68-
void getAnalysisUsage(AnalysisUsage &AU) const override {
69-
AU.addRequired<AssumptionCacheTracker>();
70-
AU.addRequired<TargetLibraryInfoWrapperPass>();
71-
getAAResultsAnalysisUsage(AU);
72-
CallGraphSCCPass::getAnalysisUsage(AU);
73-
}
74-
75-
bool runOnSCC(CallGraphSCC &SCC) override;
76-
static char ID; // Pass identification, replacement for typeid
77-
explicit ArgPromotion(unsigned maxElements = 3)
78-
: CallGraphSCCPass(ID), maxElements(maxElements) {
79-
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
80-
}
81-
82-
private:
83-
84-
using llvm::Pass::doInitialization;
85-
bool doInitialization(CallGraph &CG) override;
86-
/// The maximum number of elements to expand, or 0 for unlimited.
87-
unsigned maxElements;
88-
};
89-
}
90-
9164
/// A vector used to hold the indices of a single GEP instruction
9265
typedef std::vector<uint64_t> IndicesVector;
9366

94-
static CallGraphNode *
95-
PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
96-
function_ref<AAResults &(Function &F)> AARGetter,
97-
unsigned MaxElements);
98-
static bool isDenselyPacked(Type *type, const DataLayout &DL);
99-
static bool canPaddingBeAccessed(Argument *Arg);
100-
static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
101-
unsigned MaxElements);
67+
/// DoPromotion - This method actually performs the promotion of the specified
68+
/// arguments, and returns the new function. At this point, we know that it's
69+
/// safe to do so.
10270
static CallGraphNode *
10371
DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
104-
SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG);
105-
106-
char ArgPromotion::ID = 0;
107-
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
108-
"Promote 'by reference' arguments to scalars", false, false)
109-
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
110-
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
111-
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
112-
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
113-
"Promote 'by reference' arguments to scalars", false, false)
114-
115-
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
116-
return new ArgPromotion(maxElements);
117-
}
118-
119-
static bool runImpl(CallGraphSCC &SCC, CallGraph &CG,
120-
function_ref<AAResults &(Function &F)> AARGetter,
121-
unsigned MaxElements) {
122-
bool Changed = false, LocalChange;
123-
124-
do { // Iterate until we stop promoting from this SCC.
125-
LocalChange = false;
126-
// Attempt to promote arguments from all functions in this SCC.
127-
for (CallGraphNode *OldNode : SCC) {
128-
if (CallGraphNode *NewNode =
129-
PromoteArguments(OldNode, CG, AARGetter, MaxElements)) {
130-
LocalChange = true;
131-
SCC.ReplaceNode(OldNode, NewNode);
132-
}
133-
}
134-
Changed |= LocalChange; // Remember that we changed something.
135-
} while (LocalChange);
136-
137-
return Changed;
138-
}
72+
SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
13973

140-
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
141-
if (skipSCC(SCC))
142-
return false;
74+
// Start by computing a new prototype for the function, which is the same as
75+
// the old function, but has modified arguments.
76+
FunctionType *FTy = F->getFunctionType();
77+
std::vector<Type*> Params;
14378

144-
// Get the callgraph information that we need to update to reflect our
145-
// changes.
146-
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
79+
typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
14780

148-
// We compute dedicated AA results for each function in the SCC as needed. We
149-
// use a lambda referencing external objects so that they live long enough to
150-
// be queried, but we re-use them each time.
151-
Optional<BasicAAResult> BAR;
152-
Optional<AAResults> AAR;
153-
auto AARGetter = [&](Function &F) -> AAResults & {
154-
BAR.emplace(createLegacyPMBasicAAResult(*this, F));
155-
AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
156-
return *AAR;
157-
};
81+
// ScalarizedElements - If we are promoting a pointer that has elements
82+
// accessed out of it, keep track of which elements are accessed so that we
83+
// can add one argument for each.
84+
//
85+
// Arguments that are directly loaded will have a zero element value here, to
86+
// handle cases where there are both a direct load and GEP accesses.
87+
//
88+
std::map<Argument*, ScalarizeTable> ScalarizedElements;
15889

159-
return runImpl(SCC, CG, AARGetter, maxElements);
160-
}
90+
// OriginalLoads - Keep track of a representative load instruction from the
91+
// original function so that we can tell the alias analysis implementation
92+
// what the new GEP/Load instructions we are inserting look like.
93+
// We need to keep the original loads for each argument and the elements
94+
// of the argument that are accessed.
95+
std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
16196

162-
/// \brief Checks if a type could have padding bytes.
163-
static bool isDenselyPacked(Type *type, const DataLayout &DL) {
97+
// Attribute - Keep track of the parameter attributes for the arguments
98+
// that we are *not* promoting. For the ones that we do promote, the parameter
99+
// attributes are lost
100+
SmallVector<AttributeSet, 8> AttributesVec;
101+
const AttributeSet &PAL = F->getAttributes();
164102

165-
// There is no size information, so be conservative.
166-
if (!type->isSized())
167-
return false;
103+
// Add any return attributes.
104+
if (PAL.hasAttributes(AttributeSet::ReturnIndex))
105+
AttributesVec.push_back(AttributeSet::get(F->getContext(),
106+
PAL.getRetAttributes()));
168107

169-
// If the alloc size is not equal to the storage size, then there are padding
170-
// bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
171-
if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
172-
return false;
108+
// First, determine the new argument list
109+
unsigned ArgIndex = 1;
110+
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
111+
++I, ++ArgIndex) {
112+
if (ByValArgsToTransform.count(&*I)) {
113+
// Simple byval argument? Just add all the struct element types.
114+
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
115+
StructType *STy = cast<StructType>(AgTy);
116+
Params.insert(Params.end(), STy->element_begin(), STy->element_end());
117+
++NumByValArgsPromoted;
118+
} else if (!ArgsToPromote.count(&*I)) {
119+
// Unchanged argument
120+
Params.push_back(I->getType());
121+
AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
122+
if (attrs.hasAttributes(ArgIndex)) {
123+
AttrBuilder B(attrs, ArgIndex);
124+
AttributesVec.
125+
push_back(AttributeSet::get(F->getContext(), Params.size(), B));
126+
}
127+
} else if (I->use_empty()) {
128+
// Dead argument (which are always marked as promotable)
129+
++NumArgumentsDead;
130+
} else {
131+
// Okay, this is being promoted. This means that the only uses are loads
132+
// or GEPs which are only used by loads
173133

174-
if (!isa<CompositeType>(type))
175-
return true;
134+
// In this table, we will track which indices are loaded from the argument
135+
// (where direct loads are tracked as no indices).
136+
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
137+
for (User *U : I->users()) {
138+
Instruction *UI = cast<Instruction>(U);
139+
Type *SrcTy;
140+
if (LoadInst *L = dyn_cast<LoadInst>(UI))
141+
SrcTy = L->getType();
142+
else
143+
SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
144+
IndicesVector Indices;
145+
Indices.reserve(UI->getNumOperands() - 1);
146+
// Since loads will only have a single operand, and GEPs only a single
147+
// non-index operand, this will record direct loads without any indices,
148+
// and gep+loads with the GEP indices.
149+
for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
150+
II != IE; ++II)
151+
Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
152+
// GEPs with a single 0 index can be merged with direct loads
153+
if (Indices.size() == 1 && Indices.front() == 0)
154+
Indices.clear();
155+
ArgIndices.insert(std::make_pair(SrcTy, Indices));
156+
LoadInst *OrigLoad;
157+
if (LoadInst *L = dyn_cast<LoadInst>(UI))
158+
OrigLoad = L;
159+
else
160+
// Take any load, we will use it only to update Alias Analysis
161+
OrigLoad = cast<LoadInst>(UI->user_back());
162+
OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
163+
}
176164

177-
// For homogenous sequential types, check for padding within members.
178-
if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
179-
return isDenselyPacked(seqTy->getElementType(), DL);
165+
// Add a parameter to the function for each element passed in.
166+
for (const auto &ArgIndex : ArgIndices) {
167+
// not allowed to dereference ->begin() if size() is 0
168+
Params.push_back(GetElementPtrInst::getIndexedType(
169+
cast<PointerType>(I->getType()->getScalarType())->getElementType(),
170+
ArgIndex.second));
171+
assert(Params.back());
172+
}
180173

181-
// Check for padding within and between elements of a struct.
182-
StructType *StructTy = cast<StructType>(type);
183-
const StructLayout *Layout = DL.getStructLayout(StructTy);
184-
uint64_t StartPos = 0;
185-
for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
186-
Type *ElTy = StructTy->getElementType(i);
187-
if (!isDenselyPacked(ElTy, DL))
188-
return false;
189-
if (StartPos != Layout->getElementOffsetInBits(i))
190-
return false;
191-
StartPos += DL.getTypeAllocSizeInBits(ElTy);
174+
if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
175+
++NumArgumentsPromoted;
176+
else
177+
++NumAggregatesPromoted;
178+
}
192179
}
193180

194-
return true;
195-
}
181+
// Add any function attributes.
182+
if (PAL.hasAttributes(AttributeSet::FunctionIndex))
183+
AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
184+
PAL.getFnAttributes()));
196185

197-
/// \brief Checks if the padding bytes of an argument could be accessed.
198-
static bool canPaddingBeAccessed(Argument *arg) {
186+
Type *RetTy = FTy->getReturnType();
199187

200-
assert(arg->hasByValAttr());
188+
// Construct the new function type using the new arguments.
189+
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
201190

202-
// Track all the pointers to the argument to make sure they are not captured.
203-
SmallPtrSet<Value *, 16> PtrValues;
204-
PtrValues.insert(arg);
191+
// Create the new function body and insert it into the module.
192+
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
193+
NF->copyAttributesFrom(F);
205194

206-
// Track all of the stores.
207-
SmallVector<StoreInst *, 16> Stores;
195+
// Patch the pointer to LLVM function in debug info descriptor.
196+
NF->setSubprogram(F->getSubprogram());
197+
F->setSubprogram(nullptr);
208198

209-
// Scan through the uses recursively to make sure the pointer is always used
210-
// sanely.
211-
SmallVector<Value *, 16> WorkList;
212-
WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
213-
while (!WorkList.empty()) {
214-
Value *V = WorkList.back();
215-
WorkList.pop_back();
216-
if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
217-
if (PtrValues.insert(V).second)
218-
WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
219-
} else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
220-
Stores.push_back(Store);
221-
} else if (!isa<LoadInst>(V)) {
222-
return true;
223-
}
224-
}
199+
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
200+
<< "From: " << *F);
201+
202+
// Recompute the parameter attributes list based on the new arguments for
203+
// the function.
204+
NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
205+
AttributesVec.clear();
225206

226-
// Check to make sure the pointers aren't captured
227-
for (StoreInst *Store : Stores)
228-
if (PtrValues.count(Store->getValueOperand()))
229-
return true;
207+
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
208+
NF->takeName(F);
230209

231-
return false;
232-
}
210+
// Get a new callgraph node for NF.
211+
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
233212

234-
/// PromoteArguments - This method checks the specified function to see if there
235-
/// are any promotable arguments and if it is safe to promote the function (for
236-
/// example, all callers are direct). If safe to promote some arguments, it
237-
/// calls the DoPromotion method.
238-
///
239-
static CallGraphNode *
240-
PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
241-
function_ref<AAResults &(Function &F)> AARGetter,
242-
unsigned MaxElements) {
243-
Function *F = CGN->getFunction();
244-
245-
// Make sure that it is local to this module.
246-
if (!F || !F->hasLocalLinkage()) return nullptr;
247-
248-
// Don't promote arguments for variadic functions. Adding, removing, or
249-
// changing non-pack parameters can change the classification of pack
250-
// parameters. Frontends encode that classification at the call site in the
251-
// IR, while in the callee the classification is determined dynamically based
252-
// on the number of registers consumed so far.
253-
if (F->isVarArg()) return nullptr;
254-
255-
// First check: see if there are any pointer arguments! If not, quick exit.
256-
SmallVector<Argument*, 16> PointerArgs;
257-
for (Argument &I : F->args())
258-
if (I.getType()->isPointerTy())
259-
PointerArgs.push_back(&I);
260-
if (PointerArgs.empty()) return nullptr;
261-
262-
// Second check: make sure that all callers are direct callers. We can't
263-
// transform functions that have indirect callers. Also see if the function
264-
// is self-recursive.
265-
bool isSelfRecursive = false;
266-
for (Use &U : F->uses()) {
267-
CallSite CS(U.getUser());
268-
// Must be a direct call.
269-
if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
270-
271-
if (CS.getInstruction()->getParent()->getParent() == F)
272-
isSelfRecursive = true;
273-
}
274-
275-
const DataLayout &DL = F->getParent()->getDataLayout();
276-
277-
AAResults &AAR = AARGetter(*F);
213+
// Loop over all of the callers of the function, transforming the call sites
214+
// to pass in the loaded pointers.
215+
//
216+
SmallVector<Value*, 16> Args;
217+
while (!F->use_empty()) {
218+
CallSite CS(F->user_back());
219+
assert(CS.getCalledFunction() == F);
220+
Instruction *Call = CS.getInstruction();
221+
const AttributeSet &CallPAL = CS.getAttributes();
278222

279-
// Check to see which arguments are promotable. If an argument is promotable,
280-
// add it to ArgsToPromote.
281-
SmallPtrSet<Argument*, 8> ArgsToPromote;
282-
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
283-
for (Argument *PtrArg : PointerArgs) {
284-
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
223+
// Add any return attributes.
224+
if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
225+
AttributesVec.push_back(AttributeSet::get(F->getContext(),
226+
CallPAL.getRetAttributes()));
285227

286-
// Replace sret attribute with noalias. This reduces register pressure by
287-
// avoiding a register copy.
288-
if (PtrArg->hasStructRetAttr()) {
289-
unsigned ArgNo = PtrArg->getArgNo();
290-
F->setAttributes(
291-
F->getAttributes()
292-
.removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
293-
.addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
294-
for (Use &U : F->uses()) {
295-
CallSite CS(U.getUser());
296-
CS.setAttributes(
297-
CS.getAttributes()
298-
.removeAttribute(F->getContext(), ArgNo + 1,
299-
Attribute::StructRet)
300-
.addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
301-
}
302-
}
228+
// Loop over the operands, inserting GEP and loads in the caller as
229+
// appropriate.
230+
CallSite::arg_iterator AI = CS.arg_begin();
231+
ArgIndex = 1;
232+
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
233+
I != E; ++I, ++AI, ++ArgIndex)
234+
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
235+
Args.push_back(*AI); // Unmodified argument
303236

304-
// If this is a byval argument, and if the aggregate type is small, just
305-
// pass the elements, which is always safe, if the passed value is densely
306-
// packed or if we can prove the padding bytes are never accessed. This does
307-
// not apply to inalloca.
308-
bool isSafeToPromote =
309-
PtrArg->hasByValAttr() &&
310-
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
311-
if (isSafeToPromote) {
312-
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
313-
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
314-
DEBUG(dbgs() << "argpromotion disable promoting argument '"
315-
<< PtrArg->getName() << "' because it would require adding more"
316-
<< " than " << MaxElements << " arguments to the function.\n");
317-
continue;
237+
if (CallPAL.hasAttributes(ArgIndex)) {
238+
AttrBuilder B(CallPAL, ArgIndex);
239+
AttributesVec.
240+
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
318241
}
319-
320-
// If all the elements are single-value types, we can promote it.
321-
bool AllSimple = true;
322-
for (const auto *EltTy : STy->elements()) {
323-
if (!EltTy->isSingleValueType()) {
324-
AllSimple = false;
325-
break;
326-
}
242+
} else if (ByValArgsToTransform.count(&*I)) {
243+
// Emit a GEP and load for each element of the struct.
244+
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
245+
StructType *STy = cast<StructType>(AgTy);
246+
Value *Idxs[2] = {
247+
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
248+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
249+
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
250+
Value *Idx = GetElementPtrInst::Create(
251+
STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
252+
// TODO: Tell AA about the new values?
253+
Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
327254
}
255+
} else if (!I->use_empty()) {
256+
// Non-dead argument: insert GEPs and loads as appropriate.
257+
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
258+
// Store the Value* version of the indices in here, but declare it now
259+
// for reuse.
260+
std::vector<Value*> Ops;
261+
for (const auto &ArgIndex : ArgIndices) {
262+
Value *V = *AI;
263+
LoadInst *OrigLoad =
264+
OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
265+
if (!ArgIndex.second.empty()) {
266+
Ops.reserve(ArgIndex.second.size());
267+
Type *ElTy = V->getType();
268+
for (unsigned long II : ArgIndex.second) {
269+
// Use i32 to index structs, and i64 for others (pointers/arrays).
270+
// This satisfies GEP constraints.
271+
Type *IdxTy = (ElTy->isStructTy() ?
272+
Type::getInt32Ty(F->getContext()) :
273+
Type::getInt64Ty(F->getContext()));
274+
Ops.push_back(ConstantInt::get(IdxTy, II));
275+
// Keep track of the type we're currently indexing.
276+
if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
277+
ElTy = ElPTy->getElementType();
278+
else
279+
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
280+
}
281+
// And create a GEP to extract those indices.
282+
V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
283+
V->getName() + ".idx", Call);
284+
Ops.clear();
285+
}
286+
// Since we're replacing a load make sure we take the alignment
287+
// of the previous load.
288+
LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
289+
newLoad->setAlignment(OrigLoad->getAlignment());
290+
// Transfer the AA info too.
291+
AAMDNodes AAInfo;
292+
OrigLoad->getAAMetadata(AAInfo);
293+
newLoad->setAAMetadata(AAInfo);
328294

329-
// Safe to transform, don't even bother trying to "promote" it.
330-
// Passing the elements as a scalar will allow sroa to hack on
331-
// the new alloca we introduce.
332-
if (AllSimple) {
333-
ByValArgsToTransform.insert(PtrArg);
334-
continue;
295+
Args.push_back(newLoad);
335296
}
336297
}
337-
}
338298

339-
// If the argument is a recursive type and we're in a recursive
340-
// function, we could end up infinitely peeling the function argument.
341-
if (isSelfRecursive) {
342-
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
343-
bool RecursiveType = false;
344-
for (const auto *EltTy : STy->elements()) {
345-
if (EltTy == PtrArg->getType()) {
346-
RecursiveType = true;
347-
break;
348-
}
349-
}
350-
if (RecursiveType)
351-
continue;
299+
// Push any varargs arguments on the list.
300+
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
301+
Args.push_back(*AI);
302+
if (CallPAL.hasAttributes(ArgIndex)) {
303+
AttrBuilder B(CallPAL, ArgIndex);
304+
AttributesVec.
305+
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
352306
}
353307
}
354-
355-
// Otherwise, see if we can promote the pointer to its value.
356-
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
357-
MaxElements))
358-
ArgsToPromote.insert(PtrArg);
359-
}
360308

361-
// No promotable pointer arguments.
362-
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
363-
return nullptr;
309+
// Add any function attributes.
310+
if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
311+
AttributesVec.push_back(AttributeSet::get(Call->getContext(),
312+
CallPAL.getFnAttributes()));
364313

365-
return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG);
366-
}
314+
SmallVector<OperandBundleDef, 1> OpBundles;
315+
CS.getOperandBundlesAsDefs(OpBundles);
367316

368-
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
369-
/// all callees pass in a valid pointer for the specified function argument.
370-
static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
371-
Function *Callee = Arg->getParent();
372-
const DataLayout &DL = Callee->getParent()->getDataLayout();
317+
Instruction *New;
318+
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
319+
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
320+
Args, OpBundles, "", Call);
321+
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
322+
cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
323+
AttributesVec));
324+
} else {
325+
New = CallInst::Create(NF, Args, OpBundles, "", Call);
326+
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
327+
cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
328+
AttributesVec));
329+
cast<CallInst>(New)->setTailCallKind(
330+
cast<CallInst>(Call)->getTailCallKind());
331+
}
332+
New->setDebugLoc(Call->getDebugLoc());
333+
Args.clear();
334+
AttributesVec.clear();
373335

374-
unsigned ArgNo = Arg->getArgNo();
336+
// Update the callgraph to know that the callsite has been transformed.
337+
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
338+
CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
375339

376-
// Look at all call sites of the function. At this point we know we only have
377-
// direct callees.
378-
for (User *U : Callee->users()) {
379-
CallSite CS(U);
380-
assert(CS && "Should only have direct calls!");
340+
if (!Call->use_empty()) {
341+
Call->replaceAllUsesWith(New);
342+
New->takeName(Call);
343+
}
381344

382-
if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
383-
return false;
345+
// Finally, remove the old call from the program, reducing the use-count of
346+
// F.
347+
Call->eraseFromParent();
384348
}
385-
return true;
386-
}
387349

388-
/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
389-
/// that is greater than or equal to the size of prefix, and each of the
390-
/// elements in Prefix is the same as the corresponding elements in Longer.
391-
///
392-
/// This means it also returns true when Prefix and Longer are equal!
393-
static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
394-
if (Prefix.size() > Longer.size())
350+
// Since we have now created the new function, splice the body of the old
351+
// function right into the new function, leaving the old rotting hulk of the
352+
// function empty.
353+
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
354+
355+
// Loop over the argument list, transferring uses of the old arguments over to
356+
// the new arguments, also transferring over the names as well.
357+
//
358+
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
359+
I2 = NF->arg_begin(); I != E; ++I) {
360+
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
361+
// If this is an unmodified argument, move the name and users over to the
362+
// new version.
363+
I->replaceAllUsesWith(&*I2);
364+
I2->takeName(&*I);
365+
++I2;
366+
continue;
367+
}
368+
369+
if (ByValArgsToTransform.count(&*I)) {
370+
// In the callee, we create an alloca, and store each of the new incoming
371+
// arguments into the alloca.
372+
Instruction *InsertPt = &NF->begin()->front();
373+
374+
// Just add all the struct element types.
375+
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
376+
Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
377+
StructType *STy = cast<StructType>(AgTy);
378+
Value *Idxs[2] = {
379+
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
380+
381+
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
382+
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
383+
Value *Idx = GetElementPtrInst::Create(
384+
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
385+
InsertPt);
386+
I2->setName(I->getName()+"."+Twine(i));
387+
new StoreInst(&*I2++, Idx, InsertPt);
388+
}
389+
390+
// Anything that used the arg should now use the alloca.
391+
I->replaceAllUsesWith(TheAlloca);
392+
TheAlloca->takeName(&*I);
393+
394+
// If the alloca is used in a call, we must clear the tail flag since
395+
// the callee now uses an alloca from the caller.
396+
for (User *U : TheAlloca->users()) {
397+
CallInst *Call = dyn_cast<CallInst>(U);
398+
if (!Call)
399+
continue;
400+
Call->setTailCall(false);
401+
}
402+
continue;
403+
}
404+
405+
if (I->use_empty())
406+
continue;
407+
408+
// Otherwise, if we promoted this argument, then all users are load
409+
// instructions (or GEPs with only load users), and all loads should be
410+
// using the new argument that we added.
411+
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
412+
413+
while (!I->use_empty()) {
414+
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
415+
assert(ArgIndices.begin()->second.empty() &&
416+
"Load element should sort to front!");
417+
I2->setName(I->getName()+".val");
418+
LI->replaceAllUsesWith(&*I2);
419+
LI->eraseFromParent();
420+
DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
421+
<< "' in function '" << F->getName() << "'\n");
422+
} else {
423+
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
424+
IndicesVector Operands;
425+
Operands.reserve(GEP->getNumIndices());
426+
for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
427+
II != IE; ++II)
428+
Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
429+
430+
// GEPs with a single 0 index can be merged with direct loads
431+
if (Operands.size() == 1 && Operands.front() == 0)
432+
Operands.clear();
433+
434+
Function::arg_iterator TheArg = I2;
435+
for (ScalarizeTable::iterator It = ArgIndices.begin();
436+
It->second != Operands; ++It, ++TheArg) {
437+
assert(It != ArgIndices.end() && "GEP not handled??");
438+
}
439+
440+
std::string NewName = I->getName();
441+
for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
442+
NewName += "." + utostr(Operands[i]);
443+
}
444+
NewName += ".val";
445+
TheArg->setName(NewName);
446+
447+
DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
448+
<< "' of function '" << NF->getName() << "'\n");
449+
450+
// All of the uses must be load instructions. Replace them all with
451+
// the argument specified by ArgNo.
452+
while (!GEP->use_empty()) {
453+
LoadInst *L = cast<LoadInst>(GEP->user_back());
454+
L->replaceAllUsesWith(&*TheArg);
455+
L->eraseFromParent();
456+
}
457+
GEP->eraseFromParent();
458+
}
459+
}
460+
461+
// Increment I2 past all of the arguments added for this promoted pointer.
462+
std::advance(I2, ArgIndices.size());
463+
}
464+
465+
NF_CGN->stealCalledFunctionsFrom(CG[F]);
466+
467+
// Now that the old function is dead, delete it. If there is a dangling
468+
// reference to the CallgraphNode, just leave the dead function around for
469+
// someone else to nuke.
470+
CallGraphNode *CGN = CG[F];
471+
if (CGN->getNumReferences() == 0)
472+
delete CG.removeFunctionFromModule(CGN);
473+
else
474+
F->setLinkage(Function::ExternalLinkage);
475+
476+
return NF_CGN;
477+
}
478+
479+
480+
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
481+
/// all callees pass in a valid pointer for the specified function argument.
482+
static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
483+
Function *Callee = Arg->getParent();
484+
const DataLayout &DL = Callee->getParent()->getDataLayout();
485+
486+
unsigned ArgNo = Arg->getArgNo();
487+
488+
// Look at all call sites of the function. At this point we know we only have
489+
// direct callees.
490+
for (User *U : Callee->users()) {
491+
CallSite CS(U);
492+
assert(CS && "Should only have direct calls!");
493+
494+
if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL))
495+
return false;
496+
}
497+
return true;
498+
}
499+
500+
/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
501+
/// that is greater than or equal to the size of prefix, and each of the
502+
/// elements in Prefix is the same as the corresponding elements in Longer.
503+
///
504+
/// This means it also returns true when Prefix and Longer are equal!
505+
static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
506+
if (Prefix.size() > Longer.size())
395507
return false;
396508
return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
397509
}
@@ -625,416 +737,290 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
625737
return true;
626738
}
627739

628-
/// DoPromotion - This method actually performs the promotion of the specified
629-
/// arguments, and returns the new function. At this point, we know that it's
630-
/// safe to do so.
631-
static CallGraphNode *
632-
DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
633-
SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
634740

635-
// Start by computing a new prototype for the function, which is the same as
636-
// the old function, but has modified arguments.
637-
FunctionType *FTy = F->getFunctionType();
638-
std::vector<Type*> Params;
741+
/// \brief Checks if a type could have padding bytes.
742+
static bool isDenselyPacked(Type *type, const DataLayout &DL) {
639743

640-
typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
744+
// There is no size information, so be conservative.
745+
if (!type->isSized())
746+
return false;
641747

642-
// ScalarizedElements - If we are promoting a pointer that has elements
643-
// accessed out of it, keep track of which elements are accessed so that we
644-
// can add one argument for each.
645-
//
646-
// Arguments that are directly loaded will have a zero element value here, to
647-
// handle cases where there are both a direct load and GEP accesses.
648-
//
649-
std::map<Argument*, ScalarizeTable> ScalarizedElements;
748+
// If the alloc size is not equal to the storage size, then there are padding
749+
// bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
750+
if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
751+
return false;
650752

651-
// OriginalLoads - Keep track of a representative load instruction from the
652-
// original function so that we can tell the alias analysis implementation
653-
// what the new GEP/Load instructions we are inserting look like.
654-
// We need to keep the original loads for each argument and the elements
655-
// of the argument that are accessed.
656-
std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
753+
if (!isa<CompositeType>(type))
754+
return true;
657755

658-
// Attribute - Keep track of the parameter attributes for the arguments
659-
// that we are *not* promoting. For the ones that we do promote, the parameter
660-
// attributes are lost
661-
SmallVector<AttributeSet, 8> AttributesVec;
662-
const AttributeSet &PAL = F->getAttributes();
756+
// For homogenous sequential types, check for padding within members.
757+
if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
758+
return isDenselyPacked(seqTy->getElementType(), DL);
663759

664-
// Add any return attributes.
665-
if (PAL.hasAttributes(AttributeSet::ReturnIndex))
666-
AttributesVec.push_back(AttributeSet::get(F->getContext(),
667-
PAL.getRetAttributes()));
760+
// Check for padding within and between elements of a struct.
761+
StructType *StructTy = cast<StructType>(type);
762+
const StructLayout *Layout = DL.getStructLayout(StructTy);
763+
uint64_t StartPos = 0;
764+
for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
765+
Type *ElTy = StructTy->getElementType(i);
766+
if (!isDenselyPacked(ElTy, DL))
767+
return false;
768+
if (StartPos != Layout->getElementOffsetInBits(i))
769+
return false;
770+
StartPos += DL.getTypeAllocSizeInBits(ElTy);
771+
}
668772

669-
// First, determine the new argument list
670-
unsigned ArgIndex = 1;
671-
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
672-
++I, ++ArgIndex) {
673-
if (ByValArgsToTransform.count(&*I)) {
674-
// Simple byval argument? Just add all the struct element types.
675-
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
676-
StructType *STy = cast<StructType>(AgTy);
677-
Params.insert(Params.end(), STy->element_begin(), STy->element_end());
678-
++NumByValArgsPromoted;
679-
} else if (!ArgsToPromote.count(&*I)) {
680-
// Unchanged argument
681-
Params.push_back(I->getType());
682-
AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
683-
if (attrs.hasAttributes(ArgIndex)) {
684-
AttrBuilder B(attrs, ArgIndex);
685-
AttributesVec.
686-
push_back(AttributeSet::get(F->getContext(), Params.size(), B));
687-
}
688-
} else if (I->use_empty()) {
689-
// Dead argument (which are always marked as promotable)
690-
++NumArgumentsDead;
691-
} else {
692-
// Okay, this is being promoted. This means that the only uses are loads
693-
// or GEPs which are only used by loads
773+
return true;
774+
}
694775

695-
// In this table, we will track which indices are loaded from the argument
696-
// (where direct loads are tracked as no indices).
697-
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
698-
for (User *U : I->users()) {
699-
Instruction *UI = cast<Instruction>(U);
700-
Type *SrcTy;
701-
if (LoadInst *L = dyn_cast<LoadInst>(UI))
702-
SrcTy = L->getType();
703-
else
704-
SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
705-
IndicesVector Indices;
706-
Indices.reserve(UI->getNumOperands() - 1);
707-
// Since loads will only have a single operand, and GEPs only a single
708-
// non-index operand, this will record direct loads without any indices,
709-
// and gep+loads with the GEP indices.
710-
for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
711-
II != IE; ++II)
712-
Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
713-
// GEPs with a single 0 index can be merged with direct loads
714-
if (Indices.size() == 1 && Indices.front() == 0)
715-
Indices.clear();
716-
ArgIndices.insert(std::make_pair(SrcTy, Indices));
717-
LoadInst *OrigLoad;
718-
if (LoadInst *L = dyn_cast<LoadInst>(UI))
719-
OrigLoad = L;
720-
else
721-
// Take any load, we will use it only to update Alias Analysis
722-
OrigLoad = cast<LoadInst>(UI->user_back());
723-
OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
724-
}
776+
/// \brief Checks if the padding bytes of an argument could be accessed.
777+
static bool canPaddingBeAccessed(Argument *arg) {
725778

726-
// Add a parameter to the function for each element passed in.
727-
for (const auto &ArgIndex : ArgIndices) {
728-
// not allowed to dereference ->begin() if size() is 0
729-
Params.push_back(GetElementPtrInst::getIndexedType(
730-
cast<PointerType>(I->getType()->getScalarType())->getElementType(),
731-
ArgIndex.second));
732-
assert(Params.back());
733-
}
779+
assert(arg->hasByValAttr());
734780

735-
if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
736-
++NumArgumentsPromoted;
737-
else
738-
++NumAggregatesPromoted;
781+
// Track all the pointers to the argument to make sure they are not captured.
782+
SmallPtrSet<Value *, 16> PtrValues;
783+
PtrValues.insert(arg);
784+
785+
// Track all of the stores.
786+
SmallVector<StoreInst *, 16> Stores;
787+
788+
// Scan through the uses recursively to make sure the pointer is always used
789+
// sanely.
790+
SmallVector<Value *, 16> WorkList;
791+
WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
792+
while (!WorkList.empty()) {
793+
Value *V = WorkList.back();
794+
WorkList.pop_back();
795+
if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
796+
if (PtrValues.insert(V).second)
797+
WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
798+
} else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
799+
Stores.push_back(Store);
800+
} else if (!isa<LoadInst>(V)) {
801+
return true;
739802
}
740803
}
741804

742-
// Add any function attributes.
743-
if (PAL.hasAttributes(AttributeSet::FunctionIndex))
744-
AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
745-
PAL.getFnAttributes()));
746-
747-
Type *RetTy = FTy->getReturnType();
805+
// Check to make sure the pointers aren't captured
806+
for (StoreInst *Store : Stores)
807+
if (PtrValues.count(Store->getValueOperand()))
808+
return true;
748809

749-
// Construct the new function type using the new arguments.
750-
FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
810+
return false;
811+
}
751812

752-
// Create the new function body and insert it into the module.
753-
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
754-
NF->copyAttributesFrom(F);
813+
/// PromoteArguments - This method checks the specified function to see if there
814+
/// are any promotable arguments and if it is safe to promote the function (for
815+
/// example, all callers are direct). If safe to promote some arguments, it
816+
/// calls the DoPromotion method.
817+
///
818+
static CallGraphNode *
819+
PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
820+
function_ref<AAResults &(Function &F)> AARGetter,
821+
unsigned MaxElements) {
822+
Function *F = CGN->getFunction();
755823

756-
// Patch the pointer to LLVM function in debug info descriptor.
757-
NF->setSubprogram(F->getSubprogram());
758-
F->setSubprogram(nullptr);
824+
// Make sure that it is local to this module.
825+
if (!F || !F->hasLocalLinkage()) return nullptr;
759826

760-
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
761-
<< "From: " << *F);
762-
763-
// Recompute the parameter attributes list based on the new arguments for
764-
// the function.
765-
NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
766-
AttributesVec.clear();
827+
// Don't promote arguments for variadic functions. Adding, removing, or
828+
// changing non-pack parameters can change the classification of pack
829+
// parameters. Frontends encode that classification at the call site in the
830+
// IR, while in the callee the classification is determined dynamically based
831+
// on the number of registers consumed so far.
832+
if (F->isVarArg()) return nullptr;
767833

768-
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
769-
NF->takeName(F);
834+
// First check: see if there are any pointer arguments! If not, quick exit.
835+
SmallVector<Argument*, 16> PointerArgs;
836+
for (Argument &I : F->args())
837+
if (I.getType()->isPointerTy())
838+
PointerArgs.push_back(&I);
839+
if (PointerArgs.empty()) return nullptr;
770840

771-
// Get a new callgraph node for NF.
772-
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
841+
// Second check: make sure that all callers are direct callers. We can't
842+
// transform functions that have indirect callers. Also see if the function
843+
// is self-recursive.
844+
bool isSelfRecursive = false;
845+
for (Use &U : F->uses()) {
846+
CallSite CS(U.getUser());
847+
// Must be a direct call.
848+
if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
849+
850+
if (CS.getInstruction()->getParent()->getParent() == F)
851+
isSelfRecursive = true;
852+
}
853+
854+
const DataLayout &DL = F->getParent()->getDataLayout();
773855

774-
// Loop over all of the callers of the function, transforming the call sites
775-
// to pass in the loaded pointers.
776-
//
777-
SmallVector<Value*, 16> Args;
778-
while (!F->use_empty()) {
779-
CallSite CS(F->user_back());
780-
assert(CS.getCalledFunction() == F);
781-
Instruction *Call = CS.getInstruction();
782-
const AttributeSet &CallPAL = CS.getAttributes();
856+
AAResults &AAR = AARGetter(*F);
783857

784-
// Add any return attributes.
785-
if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
786-
AttributesVec.push_back(AttributeSet::get(F->getContext(),
787-
CallPAL.getRetAttributes()));
858+
// Check to see which arguments are promotable. If an argument is promotable,
859+
// add it to ArgsToPromote.
860+
SmallPtrSet<Argument*, 8> ArgsToPromote;
861+
SmallPtrSet<Argument*, 8> ByValArgsToTransform;
862+
for (Argument *PtrArg : PointerArgs) {
863+
Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
788864

789-
// Loop over the operands, inserting GEP and loads in the caller as
790-
// appropriate.
791-
CallSite::arg_iterator AI = CS.arg_begin();
792-
ArgIndex = 1;
793-
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
794-
I != E; ++I, ++AI, ++ArgIndex)
795-
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
796-
Args.push_back(*AI); // Unmodified argument
865+
// Replace sret attribute with noalias. This reduces register pressure by
866+
// avoiding a register copy.
867+
if (PtrArg->hasStructRetAttr()) {
868+
unsigned ArgNo = PtrArg->getArgNo();
869+
F->setAttributes(
870+
F->getAttributes()
871+
.removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
872+
.addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
873+
for (Use &U : F->uses()) {
874+
CallSite CS(U.getUser());
875+
CS.setAttributes(
876+
CS.getAttributes()
877+
.removeAttribute(F->getContext(), ArgNo + 1,
878+
Attribute::StructRet)
879+
.addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
880+
}
881+
}
797882

798-
if (CallPAL.hasAttributes(ArgIndex)) {
799-
AttrBuilder B(CallPAL, ArgIndex);
800-
AttributesVec.
801-
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
802-
}
803-
} else if (ByValArgsToTransform.count(&*I)) {
804-
// Emit a GEP and load for each element of the struct.
805-
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
806-
StructType *STy = cast<StructType>(AgTy);
807-
Value *Idxs[2] = {
808-
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
809-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
810-
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
811-
Value *Idx = GetElementPtrInst::Create(
812-
STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
813-
// TODO: Tell AA about the new values?
814-
Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
883+
// If this is a byval argument, and if the aggregate type is small, just
884+
// pass the elements, which is always safe, if the passed value is densely
885+
// packed or if we can prove the padding bytes are never accessed. This does
886+
// not apply to inalloca.
887+
bool isSafeToPromote =
888+
PtrArg->hasByValAttr() &&
889+
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
890+
if (isSafeToPromote) {
891+
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
892+
if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
893+
DEBUG(dbgs() << "argpromotion disable promoting argument '"
894+
<< PtrArg->getName() << "' because it would require adding more"
895+
<< " than " << MaxElements << " arguments to the function.\n");
896+
continue;
815897
}
816-
} else if (!I->use_empty()) {
817-
// Non-dead argument: insert GEPs and loads as appropriate.
818-
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
819-
// Store the Value* version of the indices in here, but declare it now
820-
// for reuse.
821-
std::vector<Value*> Ops;
822-
for (const auto &ArgIndex : ArgIndices) {
823-
Value *V = *AI;
824-
LoadInst *OrigLoad =
825-
OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
826-
if (!ArgIndex.second.empty()) {
827-
Ops.reserve(ArgIndex.second.size());
828-
Type *ElTy = V->getType();
829-
for (unsigned long II : ArgIndex.second) {
830-
// Use i32 to index structs, and i64 for others (pointers/arrays).
831-
// This satisfies GEP constraints.
832-
Type *IdxTy = (ElTy->isStructTy() ?
833-
Type::getInt32Ty(F->getContext()) :
834-
Type::getInt64Ty(F->getContext()));
835-
Ops.push_back(ConstantInt::get(IdxTy, II));
836-
// Keep track of the type we're currently indexing.
837-
if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
838-
ElTy = ElPTy->getElementType();
839-
else
840-
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
841-
}
842-
// And create a GEP to extract those indices.
843-
V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
844-
V->getName() + ".idx", Call);
845-
Ops.clear();
898+
899+
// If all the elements are single-value types, we can promote it.
900+
bool AllSimple = true;
901+
for (const auto *EltTy : STy->elements()) {
902+
if (!EltTy->isSingleValueType()) {
903+
AllSimple = false;
904+
break;
846905
}
847-
// Since we're replacing a load make sure we take the alignment
848-
// of the previous load.
849-
LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
850-
newLoad->setAlignment(OrigLoad->getAlignment());
851-
// Transfer the AA info too.
852-
AAMDNodes AAInfo;
853-
OrigLoad->getAAMetadata(AAInfo);
854-
newLoad->setAAMetadata(AAInfo);
906+
}
855907

856-
Args.push_back(newLoad);
908+
// Safe to transform, don't even bother trying to "promote" it.
909+
// Passing the elements as a scalar will allow sroa to hack on
910+
// the new alloca we introduce.
911+
if (AllSimple) {
912+
ByValArgsToTransform.insert(PtrArg);
913+
continue;
857914
}
858915
}
916+
}
859917

860-
// Push any varargs arguments on the list.
861-
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
862-
Args.push_back(*AI);
863-
if (CallPAL.hasAttributes(ArgIndex)) {
864-
AttrBuilder B(CallPAL, ArgIndex);
865-
AttributesVec.
866-
push_back(AttributeSet::get(F->getContext(), Args.size(), B));
918+
// If the argument is a recursive type and we're in a recursive
919+
// function, we could end up infinitely peeling the function argument.
920+
if (isSelfRecursive) {
921+
if (StructType *STy = dyn_cast<StructType>(AgTy)) {
922+
bool RecursiveType = false;
923+
for (const auto *EltTy : STy->elements()) {
924+
if (EltTy == PtrArg->getType()) {
925+
RecursiveType = true;
926+
break;
927+
}
928+
}
929+
if (RecursiveType)
930+
continue;
867931
}
868932
}
933+
934+
// Otherwise, see if we can promote the pointer to its value.
935+
if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
936+
MaxElements))
937+
ArgsToPromote.insert(PtrArg);
938+
}
869939

870-
// Add any function attributes.
871-
if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
872-
AttributesVec.push_back(AttributeSet::get(Call->getContext(),
873-
CallPAL.getFnAttributes()));
874-
875-
SmallVector<OperandBundleDef, 1> OpBundles;
876-
CS.getOperandBundlesAsDefs(OpBundles);
877-
878-
Instruction *New;
879-
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
880-
New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
881-
Args, OpBundles, "", Call);
882-
cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
883-
cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
884-
AttributesVec));
885-
} else {
886-
New = CallInst::Create(NF, Args, OpBundles, "", Call);
887-
cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
888-
cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
889-
AttributesVec));
890-
cast<CallInst>(New)->setTailCallKind(
891-
cast<CallInst>(Call)->getTailCallKind());
892-
}
893-
New->setDebugLoc(Call->getDebugLoc());
894-
Args.clear();
895-
AttributesVec.clear();
940+
// No promotable pointer arguments.
941+
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
942+
return nullptr;
896943

897-
// Update the callgraph to know that the callsite has been transformed.
898-
CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
899-
CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
944+
return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG);
945+
}
900946

901-
if (!Call->use_empty()) {
902-
Call->replaceAllUsesWith(New);
903-
New->takeName(Call);
947+
namespace {
948+
/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
949+
///
950+
struct ArgPromotion : public CallGraphSCCPass {
951+
void getAnalysisUsage(AnalysisUsage &AU) const override {
952+
AU.addRequired<AssumptionCacheTracker>();
953+
AU.addRequired<TargetLibraryInfoWrapperPass>();
954+
getAAResultsAnalysisUsage(AU);
955+
CallGraphSCCPass::getAnalysisUsage(AU);
904956
}
905957

906-
// Finally, remove the old call from the program, reducing the use-count of
907-
// F.
908-
Call->eraseFromParent();
909-
}
910-
911-
// Since we have now created the new function, splice the body of the old
912-
// function right into the new function, leaving the old rotting hulk of the
913-
// function empty.
914-
NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
915-
916-
// Loop over the argument list, transferring uses of the old arguments over to
917-
// the new arguments, also transferring over the names as well.
918-
//
919-
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
920-
I2 = NF->arg_begin(); I != E; ++I) {
921-
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
922-
// If this is an unmodified argument, move the name and users over to the
923-
// new version.
924-
I->replaceAllUsesWith(&*I2);
925-
I2->takeName(&*I);
926-
++I2;
927-
continue;
958+
bool runOnSCC(CallGraphSCC &SCC) override;
959+
static char ID; // Pass identification, replacement for typeid
960+
explicit ArgPromotion(unsigned MaxElements = 3)
961+
: CallGraphSCCPass(ID), MaxElements(MaxElements) {
962+
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
928963
}
929964

930-
if (ByValArgsToTransform.count(&*I)) {
931-
// In the callee, we create an alloca, and store each of the new incoming
932-
// arguments into the alloca.
933-
Instruction *InsertPt = &NF->begin()->front();
934-
935-
// Just add all the struct element types.
936-
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
937-
Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
938-
StructType *STy = cast<StructType>(AgTy);
939-
Value *Idxs[2] = {
940-
ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
941-
942-
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
943-
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
944-
Value *Idx = GetElementPtrInst::Create(
945-
AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
946-
InsertPt);
947-
I2->setName(I->getName()+"."+Twine(i));
948-
new StoreInst(&*I2++, Idx, InsertPt);
949-
}
950-
951-
// Anything that used the arg should now use the alloca.
952-
I->replaceAllUsesWith(TheAlloca);
953-
TheAlloca->takeName(&*I);
954-
955-
// If the alloca is used in a call, we must clear the tail flag since
956-
// the callee now uses an alloca from the caller.
957-
for (User *U : TheAlloca->users()) {
958-
CallInst *Call = dyn_cast<CallInst>(U);
959-
if (!Call)
960-
continue;
961-
Call->setTailCall(false);
962-
}
963-
continue;
964-
}
965+
private:
965966

966-
if (I->use_empty())
967-
continue;
967+
using llvm::Pass::doInitialization;
968+
bool doInitialization(CallGraph &CG) override;
969+
/// The maximum number of elements to expand, or 0 for unlimited.
970+
unsigned MaxElements;
971+
};
972+
}
968973

969-
// Otherwise, if we promoted this argument, then all users are load
970-
// instructions (or GEPs with only load users), and all loads should be
971-
// using the new argument that we added.
972-
ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
974+
char ArgPromotion::ID = 0;
975+
INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
976+
"Promote 'by reference' arguments to scalars", false, false)
977+
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
978+
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
979+
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
980+
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
981+
"Promote 'by reference' arguments to scalars", false, false)
973982

974-
while (!I->use_empty()) {
975-
if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
976-
assert(ArgIndices.begin()->second.empty() &&
977-
"Load element should sort to front!");
978-
I2->setName(I->getName()+".val");
979-
LI->replaceAllUsesWith(&*I2);
980-
LI->eraseFromParent();
981-
DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
982-
<< "' in function '" << F->getName() << "'\n");
983-
} else {
984-
GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
985-
IndicesVector Operands;
986-
Operands.reserve(GEP->getNumIndices());
987-
for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
988-
II != IE; ++II)
989-
Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
983+
Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
984+
return new ArgPromotion(MaxElements);
985+
}
990986

991-
// GEPs with a single 0 index can be merged with direct loads
992-
if (Operands.size() == 1 && Operands.front() == 0)
993-
Operands.clear();
987+
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
988+
if (skipSCC(SCC))
989+
return false;
994990

995-
Function::arg_iterator TheArg = I2;
996-
for (ScalarizeTable::iterator It = ArgIndices.begin();
997-
It->second != Operands; ++It, ++TheArg) {
998-
assert(It != ArgIndices.end() && "GEP not handled??");
999-
}
991+
// Get the callgraph information that we need to update to reflect our
992+
// changes.
993+
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
1000994

1001-
std::string NewName = I->getName();
1002-
for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
1003-
NewName += "." + utostr(Operands[i]);
1004-
}
1005-
NewName += ".val";
1006-
TheArg->setName(NewName);
995+
// We compute dedicated AA results for each function in the SCC as needed. We
996+
// use a lambda referencing external objects so that they live long enough to
997+
// be queried, but we re-use them each time.
998+
Optional<BasicAAResult> BAR;
999+
Optional<AAResults> AAR;
1000+
auto AARGetter = [&](Function &F) -> AAResults & {
1001+
BAR.emplace(createLegacyPMBasicAAResult(*this, F));
1002+
AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
1003+
return *AAR;
1004+
};
10071005

1008-
DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
1009-
<< "' of function '" << NF->getName() << "'\n");
1006+
bool Changed = false, LocalChange;
10101007

1011-
// All of the uses must be load instructions. Replace them all with
1012-
// the argument specified by ArgNo.
1013-
while (!GEP->use_empty()) {
1014-
LoadInst *L = cast<LoadInst>(GEP->user_back());
1015-
L->replaceAllUsesWith(&*TheArg);
1016-
L->eraseFromParent();
1017-
}
1018-
GEP->eraseFromParent();
1008+
// Iterate until we stop promoting from this SCC.
1009+
do {
1010+
LocalChange = false;
1011+
// Attempt to promote arguments from all functions in this SCC.
1012+
for (CallGraphNode *OldNode : SCC) {
1013+
if (CallGraphNode *NewNode =
1014+
PromoteArguments(OldNode, CG, AARGetter, MaxElements)) {
1015+
LocalChange = true;
1016+
SCC.ReplaceNode(OldNode, NewNode);
10191017
}
10201018
}
1019+
// Remember that we changed something.
1020+
Changed |= LocalChange;
1021+
} while (LocalChange);
10211022

1022-
// Increment I2 past all of the arguments added for this promoted pointer.
1023-
std::advance(I2, ArgIndices.size());
1024-
}
1025-
1026-
NF_CGN->stealCalledFunctionsFrom(CG[F]);
1027-
1028-
// Now that the old function is dead, delete it. If there is a dangling
1029-
// reference to the CallgraphNode, just leave the dead function around for
1030-
// someone else to nuke.
1031-
CallGraphNode *CGN = CG[F];
1032-
if (CGN->getNumReferences() == 0)
1033-
delete CG.removeFunctionFromModule(CGN);
1034-
else
1035-
F->setLinkage(Function::ExternalLinkage);
1036-
1037-
return NF_CGN;
1023+
return Changed;
10381024
}
10391025

10401026
bool ArgPromotion::doInitialization(CallGraph &CG) {

0 commit comments

Comments
 (0)
Please sign in to comment.