@@ -61,337 +61,449 @@ STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
61
61
STATISTIC (NumByValArgsPromoted , " Number of byval arguments promoted" );
62
62
STATISTIC (NumArgumentsDead , " Number of dead pointer args eliminated" );
63
63
64
- namespace {
65
- // / ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
66
- // /
67
- struct ArgPromotion : public CallGraphSCCPass {
68
- void getAnalysisUsage (AnalysisUsage &AU) const override {
69
- AU.addRequired <AssumptionCacheTracker>();
70
- AU.addRequired <TargetLibraryInfoWrapperPass>();
71
- getAAResultsAnalysisUsage (AU);
72
- CallGraphSCCPass::getAnalysisUsage (AU);
73
- }
74
-
75
- bool runOnSCC (CallGraphSCC &SCC) override ;
76
- static char ID; // Pass identification, replacement for typeid
77
- explicit ArgPromotion (unsigned maxElements = 3 )
78
- : CallGraphSCCPass(ID), maxElements(maxElements) {
79
- initializeArgPromotionPass (*PassRegistry::getPassRegistry ());
80
- }
81
-
82
- private:
83
-
84
- using llvm::Pass::doInitialization;
85
- bool doInitialization (CallGraph &CG) override ;
86
- // / The maximum number of elements to expand, or 0 for unlimited.
87
- unsigned maxElements;
88
- };
89
- }
90
-
91
64
// / A vector used to hold the indices of a single GEP instruction
92
65
typedef std::vector<uint64_t > IndicesVector;
93
66
94
- static CallGraphNode *
95
- PromoteArguments (CallGraphNode *CGN, CallGraph &CG,
96
- function_ref<AAResults &(Function &F)> AARGetter,
97
- unsigned MaxElements);
98
- static bool isDenselyPacked (Type *type, const DataLayout &DL);
99
- static bool canPaddingBeAccessed (Argument *Arg);
100
- static bool isSafeToPromoteArgument (Argument *Arg, bool isByVal, AAResults &AAR,
101
- unsigned MaxElements);
67
+ // / DoPromotion - This method actually performs the promotion of the specified
68
+ // / arguments, and returns the new function. At this point, we know that it's
69
+ // / safe to do so.
102
70
static CallGraphNode *
103
71
DoPromotion (Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
104
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG);
105
-
106
- char ArgPromotion::ID = 0 ;
107
- INITIALIZE_PASS_BEGIN (ArgPromotion, " argpromotion" ,
108
- " Promote 'by reference' arguments to scalars" , false , false )
109
- INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
110
- INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
111
- INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
112
- INITIALIZE_PASS_END(ArgPromotion, " argpromotion" ,
113
- " Promote 'by reference' arguments to scalars" , false , false )
114
-
115
- Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
116
- return new ArgPromotion (maxElements);
117
- }
118
-
119
- static bool runImpl (CallGraphSCC &SCC, CallGraph &CG,
120
- function_ref<AAResults &(Function &F)> AARGetter,
121
- unsigned MaxElements) {
122
- bool Changed = false , LocalChange;
123
-
124
- do { // Iterate until we stop promoting from this SCC.
125
- LocalChange = false ;
126
- // Attempt to promote arguments from all functions in this SCC.
127
- for (CallGraphNode *OldNode : SCC) {
128
- if (CallGraphNode *NewNode =
129
- PromoteArguments (OldNode, CG, AARGetter, MaxElements)) {
130
- LocalChange = true ;
131
- SCC.ReplaceNode (OldNode, NewNode);
132
- }
133
- }
134
- Changed |= LocalChange; // Remember that we changed something.
135
- } while (LocalChange);
136
-
137
- return Changed;
138
- }
72
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
139
73
140
- bool ArgPromotion::runOnSCC (CallGraphSCC &SCC) {
141
- if (skipSCC (SCC))
142
- return false ;
74
+ // Start by computing a new prototype for the function, which is the same as
75
+ // the old function, but has modified arguments.
76
+ FunctionType *FTy = F->getFunctionType ();
77
+ std::vector<Type*> Params;
143
78
144
- // Get the callgraph information that we need to update to reflect our
145
- // changes.
146
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph ();
79
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
147
80
148
- // We compute dedicated AA results for each function in the SCC as needed. We
149
- // use a lambda referencing external objects so that they live long enough to
150
- // be queried, but we re-use them each time.
151
- Optional<BasicAAResult> BAR;
152
- Optional<AAResults> AAR;
153
- auto AARGetter = [&](Function &F) -> AAResults & {
154
- BAR.emplace (createLegacyPMBasicAAResult (*this , F));
155
- AAR.emplace (createLegacyPMAAResults (*this , F, *BAR));
156
- return *AAR;
157
- };
81
+ // ScalarizedElements - If we are promoting a pointer that has elements
82
+ // accessed out of it, keep track of which elements are accessed so that we
83
+ // can add one argument for each.
84
+ //
85
+ // Arguments that are directly loaded will have a zero element value here, to
86
+ // handle cases where there are both a direct load and GEP accesses.
87
+ //
88
+ std::map<Argument*, ScalarizeTable> ScalarizedElements;
158
89
159
- return runImpl (SCC, CG, AARGetter, maxElements);
160
- }
90
+ // OriginalLoads - Keep track of a representative load instruction from the
91
+ // original function so that we can tell the alias analysis implementation
92
+ // what the new GEP/Load instructions we are inserting look like.
93
+ // We need to keep the original loads for each argument and the elements
94
+ // of the argument that are accessed.
95
+ std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
161
96
162
- // / \brief Checks if a type could have padding bytes.
163
- static bool isDenselyPacked (Type *type, const DataLayout &DL) {
97
+ // Attribute - Keep track of the parameter attributes for the arguments
98
+ // that we are *not* promoting. For the ones that we do promote, the parameter
99
+ // attributes are lost
100
+ SmallVector<AttributeSet, 8 > AttributesVec;
101
+ const AttributeSet &PAL = F->getAttributes ();
164
102
165
- // There is no size information, so be conservative.
166
- if (!type->isSized ())
167
- return false ;
103
+ // Add any return attributes.
104
+ if (PAL.hasAttributes (AttributeSet::ReturnIndex))
105
+ AttributesVec.push_back (AttributeSet::get (F->getContext (),
106
+ PAL.getRetAttributes ()));
168
107
169
- // If the alloc size is not equal to the storage size, then there are padding
170
- // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
171
- if (DL.getTypeSizeInBits (type) != DL.getTypeAllocSizeInBits (type))
172
- return false ;
108
+ // First, determine the new argument list
109
+ unsigned ArgIndex = 1 ;
110
+ for (Function::arg_iterator I = F->arg_begin (), E = F->arg_end (); I != E;
111
+ ++I, ++ArgIndex) {
112
+ if (ByValArgsToTransform.count (&*I)) {
113
+ // Simple byval argument? Just add all the struct element types.
114
+ Type *AgTy = cast<PointerType>(I->getType ())->getElementType ();
115
+ StructType *STy = cast<StructType>(AgTy);
116
+ Params.insert (Params.end (), STy->element_begin (), STy->element_end ());
117
+ ++NumByValArgsPromoted;
118
+ } else if (!ArgsToPromote.count (&*I)) {
119
+ // Unchanged argument
120
+ Params.push_back (I->getType ());
121
+ AttributeSet attrs = PAL.getParamAttributes (ArgIndex);
122
+ if (attrs.hasAttributes (ArgIndex)) {
123
+ AttrBuilder B (attrs, ArgIndex);
124
+ AttributesVec.
125
+ push_back (AttributeSet::get (F->getContext (), Params.size (), B));
126
+ }
127
+ } else if (I->use_empty ()) {
128
+ // Dead argument (which are always marked as promotable)
129
+ ++NumArgumentsDead;
130
+ } else {
131
+ // Okay, this is being promoted. This means that the only uses are loads
132
+ // or GEPs which are only used by loads
173
133
174
- if (!isa<CompositeType>(type))
175
- return true ;
134
+ // In this table, we will track which indices are loaded from the argument
135
+ // (where direct loads are tracked as no indices).
136
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
137
+ for (User *U : I->users ()) {
138
+ Instruction *UI = cast<Instruction>(U);
139
+ Type *SrcTy;
140
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
141
+ SrcTy = L->getType ();
142
+ else
143
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType ();
144
+ IndicesVector Indices;
145
+ Indices.reserve (UI->getNumOperands () - 1 );
146
+ // Since loads will only have a single operand, and GEPs only a single
147
+ // non-index operand, this will record direct loads without any indices,
148
+ // and gep+loads with the GEP indices.
149
+ for (User::op_iterator II = UI->op_begin () + 1 , IE = UI->op_end ();
150
+ II != IE; ++II)
151
+ Indices.push_back (cast<ConstantInt>(*II)->getSExtValue ());
152
+ // GEPs with a single 0 index can be merged with direct loads
153
+ if (Indices.size () == 1 && Indices.front () == 0 )
154
+ Indices.clear ();
155
+ ArgIndices.insert (std::make_pair (SrcTy, Indices));
156
+ LoadInst *OrigLoad;
157
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
158
+ OrigLoad = L;
159
+ else
160
+ // Take any load, we will use it only to update Alias Analysis
161
+ OrigLoad = cast<LoadInst>(UI->user_back ());
162
+ OriginalLoads[std::make_pair (&*I, Indices)] = OrigLoad;
163
+ }
176
164
177
- // For homogenous sequential types, check for padding within members.
178
- if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
179
- return isDenselyPacked (seqTy->getElementType (), DL);
165
+ // Add a parameter to the function for each element passed in.
166
+ for (const auto &ArgIndex : ArgIndices) {
167
+ // not allowed to dereference ->begin() if size() is 0
168
+ Params.push_back (GetElementPtrInst::getIndexedType (
169
+ cast<PointerType>(I->getType ()->getScalarType ())->getElementType (),
170
+ ArgIndex.second ));
171
+ assert (Params.back ());
172
+ }
180
173
181
- // Check for padding within and between elements of a struct.
182
- StructType *StructTy = cast<StructType>(type);
183
- const StructLayout *Layout = DL.getStructLayout (StructTy);
184
- uint64_t StartPos = 0 ;
185
- for (unsigned i = 0 , E = StructTy->getNumElements (); i < E; ++i) {
186
- Type *ElTy = StructTy->getElementType (i);
187
- if (!isDenselyPacked (ElTy, DL))
188
- return false ;
189
- if (StartPos != Layout->getElementOffsetInBits (i))
190
- return false ;
191
- StartPos += DL.getTypeAllocSizeInBits (ElTy);
174
+ if (ArgIndices.size () == 1 && ArgIndices.begin ()->second .empty ())
175
+ ++NumArgumentsPromoted;
176
+ else
177
+ ++NumAggregatesPromoted;
178
+ }
192
179
}
193
180
194
- return true ;
195
- }
181
+ // Add any function attributes.
182
+ if (PAL.hasAttributes (AttributeSet::FunctionIndex))
183
+ AttributesVec.push_back (AttributeSet::get (FTy->getContext (),
184
+ PAL.getFnAttributes ()));
196
185
197
- // / \brief Checks if the padding bytes of an argument could be accessed.
198
- static bool canPaddingBeAccessed (Argument *arg) {
186
+ Type *RetTy = FTy->getReturnType ();
199
187
200
- assert (arg->hasByValAttr ());
188
+ // Construct the new function type using the new arguments.
189
+ FunctionType *NFTy = FunctionType::get (RetTy, Params, FTy->isVarArg ());
201
190
202
- // Track all the pointers to the argument to make sure they are not captured .
203
- SmallPtrSet<Value *, 16 > PtrValues ;
204
- PtrValues. insert (arg );
191
+ // Create the new function body and insert it into the module .
192
+ Function *NF = Function::Create (NFTy, F-> getLinkage (), F-> getName ()) ;
193
+ NF-> copyAttributesFrom (F );
205
194
206
- // Track all of the stores.
207
- SmallVector<StoreInst *, 16 > Stores;
195
+ // Patch the pointer to LLVM function in debug info descriptor.
196
+ NF->setSubprogram (F->getSubprogram ());
197
+ F->setSubprogram (nullptr );
208
198
209
- // Scan through the uses recursively to make sure the pointer is always used
210
- // sanely.
211
- SmallVector<Value *, 16 > WorkList;
212
- WorkList.insert (WorkList.end (), arg->user_begin (), arg->user_end ());
213
- while (!WorkList.empty ()) {
214
- Value *V = WorkList.back ();
215
- WorkList.pop_back ();
216
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
217
- if (PtrValues.insert (V).second )
218
- WorkList.insert (WorkList.end (), V->user_begin (), V->user_end ());
219
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
220
- Stores.push_back (Store);
221
- } else if (!isa<LoadInst>(V)) {
222
- return true ;
223
- }
224
- }
199
+ DEBUG (dbgs () << " ARG PROMOTION: Promoting to:" << *NF << " \n "
200
+ << " From: " << *F);
201
+
202
+ // Recompute the parameter attributes list based on the new arguments for
203
+ // the function.
204
+ NF->setAttributes (AttributeSet::get (F->getContext (), AttributesVec));
205
+ AttributesVec.clear ();
225
206
226
- // Check to make sure the pointers aren't captured
227
- for (StoreInst *Store : Stores)
228
- if (PtrValues.count (Store->getValueOperand ()))
229
- return true ;
207
+ F->getParent ()->getFunctionList ().insert (F->getIterator (), NF);
208
+ NF->takeName (F);
230
209
231
- return false ;
232
- }
210
+ // Get a new callgraph node for NF.
211
+ CallGraphNode *NF_CGN = CG. getOrInsertFunction (NF);
233
212
234
- // / PromoteArguments - This method checks the specified function to see if there
235
- // / are any promotable arguments and if it is safe to promote the function (for
236
- // / example, all callers are direct). If safe to promote some arguments, it
237
- // / calls the DoPromotion method.
238
- // /
239
- static CallGraphNode *
240
- PromoteArguments (CallGraphNode *CGN, CallGraph &CG,
241
- function_ref<AAResults &(Function &F)> AARGetter,
242
- unsigned MaxElements) {
243
- Function *F = CGN->getFunction ();
244
-
245
- // Make sure that it is local to this module.
246
- if (!F || !F->hasLocalLinkage ()) return nullptr ;
247
-
248
- // Don't promote arguments for variadic functions. Adding, removing, or
249
- // changing non-pack parameters can change the classification of pack
250
- // parameters. Frontends encode that classification at the call site in the
251
- // IR, while in the callee the classification is determined dynamically based
252
- // on the number of registers consumed so far.
253
- if (F->isVarArg ()) return nullptr ;
254
-
255
- // First check: see if there are any pointer arguments! If not, quick exit.
256
- SmallVector<Argument*, 16 > PointerArgs;
257
- for (Argument &I : F->args ())
258
- if (I.getType ()->isPointerTy ())
259
- PointerArgs.push_back (&I);
260
- if (PointerArgs.empty ()) return nullptr ;
261
-
262
- // Second check: make sure that all callers are direct callers. We can't
263
- // transform functions that have indirect callers. Also see if the function
264
- // is self-recursive.
265
- bool isSelfRecursive = false ;
266
- for (Use &U : F->uses ()) {
267
- CallSite CS (U.getUser ());
268
- // Must be a direct call.
269
- if (CS.getInstruction () == nullptr || !CS.isCallee (&U)) return nullptr ;
270
-
271
- if (CS.getInstruction ()->getParent ()->getParent () == F)
272
- isSelfRecursive = true ;
273
- }
274
-
275
- const DataLayout &DL = F->getParent ()->getDataLayout ();
276
-
277
- AAResults &AAR = AARGetter (*F);
213
+ // Loop over all of the callers of the function, transforming the call sites
214
+ // to pass in the loaded pointers.
215
+ //
216
+ SmallVector<Value*, 16 > Args;
217
+ while (!F->use_empty ()) {
218
+ CallSite CS (F->user_back ());
219
+ assert (CS.getCalledFunction () == F);
220
+ Instruction *Call = CS.getInstruction ();
221
+ const AttributeSet &CallPAL = CS.getAttributes ();
278
222
279
- // Check to see which arguments are promotable. If an argument is promotable,
280
- // add it to ArgsToPromote.
281
- SmallPtrSet<Argument*, 8 > ArgsToPromote;
282
- SmallPtrSet<Argument*, 8 > ByValArgsToTransform;
283
- for (Argument *PtrArg : PointerArgs) {
284
- Type *AgTy = cast<PointerType>(PtrArg->getType ())->getElementType ();
223
+ // Add any return attributes.
224
+ if (CallPAL.hasAttributes (AttributeSet::ReturnIndex))
225
+ AttributesVec.push_back (AttributeSet::get (F->getContext (),
226
+ CallPAL.getRetAttributes ()));
285
227
286
- // Replace sret attribute with noalias. This reduces register pressure by
287
- // avoiding a register copy.
288
- if (PtrArg->hasStructRetAttr ()) {
289
- unsigned ArgNo = PtrArg->getArgNo ();
290
- F->setAttributes (
291
- F->getAttributes ()
292
- .removeAttribute (F->getContext (), ArgNo + 1 , Attribute::StructRet)
293
- .addAttribute (F->getContext (), ArgNo + 1 , Attribute::NoAlias));
294
- for (Use &U : F->uses ()) {
295
- CallSite CS (U.getUser ());
296
- CS.setAttributes (
297
- CS.getAttributes ()
298
- .removeAttribute (F->getContext (), ArgNo + 1 ,
299
- Attribute::StructRet)
300
- .addAttribute (F->getContext (), ArgNo + 1 , Attribute::NoAlias));
301
- }
302
- }
228
+ // Loop over the operands, inserting GEP and loads in the caller as
229
+ // appropriate.
230
+ CallSite::arg_iterator AI = CS.arg_begin ();
231
+ ArgIndex = 1 ;
232
+ for (Function::arg_iterator I = F->arg_begin (), E = F->arg_end ();
233
+ I != E; ++I, ++AI, ++ArgIndex)
234
+ if (!ArgsToPromote.count (&*I) && !ByValArgsToTransform.count (&*I)) {
235
+ Args.push_back (*AI); // Unmodified argument
303
236
304
- // If this is a byval argument, and if the aggregate type is small, just
305
- // pass the elements, which is always safe, if the passed value is densely
306
- // packed or if we can prove the padding bytes are never accessed. This does
307
- // not apply to inalloca.
308
- bool isSafeToPromote =
309
- PtrArg->hasByValAttr () &&
310
- (isDenselyPacked (AgTy, DL) || !canPaddingBeAccessed (PtrArg));
311
- if (isSafeToPromote) {
312
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
313
- if (MaxElements > 0 && STy->getNumElements () > MaxElements) {
314
- DEBUG (dbgs () << " argpromotion disable promoting argument '"
315
- << PtrArg->getName () << " ' because it would require adding more"
316
- << " than " << MaxElements << " arguments to the function.\n " );
317
- continue ;
237
+ if (CallPAL.hasAttributes (ArgIndex)) {
238
+ AttrBuilder B (CallPAL, ArgIndex);
239
+ AttributesVec.
240
+ push_back (AttributeSet::get (F->getContext (), Args.size (), B));
318
241
}
319
-
320
- // If all the elements are single-value types, we can promote it.
321
- bool AllSimple = true ;
322
- for (const auto *EltTy : STy->elements ()) {
323
- if (!EltTy->isSingleValueType ()) {
324
- AllSimple = false ;
325
- break ;
326
- }
242
+ } else if (ByValArgsToTransform.count (&*I)) {
243
+ // Emit a GEP and load for each element of the struct.
244
+ Type *AgTy = cast<PointerType>(I->getType ())->getElementType ();
245
+ StructType *STy = cast<StructType>(AgTy);
246
+ Value *Idxs[2 ] = {
247
+ ConstantInt::get (Type::getInt32Ty (F->getContext ()), 0 ), nullptr };
248
+ for (unsigned i = 0 , e = STy->getNumElements (); i != e; ++i) {
249
+ Idxs[1 ] = ConstantInt::get (Type::getInt32Ty (F->getContext ()), i);
250
+ Value *Idx = GetElementPtrInst::Create (
251
+ STy, *AI, Idxs, (*AI)->getName () + " ." + Twine (i), Call);
252
+ // TODO: Tell AA about the new values?
253
+ Args.push_back (new LoadInst (Idx, Idx->getName ()+" .val" , Call));
327
254
}
255
+ } else if (!I->use_empty ()) {
256
+ // Non-dead argument: insert GEPs and loads as appropriate.
257
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
258
+ // Store the Value* version of the indices in here, but declare it now
259
+ // for reuse.
260
+ std::vector<Value*> Ops;
261
+ for (const auto &ArgIndex : ArgIndices) {
262
+ Value *V = *AI;
263
+ LoadInst *OrigLoad =
264
+ OriginalLoads[std::make_pair (&*I, ArgIndex.second )];
265
+ if (!ArgIndex.second .empty ()) {
266
+ Ops.reserve (ArgIndex.second .size ());
267
+ Type *ElTy = V->getType ();
268
+ for (unsigned long II : ArgIndex.second ) {
269
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
270
+ // This satisfies GEP constraints.
271
+ Type *IdxTy = (ElTy->isStructTy () ?
272
+ Type::getInt32Ty (F->getContext ()) :
273
+ Type::getInt64Ty (F->getContext ()));
274
+ Ops.push_back (ConstantInt::get (IdxTy, II));
275
+ // Keep track of the type we're currently indexing.
276
+ if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
277
+ ElTy = ElPTy->getElementType ();
278
+ else
279
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex (II);
280
+ }
281
+ // And create a GEP to extract those indices.
282
+ V = GetElementPtrInst::Create (ArgIndex.first , V, Ops,
283
+ V->getName () + " .idx" , Call);
284
+ Ops.clear ();
285
+ }
286
+ // Since we're replacing a load make sure we take the alignment
287
+ // of the previous load.
288
+ LoadInst *newLoad = new LoadInst (V, V->getName ()+" .val" , Call);
289
+ newLoad->setAlignment (OrigLoad->getAlignment ());
290
+ // Transfer the AA info too.
291
+ AAMDNodes AAInfo;
292
+ OrigLoad->getAAMetadata (AAInfo);
293
+ newLoad->setAAMetadata (AAInfo);
328
294
329
- // Safe to transform, don't even bother trying to "promote" it.
330
- // Passing the elements as a scalar will allow sroa to hack on
331
- // the new alloca we introduce.
332
- if (AllSimple) {
333
- ByValArgsToTransform.insert (PtrArg);
334
- continue ;
295
+ Args.push_back (newLoad);
335
296
}
336
297
}
337
- }
338
298
339
- // If the argument is a recursive type and we're in a recursive
340
- // function, we could end up infinitely peeling the function argument.
341
- if (isSelfRecursive) {
342
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
343
- bool RecursiveType = false ;
344
- for (const auto *EltTy : STy->elements ()) {
345
- if (EltTy == PtrArg->getType ()) {
346
- RecursiveType = true ;
347
- break ;
348
- }
349
- }
350
- if (RecursiveType)
351
- continue ;
299
+ // Push any varargs arguments on the list.
300
+ for (; AI != CS.arg_end (); ++AI, ++ArgIndex) {
301
+ Args.push_back (*AI);
302
+ if (CallPAL.hasAttributes (ArgIndex)) {
303
+ AttrBuilder B (CallPAL, ArgIndex);
304
+ AttributesVec.
305
+ push_back (AttributeSet::get (F->getContext (), Args.size (), B));
352
306
}
353
307
}
354
-
355
- // Otherwise, see if we can promote the pointer to its value.
356
- if (isSafeToPromoteArgument (PtrArg, PtrArg->hasByValOrInAllocaAttr (), AAR,
357
- MaxElements))
358
- ArgsToPromote.insert (PtrArg);
359
- }
360
308
361
- // No promotable pointer arguments.
362
- if (ArgsToPromote.empty () && ByValArgsToTransform.empty ())
363
- return nullptr ;
309
+ // Add any function attributes.
310
+ if (CallPAL.hasAttributes (AttributeSet::FunctionIndex))
311
+ AttributesVec.push_back (AttributeSet::get (Call->getContext (),
312
+ CallPAL.getFnAttributes ()));
364
313
365
- return DoPromotion (F, ArgsToPromote, ByValArgsToTransform, CG) ;
366
- }
314
+ SmallVector<OperandBundleDef, 1 > OpBundles ;
315
+ CS. getOperandBundlesAsDefs (OpBundles);
367
316
368
- // / AllCallersPassInValidPointerForArgument - Return true if we can prove that
369
- // / all callees pass in a valid pointer for the specified function argument.
370
- static bool AllCallersPassInValidPointerForArgument (Argument *Arg) {
371
- Function *Callee = Arg->getParent ();
372
- const DataLayout &DL = Callee->getParent ()->getDataLayout ();
317
+ Instruction *New;
318
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
319
+ New = InvokeInst::Create (NF, II->getNormalDest (), II->getUnwindDest (),
320
+ Args, OpBundles, " " , Call);
321
+ cast<InvokeInst>(New)->setCallingConv (CS.getCallingConv ());
322
+ cast<InvokeInst>(New)->setAttributes (AttributeSet::get (II->getContext (),
323
+ AttributesVec));
324
+ } else {
325
+ New = CallInst::Create (NF, Args, OpBundles, " " , Call);
326
+ cast<CallInst>(New)->setCallingConv (CS.getCallingConv ());
327
+ cast<CallInst>(New)->setAttributes (AttributeSet::get (New->getContext (),
328
+ AttributesVec));
329
+ cast<CallInst>(New)->setTailCallKind (
330
+ cast<CallInst>(Call)->getTailCallKind ());
331
+ }
332
+ New->setDebugLoc (Call->getDebugLoc ());
333
+ Args.clear ();
334
+ AttributesVec.clear ();
373
335
374
- unsigned ArgNo = Arg->getArgNo ();
336
+ // Update the callgraph to know that the callsite has been transformed.
337
+ CallGraphNode *CalleeNode = CG[Call->getParent ()->getParent ()];
338
+ CalleeNode->replaceCallEdge (CS, CallSite (New), NF_CGN);
375
339
376
- // Look at all call sites of the function. At this point we know we only have
377
- // direct callees.
378
- for (User *U : Callee->users ()) {
379
- CallSite CS (U);
380
- assert (CS && " Should only have direct calls!" );
340
+ if (!Call->use_empty ()) {
341
+ Call->replaceAllUsesWith (New);
342
+ New->takeName (Call);
343
+ }
381
344
382
- if (!isDereferenceablePointer (CS.getArgument (ArgNo), DL))
383
- return false ;
345
+ // Finally, remove the old call from the program, reducing the use-count of
346
+ // F.
347
+ Call->eraseFromParent ();
384
348
}
385
- return true ;
386
- }
387
349
388
- // / Returns true if Prefix is a prefix of longer. That means, Longer has a size
389
- // / that is greater than or equal to the size of prefix, and each of the
390
- // / elements in Prefix is the same as the corresponding elements in Longer.
391
- // /
392
- // / This means it also returns true when Prefix and Longer are equal!
393
- static bool IsPrefix (const IndicesVector &Prefix, const IndicesVector &Longer) {
394
- if (Prefix.size () > Longer.size ())
350
+ // Since we have now created the new function, splice the body of the old
351
+ // function right into the new function, leaving the old rotting hulk of the
352
+ // function empty.
353
+ NF->getBasicBlockList ().splice (NF->begin (), F->getBasicBlockList ());
354
+
355
+ // Loop over the argument list, transferring uses of the old arguments over to
356
+ // the new arguments, also transferring over the names as well.
357
+ //
358
+ for (Function::arg_iterator I = F->arg_begin (), E = F->arg_end (),
359
+ I2 = NF->arg_begin (); I != E; ++I) {
360
+ if (!ArgsToPromote.count (&*I) && !ByValArgsToTransform.count (&*I)) {
361
+ // If this is an unmodified argument, move the name and users over to the
362
+ // new version.
363
+ I->replaceAllUsesWith (&*I2);
364
+ I2->takeName (&*I);
365
+ ++I2;
366
+ continue ;
367
+ }
368
+
369
+ if (ByValArgsToTransform.count (&*I)) {
370
+ // In the callee, we create an alloca, and store each of the new incoming
371
+ // arguments into the alloca.
372
+ Instruction *InsertPt = &NF->begin ()->front ();
373
+
374
+ // Just add all the struct element types.
375
+ Type *AgTy = cast<PointerType>(I->getType ())->getElementType ();
376
+ Value *TheAlloca = new AllocaInst (AgTy, nullptr , " " , InsertPt);
377
+ StructType *STy = cast<StructType>(AgTy);
378
+ Value *Idxs[2 ] = {
379
+ ConstantInt::get (Type::getInt32Ty (F->getContext ()), 0 ), nullptr };
380
+
381
+ for (unsigned i = 0 , e = STy->getNumElements (); i != e; ++i) {
382
+ Idxs[1 ] = ConstantInt::get (Type::getInt32Ty (F->getContext ()), i);
383
+ Value *Idx = GetElementPtrInst::Create (
384
+ AgTy, TheAlloca, Idxs, TheAlloca->getName () + " ." + Twine (i),
385
+ InsertPt);
386
+ I2->setName (I->getName ()+" ." +Twine (i));
387
+ new StoreInst (&*I2++, Idx, InsertPt);
388
+ }
389
+
390
+ // Anything that used the arg should now use the alloca.
391
+ I->replaceAllUsesWith (TheAlloca);
392
+ TheAlloca->takeName (&*I);
393
+
394
+ // If the alloca is used in a call, we must clear the tail flag since
395
+ // the callee now uses an alloca from the caller.
396
+ for (User *U : TheAlloca->users ()) {
397
+ CallInst *Call = dyn_cast<CallInst>(U);
398
+ if (!Call)
399
+ continue ;
400
+ Call->setTailCall (false );
401
+ }
402
+ continue ;
403
+ }
404
+
405
+ if (I->use_empty ())
406
+ continue ;
407
+
408
+ // Otherwise, if we promoted this argument, then all users are load
409
+ // instructions (or GEPs with only load users), and all loads should be
410
+ // using the new argument that we added.
411
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
412
+
413
+ while (!I->use_empty ()) {
414
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back ())) {
415
+ assert (ArgIndices.begin ()->second .empty () &&
416
+ " Load element should sort to front!" );
417
+ I2->setName (I->getName ()+" .val" );
418
+ LI->replaceAllUsesWith (&*I2);
419
+ LI->eraseFromParent ();
420
+ DEBUG (dbgs () << " *** Promoted load of argument '" << I->getName ()
421
+ << " ' in function '" << F->getName () << " '\n " );
422
+ } else {
423
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back ());
424
+ IndicesVector Operands;
425
+ Operands.reserve (GEP->getNumIndices ());
426
+ for (User::op_iterator II = GEP->idx_begin (), IE = GEP->idx_end ();
427
+ II != IE; ++II)
428
+ Operands.push_back (cast<ConstantInt>(*II)->getSExtValue ());
429
+
430
+ // GEPs with a single 0 index can be merged with direct loads
431
+ if (Operands.size () == 1 && Operands.front () == 0 )
432
+ Operands.clear ();
433
+
434
+ Function::arg_iterator TheArg = I2;
435
+ for (ScalarizeTable::iterator It = ArgIndices.begin ();
436
+ It->second != Operands; ++It, ++TheArg) {
437
+ assert (It != ArgIndices.end () && " GEP not handled??" );
438
+ }
439
+
440
+ std::string NewName = I->getName ();
441
+ for (unsigned i = 0 , e = Operands.size (); i != e; ++i) {
442
+ NewName += " ." + utostr (Operands[i]);
443
+ }
444
+ NewName += " .val" ;
445
+ TheArg->setName (NewName);
446
+
447
+ DEBUG (dbgs () << " *** Promoted agg argument '" << TheArg->getName ()
448
+ << " ' of function '" << NF->getName () << " '\n " );
449
+
450
+ // All of the uses must be load instructions. Replace them all with
451
+ // the argument specified by ArgNo.
452
+ while (!GEP->use_empty ()) {
453
+ LoadInst *L = cast<LoadInst>(GEP->user_back ());
454
+ L->replaceAllUsesWith (&*TheArg);
455
+ L->eraseFromParent ();
456
+ }
457
+ GEP->eraseFromParent ();
458
+ }
459
+ }
460
+
461
+ // Increment I2 past all of the arguments added for this promoted pointer.
462
+ std::advance (I2, ArgIndices.size ());
463
+ }
464
+
465
+ NF_CGN->stealCalledFunctionsFrom (CG[F]);
466
+
467
+ // Now that the old function is dead, delete it. If there is a dangling
468
+ // reference to the CallgraphNode, just leave the dead function around for
469
+ // someone else to nuke.
470
+ CallGraphNode *CGN = CG[F];
471
+ if (CGN->getNumReferences () == 0 )
472
+ delete CG.removeFunctionFromModule (CGN);
473
+ else
474
+ F->setLinkage (Function::ExternalLinkage);
475
+
476
+ return NF_CGN;
477
+ }
478
+
479
+
480
+ // / AllCallersPassInValidPointerForArgument - Return true if we can prove that
481
+ // / all callees pass in a valid pointer for the specified function argument.
482
+ static bool AllCallersPassInValidPointerForArgument (Argument *Arg) {
483
+ Function *Callee = Arg->getParent ();
484
+ const DataLayout &DL = Callee->getParent ()->getDataLayout ();
485
+
486
+ unsigned ArgNo = Arg->getArgNo ();
487
+
488
+ // Look at all call sites of the function. At this point we know we only have
489
+ // direct callees.
490
+ for (User *U : Callee->users ()) {
491
+ CallSite CS (U);
492
+ assert (CS && " Should only have direct calls!" );
493
+
494
+ if (!isDereferenceablePointer (CS.getArgument (ArgNo), DL))
495
+ return false ;
496
+ }
497
+ return true ;
498
+ }
499
+
500
+ // / Returns true if Prefix is a prefix of longer. That means, Longer has a size
501
+ // / that is greater than or equal to the size of prefix, and each of the
502
+ // / elements in Prefix is the same as the corresponding elements in Longer.
503
+ // /
504
+ // / This means it also returns true when Prefix and Longer are equal!
505
+ static bool IsPrefix (const IndicesVector &Prefix, const IndicesVector &Longer) {
506
+ if (Prefix.size () > Longer.size ())
395
507
return false ;
396
508
return std::equal (Prefix.begin (), Prefix.end (), Longer.begin ());
397
509
}
@@ -625,416 +737,290 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
625
737
return true ;
626
738
}
627
739
628
- // / DoPromotion - This method actually performs the promotion of the specified
629
- // / arguments, and returns the new function. At this point, we know that it's
630
- // / safe to do so.
631
- static CallGraphNode *
632
- DoPromotion (Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
633
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
634
740
635
- // Start by computing a new prototype for the function, which is the same as
636
- // the old function, but has modified arguments.
637
- FunctionType *FTy = F->getFunctionType ();
638
- std::vector<Type*> Params;
741
+ // / \brief Checks if a type could have padding bytes.
742
+ static bool isDenselyPacked (Type *type, const DataLayout &DL) {
639
743
640
- typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
744
+ // There is no size information, so be conservative.
745
+ if (!type->isSized ())
746
+ return false ;
641
747
642
- // ScalarizedElements - If we are promoting a pointer that has elements
643
- // accessed out of it, keep track of which elements are accessed so that we
644
- // can add one argument for each.
645
- //
646
- // Arguments that are directly loaded will have a zero element value here, to
647
- // handle cases where there are both a direct load and GEP accesses.
648
- //
649
- std::map<Argument*, ScalarizeTable> ScalarizedElements;
748
+ // If the alloc size is not equal to the storage size, then there are padding
749
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
750
+ if (DL.getTypeSizeInBits (type) != DL.getTypeAllocSizeInBits (type))
751
+ return false ;
650
752
651
- // OriginalLoads - Keep track of a representative load instruction from the
652
- // original function so that we can tell the alias analysis implementation
653
- // what the new GEP/Load instructions we are inserting look like.
654
- // We need to keep the original loads for each argument and the elements
655
- // of the argument that are accessed.
656
- std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
753
+ if (!isa<CompositeType>(type))
754
+ return true ;
657
755
658
- // Attribute - Keep track of the parameter attributes for the arguments
659
- // that we are *not* promoting. For the ones that we do promote, the parameter
660
- // attributes are lost
661
- SmallVector<AttributeSet, 8 > AttributesVec;
662
- const AttributeSet &PAL = F->getAttributes ();
756
+ // For homogenous sequential types, check for padding within members.
757
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
758
+ return isDenselyPacked (seqTy->getElementType (), DL);
663
759
664
- // Add any return attributes.
665
- if (PAL.hasAttributes (AttributeSet::ReturnIndex))
666
- AttributesVec.push_back (AttributeSet::get (F->getContext (),
667
- PAL.getRetAttributes ()));
760
+ // Check for padding within and between elements of a struct.
761
+ StructType *StructTy = cast<StructType>(type);
762
+ const StructLayout *Layout = DL.getStructLayout (StructTy);
763
+ uint64_t StartPos = 0 ;
764
+ for (unsigned i = 0 , E = StructTy->getNumElements (); i < E; ++i) {
765
+ Type *ElTy = StructTy->getElementType (i);
766
+ if (!isDenselyPacked (ElTy, DL))
767
+ return false ;
768
+ if (StartPos != Layout->getElementOffsetInBits (i))
769
+ return false ;
770
+ StartPos += DL.getTypeAllocSizeInBits (ElTy);
771
+ }
668
772
669
- // First, determine the new argument list
670
- unsigned ArgIndex = 1 ;
671
- for (Function::arg_iterator I = F->arg_begin (), E = F->arg_end (); I != E;
672
- ++I, ++ArgIndex) {
673
- if (ByValArgsToTransform.count (&*I)) {
674
- // Simple byval argument? Just add all the struct element types.
675
- Type *AgTy = cast<PointerType>(I->getType ())->getElementType ();
676
- StructType *STy = cast<StructType>(AgTy);
677
- Params.insert (Params.end (), STy->element_begin (), STy->element_end ());
678
- ++NumByValArgsPromoted;
679
- } else if (!ArgsToPromote.count (&*I)) {
680
- // Unchanged argument
681
- Params.push_back (I->getType ());
682
- AttributeSet attrs = PAL.getParamAttributes (ArgIndex);
683
- if (attrs.hasAttributes (ArgIndex)) {
684
- AttrBuilder B (attrs, ArgIndex);
685
- AttributesVec.
686
- push_back (AttributeSet::get (F->getContext (), Params.size (), B));
687
- }
688
- } else if (I->use_empty ()) {
689
- // Dead argument (which are always marked as promotable)
690
- ++NumArgumentsDead;
691
- } else {
692
- // Okay, this is being promoted. This means that the only uses are loads
693
- // or GEPs which are only used by loads
773
+ return true ;
774
+ }
694
775
695
- // In this table, we will track which indices are loaded from the argument
696
- // (where direct loads are tracked as no indices).
697
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
698
- for (User *U : I->users ()) {
699
- Instruction *UI = cast<Instruction>(U);
700
- Type *SrcTy;
701
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
702
- SrcTy = L->getType ();
703
- else
704
- SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType ();
705
- IndicesVector Indices;
706
- Indices.reserve (UI->getNumOperands () - 1 );
707
- // Since loads will only have a single operand, and GEPs only a single
708
- // non-index operand, this will record direct loads without any indices,
709
- // and gep+loads with the GEP indices.
710
- for (User::op_iterator II = UI->op_begin () + 1 , IE = UI->op_end ();
711
- II != IE; ++II)
712
- Indices.push_back (cast<ConstantInt>(*II)->getSExtValue ());
713
- // GEPs with a single 0 index can be merged with direct loads
714
- if (Indices.size () == 1 && Indices.front () == 0 )
715
- Indices.clear ();
716
- ArgIndices.insert (std::make_pair (SrcTy, Indices));
717
- LoadInst *OrigLoad;
718
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
719
- OrigLoad = L;
720
- else
721
- // Take any load, we will use it only to update Alias Analysis
722
- OrigLoad = cast<LoadInst>(UI->user_back ());
723
- OriginalLoads[std::make_pair (&*I, Indices)] = OrigLoad;
724
- }
776
+ // / \brief Checks if the padding bytes of an argument could be accessed.
777
+ static bool canPaddingBeAccessed (Argument *arg) {
725
778
726
- // Add a parameter to the function for each element passed in.
727
- for (const auto &ArgIndex : ArgIndices) {
728
- // not allowed to dereference ->begin() if size() is 0
729
- Params.push_back (GetElementPtrInst::getIndexedType (
730
- cast<PointerType>(I->getType ()->getScalarType ())->getElementType (),
731
- ArgIndex.second ));
732
- assert (Params.back ());
733
- }
779
+ assert (arg->hasByValAttr ());
734
780
735
- if (ArgIndices.size () == 1 && ArgIndices.begin ()->second .empty ())
736
- ++NumArgumentsPromoted;
737
- else
738
- ++NumAggregatesPromoted;
781
+ // Track all the pointers to the argument to make sure they are not captured.
782
+ SmallPtrSet<Value *, 16 > PtrValues;
783
+ PtrValues.insert (arg);
784
+
785
+ // Track all of the stores.
786
+ SmallVector<StoreInst *, 16 > Stores;
787
+
788
+ // Scan through the uses recursively to make sure the pointer is always used
789
+ // sanely.
790
+ SmallVector<Value *, 16 > WorkList;
791
+ WorkList.insert (WorkList.end (), arg->user_begin (), arg->user_end ());
792
+ while (!WorkList.empty ()) {
793
+ Value *V = WorkList.back ();
794
+ WorkList.pop_back ();
795
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
796
+ if (PtrValues.insert (V).second )
797
+ WorkList.insert (WorkList.end (), V->user_begin (), V->user_end ());
798
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
799
+ Stores.push_back (Store);
800
+ } else if (!isa<LoadInst>(V)) {
801
+ return true ;
739
802
}
740
803
}
741
804
742
- // Add any function attributes.
743
- if (PAL.hasAttributes (AttributeSet::FunctionIndex))
744
- AttributesVec.push_back (AttributeSet::get (FTy->getContext (),
745
- PAL.getFnAttributes ()));
746
-
747
- Type *RetTy = FTy->getReturnType ();
805
+ // Check to make sure the pointers aren't captured
806
+ for (StoreInst *Store : Stores)
807
+ if (PtrValues.count (Store->getValueOperand ()))
808
+ return true ;
748
809
749
- // Construct the new function type using the new arguments.
750
- FunctionType *NFTy = FunctionType::get (RetTy, Params, FTy-> isVarArg ());
810
+ return false ;
811
+ }
751
812
752
- // Create the new function body and insert it into the module.
753
- Function *NF = Function::Create (NFTy, F->getLinkage (), F->getName ());
754
- NF->copyAttributesFrom (F);
813
+ // / PromoteArguments - This method checks the specified function to see if there
814
+ // / are any promotable arguments and if it is safe to promote the function (for
815
+ // / example, all callers are direct). If safe to promote some arguments, it
816
+ // / calls the DoPromotion method.
817
+ // /
818
+ static CallGraphNode *
819
+ PromoteArguments (CallGraphNode *CGN, CallGraph &CG,
820
+ function_ref<AAResults &(Function &F)> AARGetter,
821
+ unsigned MaxElements) {
822
+ Function *F = CGN->getFunction ();
755
823
756
- // Patch the pointer to LLVM function in debug info descriptor.
757
- NF->setSubprogram (F->getSubprogram ());
758
- F->setSubprogram (nullptr );
824
+ // Make sure that it is local to this module.
825
+ if (!F || !F->hasLocalLinkage ()) return nullptr ;
759
826
760
- DEBUG (dbgs () << " ARG PROMOTION: Promoting to:" << *NF << " \n "
761
- << " From: " << *F);
762
-
763
- // Recompute the parameter attributes list based on the new arguments for
764
- // the function.
765
- NF->setAttributes (AttributeSet::get (F->getContext (), AttributesVec));
766
- AttributesVec.clear ();
827
+ // Don't promote arguments for variadic functions. Adding, removing, or
828
+ // changing non-pack parameters can change the classification of pack
829
+ // parameters. Frontends encode that classification at the call site in the
830
+ // IR, while in the callee the classification is determined dynamically based
831
+ // on the number of registers consumed so far.
832
+ if (F->isVarArg ()) return nullptr ;
767
833
768
- F->getParent ()->getFunctionList ().insert (F->getIterator (), NF);
769
- NF->takeName (F);
834
+ // First check: see if there are any pointer arguments! If not, quick exit.
835
+ SmallVector<Argument*, 16 > PointerArgs;
836
+ for (Argument &I : F->args ())
837
+ if (I.getType ()->isPointerTy ())
838
+ PointerArgs.push_back (&I);
839
+ if (PointerArgs.empty ()) return nullptr ;
770
840
771
- // Get a new callgraph node for NF.
772
- CallGraphNode *NF_CGN = CG.getOrInsertFunction (NF);
841
+ // Second check: make sure that all callers are direct callers. We can't
842
+ // transform functions that have indirect callers. Also see if the function
843
+ // is self-recursive.
844
+ bool isSelfRecursive = false ;
845
+ for (Use &U : F->uses ()) {
846
+ CallSite CS (U.getUser ());
847
+ // Must be a direct call.
848
+ if (CS.getInstruction () == nullptr || !CS.isCallee (&U)) return nullptr ;
849
+
850
+ if (CS.getInstruction ()->getParent ()->getParent () == F)
851
+ isSelfRecursive = true ;
852
+ }
853
+
854
+ const DataLayout &DL = F->getParent ()->getDataLayout ();
773
855
774
- // Loop over all of the callers of the function, transforming the call sites
775
- // to pass in the loaded pointers.
776
- //
777
- SmallVector<Value*, 16 > Args;
778
- while (!F->use_empty ()) {
779
- CallSite CS (F->user_back ());
780
- assert (CS.getCalledFunction () == F);
781
- Instruction *Call = CS.getInstruction ();
782
- const AttributeSet &CallPAL = CS.getAttributes ();
856
+ AAResults &AAR = AARGetter (*F);
783
857
784
- // Add any return attributes.
785
- if (CallPAL.hasAttributes (AttributeSet::ReturnIndex))
786
- AttributesVec.push_back (AttributeSet::get (F->getContext (),
787
- CallPAL.getRetAttributes ()));
858
+ // Check to see which arguments are promotable. If an argument is promotable,
859
+ // add it to ArgsToPromote.
860
+ SmallPtrSet<Argument*, 8 > ArgsToPromote;
861
+ SmallPtrSet<Argument*, 8 > ByValArgsToTransform;
862
+ for (Argument *PtrArg : PointerArgs) {
863
+ Type *AgTy = cast<PointerType>(PtrArg->getType ())->getElementType ();
788
864
789
- // Loop over the operands, inserting GEP and loads in the caller as
790
- // appropriate.
791
- CallSite::arg_iterator AI = CS.arg_begin ();
792
- ArgIndex = 1 ;
793
- for (Function::arg_iterator I = F->arg_begin (), E = F->arg_end ();
794
- I != E; ++I, ++AI, ++ArgIndex)
795
- if (!ArgsToPromote.count (&*I) && !ByValArgsToTransform.count (&*I)) {
796
- Args.push_back (*AI); // Unmodified argument
865
+ // Replace sret attribute with noalias. This reduces register pressure by
866
+ // avoiding a register copy.
867
+ if (PtrArg->hasStructRetAttr ()) {
868
+ unsigned ArgNo = PtrArg->getArgNo ();
869
+ F->setAttributes (
870
+ F->getAttributes ()
871
+ .removeAttribute (F->getContext (), ArgNo + 1 , Attribute::StructRet)
872
+ .addAttribute (F->getContext (), ArgNo + 1 , Attribute::NoAlias));
873
+ for (Use &U : F->uses ()) {
874
+ CallSite CS (U.getUser ());
875
+ CS.setAttributes (
876
+ CS.getAttributes ()
877
+ .removeAttribute (F->getContext (), ArgNo + 1 ,
878
+ Attribute::StructRet)
879
+ .addAttribute (F->getContext (), ArgNo + 1 , Attribute::NoAlias));
880
+ }
881
+ }
797
882
798
- if (CallPAL.hasAttributes (ArgIndex)) {
799
- AttrBuilder B (CallPAL, ArgIndex);
800
- AttributesVec.
801
- push_back (AttributeSet::get (F->getContext (), Args.size (), B));
802
- }
803
- } else if (ByValArgsToTransform.count (&*I)) {
804
- // Emit a GEP and load for each element of the struct.
805
- Type *AgTy = cast<PointerType>(I->getType ())->getElementType ();
806
- StructType *STy = cast<StructType>(AgTy);
807
- Value *Idxs[2 ] = {
808
- ConstantInt::get (Type::getInt32Ty (F->getContext ()), 0 ), nullptr };
809
- for (unsigned i = 0 , e = STy->getNumElements (); i != e; ++i) {
810
- Idxs[1 ] = ConstantInt::get (Type::getInt32Ty (F->getContext ()), i);
811
- Value *Idx = GetElementPtrInst::Create (
812
- STy, *AI, Idxs, (*AI)->getName () + " ." + Twine (i), Call);
813
- // TODO: Tell AA about the new values?
814
- Args.push_back (new LoadInst (Idx, Idx->getName ()+" .val" , Call));
883
+ // If this is a byval argument, and if the aggregate type is small, just
884
+ // pass the elements, which is always safe, if the passed value is densely
885
+ // packed or if we can prove the padding bytes are never accessed. This does
886
+ // not apply to inalloca.
887
+ bool isSafeToPromote =
888
+ PtrArg->hasByValAttr () &&
889
+ (isDenselyPacked (AgTy, DL) || !canPaddingBeAccessed (PtrArg));
890
+ if (isSafeToPromote) {
891
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
892
+ if (MaxElements > 0 && STy->getNumElements () > MaxElements) {
893
+ DEBUG (dbgs () << " argpromotion disable promoting argument '"
894
+ << PtrArg->getName () << " ' because it would require adding more"
895
+ << " than " << MaxElements << " arguments to the function.\n " );
896
+ continue ;
815
897
}
816
- } else if (!I->use_empty ()) {
817
- // Non-dead argument: insert GEPs and loads as appropriate.
818
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
819
- // Store the Value* version of the indices in here, but declare it now
820
- // for reuse.
821
- std::vector<Value*> Ops;
822
- for (const auto &ArgIndex : ArgIndices) {
823
- Value *V = *AI;
824
- LoadInst *OrigLoad =
825
- OriginalLoads[std::make_pair (&*I, ArgIndex.second )];
826
- if (!ArgIndex.second .empty ()) {
827
- Ops.reserve (ArgIndex.second .size ());
828
- Type *ElTy = V->getType ();
829
- for (unsigned long II : ArgIndex.second ) {
830
- // Use i32 to index structs, and i64 for others (pointers/arrays).
831
- // This satisfies GEP constraints.
832
- Type *IdxTy = (ElTy->isStructTy () ?
833
- Type::getInt32Ty (F->getContext ()) :
834
- Type::getInt64Ty (F->getContext ()));
835
- Ops.push_back (ConstantInt::get (IdxTy, II));
836
- // Keep track of the type we're currently indexing.
837
- if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
838
- ElTy = ElPTy->getElementType ();
839
- else
840
- ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex (II);
841
- }
842
- // And create a GEP to extract those indices.
843
- V = GetElementPtrInst::Create (ArgIndex.first , V, Ops,
844
- V->getName () + " .idx" , Call);
845
- Ops.clear ();
898
+
899
+ // If all the elements are single-value types, we can promote it.
900
+ bool AllSimple = true ;
901
+ for (const auto *EltTy : STy->elements ()) {
902
+ if (!EltTy->isSingleValueType ()) {
903
+ AllSimple = false ;
904
+ break ;
846
905
}
847
- // Since we're replacing a load make sure we take the alignment
848
- // of the previous load.
849
- LoadInst *newLoad = new LoadInst (V, V->getName ()+" .val" , Call);
850
- newLoad->setAlignment (OrigLoad->getAlignment ());
851
- // Transfer the AA info too.
852
- AAMDNodes AAInfo;
853
- OrigLoad->getAAMetadata (AAInfo);
854
- newLoad->setAAMetadata (AAInfo);
906
+ }
855
907
856
- Args.push_back (newLoad);
908
+ // Safe to transform, don't even bother trying to "promote" it.
909
+ // Passing the elements as a scalar will allow sroa to hack on
910
+ // the new alloca we introduce.
911
+ if (AllSimple) {
912
+ ByValArgsToTransform.insert (PtrArg);
913
+ continue ;
857
914
}
858
915
}
916
+ }
859
917
860
- // Push any varargs arguments on the list.
861
- for (; AI != CS.arg_end (); ++AI, ++ArgIndex) {
862
- Args.push_back (*AI);
863
- if (CallPAL.hasAttributes (ArgIndex)) {
864
- AttrBuilder B (CallPAL, ArgIndex);
865
- AttributesVec.
866
- push_back (AttributeSet::get (F->getContext (), Args.size (), B));
918
+ // If the argument is a recursive type and we're in a recursive
919
+ // function, we could end up infinitely peeling the function argument.
920
+ if (isSelfRecursive) {
921
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
922
+ bool RecursiveType = false ;
923
+ for (const auto *EltTy : STy->elements ()) {
924
+ if (EltTy == PtrArg->getType ()) {
925
+ RecursiveType = true ;
926
+ break ;
927
+ }
928
+ }
929
+ if (RecursiveType)
930
+ continue ;
867
931
}
868
932
}
933
+
934
+ // Otherwise, see if we can promote the pointer to its value.
935
+ if (isSafeToPromoteArgument (PtrArg, PtrArg->hasByValOrInAllocaAttr (), AAR,
936
+ MaxElements))
937
+ ArgsToPromote.insert (PtrArg);
938
+ }
869
939
870
- // Add any function attributes.
871
- if (CallPAL.hasAttributes (AttributeSet::FunctionIndex))
872
- AttributesVec.push_back (AttributeSet::get (Call->getContext (),
873
- CallPAL.getFnAttributes ()));
874
-
875
- SmallVector<OperandBundleDef, 1 > OpBundles;
876
- CS.getOperandBundlesAsDefs (OpBundles);
877
-
878
- Instruction *New;
879
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
880
- New = InvokeInst::Create (NF, II->getNormalDest (), II->getUnwindDest (),
881
- Args, OpBundles, " " , Call);
882
- cast<InvokeInst>(New)->setCallingConv (CS.getCallingConv ());
883
- cast<InvokeInst>(New)->setAttributes (AttributeSet::get (II->getContext (),
884
- AttributesVec));
885
- } else {
886
- New = CallInst::Create (NF, Args, OpBundles, " " , Call);
887
- cast<CallInst>(New)->setCallingConv (CS.getCallingConv ());
888
- cast<CallInst>(New)->setAttributes (AttributeSet::get (New->getContext (),
889
- AttributesVec));
890
- cast<CallInst>(New)->setTailCallKind (
891
- cast<CallInst>(Call)->getTailCallKind ());
892
- }
893
- New->setDebugLoc (Call->getDebugLoc ());
894
- Args.clear ();
895
- AttributesVec.clear ();
940
+ // No promotable pointer arguments.
941
+ if (ArgsToPromote.empty () && ByValArgsToTransform.empty ())
942
+ return nullptr ;
896
943
897
- // Update the callgraph to know that the callsite has been transformed.
898
- CallGraphNode *CalleeNode = CG[Call->getParent ()->getParent ()];
899
- CalleeNode->replaceCallEdge (CS, CallSite (New), NF_CGN);
944
+ return DoPromotion (F, ArgsToPromote, ByValArgsToTransform, CG);
945
+ }
900
946
901
- if (!Call->use_empty ()) {
902
- Call->replaceAllUsesWith (New);
903
- New->takeName (Call);
947
+ namespace {
948
+ // / ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
949
+ // /
950
+ struct ArgPromotion : public CallGraphSCCPass {
951
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
952
+ AU.addRequired <AssumptionCacheTracker>();
953
+ AU.addRequired <TargetLibraryInfoWrapperPass>();
954
+ getAAResultsAnalysisUsage (AU);
955
+ CallGraphSCCPass::getAnalysisUsage (AU);
904
956
}
905
957
906
- // Finally, remove the old call from the program, reducing the use-count of
907
- // F.
908
- Call->eraseFromParent ();
909
- }
910
-
911
- // Since we have now created the new function, splice the body of the old
912
- // function right into the new function, leaving the old rotting hulk of the
913
- // function empty.
914
- NF->getBasicBlockList ().splice (NF->begin (), F->getBasicBlockList ());
915
-
916
- // Loop over the argument list, transferring uses of the old arguments over to
917
- // the new arguments, also transferring over the names as well.
918
- //
919
- for (Function::arg_iterator I = F->arg_begin (), E = F->arg_end (),
920
- I2 = NF->arg_begin (); I != E; ++I) {
921
- if (!ArgsToPromote.count (&*I) && !ByValArgsToTransform.count (&*I)) {
922
- // If this is an unmodified argument, move the name and users over to the
923
- // new version.
924
- I->replaceAllUsesWith (&*I2);
925
- I2->takeName (&*I);
926
- ++I2;
927
- continue ;
958
+ bool runOnSCC (CallGraphSCC &SCC) override ;
959
+ static char ID; // Pass identification, replacement for typeid
960
+ explicit ArgPromotion (unsigned MaxElements = 3 )
961
+ : CallGraphSCCPass(ID), MaxElements(MaxElements) {
962
+ initializeArgPromotionPass (*PassRegistry::getPassRegistry ());
928
963
}
929
964
930
- if (ByValArgsToTransform.count (&*I)) {
931
- // In the callee, we create an alloca, and store each of the new incoming
932
- // arguments into the alloca.
933
- Instruction *InsertPt = &NF->begin ()->front ();
934
-
935
- // Just add all the struct element types.
936
- Type *AgTy = cast<PointerType>(I->getType ())->getElementType ();
937
- Value *TheAlloca = new AllocaInst (AgTy, nullptr , " " , InsertPt);
938
- StructType *STy = cast<StructType>(AgTy);
939
- Value *Idxs[2 ] = {
940
- ConstantInt::get (Type::getInt32Ty (F->getContext ()), 0 ), nullptr };
941
-
942
- for (unsigned i = 0 , e = STy->getNumElements (); i != e; ++i) {
943
- Idxs[1 ] = ConstantInt::get (Type::getInt32Ty (F->getContext ()), i);
944
- Value *Idx = GetElementPtrInst::Create (
945
- AgTy, TheAlloca, Idxs, TheAlloca->getName () + " ." + Twine (i),
946
- InsertPt);
947
- I2->setName (I->getName ()+" ." +Twine (i));
948
- new StoreInst (&*I2++, Idx, InsertPt);
949
- }
950
-
951
- // Anything that used the arg should now use the alloca.
952
- I->replaceAllUsesWith (TheAlloca);
953
- TheAlloca->takeName (&*I);
954
-
955
- // If the alloca is used in a call, we must clear the tail flag since
956
- // the callee now uses an alloca from the caller.
957
- for (User *U : TheAlloca->users ()) {
958
- CallInst *Call = dyn_cast<CallInst>(U);
959
- if (!Call)
960
- continue ;
961
- Call->setTailCall (false );
962
- }
963
- continue ;
964
- }
965
+ private:
965
966
966
- if (I->use_empty ())
967
- continue ;
967
+ using llvm::Pass::doInitialization;
968
+ bool doInitialization (CallGraph &CG) override ;
969
+ // / The maximum number of elements to expand, or 0 for unlimited.
970
+ unsigned MaxElements;
971
+ };
972
+ }
968
973
969
- // Otherwise, if we promoted this argument, then all users are load
970
- // instructions (or GEPs with only load users), and all loads should be
971
- // using the new argument that we added.
972
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
974
+ char ArgPromotion::ID = 0 ;
975
+ INITIALIZE_PASS_BEGIN (ArgPromotion, " argpromotion" ,
976
+ " Promote 'by reference' arguments to scalars" , false , false )
977
+ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
978
+ INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
979
+ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
980
+ INITIALIZE_PASS_END(ArgPromotion, " argpromotion" ,
981
+ " Promote 'by reference' arguments to scalars" , false , false )
973
982
974
- while (!I->use_empty ()) {
975
- if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back ())) {
976
- assert (ArgIndices.begin ()->second .empty () &&
977
- " Load element should sort to front!" );
978
- I2->setName (I->getName ()+" .val" );
979
- LI->replaceAllUsesWith (&*I2);
980
- LI->eraseFromParent ();
981
- DEBUG (dbgs () << " *** Promoted load of argument '" << I->getName ()
982
- << " ' in function '" << F->getName () << " '\n " );
983
- } else {
984
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back ());
985
- IndicesVector Operands;
986
- Operands.reserve (GEP->getNumIndices ());
987
- for (User::op_iterator II = GEP->idx_begin (), IE = GEP->idx_end ();
988
- II != IE; ++II)
989
- Operands.push_back (cast<ConstantInt>(*II)->getSExtValue ());
983
+ Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
984
+ return new ArgPromotion (MaxElements);
985
+ }
990
986
991
- // GEPs with a single 0 index can be merged with direct loads
992
- if (Operands. size () == 1 && Operands. front () == 0 )
993
- Operands. clear () ;
987
+ bool ArgPromotion::runOnSCC (CallGraphSCC &SCC) {
988
+ if (skipSCC (SCC) )
989
+ return false ;
994
990
995
- Function::arg_iterator TheArg = I2;
996
- for (ScalarizeTable::iterator It = ArgIndices.begin ();
997
- It->second != Operands; ++It, ++TheArg) {
998
- assert (It != ArgIndices.end () && " GEP not handled??" );
999
- }
991
+ // Get the callgraph information that we need to update to reflect our
992
+ // changes.
993
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph ();
1000
994
1001
- std::string NewName = I->getName ();
1002
- for (unsigned i = 0 , e = Operands.size (); i != e; ++i) {
1003
- NewName += " ." + utostr (Operands[i]);
1004
- }
1005
- NewName += " .val" ;
1006
- TheArg->setName (NewName);
995
+ // We compute dedicated AA results for each function in the SCC as needed. We
996
+ // use a lambda referencing external objects so that they live long enough to
997
+ // be queried, but we re-use them each time.
998
+ Optional<BasicAAResult> BAR;
999
+ Optional<AAResults> AAR;
1000
+ auto AARGetter = [&](Function &F) -> AAResults & {
1001
+ BAR.emplace (createLegacyPMBasicAAResult (*this , F));
1002
+ AAR.emplace (createLegacyPMAAResults (*this , F, *BAR));
1003
+ return *AAR;
1004
+ };
1007
1005
1008
- DEBUG (dbgs () << " *** Promoted agg argument '" << TheArg->getName ()
1009
- << " ' of function '" << NF->getName () << " '\n " );
1006
+ bool Changed = false , LocalChange;
1010
1007
1011
- // All of the uses must be load instructions. Replace them all with
1012
- // the argument specified by ArgNo.
1013
- while (!GEP->use_empty ()) {
1014
- LoadInst *L = cast<LoadInst>(GEP->user_back ());
1015
- L->replaceAllUsesWith (&*TheArg);
1016
- L->eraseFromParent ();
1017
- }
1018
- GEP->eraseFromParent ();
1008
+ // Iterate until we stop promoting from this SCC.
1009
+ do {
1010
+ LocalChange = false ;
1011
+ // Attempt to promote arguments from all functions in this SCC.
1012
+ for (CallGraphNode *OldNode : SCC) {
1013
+ if (CallGraphNode *NewNode =
1014
+ PromoteArguments (OldNode, CG, AARGetter, MaxElements)) {
1015
+ LocalChange = true ;
1016
+ SCC.ReplaceNode (OldNode, NewNode);
1019
1017
}
1020
1018
}
1019
+ // Remember that we changed something.
1020
+ Changed |= LocalChange;
1021
+ } while (LocalChange);
1021
1022
1022
- // Increment I2 past all of the arguments added for this promoted pointer.
1023
- std::advance (I2, ArgIndices.size ());
1024
- }
1025
-
1026
- NF_CGN->stealCalledFunctionsFrom (CG[F]);
1027
-
1028
- // Now that the old function is dead, delete it. If there is a dangling
1029
- // reference to the CallgraphNode, just leave the dead function around for
1030
- // someone else to nuke.
1031
- CallGraphNode *CGN = CG[F];
1032
- if (CGN->getNumReferences () == 0 )
1033
- delete CG.removeFunctionFromModule (CGN);
1034
- else
1035
- F->setLinkage (Function::ExternalLinkage);
1036
-
1037
- return NF_CGN;
1023
+ return Changed;
1038
1024
}
1039
1025
1040
1026
bool ArgPromotion::doInitialization (CallGraph &CG) {
0 commit comments