diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h --- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -28,6 +28,8 @@ ArgumentPromotionPass(unsigned MaxElements = 2u) : MaxElements(MaxElements) {} /// Checks if a type could have padding bytes. + // TODO the function aren't used in the ArgumentPromotionPass anymore and + // should be moved into AttributorAttributes.cpp as the single known user. static bool isDenselyPacked(Type *Ty, const DataLayout &DL); PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,6 +29,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ArgumentPromotion.h" + #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" @@ -56,6 +57,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -75,6 +77,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include #include @@ -86,7 +89,6 @@ #define DEBUG_TYPE "argpromotion" STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); -STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); namespace { @@ -94,9 +96,9 @@ struct ArgPart { Type *Ty; Align Alignment; - /// A representative guaranteed-executed load instruction for use by + /// A representative guaranteed-executed load or store instruction for use by /// metadata transfer. - LoadInst *MustExecLoad; + Instruction *MustExecInstr; }; using OffsetAndArgPart = std::pair; @@ -154,9 +156,9 @@ /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function *doPromotion( - Function *F, + Function *F, function_ref DTGetter, + function_ref ACGetter, const DenseMap> &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, Optional> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as @@ -174,15 +176,7 @@ unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { - if (ByValArgsToTransform.count(&*I)) { - // Simple byval argument? Just add all the struct element types. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast(AgTy); - llvm::append_range(Params, STy->elements()); - ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), - AttributeSet()); - ++NumByValArgsPromoted; - } else if (!ArgsToPromote.count(&*I)) { + if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo)); @@ -250,29 +244,10 @@ auto *AI = CB.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++AI, ++ArgNo) - if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { + ++I, ++AI, ++ArgNo) { + if (!ArgsToPromote.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo)); - } else if (ByValArgsToTransform.count(&*I)) { - // Emit a GEP and load for each element of the struct. - Type *AgTy = I->getParamByValType(); - StructType *STy = cast(AgTy); - Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - Align StructAlign = *I->getParamAlign(); - for (unsigned J = 0, Elems = STy->getNumElements(); J != Elems; ++J) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), J); - auto *Idx = - IRB.CreateGEP(STy, *AI, Idxs, (*AI)->getName() + "." + Twine(J)); - // TODO: Tell AA about the new values? - Align Alignment = - commonAlignment(StructAlign, SL->getElementOffset(J)); - Args.push_back(IRB.CreateAlignedLoad( - STy->getElementType(J), Idx, Alignment, Idx->getName() + ".val")); - ArgAttrVec.push_back(AttributeSet()); - } } else if (!I->use_empty()) { Value *V = *AI; const auto &ArgParts = ArgsToPromote.find(&*I)->second; @@ -281,9 +256,9 @@ Pair.second.Ty, createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), Pair.second.Alignment, V->getName() + ".val"); - if (Pair.second.MustExecLoad) { - LI->setAAMetadata(Pair.second.MustExecLoad->getAAMetadata()); - LI->copyMetadata(*Pair.second.MustExecLoad, + if (Pair.second.MustExecInstr) { + LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata()); + LI->copyMetadata(*Pair.second.MustExecInstr, {LLVMContext::MD_range, LLVMContext::MD_nonnull, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, @@ -293,6 +268,7 @@ ArgAttrVec.push_back(AttributeSet()); } } + } // Push any varargs arguments on the list. for (; AI != CB.arg_end(); ++AI, ++ArgNo) { @@ -342,11 +318,15 @@ // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); + // We will collect all the new created allocas to promote them into registers + // after the following loop + SmallVector Allocas; + // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. Function::arg_iterator I2 = NF->arg_begin(); for (Argument &Arg : F->args()) { - if (!ArgsToPromote.count(&Arg) && !ByValArgsToTransform.count(&Arg)) { + if (!ArgsToPromote.count(&Arg)) { // If this is an unmodified argument, move the name and users over to the // new version. Arg.replaceAllUsesWith(&*I2); @@ -355,37 +335,6 @@ continue; } - if (ByValArgsToTransform.count(&Arg)) { - // In the callee, we create an alloca, and store each of the new incoming - // arguments into the alloca. - Instruction *InsertPt = &NF->begin()->front(); - - // Just add all the struct element types. - Type *AgTy = Arg.getParamByValType(); - Align StructAlign = *Arg.getParamAlign(); - Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, - StructAlign, "", InsertPt); - StructType *STy = cast(AgTy); - Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), - nullptr}; - const StructLayout *SL = DL.getStructLayout(STy); - - for (unsigned J = 0, Elems = STy->getNumElements(); J != Elems; ++J) { - Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), J); - Value *Idx = GetElementPtrInst::Create( - AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(J), - InsertPt); - I2->setName(Arg.getName() + "." + Twine(J)); - Align Alignment = commonAlignment(StructAlign, SL->getElementOffset(J)); - new StoreInst(&*I2++, Idx, false, Alignment, InsertPt); - } - - // Anything that used the arg should now use the alloca. - Arg.replaceAllUsesWith(TheAlloca); - TheAlloca->takeName(&Arg); - continue; - } - // There potentially are metadata uses for things like llvm.dbg.value. // Replace them with undef, after handling the other regular uses. auto RauwUndefMetadata = make_scope_exit( @@ -394,16 +343,45 @@ if (Arg.use_empty()) continue; - SmallDenseMap OffsetToArg; + // Otherwise, if we promoted this argument, we have to create an alloca in + // the callee for every promotable part and store each of the new incoming + // arguments into the corresponding alloca, what lets the old code (the + // store instructions if they are allowed especially) a chance to work as + // before. + assert(Arg.getType()->isPointerTy() && + "Only arguments with a pointer type are promotable"); + + IRBuilder IRB(&NF->begin()->front()); + + // Add only the promoted elements, so parts from ArgsToPromote + SmallDenseMap OffsetToAlloca; for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { - Argument &NewArg = *I2++; - NewArg.setName(Arg.getName() + "." + Twine(Pair.first) + ".val"); - OffsetToArg.insert({Pair.first, &NewArg}); + int64_t Offset = Pair.first; + const ArgPart &Part = Pair.second; + + Argument *NewArg = I2++; + NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val"); + + AllocaInst *NewAlloca = IRB.CreateAlloca( + Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc"); + NewAlloca->setAlignment(Pair.second.Alignment); + IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment); + + // Collect the alloca to retarget the users to + OffsetToAlloca.insert({Offset, NewAlloca}); } - // Otherwise, if we promoted this argument, then all users are load - // instructions (with possible casts and GEPs in between). + auto GetAlloca = [&](Value *Ptr) { + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + assert(Ptr == &Arg && "Not constant offset from arg?"); + return OffsetToAlloca.lookup(Offset.getSExtValue()); + }; + // Cleanup the code from the dead instructions: GEPs and BitCasts in between + // the original argument and its users: loads and stores. Retarget every + // user to the new created alloca. SmallVector Worklist; SmallVector DeadInsts; append_range(Worklist, Arg.users()); @@ -417,13 +395,14 @@ if (auto *LI = dyn_cast(V)) { Value *Ptr = LI->getPointerOperand(); - APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - Ptr = - Ptr->stripAndAccumulateConstantOffsets(DL, Offset, - /* AllowNonInbounds */ true); - assert(Ptr == &Arg && "Not constant offset from arg?"); - LI->replaceAllUsesWith(OffsetToArg[Offset.getSExtValue()]); - DeadInsts.push_back(LI); + LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr)); + continue; + } + + if (auto *SI = dyn_cast(V)) { + assert(!SI->isVolatile() && "Volatile operations can't be promoted."); + Value *Ptr = SI->getPointerOperand(); + SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr)); continue; } @@ -434,6 +413,23 @@ I->replaceAllUsesWith(PoisonValue::get(I->getType())); I->eraseFromParent(); } + + // Collect the allocas for promotion + for (const auto &Pair : OffsetToAlloca) { + assert(isAllocaPromotable(Pair.second) && + "By design, only promotable allocas should be produced."); + Allocas.push_back(Pair.second); + } + } + + LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size() + << " alloca(s) are promotable by Mem2Reg\n"); + + if (!Allocas.empty()) { + // And we are able to call the `promoteMemoryToRegister()` function. + // Our earlier checks have ensured that PromoteMemToReg() will + // succeed. + PromoteMemToReg(Allocas, DTGetter(*NF), ACGetter(*NF)); } return NF; @@ -456,8 +452,8 @@ // direct callees. return all_of(Callee->users(), [&](User *U) { CallBase &CB = cast(*U); - return isDereferenceableAndAlignedPointer( - CB.getArgOperand(Arg->getArgNo()), NeededAlign, Bytes, DL); + return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()), + NeededAlign, Bytes, DL); }); } @@ -470,7 +466,7 @@ if (Arg->use_empty()) return true; - // We can only promote this argument if all of the uses are loads at known + // We can only promote this argument if all the uses are loads at known // offsets. // // Promoting the argument causes it to be loaded in the caller @@ -487,15 +483,22 @@ Align NeededAlign(1); uint64_t NeededDerefBytes = 0; - // Returns None if this load is not based on the argument. Return true if - // we can promote the load, false otherwise. - auto HandleLoad = [&](LoadInst *LI, - bool GuaranteedToExecute) -> Optional { - // Don't promote volatile or atomic loads. - if (!LI->isSimple()) + // And if this is a byval argument we also allow to have store instructions. + // Only handle in such way arguments with specified alignment; + // if it's unspecified, the actual alignment of the argument is + // target-specific. + bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign(); + + // An end user of a pointer argument is a load or store instruction. + // Returns None if this load or store is not based on the argument. Return + // true if we can promote the instruction, false otherwise. + auto HandleEndUser = [&](auto *I, Type *Ty, + bool GuaranteedToExecute) -> Optional { + // Don't promote volatile or atomic instructions. + if (!I->isSimple()) return false; - Value *Ptr = LI->getPointerOperand(); + Value *Ptr = I->getPointerOperand(); APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, /* AllowNonInbounds */ true); @@ -505,7 +508,6 @@ if (Offset.getSignificantBits() >= 64) return false; - Type *Ty = LI->getType(); TypeSize Size = DL.getTypeStoreSize(Ty); // Don't try to promote scalable types. if (Size.isScalable()) @@ -518,7 +520,7 @@ int64_t Off = Offset.getSExtValue(); auto Pair = ArgParts.try_emplace( - Off, ArgPart{Ty, LI->getAlign(), GuaranteedToExecute ? LI : nullptr}); + Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr}); ArgPart &Part = Pair.first->second; bool OffsetNotSeenBefore = Pair.second; @@ -530,44 +532,49 @@ return false; } - // For now, we only support loading one specific type at a given offset. + // For now, we only support loading/storing one specific type at a given + // offset. if (Part.Ty != Ty) { LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " - << "loaded via both " << *Part.Ty << " and " << *Ty + << "accessed as both " << *Part.Ty << " and " << *Ty << " at offset " << Off << "\n"); return false; } - // If this load is not guaranteed to execute, and we haven't seen a load at - // this offset before (or it had lower alignment), then we need to remember - // that requirement. - // Note that skipping loads of previously seen offsets is only correct - // because we only allow a single type for a given offset, which also means - // that the number of accessed bytes will be the same. + // If this instruction is not guaranteed to execute, and we haven't seen a + // load or store at this offset before (or it had lower alignment), then we + // need to remember that requirement. + // Note that skipping instructions of previously seen offsets is only + // correct because we only allow a single type for a given offset, which + // also means that the number of accessed bytes will be the same. if (!GuaranteedToExecute && - (OffsetNotSeenBefore || Part.Alignment < LI->getAlign())) { + (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) { // We won't be able to prove dereferenceability for negative offsets. if (Off < 0) return false; // If the offset is not aligned, an aligned base pointer won't help. - if (!isAligned(LI->getAlign(), Off)) + if (!isAligned(I->getAlign(), Off)) return false; NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); - NeededAlign = std::max(NeededAlign, LI->getAlign()); + NeededAlign = std::max(NeededAlign, I->getAlign()); } - Part.Alignment = std::max(Part.Alignment, LI->getAlign()); + Part.Alignment = std::max(Part.Alignment, I->getAlign()); return true; }; - // Look for loads that are guaranteed to execute on entry. + // Look for loads and stores that are guaranteed to execute on entry. for (Instruction &I : Arg->getParent()->getEntryBlock()) { + Optional Res{}; if (LoadInst *LI = dyn_cast(&I)) - if (Optional Res = HandleLoad(LI, /* GuaranteedToExecute */ true)) - if (!*Res) - return false; + Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true); + else if (StoreInst *SI = dyn_cast(&I)) + Res = HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ true); + if (Res && !*Res) + return false; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; @@ -575,36 +582,49 @@ // Now look at all loads of the argument. Remember the load instructions // for the aliasing check below. - SmallVector Worklist; - SmallPtrSet Visited; + SmallVector Worklist; + SmallPtrSet Visited; SmallVector Loads; - auto AppendUsers = [&](Value *V) { - for (User *U : V->users()) - if (Visited.insert(U).second) - Worklist.push_back(U); + auto AppendUses = [&](const Value *V) { + for (const Use &U : V->uses()) + if (Visited.insert(&U).second) + Worklist.push_back(&U); }; - AppendUsers(Arg); + AppendUses(Arg); while (!Worklist.empty()) { - Value *V = Worklist.pop_back_val(); + const Use *U = Worklist.pop_back_val(); + Value *V = U->getUser(); if (isa(V)) { - AppendUsers(V); + AppendUses(V); continue; } if (auto *GEP = dyn_cast(V)) { if (!GEP->hasAllConstantIndices()) return false; - AppendUsers(V); + AppendUses(V); continue; } if (auto *LI = dyn_cast(V)) { - if (!*HandleLoad(LI, /* GuaranteedToExecute */ false)) + if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false)) return false; Loads.push_back(LI); continue; } + // Stores are allowed for byval arguments + auto *SI = dyn_cast(V); + if (AreStoresAllowed && SI && + U->getOperandNo() == StoreInst::getPointerOperandIndex()) { + if (!*HandleEndUser(SI, SI->getValueOperand()->getType(), + /* GuaranteedToExecute */ false)) + return false; + continue; + // Only stores TO the argument is allowed, all the other stores are + // unknown users + } + // Unknown user. LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " << "unknown user " << *V << "\n"); @@ -630,8 +650,6 @@ [](const auto &A, const auto &B) { return A.first < B.first; }); // Make sure the parts are non-overlapping. - // TODO: As we're doing pure load promotion here, overlap should be fine from - // a correctness perspective. Profitability is less obvious though. int64_t Offset = ArgPartsVec[0].first; for (const auto &Pair : ArgPartsVec) { if (Pair.first < Offset) @@ -640,6 +658,12 @@ Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); } + // If store instructions are allowed, the path from the entry of the function + // to each load may be not free of instructions that potentially invalidate + // the load, and this is an admissible situation. + if (AreStoresAllowed) + return true; + // Okay, now we know that the argument is only used by load instructions, and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of @@ -712,40 +736,6 @@ return true; } -/// Checks if the padding bytes of an argument could be accessed. -static bool canPaddingBeAccessed(Argument *Arg) { - assert(Arg->hasByValAttr()); - - // Track all the pointers to the argument to make sure they are not captured. - SmallPtrSet PtrValues; - PtrValues.insert(Arg); - - // Track all of the stores. - SmallVector Stores; - - // Scan through the uses recursively to make sure the pointer is always used - // sanely. - SmallVector WorkList(Arg->users()); - while (!WorkList.empty()) { - Value *V = WorkList.pop_back_val(); - if (isa(V) || isa(V)) { - if (PtrValues.insert(V).second) - append_range(WorkList, V->users()); - } else if (StoreInst *Store = dyn_cast(V)) { - Stores.push_back(Store); - } else if (!isa(V)) { - return true; - } - } - - // Check to make sure the pointers aren't captured - for (StoreInst *Store : Stores) - if (PtrValues.count(Store->getValueOperand())) - return true; - - return false; -} - /// Check if callers and callee agree on how promoted arguments would be /// passed. static bool areTypesABICompatible(ArrayRef Types, const Function &F, @@ -767,6 +757,8 @@ /// calls the DoPromotion method. static Function * promoteArguments(Function *F, function_ref AARGetter, + function_ref DTGetter, + function_ref ACGetter, unsigned MaxElements, Optional> ReplaceCallSite, @@ -774,7 +766,7 @@ // Don't perform argument promotion for naked functions; otherwise we can end // up removing parameters that are seemingly 'not used' as they are referred // to in the assembly. - if(F->hasFnAttribute(Attribute::Naked)) + if (F->hasFnAttribute(Attribute::Naked)) return nullptr; // Make sure that it is local to this module. @@ -833,7 +825,6 @@ // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. DenseMap> ArgsToPromote; - SmallPtrSet ByValArgsToTransform; for (Argument *PtrArg : PointerArgs) { // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. @@ -850,6 +841,7 @@ // If we can promote the pointer to its value. SmallVector ArgParts; + if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) { SmallVector Types; for (const auto &Pair : ArgParts) @@ -857,56 +849,15 @@ if (areTypesABICompatible(Types, *F, TTI)) { ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); - continue; } } - - // Otherwise, if this is a byval argument, and if the aggregate type is - // small, just pass the elements, which is always safe, if the passed value - // is densely packed or if we can prove the padding bytes are never - // accessed. - // - // Only handle arguments with specified alignment; if it's unspecified, the - // actual alignment of the argument is target-specific. - Type *ByValTy = PtrArg->getParamByValType(); - bool IsSafeToPromote = - ByValTy && PtrArg->getParamAlign() && - (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) || - !canPaddingBeAccessed(PtrArg)); - if (!IsSafeToPromote) { - LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of" - << " the argument '" << PtrArg->getName() - << "' because it is not safe.\n"); - continue; - } - if (StructType *STy = dyn_cast(ByValTy)) { - if (MaxElements > 0 && STy->getNumElements() > MaxElements) { - LLVM_DEBUG(dbgs() << "ArgPromotion disables passing the elements of" - << " the argument '" << PtrArg->getName() - << "' because it would require adding more" - << " than " << MaxElements - << " arguments to the function.\n"); - continue; - } - SmallVector Types; - append_range(Types, STy->elements()); - - // If all the elements are single-value types, we can promote it. - bool AllSimple = - all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); }); - - // Safe to transform. Passing the elements as a scalar will allow sroa to - // hack on the new alloca we introduce. - if (AllSimple && areTypesABICompatible(Types, *F, TTI)) - ByValArgsToTransform.insert(PtrArg); - } } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + if (ArgsToPromote.empty()) return nullptr; - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); + return doPromotion(F, DTGetter, ACGetter, ArgsToPromote, ReplaceCallSite); } PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, @@ -933,9 +884,19 @@ return FAM.getResult(F); }; - const TargetTransformInfo &TTI = FAM.getResult(OldF); - Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None, - TTI, IsRecursive); + auto DTGetter = [&](Function &F) -> DominatorTree & { + assert(&F != &OldF && "Called with the obsolete function!"); + return FAM.getResult(F); + }; + + auto ACGetter = [&](Function &F) -> AssumptionCache * { + assert(&F != &OldF && "Called with the obsolete function!"); + return &FAM.getResult(F); + }; + + const auto &TTI = FAM.getResult(OldF); + Function *NewF = promoteArguments(&OldF, AARGetter, DTGetter, ACGetter, + MaxElements, None, TTI, IsRecursive); if (!NewF) continue; LocalChange = true; diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll @@ -3,25 +3,14 @@ %struct.ss = type { i32, i64 } -; Don't drop 'byval' on %X here. define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b, i32* byval(i32) align 4 %X, i32 %i) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[X:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 -; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: - %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 %temp1 = load i32, i32* %temp, align 4 %temp2 = add i32 %temp1, 1 @@ -41,11 +30,10 @@ ; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8 ; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 ; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 -; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]], i32 zeroext 0) +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i32 [[X_VAL]], i32 zeroext 0) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll @@ -2,24 +2,14 @@ ; RUN: opt < %s -passes=argpromotion -S | FileCheck %s ; Arg promotion eliminates the struct argument. -; FIXME: We should eliminate the i32* argument. %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval(%struct.ss) align 8 %b, i32* byval(i32) align 4 %X) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval(i32) align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 8 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 -; CHECK-NEXT: store i32 0, i32* [[X]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: @@ -41,11 +31,10 @@ ; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8 ; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 8 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 -; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval(i32) align 4 [[X]]) +; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 +; CHECK-NEXT: [[X_VAL:%.*]] = load i32, i32* [[X]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i32 [[X_VAL]]) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll b/llvm/test/Transforms/ArgumentPromotion/byval-with-padding.ll rename from llvm/test/Transforms/ArgumentPromotion/byval-through-pointer-promotion.ll rename to llvm/test/Transforms/ArgumentPromotion/byval-with-padding.ll diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll @@ -7,17 +7,9 @@ define internal void @f(%struct.ss* byval(%struct.ss) align 4 %b) nounwind { ; CHECK-LABEL: define {{[^@]+}}@f -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: @@ -28,20 +20,11 @@ ret void } - define internal void @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind { ; CHECK-LABEL: define {{[^@]+}}@g -; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[B_0]], i32* [[DOT0]], align 32 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; CHECK-NEXT: store i64 [[B_1]], i64* [[DOT1]], align 4 -; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; CHECK-NEXT: [[TEMP1:%.*]] = load i32, i32* [[TEMP]], align 4 -; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[TEMP1]], 1 -; CHECK-NEXT: store i32 [[TEMP2]], i32* [[TEMP]], align 4 +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 ; CHECK-NEXT: ret void ; entry: @@ -75,6 +58,63 @@ ret void } +; Transform even if an argument is written to and then is loaded from. +define internal void @k(%struct.ss* byval(%struct.ss) align 4 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@k +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1 +; CHECK-NEXT: ret void +; +entry: + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 4 + %temp2 = add i32 %temp1, 1 + store i32 %temp2, i32* %temp, align 4 + %temp3 = load i32, i32* %temp, align 4 + ret void +} + +; Transform even if a store instruction is the single user. +define internal void @l(%struct.ss* byval(%struct.ss) align 4 %b) nounwind { +; CHECK-LABEL: define {{[^@]+}}@l +; CHECK-SAME: (i32 [[B_0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + store i32 1, i32* %temp, align 4 + ret void +} + +; Transform all the arguments creating the required number of 'alloca's and +; then optimize them out. +define internal void @m(%struct.ss* byval(%struct.ss) align 4 %b, %struct.ss* byval(%struct.ss) align 4 %c) nounwind { +; CHECK-LABEL: define {{[^@]+}}@m +; CHECK-SAME: (i32 [[B_0:%.*]], i32 [[C_0:%.*]], i64 [[C_1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP2:%.*]] = add i32 [[B_0]], 1 +; CHECK-NEXT: [[TEMP6:%.*]] = add i64 [[C_1]], 1 +; CHECK-NEXT: ret void +; +entry: + %temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 + %temp1 = load i32, i32* %temp, align 4 + %temp2 = add i32 %temp1, 1 + store i32 %temp2, i32* %temp, align 4 + + %temp3 = getelementptr %struct.ss, %struct.ss* %c, i32 0, i32 0 + store i32 %temp2, i32* %temp3, align 4 + + %temp4 = getelementptr %struct.ss, %struct.ss* %c, i32 0, i32 1 + %temp5 = load i64, i64* %temp4, align 8 + %temp6 = add i64 %temp5, 1 + store i64 %temp6, i64* %temp4, align 8 + + ret void +} + define i32 @main() nounwind { ; CHECK-LABEL: define {{[^@]+}}@main ; CHECK-SAME: () #[[ATTR0]] { @@ -84,17 +124,26 @@ ; CHECK-NEXT: store i32 1, i32* [[TEMP1]], align 8 ; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; CHECK-NEXT: store i64 2, i64* [[TEMP4]], align 4 -; CHECK-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; CHECK-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4 -; CHECK-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4 -; CHECK-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) -; CHECK-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; CHECK-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 32 -; CHECK-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; CHECK-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4 -; CHECK-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) +; CHECK-NEXT: [[S_0_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_0_0_0_VAL:%.*]] = load i32, i32* [[S_0_0_0]], align 4 +; CHECK-NEXT: call void @f(i32 [[S_0_0_0_VAL]]) +; CHECK-NEXT: [[S_1_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_1_0_0_VAL:%.*]] = load i32, i32* [[S_1_0_0]], align 4 +; CHECK-NEXT: call void @g(i32 [[S_1_0_0_VAL]]) ; CHECK-NEXT: call void @h(%struct.ss* byval([[STRUCT_SS]]) [[S]]) +; CHECK-NEXT: [[S_2_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_2_0_0_VAL:%.*]] = load i32, i32* [[S_2_0_0]], align 4 +; CHECK-NEXT: call void @k(i32 [[S_2_0_0_VAL]]) +; CHECK-NEXT: [[S_3_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_3_0_0_VAL:%.*]] = load i32, i32* [[S_3_0_0]], align 4 +; CHECK-NEXT: call void @l(i32 [[S_3_0_0_VAL]]) +; CHECK-NEXT: [[S_4_0_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_4_0_0_VAL:%.*]] = load i32, i32* [[S_4_0_0]], align 4 +; CHECK-NEXT: [[S_4_1_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 0 +; CHECK-NEXT: [[S_4_1_0_VAL:%.*]] = load i32, i32* [[S_4_1_0]], align 4 +; CHECK-NEXT: [[S_4_1_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 +; CHECK-NEXT: [[S_4_1_1_VAL:%.*]] = load i64, i64* [[S_4_1_1]], align 8 +; CHECK-NEXT: call void @m(i32 [[S_4_0_0_VAL]], i32 [[S_4_1_0_VAL]], i64 [[S_4_1_1_VAL]]) ; CHECK-NEXT: ret i32 0 ; entry: @@ -106,7 +155,8 @@ call void @f(%struct.ss* byval(%struct.ss) align 4 %S) nounwind call void @g(%struct.ss* byval(%struct.ss) align 32 %S) nounwind call void @h(%struct.ss* byval(%struct.ss) %S) nounwind + call void @k(%struct.ss* byval(%struct.ss) align 4 %S) nounwind + call void @l(%struct.ss* byval(%struct.ss) align 4 %S) nounwind + call void @m(%struct.ss* byval(%struct.ss) align 4 %S, %struct.ss* byval(%struct.ss) align 4 %S) nounwind ret i32 0 } - - diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll @@ -17,22 +17,20 @@ %struct.pair = type { i32, i32 } +; Do not promote because there is a store of the pointer %P itself. Even if %P +; had been promoted as a byval argument, the result would have been not +; optimizable for SROA. define internal void @test_byval(%struct.pair* byval(%struct.pair) align 4 %P) { ; CHECK-LABEL: define {{[^@]+}}@test_byval -; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) { -; CHECK-NEXT: [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 4 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 -; CHECK-NEXT: store i32 [[P_0]], i32* [[DOT0]], align 4 -; CHECK-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 1 -; CHECK-NEXT: store i32 [[P_1]], i32* [[DOT1]], align 4 +; CHECK-SAME: ([[STRUCT_PAIR:%.*]]* byval([[STRUCT_PAIR]]) align 4 [[P:%.*]]) { ; CHECK-NEXT: [[SINK:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: [[DOT2:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 -; CHECK-NEXT: store i32* [[DOT2]], i32** [[SINK]], align 8 +; CHECK-NEXT: [[TEMP:%.*]] = getelementptr [[STRUCT_PAIR]], [[STRUCT_PAIR]]* [[P]], i32 0, i32 0 +; CHECK-NEXT: store i32* [[TEMP]], i32** [[SINK]], align 8 ; CHECK-NEXT: ret void ; %1 = alloca i32*, align 8 %2 = getelementptr %struct.pair, %struct.pair* %P, i32 0, i32 0 - store i32* %2, i32** %1, align 8 ; to protect from "usual" promotion + store i32* %2, i32** %1, align 8 ; to protect from promotion ret void } @@ -42,11 +40,7 @@ ; CHECK-NEXT: [[Y_VAL:%.*]] = load i32*, i32** [[Y]], align 8, !dbg [[DBG4:![0-9]+]] ; CHECK-NEXT: [[Y_VAL_VAL:%.*]] = load i32, i32* [[Y_VAL]], align 8, !dbg [[DBG4]] ; CHECK-NEXT: call void @test(i32 [[Y_VAL_VAL]]), !dbg [[DBG4]] -; CHECK-NEXT: [[P_0:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 0, !dbg [[DBG5:![0-9]+]] -; CHECK-NEXT: [[P_0_VAL:%.*]] = load i32, i32* [[P_0]], align 4, !dbg [[DBG5]] -; CHECK-NEXT: [[P_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1, !dbg [[DBG5]] -; CHECK-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], align 4, !dbg [[DBG5]] -; CHECK-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg [[DBG5]] +; CHECK-NEXT: call void @test_byval([[STRUCT_PAIR]]* byval([[STRUCT_PAIR]]) align 4 [[P]]), !dbg [[DBG5:![0-9]+]] ; CHECK-NEXT: ret void ; call void @test(i32** %Y), !dbg !1 diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -14,23 +14,23 @@ define void @run() { ; CHECK-LABEL: define {{[^@]+}}@run() { -; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast %union.u* bitcast (%struct.s* @b to %union.u*) to i8* ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 10 ; CHECK-NEXT: [[DOTVAL:%.*]] = load i8, i8* [[TMP1]], align 1 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i8 @UseLongDoubleUnsafely(i8 [[DOTVAL]]) -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i64 0, i32 0 ; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]], align 16 ; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) -; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.Foo* @a to i64* -; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a) +; CHECK-NEXT: [[TMP4:%.*]] = tail call x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast %struct.Foo* @a to i64* +; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a) ; CHECK-NEXT: ret void ; -entry: tail call i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) + tail call x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 bitcast (%struct.s* @b to %union.u*)) call i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) @a) call i64 @CaptureAStruct(%struct.Foo* byval(%struct.Foo) @a) ret void @@ -38,11 +38,9 @@ define internal i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 %arg) { ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely -; CHECK-SAME: (i8 [[ARG_10_VAL:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i8 [[ARG_10_VAL]] +; CHECK-SAME: (i8 [[ARG_0_VAL:%.*]]) { +; CHECK-NEXT: ret i8 [[ARG_0_VAL]] ; -entry: %bitcast = bitcast %union.u* %arg to %struct.s* %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 %result = load i8, i8* %gep @@ -51,23 +49,30 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 %arg) { ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafely -; CHECK-SAME: (x86_fp80 [[ARG_0:%.*]]) { -; CHECK-NEXT: [[ARG:%.*]] = alloca [[UNION_U:%.*]], align 16 -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], [[UNION_U]]* [[ARG]], i32 0, i32 0 -; CHECK-NEXT: store x86_fp80 [[ARG_0]], x86_fp80* [[DOT0]], align 16 +; CHECK-SAME: (x86_fp80 [[ARG_0_VAL:%.*]]) { +; CHECK-NEXT: ret x86_fp80 [[ARG_0_VAL]] +; + %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 + %fp80 = load x86_fp80, x86_fp80* %gep + ret x86_fp80 %fp80 +} + +define internal x86_fp80 @UseLongDoubleSafelyNoPromotion(%union.u* byval(%union.u) align 16 %arg) { +; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleSafelyNoPromotion +; CHECK-SAME: ([[UNION_U]]* byval([[UNION_U]]) align 16 [[ARG:%.*]]) { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 0, i32 0 -; CHECK-NEXT: [[IDX_P:%.*]] = alloca i64, align 8 -; CHECK-NEXT: store i64 0, i64* [[IDX_P]], align 8 -; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[IDX_P]], align 8 +; CHECK-NEXT: [[TMP_IDX:%.*]] = alloca i64, align 8 +; CHECK-NEXT: store i64 0, i64* [[TMP_IDX]], align 8 +; CHECK-NEXT: [[IDX:%.*]] = load i64, i64* [[TMP_IDX]], align 8 ; CHECK-NEXT: [[GEP_IDX:%.*]] = getelementptr inbounds [[UNION_U]], [[UNION_U]]* [[ARG]], i64 [[IDX]], i32 0 -; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16 +; CHECK-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]] ; CHECK-NEXT: ret x86_fp80 [[FP80]] ; %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 %idx_slot = alloca i64, align 8 store i64 0, i64* %idx_slot, align 8 %idx = load i64, i64* %idx_slot, align 8 - %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from "usual" promotion + %gep_idx = getelementptr inbounds %union.u, %union.u* %arg, i64 %idx, i32 0 ; to protect from promotion %fp80 = load x86_fp80, x86_fp80* %gep ret x86_fp80 %fp80 } diff --git a/llvm/test/Transforms/ArgumentPromotion/metadata.ll b/llvm/test/Transforms/ArgumentPromotion/metadata.ll --- a/llvm/test/Transforms/ArgumentPromotion/metadata.ll +++ b/llvm/test/Transforms/ArgumentPromotion/metadata.ll @@ -7,6 +7,8 @@ define internal void @callee(i32* %p1, i32** %p2, i32** %p3, i32** %p4, i32** %p5, i32** %p6) { ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i32 [[P1_0_VAL:%.*]], i32* [[P2_0_VAL:%.*]], i32* [[P3_0_VAL:%.*]], i32* [[P4_0_VAL:%.*]], i32* [[P5_0_VAL:%.*]], i32* [[P6_0_VAL:%.*]]) { +; CHECK-NEXT: [[IS_NOT_NULL:%.*]] = icmp ne i32* [[P2_0_VAL]], null +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_NULL]]) ; CHECK-NEXT: call void @use.i32(i32 [[P1_0_VAL]]) ; CHECK-NEXT: call void @use.p32(i32* [[P2_0_VAL]]) ; CHECK-NEXT: call void @use.p32(i32* [[P3_0_VAL]]) @@ -51,6 +53,8 @@ ; CHECK-SAME: (i1 [[C:%.*]], i32* [[P_0_VAL:%.*]]) { ; CHECK-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: +; CHECK-NEXT: [[IS_NOT_NULL:%.*]] = icmp ne i32* [[P_0_VAL]], null +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_NULL]]) ; CHECK-NEXT: ret i32* [[P_0_VAL]] ; CHECK: else: ; CHECK-NEXT: ret i32* null diff --git a/llvm/test/Transforms/ArgumentPromotion/store-after-load.ll b/llvm/test/Transforms/ArgumentPromotion/store-after-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/store-after-load.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +; Store instructions are allowed users for byval arguments only. +define internal void @callee(i32* %arg) nounwind { +; CHECK-LABEL: define {{[^@]+}}@callee +; CHECK-SAME: (i32* [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TEMP]], 1 +; CHECK-NEXT: store i32 [[SUM]], i32* [[ARG]], align 4 +; CHECK-NEXT: ret void +; +entry: + %temp = load i32, i32* %arg, align 4 + %sum = add i32 %temp, 1 + store i32 %sum, i32* %arg, align 4 + ret void +} + +define i32 @caller(i32* %arg) nounwind { +; CHECK-LABEL: define {{[^@]+}}@caller +; CHECK-SAME: (i32* [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @callee(i32* [[ARG]]) #[[ATTR0]] +; CHECK-NEXT: ret i32 0 +; +entry: + call void @callee(i32* %arg) nounwind + ret i32 0 +} diff --git a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll @@ -0,0 +1,102 @@ +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +%struct.ss = type { i32, i64 } + +define internal void @f(ptr byval(ptr) align 4 %p) nounwind { +; CHECK-LABEL: define {{[^@]+}}@f +; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: store ptr [[P]], ptr [[P]] +; CHECK-NEXT: ret void +; +entry: + store ptr %p, ptr %p + ret void +} + +define internal void @g(ptr byval(ptr) align 4 %p) nounwind { +; CHECK-LABEL: define {{[^@]+}}@g +; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: store ptr [[P]], ptr [[P1]] +; CHECK-NEXT: ret void +; +entry: + %p1 = getelementptr i8, ptr %p, i64 4 + store ptr %p, ptr %p1 + ret void +} + +define internal void @h(ptr byval(ptr) align 4 %p) nounwind { +; CHECK-LABEL: define {{[^@]+}}@h +; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: store ptr [[P1]], ptr [[P]] +; CHECK-NEXT: ret void +; +entry: + %p1 = getelementptr i8, ptr %p, i64 4 + store ptr %p1, ptr %p + ret void +} + +define internal void @k(ptr byval(ptr) align 4 %p) nounwind { +; CHECK-LABEL: define {{[^@]+}}@k +; CHECK-SAME: (ptr byval(ptr) align 4 [[P:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load ptr, ptr [[P]] +; CHECK-NEXT: store ptr [[P]], ptr [[X]] +; CHECK-NEXT: ret void +; +entry: + %x = load ptr, ptr %p + store ptr %p, ptr %x + ret void +} + +define internal void @l(ptr byval(ptr) align 4 %p) nounwind { +; CHECK-LABEL: define {{[^@]+}}@l +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + %x = load ptr, ptr %p + store ptr %x, ptr %p + ret void +} + +define i32 @main() nounwind { +; CHECK-LABEL: define {{[^@]+}}@main +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 +; CHECK-NEXT: [[TEMP1:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 0 +; CHECK-NEXT: store i32 1, ptr [[TEMP1]], align 4 +; CHECK-NEXT: [[TEMP4:%.*]] = getelementptr [[STRUCT_SS]], ptr [[S]], i32 0, i32 1 +; CHECK-NEXT: store i64 2, ptr [[TEMP4]], align 8 +; CHECK-NEXT: call void @f(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] +; CHECK-NEXT: call void @g(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] +; CHECK-NEXT: call void @h(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] +; CHECK-NEXT: call void @k(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] +; CHECK-NEXT: [[S_VAL:%.*]] = load ptr, ptr [[S]], align 8 +; CHECK-NEXT: call void @l() #[[ATTR0]] +; CHECK-NEXT: ret i32 0 +; +entry: + %S = alloca %struct.ss, align 32 + %temp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 + store i32 1, i32* %temp1, align 4 + %temp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 + store i64 2, i64* %temp4, align 8 + call void @f(ptr byval(ptr) align 4 %S) nounwind + call void @g(ptr byval(ptr) align 4 %S) nounwind + call void @h(ptr byval(ptr) align 4 %S) nounwind + call void @k(ptr byval(ptr) align 4 %S) nounwind + call void @l(ptr byval(ptr) align 4 %S) nounwind + ret i32 0 +}