Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -91,43 +91,75 @@ #define DEBUG_TYPE "argpromotion" STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); -STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); -/// A vector used to hold the indices of a single GEP instruction -using IndicesVector = std::vector; +struct ArgPart { + Type *Ty; + Align Alignment; + /// A representative guaranteed-executed load instruction for use by + /// metadata transfer. + LoadInst *MustExecLoad; +}; +using OffsetAndArgPart = std::pair; + +static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL, + Value *Ptr, Type *ResElemTy, int64_t Offset) { + // For non-opaque pointers, try create a "nice" GEP if possible, otherwise + // fall back to an i8 GEP to a specific offset. + unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); + APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset); + if (!Ptr->getType()->isOpaquePointerTy()) { + Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType(); + if (OrigOffset == 0 && OrigElemTy == ResElemTy) + return Ptr; + + APInt TmpOffset = OrigOffset; + Type *TmpTy = OrigElemTy; + SmallVector IntIndices = + DL.getGEPIndicesForOffset(TmpTy, TmpOffset); + if (TmpOffset == 0) { + // Try to add trailing zero indices to reach the right type. + while (TmpTy != ResElemTy) { + Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0); + if (!NextTy) + break; + + IntIndices.push_back(APInt::getZero( + isa(TmpTy) ? 32 : OrigOffset.getBitWidth())); + TmpTy = NextTy; + } + + SmallVector Indices; + for (const APInt &Index : IntIndices) + Indices.push_back(IRB.getInt(Index)); + + Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices); + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); + } + } + + if (OrigOffset != 0) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace)); + Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset)); + } + return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace)); +} /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. -static Function * -doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform, - Optional> - ReplaceCallSite) { +static Function *doPromotion( + Function *F, + const DenseMap> &ArgsToPromote, + SmallPtrSetImpl &ByValArgsToTransform, + Optional> + ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector Params; - using ScalarizeTable = std::set>; - - // ScalarizedElements - If we are promoting a pointer that has elements - // accessed out of it, keep track of which elements are accessed so that we - // can add one argument for each. - // - // Arguments that are directly loaded will have a zero element value here, to - // handle cases where there are both a direct load and GEP accesses. - std::map ScalarizedElements; - - // OriginalLoads - Keep track of a representative load instruction from the - // original function so that we can tell the alias analysis implementation - // what the new GEP/Load instructions we are inserting look like. - // We need to keep the original loads for each argument and the elements - // of the argument that are accessed. - std::map, LoadInst *> OriginalLoads; - // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost @@ -154,58 +186,12 @@ // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { - // Okay, this is being promoted. This means that the only uses are loads - // or GEPs which are only used by loads - - // In this table, we will track which indices are loaded from the argument - // (where direct loads are tracked as no indices). - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - for (User *U : make_early_inc_range(I->users())) { - Instruction *UI = cast(U); - Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) - SrcTy = L->getType(); - else - SrcTy = cast(UI)->getSourceElementType(); - // Skip dead GEPs and remove them. - if (isa(UI) && UI->use_empty()) { - UI->eraseFromParent(); - continue; - } - - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (const Use &I : llvm::drop_begin(UI->operands())) - Indices.push_back(cast(I)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); - ArgIndices.insert(std::make_pair(SrcTy, Indices)); - LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) - OrigLoad = L; - else - // Take any load, we will use it only to update Alias Analysis - OrigLoad = cast(UI->user_back()); - OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; - } - - // Add a parameter to the function for each element passed in. - for (const auto &ArgIndex : ArgIndices) { - // not allowed to dereference ->begin() if size() is 0 - Params.push_back(GetElementPtrInst::getIndexedType( - I->getType()->getPointerElementType(), ArgIndex.second)); + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + Params.push_back(Pair.second.Ty); ArgAttrVec.push_back(AttributeSet()); - assert(Params.back()); } - - if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) - ++NumArgumentsPromoted; - else - ++NumAggregatesPromoted; + ++NumArgumentsPromoted; } } @@ -277,44 +263,20 @@ ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { - // Non-dead argument: insert GEPs and loads as appropriate. - ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - // Store the Value* version of the indices in here, but declare it now - // for reuse. - std::vector Ops; - for (const auto &ArgIndex : ArgIndices) { - Value *V = *AI; - LoadInst *OrigLoad = - OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; - if (!ArgIndex.second.empty()) { - Ops.reserve(ArgIndex.second.size()); - Type *ElTy = V->getType(); - for (auto II : ArgIndex.second) { - // Use i32 to index structs, and i64 for others (pointers/arrays). - // This satisfies GEP constraints. - Type *IdxTy = - (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) - : Type::getInt64Ty(F->getContext())); - Ops.push_back(ConstantInt::get(IdxTy, II)); - // Keep track of the type we're currently indexing. - if (auto *ElPTy = dyn_cast(ElTy)) - ElTy = ElPTy->getPointerElementType(); - else - ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II); - } - // And create a GEP to extract those indices. - V = IRB.CreateGEP(ArgIndex.first, V, Ops, V->getName() + ".idx"); - Ops.clear(); + Value *V = *AI; + const auto &ArgParts = ArgsToPromote.find(&*I)->second; + for (const auto &Pair : ArgParts) { + LoadInst *LI = IRB.CreateAlignedLoad( + Pair.second.Ty, + createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first), + Pair.second.Alignment, V->getName() + ".val"); + if (Pair.second.MustExecLoad) { + // TODO: Transfer other metadata like !nonnull here. + // TODO: Blindly transferring AA metadata in this way is incorrect. + // E.g. TBAA may require offset adjustment. + LI->setAAMetadata(Pair.second.MustExecLoad->getAAMetadata()); } - // Since we're replacing a load make sure we take the alignment - // of the previous load. - LoadInst *newLoad = - IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val"); - newLoad->setAlignment(OrigLoad->getAlign()); - // Transfer the AA info too. - newLoad->setAAMetadata(OrigLoad->getAAMetadata()); - - Args.push_back(newLoad); + Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } } @@ -416,57 +378,46 @@ if (Arg.use_empty()) continue; + SmallDenseMap OffsetToArg; + for (const auto &Pair : ArgsToPromote.find(&Arg)->second) { + Argument &NewArg = *I2++; + NewArg.setName(Arg.getName() + "." + Twine(Pair.first) + ".val"); + OffsetToArg.insert({Pair.first, &NewArg}); + } + // Otherwise, if we promoted this argument, then all users are load - // instructions (or GEPs with only load users), and all loads should be - // using the new argument that we added. - ScalarizeTable &ArgIndices = ScalarizedElements[&Arg]; - - while (!Arg.use_empty()) { - if (LoadInst *LI = dyn_cast(Arg.user_back())) { - assert(ArgIndices.begin()->second.empty() && - "Load element should sort to front!"); - I2->setName(Arg.getName() + ".val"); - LI->replaceAllUsesWith(&*I2); - LI->eraseFromParent(); - LLVM_DEBUG(dbgs() << "*** Promoted load of argument '" << Arg.getName() - << "' in function '" << F->getName() << "'\n"); - } else { - GetElementPtrInst *GEP = cast(Arg.user_back()); - assert(!GEP->use_empty() && - "GEPs without uses should be cleaned up already"); - IndicesVector Operands; - Operands.reserve(GEP->getNumIndices()); - for (const Use &Idx : GEP->indices()) - Operands.push_back(cast(Idx)->getSExtValue()); - - // GEPs with a single 0 index can be merged with direct loads - if (Operands.size() == 1 && Operands.front() == 0) - Operands.clear(); - - Function::arg_iterator TheArg = I2; - for (ScalarizeTable::iterator It = ArgIndices.begin(); - It->second != Operands; ++It, ++TheArg) { - assert(It != ArgIndices.end() && "GEP not handled??"); - } + // instructions (with possible casts and GEPs in between). + + SmallVector Worklist; + SmallVector DeadInsts; + append_range(Worklist, Arg.users()); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (isa(V) || isa(V)) { + DeadInsts.push_back(cast(V)); + append_range(Worklist, V->users()); + continue; + } - TheArg->setName(formatv("{0}.{1:$[.]}.val", Arg.getName(), - make_range(Operands.begin(), Operands.end()))); + if (auto *LI = dyn_cast(V)) { + Value *Ptr = LI->getPointerOperand(); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = + Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + assert(Ptr == &Arg && "Not constant offset from arg?"); + LI->replaceAllUsesWith(OffsetToArg[Offset.getSExtValue()]); + DeadInsts.push_back(LI); + continue; + } - LLVM_DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"); + llvm_unreachable("Unexpected user"); + } - // All of the uses must be load instructions. Replace them all with - // the argument specified by ArgNo. - while (!GEP->use_empty()) { - LoadInst *L = cast(GEP->user_back()); - L->replaceAllUsesWith(&*TheArg); - L->eraseFromParent(); - } - GEP->eraseFromParent(); - } + for (Instruction *I : DeadInsts) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); } - // Increment I2 past all of the arguments added for this promoted pointer. - std::advance(I2, ArgIndices.size()); } return NF; @@ -474,100 +425,39 @@ /// Return true if we can prove that all callees pass in a valid pointer for the /// specified function argument. -static bool allCallersPassValidPointerForArgument(Argument *Arg, Type *Ty) { - Function *Callee = Arg->getParent(); - const DataLayout &DL = Callee->getParent()->getDataLayout(); - - unsigned ArgNo = Arg->getArgNo(); +static bool allCallersPassValidPointerForArgument(Argument *Arg, + Align NeededAlign, + uint64_t NeededDerefBytes, + const DataLayout &DL) { + // Check if the argument itself is marked dereferenceable and aligned. + APInt Bytes(64, NeededDerefBytes); + if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL)) + return true; // Look at all call sites of the function. At this point we know we only have // direct callees. + Function *Callee = Arg->getParent(); + unsigned ArgNo = Arg->getArgNo(); for (User *U : Callee->users()) { CallBase &CB = cast(*U); - - if (!isDereferenceablePointer(CB.getArgOperand(ArgNo), Ty, DL)) + if (!isDereferenceableAndAlignedPointer(CB.getArgOperand(ArgNo), + NeededAlign, Bytes, DL)) return false; } return true; } -/// Returns true if Prefix is a prefix of longer. That means, Longer has a size -/// that is greater than or equal to the size of prefix, and each of the -/// elements in Prefix is the same as the corresponding elements in Longer. -/// -/// This means it also returns true when Prefix and Longer are equal! -static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { - if (Prefix.size() > Longer.size()) - return false; - return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); -} - -/// Checks if Indices, or a prefix of Indices, is in Set. -static bool prefixIn(const IndicesVector &Indices, - std::set &Set) { - std::set::iterator Low; - Low = Set.upper_bound(Indices); - if (Low != Set.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This means - // it points to a prefix of Indices (possibly Indices itself), if such - // prefix exists. - // - // This load is safe if any prefix of its operands is safe to load. - return Low != Set.end() && isPrefix(*Low, Indices); -} - -/// Mark the given indices (ToMark) as safe in the given set of indices -/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there -/// is already a prefix of Indices in Safe, Indices are implicitely marked safe -/// already. Furthermore, any indices that Indices is itself a prefix of, are -/// removed from Safe (since they are implicitely safe because of Indices now). -static void markIndicesSafe(const IndicesVector &ToMark, - std::set &Safe) { - std::set::iterator Low; - Low = Safe.upper_bound(ToMark); - // Guard against the case where Safe is empty - if (Low != Safe.begin()) - Low--; - // Low is now the last element smaller than or equal to Indices. This - // means it points to a prefix of Indices (possibly Indices itself), if - // such prefix exists. - if (Low != Safe.end()) { - if (isPrefix(*Low, ToMark)) - // If there is already a prefix of these indices (or exactly these - // indices) marked a safe, don't bother adding these indices - return; - - // Increment Low, so we can use it as a "insert before" hint - ++Low; - } - // Insert - Low = Safe.insert(Low, ToMark); - ++Low; - // If there we're a prefix of longer index list(s), remove those - std::set::iterator End = Safe.end(); - while (Low != End && isPrefix(ToMark, *Low)) { - std::set::iterator Remove = Low; - ++Low; - Safe.erase(Remove); - } -} - -/// isSafeToPromoteArgument - As you might guess from the name of this method, -/// it checks to see if it is both safe and useful to promote the argument. -/// This method limits promotion of aggregates to only promote up to three -/// elements of the aggregate in order to avoid exploding the number of -/// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, - unsigned MaxElements) { - using GEPIndicesSet = std::set; - +/// Determine that this argument is safe to promote, and find the argument +/// parts it can be promoted into. +static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR, + unsigned MaxElements, bool IsSelfRecursive, + SmallVectorImpl &ArgPartsVec) { // Quick exit for unused arguments if (Arg->use_empty()) return true; - // We can only promote this argument if all of the uses are loads, or are GEP - // instructions (with constant indices) that are subsequently loaded. + // We can only promote this argument if all of the uses are loads at known + // offsets. // // Promoting the argument causes it to be loaded in the caller // unconditionally. This is only safe if we can prove that either the load @@ -578,156 +468,156 @@ // anyway, in the latter case, invalid loads won't happen. This prevents us // from introducing an invalid load that wouldn't have happened in the // original code. - // - // This set will contain all sets of indices that are loaded in the entry - // block, and thus are safe to unconditionally load in the caller. - GEPIndicesSet SafeToUnconditionallyLoad; - - // This set contains all the sets of indices that we are planning to promote. - // This makes it possible to limit the number of arguments added. - GEPIndicesSet ToPromote; - - // If the pointer is always valid, any load with first index 0 is valid. - - if (ByValTy) - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); - - // Whenever a new underlying type for the operand is found, make sure it's - // consistent with the GEPs and loads we've already seen and, if necessary, - // use it to see if all incoming pointers are valid (which implies the 0-index - // is safe). - Type *BaseTy = ByValTy; - auto UpdateBaseTy = [&](Type *NewBaseTy) { - if (BaseTy) - return BaseTy == NewBaseTy; - - BaseTy = NewBaseTy; - if (allCallersPassValidPointerForArgument(Arg, BaseTy)) { - assert(SafeToUnconditionallyLoad.empty()); - SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); + + SmallDenseMap ArgParts; + Align NeededAlign(1); + uint64_t NeededDerefBytes = 0; + + // Returns None if this load is not based on the argument. Return true if + // we can promote the load, false otherwise. + auto HandleLoad = [&](LoadInst *LI, + bool GuaranteedToExecute) -> Optional { + Value *Ptr = LI->getPointerOperand(); + APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset, + /* AllowNonInbounds */ true); + if (Ptr != Arg) + return None; + + if (Offset.getSignificantBits() >= 64) + return false; + + Type *Ty = LI->getType(); + TypeSize Size = DL.getTypeStoreSize(Ty); + // Don't try to promote scalable types. + if (Size.isScalable()) + return false; + + // If this is a self-recursive function and one of the types is a pointer, + // then promoting it might lead to recursive promotion. + if (IsSelfRecursive && Ty->isPointerTy()) + return false; + + int64_t Off = Offset.getSExtValue(); + auto Pair = ArgParts.try_emplace( + Off, ArgPart{Ty, LI->getAlign(), GuaranteedToExecute ? LI : nullptr}); + ArgPart &Part = Pair.first->second; + + // We limit promotion to only promoting up to a fixed number of elements of + // the aggregate. + if (MaxElements > 0 && ArgParts.size() >= MaxElements) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "more than " << MaxElements << " parts\n"); + return false; } - return true; - }; + // For now, we only support loading one specific type at a given offset. + if (Part.Ty != Ty) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "loaded via both " << *Part.Ty << " and " << *Ty + << " at offset " << Off << "\n"); + return false; + } - // First, iterate functions that are guaranteed to execution on function - // entry and mark loads of (geps of) arguments as safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); - // Declare this here so we can reuse it - IndicesVector Indices; - for (Instruction &I : EntryBlock) { - if (LoadInst *LI = dyn_cast(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast(V)) { - V = GEP->getPointerOperand(); - if (V == Arg) { - // This load actually loads (part of) Arg? Check the indices then. - Indices.reserve(GEP->getNumIndices()); - for (Use &Idx : GEP->indices()) - if (ConstantInt *CI = dyn_cast(Idx)) - Indices.push_back(CI->getSExtValue()); - else - // We found a non-constant GEP index for this argument? Bail out - // right away, can't promote this argument at all. - return false; - - if (!UpdateBaseTy(GEP->getSourceElementType())) - return false; - - // Indices checked out, mark them as safe - markIndicesSafe(Indices, SafeToUnconditionallyLoad); - Indices.clear(); - } - } else if (V == Arg) { - // Direct loads are equivalent to a GEP with a single 0 index. - markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); + // If this load is not guaranteed to execute and we haven't seen a load at + // this offset before (or it had lower alignment), then we need to remember + // that requirement. + if (!GuaranteedToExecute && + (Pair.second || Part.Alignment < LI->getAlign())) { + // We won't be able to prove dereferenceability for negative offsets. + if (Off < 0) + return false; - if (BaseTy && LI->getType() != BaseTy) - return false; + // If the offset is not aligned, an aligned base pointer won't help. + if (!isAligned(LI->getAlign(), Off)) + return false; - BaseTy = LI->getType(); - } + NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue()); + NeededAlign = std::max(NeededAlign, LI->getAlign()); } + Part.Alignment = std::max(Part.Alignment, LI->getAlign()); + return true; + }; + + // Look for loads that are guaranteed to execute on entry. + for (Instruction &I : Arg->getParent()->getEntryBlock()) { + if (LoadInst *LI = dyn_cast(&I)) + if (Optional Res = HandleLoad(LI, /* GuaranteedToExecute */ true)) + if (!Res) + return false; + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) break; } - // Now, iterate all uses of the argument to see if there are any uses that are - // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. + // Now look at all loads of the argument. Remember the load instructions + // for the aliasing check below. + SmallVector Worklist; + SmallPtrSet Visited; SmallVector Loads; - IndicesVector Operands; - for (Use &U : Arg->uses()) { - User *UR = U.getUser(); - Operands.clear(); - if (LoadInst *LI = dyn_cast(UR)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - // Direct loads are equivalent to a GEP with a zero index and then a load. - Operands.push_back(0); + auto AppendUsers = [&](Value *V) { + for (User *U : V->users()) + if (Visited.insert(U).second) + Worklist.push_back(U); + }; + AppendUsers(Arg); + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (isa(V)) { + AppendUsers(V); + continue; + } - if (!UpdateBaseTy(LI->getType())) + if (auto *GEP = dyn_cast(V)) { + if (!GEP->hasAllConstantIndices()) return false; - } else if (GetElementPtrInst *GEP = dyn_cast(UR)) { - if (GEP->use_empty()) { - // Dead GEP's cause trouble later. Just remove them if we run into - // them. - continue; - } + AppendUsers(V); + continue; + } - if (!UpdateBaseTy(GEP->getSourceElementType())) + if (auto *LI = dyn_cast(V)) { + if (!*HandleLoad(LI, /* GuaranteedToExecute */ false)) return false; - - // Ensure that all of the indices are constants. - for (Use &Idx : GEP->indices()) - if (ConstantInt *C = dyn_cast(Idx)) - Operands.push_back(C->getSExtValue()); - else - return false; // Not a constant operand GEP! - - // Ensure that the only users of the GEP are load instructions. - for (User *GEPU : GEP->users()) - if (LoadInst *LI = dyn_cast(GEPU)) { - // Don't hack volatile/atomic loads - if (!LI->isSimple()) - return false; - Loads.push_back(LI); - } else { - // Other uses than load? - return false; - } - } else { - return false; // Not a load or a GEP. + Loads.push_back(LI); + continue; } - // Now, see if it is safe to promote this load / loads of this GEP. Loading - // is safe if Operands, or a prefix of Operands, is marked as safe. - if (!prefixIn(Operands, SafeToUnconditionallyLoad)) - return false; + // Unknown user. + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "unknown user " << *V << "\n"); + return false; + } - // See if we are already promoting a load with these indices. If not, check - // to make sure that we aren't promoting too many elements. If so, nothing - // to do. - if (ToPromote.find(Operands) == ToPromote.end()) { - if (MaxElements > 0 && ToPromote.size() == MaxElements) { - LLVM_DEBUG(dbgs() << "argpromotion not promoting argument '" - << Arg->getName() - << "' because it would require adding more " - << "than " << MaxElements - << " arguments to the function.\n"); - // We limit aggregate promotion to only promoting up to a fixed number - // of elements of the aggregate. - return false; - } - ToPromote.insert(std::move(Operands)); + if (NeededDerefBytes || NeededAlign > 1) { + // Try to prove a required deref / aligned requirement. + if (!allCallersPassValidPointerForArgument(Arg, NeededAlign, + NeededDerefBytes, DL)) { + LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: " + << "not dereferenceable or aligned\n"); + return false; } } - if (Loads.empty()) + if (ArgParts.empty()) return true; // No users, this is a dead argument. + // Sort parts by offset. + append_range(ArgPartsVec, ArgParts); + sort(ArgPartsVec, + [](const auto &A, const auto &B) { return A.first < B.first; }); + + // Make sure the parts are non-overlapping. + // TODO: As we're doing pure load promotion here, overlap should be fine from + // a correctness perspective. Profitability is less obvious though. + int64_t Offset = ArgPartsVec[0].first; + for (const auto &Pair : ArgPartsVec) { + if (Pair.first < Offset) + return false; // Overlap with previous part. + + Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty); + } + // Okay, now we know that the argument is only used by load instructions and // it is safe to unconditionally perform all of them. Use alias analysis to // check to see if the pointer is guaranteed to not be modified from entry of @@ -834,30 +724,19 @@ return false; } -/// Check if callers and the callee \p F agree how promoted arguments would be -/// passed. The ones that they do not agree on are eliminated from the sets but -/// the return value has to be observed as well. -static bool areFunctionArgsABICompatible( - const Function &F, const TargetTransformInfo &TTI, - SmallPtrSetImpl &ArgsToPromote, - SmallPtrSetImpl &ByValArgsToTransform) { - // TODO: Check individual arguments so we can promote a subset? - SmallVector Types; - for (Argument *Arg : ArgsToPromote) - Types.push_back(Arg->getType()->getPointerElementType()); - for (Argument *Arg : ByValArgsToTransform) - Types.push_back(Arg->getParamByValType()); - - for (const Use &U : F.uses()) { +/// Check if callers and callee agree on how promoted arguments would be +/// passed. +static bool areTypesABICompatible(ArrayRef Types, const Function &F, + const TargetTransformInfo &TTI) { + return all_of(F.uses(), [&](const Use &U) { CallBase *CB = dyn_cast(U.getUser()); if (!CB) return false; + const Function *Caller = CB->getCaller(); const Function *Callee = CB->getCalledFunction(); - if (!TTI.areTypesABICompatible(Caller, Callee, Types)) - return false; - } - return true; + return TTI.areTypesABICompatible(Caller, Callee, Types); + }); } /// PromoteArguments - This method checks the specified function to see if there @@ -931,11 +810,9 @@ // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. - SmallPtrSet ArgsToPromote; + DenseMap> ArgsToPromote; SmallPtrSet ByValArgsToTransform; for (Argument *PtrArg : PointerArgs) { - Type *AgTy = PtrArg->getType()->getPointerElementType(); - // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { @@ -955,11 +832,13 @@ // // Only handle arguments with specified alignment; if it's unspecified, the // actual alignment of the argument is target-specific. - bool isSafeToPromote = PtrArg->hasByValAttr() && PtrArg->getParamAlign() && - (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || - !canPaddingBeAccessed(PtrArg)); + Type *ByValTy = PtrArg->getParamByValType(); + bool isSafeToPromote = + ByValTy && PtrArg->getParamAlign() && + (ArgumentPromotionPass::isDenselyPacked(ByValTy, DL) || + !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { - if (StructType *STy = dyn_cast(AgTy)) { + if (StructType *STy = dyn_cast(ByValTy)) { if (MaxElements > 0 && STy->getNumElements() > MaxElements) { LLVM_DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() @@ -969,51 +848,39 @@ continue; } + SmallVector Types; + append_range(Types, STy->elements()); + // If all the elements are single-value types, we can promote it. - bool AllSimple = true; - for (const auto *EltTy : STy->elements()) { - if (!EltTy->isSingleValueType()) { - AllSimple = false; - break; - } - } + bool AllSimple = + all_of(Types, [](Type *Ty) { return Ty->isSingleValueType(); }); // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow sroa to hack on // the new alloca we introduce. - if (AllSimple) { + if (AllSimple && areTypesABICompatible(Types, *F, TTI)) { ByValArgsToTransform.insert(PtrArg); continue; } } } - // If the argument is a recursive type and we're in a recursive - // function, we could end up infinitely peeling the function argument. - if (isSelfRecursive) { - if (StructType *STy = dyn_cast(AgTy)) { - bool RecursiveType = - llvm::is_contained(STy->elements(), PtrArg->getType()); - if (RecursiveType) - continue; - } - } - // Otherwise, see if we can promote the pointer to its value. - Type *ByValTy = - PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; - if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) - ArgsToPromote.insert(PtrArg); + SmallVector ArgParts; + if (findArgParts(PtrArg, DL, AAR, MaxElements, isSelfRecursive, ArgParts)) { + SmallVector Types; + for (const auto &Pair : ArgParts) + Types.push_back(Pair.second.Ty); + + if (areTypesABICompatible(Types, *F, TTI)) + ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); + } } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!areFunctionArgsABICompatible( - *F, TTI, ArgsToPromote, ByValArgsToTransform)) - return nullptr; - return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); } Index: llvm/test/Transforms/ArgumentPromotion/align.ll =================================================================== --- llvm/test/Transforms/ArgumentPromotion/align.ll +++ llvm/test/Transforms/ArgumentPromotion/align.ll @@ -3,8 +3,8 @@ define internal i32 @callee_must_exec(i32* %p) { ; CHECK-LABEL: define {{[^@]+}}@callee_must_exec -; CHECK-SAME: (i32 [[P_VAL:%.*]]) { -; CHECK-NEXT: ret i32 [[P_VAL]] +; CHECK-SAME: (i32 [[P_0_VAL:%.*]]) { +; CHECK-NEXT: ret i32 [[P_0_VAL]] ; %x = load i32, i32* %p, align 16 ret i32 %x @@ -23,10 +23,10 @@ define internal i32 @callee_guaranteed_aligned(i1 %c, i32* %p) { ; CHECK-LABEL: define {{[^@]+}}@callee_guaranteed_aligned -; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]]) { +; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_0_VAL:%.*]]) { ; CHECK-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: ret i32 [[P_VAL]] +; CHECK-NEXT: ret i32 [[P_0_VAL]] ; CHECK: else: ; CHECK-NEXT: ret i32 -1 ; @@ -51,14 +51,15 @@ ret void } -; TODO: This should not be promoted, as the caller only guarantees that the +; This should not be promoted, as the caller only guarantees that the ; pointer is dereferenceable, not that it is aligned. define internal i32 @callee_not_guaranteed_aligned(i1 %c, i32* %p) { ; CHECK-LABEL: define {{[^@]+}}@callee_not_guaranteed_aligned -; CHECK-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]]) { +; CHECK-SAME: (i1 [[C:%.*]], i32* [[P:%.*]]) { ; CHECK-NEXT: br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: -; CHECK-NEXT: ret i32 [[P_VAL]] +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P]], align 16 +; CHECK-NEXT: ret i32 [[X]] ; CHECK: else: ; CHECK-NEXT: ret i32 -1 ; @@ -75,8 +76,7 @@ define void @caller_not_guaranteed_aligned(i1 %c, i32* dereferenceable(4) %p) { ; CHECK-LABEL: define {{[^@]+}}@caller_not_guaranteed_aligned ; CHECK-SAME: (i1 [[C:%.*]], i32* dereferenceable(4) [[P:%.*]]) { -; CHECK-NEXT: [[P_VAL:%.*]] = load i32, i32* [[P]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @callee_not_guaranteed_aligned(i1 [[C]], i32 [[P_VAL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @callee_not_guaranteed_aligned(i1 [[C]], i32* [[P]]) ; CHECK-NEXT: ret void ; call i32 @callee_not_guaranteed_aligned(i1 %c, i32* %p) Index: llvm/test/Transforms/ArgumentPromotion/fp80.ll =================================================================== --- llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -15,12 +15,18 @@ define void @run() { ; CHECK-LABEL: define {{[^@]+}}@run() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval([[UNION_U:%.*]]) align 16 bitcast (%struct.s* @b to %union.u*)) -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %union.u* bitcast (%struct.s* @b to %union.u*) to i8* +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 10 +; CHECK-NEXT: [[DOTVAL:%.*]] = load i8, i8* [[TMP1]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call i8 @UseLongDoubleUnsafely(i8 [[DOTVAL]]) +; CHECK-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 ; CHECK-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* byval([[STRUCT_FOO:%.*]]) @a) -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO]]) @a) +; CHECK-NEXT: [[TMP3:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* @a, i64 0, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to i64* +; CHECK-NEXT: [[A_VAL:%.*]] = load i64, i64* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @AccessPaddingOfStruct(i64 [[A_VAL]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @CaptureAStruct(%struct.Foo* byval([[STRUCT_FOO]]) @a) ; CHECK-NEXT: ret void ; entry: @@ -33,12 +39,9 @@ define internal i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 %arg) { ; CHECK-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely -; CHECK-SAME: (%union.u* byval([[UNION_U:%.*]]) align 16 [[ARG:%.*]]) { +; CHECK-SAME: (i8 [[ARG_10_VAL:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s* -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2 -; CHECK-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]], align 1 -; CHECK-NEXT: ret i8 [[RESULT]] +; CHECK-NEXT: ret i8 [[ARG_10_VAL]] ; entry: %bitcast = bitcast %union.u* %arg to %struct.s* @@ -64,10 +67,8 @@ define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) %a) { ; CHECK-LABEL: define {{[^@]+}}@AccessPaddingOfStruct -; CHECK-SAME: (%struct.Foo* byval([[STRUCT_FOO:%.*]]) [[A:%.*]]) { -; CHECK-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64* -; CHECK-NEXT: [[V:%.*]] = load i64, i64* [[P]], align 8 -; CHECK-NEXT: ret i64 [[V]] +; CHECK-SAME: (i64 [[A_0_VAL:%.*]]) { +; CHECK-NEXT: ret i64 [[A_0_VAL]] ; %p = bitcast %struct.Foo* %a to i64* %v = load i64, i64* %p Index: llvm/test/Transforms/ArgumentPromotion/opaque-ptr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ArgumentPromotion/opaque-ptr.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -S -argpromotion -opaque-pointers < %s | FileCheck %s + +define internal i32 @callee_basic(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callee_basic +; CHECK-SAME: (i32 [[P_0_VAL:%.*]], i32 [[P_4_VAL:%.*]]) { +; CHECK-NEXT: [[Z:%.*]] = add i32 [[P_0_VAL]], [[P_4_VAL]] +; CHECK-NEXT: ret i32 [[Z]] +; + %x = load i32, ptr %p + %p1 = getelementptr i8, ptr %p, i64 4 + %y = load i32, ptr %p1 + %z = add i32 %x, %y + ret i32 %z +} + +define void @caller_basic(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@caller_basic +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: [[P_VAL1:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @callee_basic(i32 [[P_VAL]], i32 [[P_VAL1]]) +; CHECK-NEXT: ret void +; + call i32 @callee_basic(ptr %p) + ret void +} + +; Same offset is loaded with two differen types: Don't promote. +define internal i32 @callee_different_types(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callee_different_types +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[Y_F:%.*]] = load float, ptr [[P]], align 4 +; CHECK-NEXT: [[Y:%.*]] = fptoui float [[Y_F]] to i32 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[Z]] +; + %x = load i32, ptr %p + %y.f = load float, ptr %p + %y = fptoui float %y.f to i32 + %z = add i32 %x, %y + ret i32 %z +} + +define void @caller_different_types(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@caller_different_types +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @callee_different_types(ptr [[P]]) +; CHECK-NEXT: ret void +; + call i32 @callee_different_types(ptr %p) + ret void +} + +; The two loads overlap: Don't promote. +define internal i32 @callee_overlap(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@callee_overlap +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[P1]], align 4 +; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[Z]] +; + %x = load i32, ptr %p + %p1 = getelementptr i8, ptr %p, i64 2 + %y = load i32, ptr %p1 + %z = add i32 %x, %y + ret i32 %z +} + +define void @caller_overlap(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@caller_overlap +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @callee_overlap(ptr [[P]]) +; CHECK-NEXT: ret void +; + call i32 @callee_overlap(ptr %p) + ret void +}