diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h --- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -14,6 +14,7 @@ #include "llvm/IR/PassManager.h" namespace llvm { +class TargetTransformInfo; /// Argument promotion pass. /// @@ -26,6 +27,17 @@ public: ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} + /// Check if callers and the callee \p F agree how promoted arguments would be + /// passed. The ones that they do not agree on are eliminated from the sets but + /// the return value has to be observed as well. + static bool areFunctionArgsABICompatible( + const Function &F, const TargetTransformInfo &TTI, + SmallPtrSetImpl &ArgsToPromote, + SmallPtrSetImpl &ByValArgsToTransform); + + /// Checks if a type could have padding bytes. + static bool isDenselyPacked(Type *type, const DataLayout &DL); + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); }; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -104,6 +104,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" @@ -275,17 +276,7 @@ } /// Return the associated argument, if any. - Argument *getAssociatedArgument() const { - if (auto *Arg = dyn_cast(&getAnchorValue())) - return Arg; - int ArgNo = getArgNo(); - if (ArgNo < 0) - return nullptr; - Function *AssociatedFn = getAssociatedFunction(); - if (!AssociatedFn || AssociatedFn->arg_size() <= unsigned(ArgNo)) - return nullptr; - return AssociatedFn->arg_begin() + ArgNo; - } + Argument *getAssociatedArgument() const; /// Return true if the position refers to a function interface, that is the /// function scope, the function return, or an argumnt. @@ -602,6 +593,10 @@ TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { return AG.getAnalysis(F); } + const TargetTransformInfo * + getTargetTransformInfoForFunction(const Function &F) { + return AG.getAnalysis(F); + } /// Return AliasAnalysis Result for function \p F. AAResults *getAAResultsForFunction(const Function &F) { @@ -2085,6 +2080,40 @@ static const char ID; }; +/// An abstract interface for privatizability. +/// +/// A pointer is privatizable if it can be replaced by a new, private one. +/// Privatizing pointer reduces the use count, interaction between unrelated +/// code parts. +struct AAPrivatizablePtr : public StateWrapper, + public IRPosition { + AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if pointer privatization is assumed to be possible. + bool isAssumedPrivatizablePtr() const { return getAssumed(); } + + /// Returns true if pointer privatization is known to be possible. + bool isKnownPrivatizablePtr() const { return getKnown(); } + + /// Return the type we can chose for a private copy of the underlying + /// value. None means it is not clear yet, nullptr means there is none. + virtual Optional getPrivatizableType() const = 0; + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Create an abstract attribute view for the position \p IRP. + static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// An abstract interface for all memory related attributes. struct AAMemoryBehavior : public IRAttribute< diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -773,8 +773,7 @@ return true; } -/// Checks if a type could have padding bytes. -static bool isDenselyPacked(Type *type, const DataLayout &DL) { +bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) { // There is no size information, so be conservative. if (!type->isSized()) return false; @@ -843,12 +842,14 @@ return false; } -static bool areFunctionArgsABICompatible( +bool ArgumentPromotionPass::areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform) { for (const Use &U : F.uses()) { CallSite CS(U.getUser()); + if (!CS) + return false; const Function *Caller = CS.getCaller(); const Function *Callee = CS.getCalledFunction(); if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || @@ -950,9 +951,9 @@ // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. - bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + bool isSafeToPromote = PtrArg->hasByValAttr() && + (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || + !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast(AgTy)) { if (MaxElements > 0 && STy->getNumElements() > MaxElements) { @@ -1010,8 +1011,8 @@ if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote, - ByValArgsToTransform)) + if (!ArgumentPromotionPass::areFunctionArgsABICompatible( + *F, TTI, ArgsToPromote, ByValArgsToTransform)) return nullptr; return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -29,11 +29,14 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/NoFolder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -147,6 +150,92 @@ } ///} +Argument *IRPosition::getAssociatedArgument() const { + if (getPositionKind() == IRP_ARGUMENT) + return cast(&getAnchorValue()); + + // Not an Argument and no argument number means this is not a call site + // argument, thus we cannot find a callback argument to return. + int ArgNo = getArgNo(); + if (ArgNo < 0) + return nullptr; + + const Function *Callee = getAssociatedFunction(); + if (Callee && Callee->arg_size() > unsigned(ArgNo)) + return Callee->getArg(ArgNo); + + return nullptr; +} + +/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and +/// advanced by \p Offset bytes. To aid later analysis the method tries to build +/// getelement pointer instructions that traverse the natural type of \p Ptr if +/// possible. If that fails, the remaining offset is adjusted byte-wise, hence +/// through a cast to i8*. +/// +/// TODO: This could probably live somewhere more prominantly if it doesn't +/// already exist. +static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset, + IRBuilder &IRB, const DataLayout &DL) { + assert(Offset >= 0 && "Negative offset not supported yet!"); + LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset + << "-bytes as " << *ResTy << "\n"); + + // The initial type we are trying to traverse to get nice GEPs. + Type *Ty = Ptr->getType(); + + SmallVector Indices; + std::string GEPName = Ptr->getName(); + while (Offset) { + uint64_t Idx, Rem; + + if (auto *STy = dyn_cast(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + if (int64_t(SL->getSizeInBytes()) < Offset) + break; + Idx = SL->getElementContainingOffset(Offset); + assert(Idx < STy->getNumElements() && "Offset calculation error!"); + Rem = Offset - SL->getElementOffset(Idx); + Ty = STy->getElementType(Idx); + } else if (auto *PTy = dyn_cast(Ty)) { + Ty = PTy->getElementType(); + if (!Ty->isSized()) + break; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + assert(ElementSize && "Expected type with size!"); + Idx = Offset / ElementSize; + Rem = Offset % ElementSize; + } else { + // Non-aggregate type, we cast and make byte-wise progress now. + break; + } + + LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset + << " Idx: " << Idx << " Rem: " << Rem << "\n"); + + GEPName += "." + std::to_string(Idx); + Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx)); + Offset = Rem; + } + + // Create a GEP if we collected indices above. + if (Indices.size()) + Ptr = IRB.CreateGEP(Ptr, Indices, GEPName); + + // If an offset is left we use byte-wise adjustment. + if (Offset) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy()); + Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset), + GEPName + ".b" + Twine(Offset)); + } + + // Ensure the result has the requested type. + Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + + LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); + return Ptr; +} + /// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls /// with the "returned" attribute. Once we cannot look through the value any @@ -4045,6 +4134,432 @@ } }; +/// ----------------------- Privatizable Pointers ------------------------------ +struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { + AAPrivatizablePtrImpl(const IRPosition &IRP) + : AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {} + + ChangeStatus indicatePessimisticFixpoint() override { + AAPrivatizablePtr::indicatePessimisticFixpoint(); + PrivatizableType = nullptr; + return ChangeStatus::CHANGED; + } + + /// Identify the type we can chose for a private copy of the underlying + /// argument. None means it is not clear yet, nullptr means there is none. + virtual Optional identifyPrivatizableType(Attributor &A) = 0; + + /// Return a privatizable type that encloses both T0 and T1. + /// TODO: This is merely a stub for now as we should manage a mapping as well. + Optional combineTypes(Optional T0, Optional T1) { + if (!T0.hasValue()) + return T1; + if (!T1.hasValue()) + return T0; + if (T0 == T1) + return T0; + return nullptr; + } + + Optional getPrivatizableType() const override { + return PrivatizableType; + } + + const std::string getAsStr() const override { + return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]"; + } + +protected: + Optional PrivatizableType; +}; + +// TODO: Do this for call site arguments (probably also other values) as well. + +struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { + AAPrivatizablePtrArgument(const IRPosition &IRP) + : AAPrivatizablePtrImpl(IRP) {} + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional identifyPrivatizableType(Attributor &A) override { + // If this is a byval argument and we know all the call sites (so we can + // rewrite them), there is no need to check them explicitly. + if (getIRPosition().hasAttr(Attribute::ByVal) && + A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this, + true)) + return getAssociatedValue().getType()->getPointerElementType(); + + Optional Ty; + unsigned ArgNo = getIRPosition().getArgNo(); + + // Make sure the associated call site argument has the same type at all call + // sites and it is an allocation we know is safe to privatize, for now that + // means we only allow alloca instructions. + // TODO: We can additionally analyze the accesses in the callee to create + // the type from that information instead. That is a little more + // involved and will be done in a follow up patch. + auto CallSiteCheck = [&](AbstractCallSite ACS) { + IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is one not + // associated (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + // Check that all call sites agree on a type. + auto &PrivCSArgAA = A.getAAFor(*this, ACSArgPos); + Optional CSTy = PrivCSArgAA.getPrivatizableType(); + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; + if (CSTy.hasValue() && CSTy.getValue()) + CSTy.getValue()->print(dbgs()); + else if (CSTy.hasValue()) + dbgs() << ""; + else + dbgs() << ""; + }); + + Ty = combineTypes(Ty, CSTy); + + LLVM_DEBUG({ + dbgs() << " : New Type: "; + if (Ty.hasValue() && Ty.getValue()) + Ty.getValue()->print(dbgs()); + else if (Ty.hasValue()) + dbgs() << ""; + else + dbgs() << ""; + dbgs() << "\n"; + }); + + return !Ty.hasValue() || Ty.getValue(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, *this, true)) + return nullptr; + return Ty; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + // Avoid arguments with padding for now. + if (!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), + A.getInfoCache().getDL())) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n"); + return indicatePessimisticFixpoint(); + } + + // Verify callee and caller agree on how the promoted argument would be + // passed. + // TODO: We should re-implement this and not reuse the ArgumentPromotion + // version of this. + Function &Fn = *getIRPosition().getAnchorScope(); + SmallPtrSet ArgsToPromote, Dummy; + ArgsToPromote.insert(getAssociatedArgument()); + const auto *TTI = A.getInfoCache().getTargetTransformInfoForFunction(Fn); + if (!TTI || + !ArgumentPromotionPass::areFunctionArgsABICompatible( + Fn, *TTI, ArgsToPromote, Dummy) || + ArgsToPromote.empty()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// Given a type to private \p PrivType, collect the constituates (which are + /// used) in \p ReplacementTypes. + static void + identifyReplacementTypes(Type *PrivType, + SmallVectorImpl &ReplacementTypes) { + // TODO: For now we expand the privatization type to the fullest which can + // lead to dead arguments that need to be removed later. + assert(PrivType && "Expected privatizable type!"); + + // Traverse the type, extract constituate types on the outermost level. + if (auto *PrivStructType = dyn_cast(PrivType)) { + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) + ReplacementTypes.push_back(PrivStructType->getElementType(u)); + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + ReplacementTypes.append(PrivArrayType->getNumElements(), + PrivArrayType->getElementType()); + } else { + ReplacementTypes.push_back(PrivType); + } + } + + /// Initialize \p Base according to the type \p PrivType at position \p IP. + /// The values needed are taken from the arguments of \p F starting at + /// position \p ArgNo. + static void createInitialization(Type *PrivType, Value &Base, Function &F, + unsigned ArgNo, Instruction &IP) { + assert(PrivType && "Expected privatizable type!"); + + IRBuilder IRB(&IP); + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Traverse the type, build GEPs and stores. + if (auto *PrivStructType = dyn_cast(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo(); + Value *Ptr = constructPointer( + PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else { + new StoreInst(F.getArg(ArgNo), &Base, &IP); + } + } + + /// Extract values from \p Base according to the type \p PrivType at the + /// call position \p ACS. The values are appended to \p ReplacementValues. + void createReplacementValues(Type *PrivType, AbstractCallSite ACS, + Value *Base, + SmallVectorImpl &ReplacementValues) { + assert(Base && "Expected base value!"); + assert(PrivType && "Expected privatizable type!"); + Instruction *IP = ACS.getInstruction(); + + IRBuilder IRB(IP); + const DataLayout &DL = IP->getModule()->getDataLayout(); + + if (Base->getType()->getPointerElementType() != PrivType) + Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(), + "", ACS.getInstruction()); + + // Traverse the type, build GEPs and loads. + if (auto *PrivStructType = dyn_cast(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u); + Value *Ptr = + constructPointer(PointeeTy->getPointerTo(), Base, + PrivStructLayout->getElementOffset(u), IRB, DL); + ReplacementValues.push_back(new LoadInst(PointeeTy, Ptr, "", IP)); + } + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + Type *PointeeTy = PrivArrayType->getElementType(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); + Type *PointeePtrTy = PointeeTy->getPointerTo(); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL); + ReplacementValues.push_back(new LoadInst(PointeePtrTy, Ptr, "", IP)); + } + } else { + ReplacementValues.push_back(new LoadInst(PrivType, Base, "", IP)); + } + } + + /// See AbstractAttribute::manifest(...) + ChangeStatus manifest(Attributor &A) override { + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + assert(PrivatizableType.getValue() && "Expected privatizable type!"); + + // Collect all tail calls in the function as we cannot allow new allocas to + // escape into tail recursion. + // TODO: Be smarter about new allocas escaping into tail calls. + SmallVector TailCalls; + if (!A.checkForAllInstructions( + [&](Instruction &I) { + CallInst &CI = cast(I); + if (CI.isTailCall()) + TailCalls.push_back(&CI); + return true; + }, + *this, {Instruction::Call})) + return ChangeStatus::UNCHANGED; + + Argument *Arg = getAssociatedArgument(); + + // Callback to repair the associated function. A new alloca is placed at the + // beginning and initialized with the values passed through arguments. The + // new alloca replaces the use of the old pointer argument. + Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + Function &ReplacementFn, Function::arg_iterator ArgIt) { + BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); + Instruction *IP = &*EntryBB.getFirstInsertionPt(); + auto *AI = new AllocaInst(PrivatizableType.getValue(), 0, + Arg->getName() + ".priv", IP); + createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, + ArgIt->getArgNo(), *IP); + Arg->replaceAllUsesWith(AI); + + for (CallInst *CI : TailCalls) + CI->setTailCall(false); + }; + + // Callback to repair a call site of the associated function. The elements + // of the privatizable type are loaded prior to the call and passed to the + // new function version. + Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + AbstractCallSite ACS, SmallVectorImpl &NewArgOperands) { + createReplacementValues( + PrivatizableType.getValue(), ACS, + ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), + NewArgOperands); + }; + + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + + // Register a rewrite of the argument. + if (A.registerFunctionSignatureRewrite( + *Arg, ReplacementTypes, std::move(FnRepairCB), std::move(ACSRepairCB))) + return ChangeStatus::CHANGED; + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { + AAPrivatizablePtrFloating(const IRPosition &IRP) + : AAPrivatizablePtrImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + virtual void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::" + "updateImpl will not be called"); + } + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional identifyPrivatizableType(Attributor &A) override { + Value *Obj = + GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL()); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n"); + return nullptr; + } + + if (auto *AI = dyn_cast(Obj)) + if (auto *CI = dyn_cast(AI->getArraySize())) + if (CI->isOne()) + return Obj->getType()->getPointerElementType(); + if (auto *Arg = dyn_cast(Obj)) { + auto &PrivArgAA = + A.getAAFor(*this, IRPosition::argument(*Arg)); + if (PrivArgAA.isAssumedPrivatizablePtr()) + return Obj->getType()->getPointerElementType(); + } + + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid " + "alloca nor privatizable argument: " + << *Obj << "!\n"); + return nullptr; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteArgument final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + const IRPosition &IRP = getIRPosition(); + auto &NoCaptureAA = A.getAAFor(*this, IRP); + if (!NoCaptureAA.isAssumedNoCapture()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n"); + return indicatePessimisticFixpoint(); + } + + auto &NoAliasAA = A.getAAFor(*this, IRP); + if (!NoAliasAA.isAssumedNoAlias()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n"); + return indicatePessimisticFixpoint(); + } + + const auto &MemBehaviorAA = A.getAAFor(*this, IRP); + if (!MemBehaviorAA.isAssumedReadOnly()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteReturned final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating { + AAPrivatizablePtrReturned(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); + } +}; + /// -------------------- Memory Behavior Attributes ---------------------------- /// Includes read-none, read-only, and write-only. /// ---------------------------------------------------------------------------- @@ -5445,6 +5960,9 @@ // Every argument with pointer type might be marked // "readnone/readonly/writeonly/..." getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be privatizable (or promotable) + getOrCreateAAFor(ArgPos); } } @@ -5666,6 +6184,7 @@ const char AANoCapture::ID = 0; const char AAValueSimplify::ID = 0; const char AAHeapToStack::ID = 0; +const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; // Macro magic to create the static generator function for attributes that @@ -5770,6 +6289,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR define internal i32 @deref(i32* %x) nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@deref -; ARGPROMOTION-SAME: (i32 [[X_VAL:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: ret i32 [[X_VAL]] +; ALL-LABEL: define {{[^@]+}}@deref +; ALL-SAME: (i32 [[TMP0:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: ret i32 [[TMP0]] ; entry: %tmp2 = load i32, i32* %x, align 4 @@ -13,14 +14,11 @@ } define i32 @f(i32 %x) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[X:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[X_ADDR:%.*]] = alloca i32 -; ARGPROMOTION-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; ARGPROMOTION-NEXT: [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4 -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]]) -; ARGPROMOTION-NEXT: ret i32 [[TMP1]] +; ALL-LABEL: define {{[^@]+}}@f +; ALL-SAME: (i32 [[X:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X]]) +; ALL-NEXT: ret i32 [[TMP1]] ; entry: %x_addr = alloca i32 diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR2498 ; This test tries to convince argpromotion about promoting the load from %A + 2, @@ -17,6 +18,18 @@ ; ARGPROMOTION-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] ; ARGPROMOTION-NEXT: ret i32 [[R]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee +; ATTRIBUTOR-SAME: (i1 [[C:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[A:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A_0:%.*]] = load i32, i32* [[A]] +; ATTRIBUTOR-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; ATTRIBUTOR: T: +; ATTRIBUTOR-NEXT: ret i32 [[A_0]] +; ATTRIBUTOR: F: +; ATTRIBUTOR-NEXT: [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2 +; ATTRIBUTOR-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] +; ATTRIBUTOR-NEXT: ret i32 [[R]] +; entry: ; Unconditonally load the element at %A %A.0 = load i32, i32* %A @@ -32,12 +45,18 @@ ret i32 %R } -define i32 @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* null) +define i32 @foo(i1 %c, i32* %A) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@foo +; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32* [[A:%.*]]) +; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]], i32* [[A]]) ; ARGPROMOTION-NEXT: ret i32 [[X]] ; - %X = call i32 @callee(i1 false, i32* null) ; [#uses=1] +; ATTRIBUTOR-LABEL: define {{[^@]+}}@foo +; ATTRIBUTOR-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[A:%.*]]) +; ATTRIBUTOR-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]], i32* nocapture readonly [[A]]) +; ATTRIBUTOR-NEXT: ret i32 [[X]] +; + %X = call i32 @callee(i1 %c, i32* %A) ; [#uses=1] ret i32 %X } diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -inline -argpromotion -disable-output +; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -argpromotion -disable-output +; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll --- a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt -S -argpromotion -mem2reg < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='argpromotion,function(mem2reg)' < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Test that we only promote arguments when the caller/callee have compatible ; function attrubtes. @@ -14,6 +15,13 @@ ; ARGPROMOTION-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@no_promote_avx2 +; ATTRIBUTOR-SAME: (<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <4 x i64>, <4 x i64>* %arg1 store <4 x i64> %tmp, <4 x i64>* %arg @@ -33,6 +41,18 @@ ; ARGPROMOTION-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@no_promote +; ATTRIBUTOR-SAME: (<4 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <4 x i64>, align 32 %tmp2 = alloca <4 x i64>, align 32 @@ -51,6 +71,12 @@ ; ARGPROMOTION-NEXT: store <4 x i64> [[ARG1_VAL]], <4 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@promote_avx2 +; ATTRIBUTOR-SAME: (<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <4 x i64>, <4 x i64>* %arg1 store <4 x i64> %tmp, <4 x i64>* %arg @@ -71,6 +97,19 @@ ; ARGPROMOTION-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@promote +; ATTRIBUTOR-SAME: (<4 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <4 x i64>, align 32 %tmp2 = alloca <4 x i64>, align 32 diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll --- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Test that we only promote arguments when the caller/callee have compatible ; function attrubtes. @@ -14,6 +15,15 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -34,6 +44,19 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -53,6 +76,15 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -73,6 +105,19 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -92,6 +137,15 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -112,6 +166,19 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -131,6 +198,15 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -151,6 +227,19 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -171,6 +260,13 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -190,6 +286,18 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[TMP]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -210,6 +318,13 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -229,6 +344,18 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[TMP]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -248,6 +375,15 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -268,6 +404,19 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 @@ -287,6 +436,15 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL]], <8 x i64>* [[ARG]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 store <8 x i64> %tmp, <8 x i64>* %arg @@ -307,6 +465,19 @@ ; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <8 x i64>, align 32 %tmp2 = alloca <8 x i64>, align 32 diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll --- a/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll @@ -4,8 +4,9 @@ ; we don't do that anymore. It also verifies that the combination of ; globalopt and argpromotion is able to optimize the call safely. ; -; RUN: opt -S -argpromotion %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt -S -globalopt -argpromotion %s | FileCheck %s --check-prefixes=GLOBALOPT_ARGPROMOTION,ALL +; RUN: opt -S -argpromotion %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -globalopt -argpromotion %s | FileCheck %s --check-prefixes=ALL,GLOBALOPT_ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i386-pc-windows-msvc19.11.0" @@ -33,6 +34,16 @@ ; GLOBALOPT_ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) ; GLOBALOPT_ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@internalfun +; ATTRIBUTOR-SAME: (%struct.a* nocapture readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]]) +; ATTRIBUTOR-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; ATTRIBUTOR-NEXT: ret void +; entry: %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0 %argmem = alloca inalloca <{ %struct.a }>, align 4 @@ -59,6 +70,14 @@ ; GLOBALOPT_ARGPROMOTION-NEXT: call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]]) ; GLOBALOPT_ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) ; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@exportedfun +; ATTRIBUTOR-SAME: (%struct.a* nocapture readnone [[A:%.*]]) +; ATTRIBUTOR-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; ATTRIBUTOR-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; ATTRIBUTOR-NEXT: call x86_thiscallcc void @internalfun(%struct.a* nocapture readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; ATTRIBUTOR-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; ATTRIBUTOR-NEXT: ret void ; %inalloca.save = tail call i8* @llvm.stacksave() %argmem = alloca inalloca <{ %struct.a }>, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll --- a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR %T = type { i32, i32, i32, i32 } @G = constant %T { i32 0, i32 0, i32 17, i32 25 } @@ -12,6 +13,16 @@ ; ARGPROMOTION-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL]], [[P_0_2_VAL]] ; ARGPROMOTION-NEXT: ret i32 [[V]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test +; ATTRIBUTOR-SAME: (%T* nocapture nonnull readonly align 8 dereferenceable(16) [[P:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 3 +; ATTRIBUTOR-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 2 +; ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; ATTRIBUTOR-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 @@ -31,6 +42,11 @@ ; ARGPROMOTION-NEXT: [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) ; ARGPROMOTION-NEXT: ret i32 [[V]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V:%.*]] = call i32 @test(%T* nonnull align 8 dereferenceable(16) @G) +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %v = call i32 @test(%T* @G) ret i32 %v diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll @@ -23,13 +23,20 @@ ; ARGPROMOTION-NEXT: ret void ; ; ATTRIBUTOR-LABEL: define {{[^@]+}}@f -; ATTRIBUTOR-SAME: (%struct.ss* noalias nocapture nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* nocapture nonnull writeonly byval dereferenceable(4) [[X:%.*]]) +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) ; ATTRIBUTOR-NEXT: entry: -; ATTRIBUTOR-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[X_PRIV:%.*]] = alloca i32 +; ATTRIBUTOR-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 ; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 ; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 ; ATTRIBUTOR-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 -; ATTRIBUTOR-NEXT: store i32 0, i32* [[X]] +; ATTRIBUTOR-NEXT: store i32 0, i32* [[X_PRIV]] ; ATTRIBUTOR-NEXT: ret void ; entry: @@ -68,7 +75,12 @@ ; ATTRIBUTOR-NEXT: store i32 1, i32* [[TMP1]], align 8 ; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; ATTRIBUTOR-NEXT: store i64 2, i64* [[TMP4]], align 4 -; ATTRIBUTOR-NEXT: call void @f(%struct.ss* noalias nocapture nonnull byval align 8 dereferenceable(12) [[S]], i32* nocapture writeonly byval [[X]]) +; ATTRIBUTOR-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; ATTRIBUTOR-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]] +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]] +; ATTRIBUTOR-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) ; ATTRIBUTOR-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/basictest.ll --- a/llvm/test/Transforms/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/ArgumentPromotion/basictest.ll @@ -1,12 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=7 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @test(i32* %X, i32* %Y) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@test -; ARGPROMOTION-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) -; ARGPROMOTION-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]] -; ARGPROMOTION-NEXT: ret i32 [[C]] +; ALL-LABEL: define {{[^@]+}}@test +; ALL-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; ALL-NEXT: [[C:%.*]] = add i32 [[TMP0]], [[TMP1]] +; ALL-NEXT: ret i32 [[C]] ; %A = load i32, i32* %X %B = load i32, i32* %Y @@ -15,10 +16,10 @@ } define internal i32 @caller(i32* %B) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@caller -; ARGPROMOTION-SAME: (i32 [[B_VAL1:%.*]]) -; ARGPROMOTION-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1]]) -; ARGPROMOTION-NEXT: ret i32 [[C]] +; ALL-LABEL: define {{[^@]+}}@caller +; ALL-SAME: (i32 [[TMP0:%.*]]) +; ALL-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[TMP0]]) +; ALL-NEXT: ret i32 [[C]] ; %A = alloca i32 store i32 1, i32* %A @@ -27,9 +28,9 @@ } define i32 @callercaller() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callercaller() -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @caller(i32 2) -; ARGPROMOTION-NEXT: ret i32 [[X]] +; ALL-LABEL: define {{[^@]+}}@callercaller() +; ALL-NEXT: [[X:%.*]] = call i32 @caller(i32 2) +; ALL-NEXT: ret i32 [[X]] ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll @@ -1,9 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Arg promotion eliminates the struct argument. ; FIXME: Should it eliminate the i32* argument? +; The attributor eliminates the i32*. %struct.ss = type { i32, i64 } @@ -23,6 +25,23 @@ ; ARGPROMOTION-NEXT: store i32 0, i32* [[X]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@f +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[X_PRIV:%.*]] = alloca i32 +; ATTRIBUTOR-NEXT: store i32 [[TMP2]], i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0]], i32* [[B_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 [[TMP1]], i64* [[B_PRIV_0_1]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; ATTRIBUTOR-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 +; ATTRIBUTOR-NEXT: store i32 0, i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: ret void +; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 %tmp1 = load i32, i32* %tmp, align 4 @@ -49,6 +68,22 @@ ; ARGPROMOTION-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]]) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test +; ATTRIBUTOR-SAME: (i32* nocapture writeonly [[X:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: store i32 1, i32* [[TMP1]], align 8 +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 2, i64* [[TMP4]], align 4 +; ATTRIBUTOR-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; ATTRIBUTOR-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]] +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]] +; ATTRIBUTOR-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %S = alloca %struct.ss %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll @@ -1,25 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes='argpromotion,function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval %b) nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]] -; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: store i32 [[B_0]], i32* [[DOT0]] -; ARGPROMOTION-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; ARGPROMOTION-NEXT: store i64 [[B_1]], i64* [[DOT1]] -; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; ARGPROMOTION-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@f +; ALL-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; ALL-NEXT: ret void ; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 @@ -31,19 +24,11 @@ define internal void @g(%struct.ss* byval align 32 %b) nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@g -; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 -; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: store i32 [[B_0]], i32* [[DOT0]] -; ARGPROMOTION-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; ARGPROMOTION-NEXT: store i64 [[B_1]], i64* [[DOT1]] -; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; ARGPROMOTION-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@g +; ALL-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; ALL-NEXT: ret void ; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 @@ -55,24 +40,11 @@ define i32 @main() nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@main() -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; ARGPROMOTION-NEXT: store i32 1, i32* [[TMP1]], align 8 -; ARGPROMOTION-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; ARGPROMOTION-NEXT: store i64 2, i64* [[TMP4]], align 4 -; ARGPROMOTION-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]] -; ARGPROMOTION-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; ARGPROMOTION-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]] -; ARGPROMOTION-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) -; ARGPROMOTION-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]] -; ARGPROMOTION-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; ARGPROMOTION-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]] -; ARGPROMOTION-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) -; ARGPROMOTION-NEXT: ret i32 0 +; ALL-LABEL: define {{[^@]+}}@main() +; ALL-NEXT: entry: +; ALL-NEXT: call void @f(i32 1, i64 2) +; ALL-NEXT: call void @g(i32 1, i64 2) +; ALL-NEXT: ret i32 0 ; entry: %S = alloca %struct.ss diff --git a/llvm/test/Transforms/ArgumentPromotion/chained.ll b/llvm/test/Transforms/ArgumentPromotion/chained.ll --- a/llvm/test/Transforms/ArgumentPromotion/chained.ll +++ b/llvm/test/Transforms/ArgumentPromotion/chained.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR @G1 = constant i32 0 @G2 = constant i32* @G1 @@ -11,6 +12,13 @@ ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: ret i32 [[X_VAL_VAL]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test +; ATTRIBUTOR-SAME: (i32** nocapture nonnull readonly align 8 dereferenceable(8) [[X:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[Y:%.*]] = load i32*, i32** @G2, align 8 +; ATTRIBUTOR-NEXT: [[Z:%.*]] = load i32, i32* [[Y]] +; ATTRIBUTOR-NEXT: ret i32 [[Z]] +; entry: %y = load i32*, i32** %x %z = load i32, i32* %y @@ -25,6 +33,11 @@ ; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]]) ; ARGPROMOTION-NEXT: ret i32 [[X]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[X:%.*]] = call i32 @test(i32** nonnull align 8 dereferenceable(8) @G2) +; ATTRIBUTOR-NEXT: ret i32 [[X]] +; entry: %x = call i32 @test(i32** @G2) ret i32 %x diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll --- a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll +++ b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Don't promote around control flow. -define internal i32 @callee(i1 %C, i32* %P) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callee -; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32* [[P:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; ARGPROMOTION: T: -; ARGPROMOTION-NEXT: ret i32 17 -; ARGPROMOTION: F: -; ARGPROMOTION-NEXT: [[X:%.*]] = load i32, i32* [[P]] -; ARGPROMOTION-NEXT: ret i32 [[X]] +define internal i32 @callee(i1 %C, i32* nocapture readonly %P) { +; ALL-LABEL: define {{[^@]+}}@callee +; ALL-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; ALL: T: +; ALL-NEXT: ret i32 17 +; ALL: F: +; ALL-NEXT: [[X:%.*]] = load i32, i32* [[P]] +; ALL-NEXT: ret i32 [[X]] ; entry: br i1 %C, label %T, label %F @@ -25,14 +26,15 @@ ret i32 %X } -define i32 @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 true, i32* null) -; ARGPROMOTION-NEXT: ret i32 [[X]] +define i32 @foo(i1 %C, i32* nocapture readonly %P) { +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]], i32* nocapture readonly [[P]]) +; ALL-NEXT: ret i32 [[X]] ; entry: - %X = call i32 @callee(i1 true, i32* null) + %X = call i32 @callee(i1 %C, i32* nocapture readonly %P) ret i32 %X } diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll --- a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes='argpromotion,function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=5 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -12,6 +13,14 @@ ; ARGPROMOTION-NEXT: ret i32 17 ; ARGPROMOTION: F: ; ARGPROMOTION-NEXT: ret i32 [[P_VAL]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee +; ATTRIBUTOR-SAME: (i1 [[C:%.*]]) +; ATTRIBUTOR-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; ATTRIBUTOR: T: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: F: +; ATTRIBUTOR-NEXT: unreachable ; br i1 %C, label %T, label %F @@ -23,17 +32,20 @@ ret i32 %X } -define i32 @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: [[A:%.*]] = alloca i32 -; ARGPROMOTION-NEXT: store i32 17, i32* [[A]] -; ARGPROMOTION-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]]) +define i32 @foo(i1 %C) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@foo +; ARGPROMOTION-SAME: (i1 [[C:%.*]]) +; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]], i32 17) ; ARGPROMOTION-NEXT: ret i32 [[X]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@foo +; ATTRIBUTOR-SAME: (i1 [[C:%.*]]) +; ATTRIBUTOR-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]]) +; ATTRIBUTOR-NEXT: ret i32 [[X]] ; %A = alloca i32 ; [#uses=2] store i32 17, i32* %A - %X = call i32 @callee( i1 false, i32* %A ) ; [#uses=1] + %X = call i32 @callee( i1 %C, i32* %A ) ; [#uses=1] ret i32 %X } diff --git a/llvm/test/Transforms/ArgumentPromotion/crash.ll b/llvm/test/Transforms/ArgumentPromotion/crash.ll --- a/llvm/test/Transforms/ArgumentPromotion/crash.ll +++ b/llvm/test/Transforms/ArgumentPromotion/crash.ll @@ -1,35 +1,49 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -inline -argpromotion | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_OLDPM -; RUN: opt -S < %s -passes=inline,argpromotion | FileCheck %s --check-prefixes=ARGPROMOTION,ALL_NEWPM +; RUN: opt -S < %s -inline -argpromotion | FileCheck %s --check-prefixes=ALL,ARGPROMOTION_OLDPM +; RUN: opt -S < %s -passes=inline,argpromotion | FileCheck %s --check-prefixes=ALL,ARGPROMOTION_NEWPM +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR %S = type { %S* } ; Inlining should nuke the invoke (and any inlined calls) here even with ; argument promotion running along with it. define void @zot() personality i32 (...)* @wibble { -; ALL-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble -; ALL-NEXT: bb: -; ALL-NEXT: unreachable -; ALL: hoge.exit: -; ALL-NEXT: br label [[BB1:%.*]] -; ALL: bb1: -; ALL-NEXT: unreachable -; ALL: bb2: -; ALL-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } -; ALL-NEXT: cleanup -; ALL-NEXT: unreachable +; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION_OLDPM-NEXT: bb: +; ARGPROMOTION_OLDPM-NEXT: unreachable +; ARGPROMOTION_OLDPM: hoge.exit: +; ARGPROMOTION_OLDPM-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION_OLDPM: bb1: +; ARGPROMOTION_OLDPM-NEXT: unreachable +; ARGPROMOTION_OLDPM: bb2: +; ARGPROMOTION_OLDPM-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION_OLDPM-NEXT: cleanup +; ARGPROMOTION_OLDPM-NEXT: unreachable ; -; ARGPROMOTION-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble -; ARGPROMOTION-NEXT: bb: -; ARGPROMOTION-NEXT: unreachable -; ARGPROMOTION: hoge.exit: -; ARGPROMOTION-NEXT: br label [[BB1:%.*]] -; ARGPROMOTION: bb1: -; ARGPROMOTION-NEXT: unreachable -; ARGPROMOTION: bb2: -; ARGPROMOTION-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } -; ARGPROMOTION-NEXT: cleanup -; ARGPROMOTION-NEXT: unreachable +; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION_NEWPM-NEXT: bb: +; ARGPROMOTION_NEWPM-NEXT: unreachable +; ARGPROMOTION_NEWPM: hoge.exit: +; ARGPROMOTION_NEWPM-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION_NEWPM: bb1: +; ARGPROMOTION_NEWPM-NEXT: unreachable +; ARGPROMOTION_NEWPM: bb2: +; ARGPROMOTION_NEWPM-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION_NEWPM-NEXT: cleanup +; ARGPROMOTION_NEWPM-NEXT: unreachable +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@zot() #0 personality i32 (...)* @wibble +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: call void @hoge() +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: bb.split: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: bb1.i2c: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: bb1: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: bb2: +; ATTRIBUTOR-NEXT: unreachable ; bb: invoke void @hoge() @@ -45,6 +59,10 @@ } define internal void @hoge() { +; ATTRIBUTOR-LABEL: define {{[^@]+}}@hoge() +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: unreachable +; bb: %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs) %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney) @@ -61,10 +79,6 @@ ; ARGPROMOTION_NEWPM-NEXT: bb: ; ARGPROMOTION_NEWPM-NEXT: unreachable ; -; ALL_NEWPM-LABEL: define {{[^@]+}}@eggs() -; ALL_NEWPM-NEXT: bb: -; ALL_NEWPM-NEXT: unreachable -; bb: %tmp = call zeroext i1 @barney(i8* %arg) unreachable @@ -76,21 +90,28 @@ } define i32 @test_inf_promote_caller(i32 %arg) { -; ALL-LABEL: define {{[^@]+}}@test_inf_promote_caller -; ALL-SAME: (i32 [[ARG:%.*]]) -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] -; ALL-NEXT: [[TMP1:%.*]] = alloca [[S]] -; ALL-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) -; ALL-NEXT: ret i32 0 +; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION_OLDPM-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION_OLDPM-NEXT: bb: +; ARGPROMOTION_OLDPM-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION_OLDPM-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION_OLDPM-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION_OLDPM-NEXT: ret i32 0 ; -; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_caller -; ARGPROMOTION-SAME: (i32 [[ARG:%.*]]) -; ARGPROMOTION-NEXT: bb: -; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = alloca [[S]] -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) -; ARGPROMOTION-NEXT: ret i32 0 +; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION_NEWPM-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION_NEWPM-NEXT: bb: +; ARGPROMOTION_NEWPM-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION_NEWPM-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION_NEWPM-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION_NEWPM-NEXT: ret i32 0 +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ATTRIBUTOR-SAME: (i32 [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: bb.split: +; ATTRIBUTOR-NEXT: unreachable ; bb: %tmp = alloca %S @@ -100,26 +121,26 @@ ret i32 0 } -define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { -; ALL-LABEL: define {{[^@]+}}@test_inf_promote_callee -; ALL-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]]) -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 -; ALL-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] -; ALL-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 -; ALL-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] -; ALL-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) -; ALL-NEXT: ret i32 0 +define internal i32 @test_inf_promote_callee(%S* nocapture readonly %arg, %S* nocapture readonly %arg1) { +; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@test_inf_promote_callee +; ARGPROMOTION_OLDPM-SAME: (%S* nocapture readonly [[ARG:%.*]], %S* nocapture readonly [[ARG1:%.*]]) +; ARGPROMOTION_OLDPM-NEXT: bb: +; ARGPROMOTION_OLDPM-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 +; ARGPROMOTION_OLDPM-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] +; ARGPROMOTION_OLDPM-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 +; ARGPROMOTION_OLDPM-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] +; ARGPROMOTION_OLDPM-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) +; ARGPROMOTION_OLDPM-NEXT: unreachable ; -; ARGPROMOTION-LABEL: define {{[^@]+}}@test_inf_promote_callee -; ARGPROMOTION-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]]) -; ARGPROMOTION-NEXT: bb: -; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] -; ARGPROMOTION-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] -; ARGPROMOTION-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) -; ARGPROMOTION-NEXT: ret i32 0 +; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@test_inf_promote_callee +; ARGPROMOTION_NEWPM-SAME: (%S* nocapture readonly [[ARG:%.*]], %S* nocapture readonly [[ARG1:%.*]]) +; ARGPROMOTION_NEWPM-NEXT: bb: +; ARGPROMOTION_NEWPM-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1]], i32 0, i32 0 +; ARGPROMOTION_NEWPM-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] +; ARGPROMOTION_NEWPM-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG]], i32 0, i32 0 +; ARGPROMOTION_NEWPM-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] +; ARGPROMOTION_NEWPM-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) +; ARGPROMOTION_NEWPM-NEXT: unreachable ; bb: %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0 @@ -127,8 +148,7 @@ %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0 %tmp4 = load %S*, %S** %tmp3 %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2) - - ret i32 0 + unreachable } declare i32 @wibble(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -1,35 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=3 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %union.u = type { x86_fp80 } %struct.s = type { double, i16, i8, [5 x i8] } - -@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16 - %struct.Foo = type { i32, i64 } -@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 -define void @run() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@run() +define x86_fp80 @run(%struct.Foo* %a, %struct.s* %b, i8* %i8, i64* %i64a, i64* %i64b) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@run +; ARGPROMOTION-SAME: (%struct.Foo* [[A:%.*]], %struct.s* [[B:%.*]], i8* [[I8:%.*]], i64* [[I64A:%.*]], i64* [[I64B:%.*]]) ; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) -; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 -; ARGPROMOTION-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]] -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a) -; ARGPROMOTION-NEXT: [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a) -; ARGPROMOTION-NEXT: ret void +; ARGPROMOTION-NEXT: [[BC:%.*]] = bitcast %struct.s* [[B]] to %union.u* +; ARGPROMOTION-NEXT: [[V0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 [[BC]]) +; ARGPROMOTION-NEXT: store i8 [[V0]], i8* [[I8]] +; ARGPROMOTION-NEXT: [[BC_0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* [[BC]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[BC_0_VAL:%.*]] = load x86_fp80, x86_fp80* [[BC_0]] +; ARGPROMOTION-NEXT: [[V1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[BC_0_VAL]]) +; ARGPROMOTION-NEXT: [[V2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* [[A]]) +; ARGPROMOTION-NEXT: store i64 [[V2]], i64* [[I64A]] +; ARGPROMOTION-NEXT: [[V3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* [[A]]) +; ARGPROMOTION-NEXT: store i64 [[V3]], i64* [[I64B]] +; ARGPROMOTION-NEXT: ret x86_fp80 [[V1]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@run +; ATTRIBUTOR-SAME: (%struct.Foo* [[A:%.*]], %struct.s* nocapture readonly [[B:%.*]], i8* nocapture writeonly [[I8:%.*]], i64* nocapture writeonly [[I64A:%.*]], i64* nocapture readnone [[I64B:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[BC:%.*]] = bitcast %struct.s* [[B]] to %union.u* +; ATTRIBUTOR-NEXT: [[V0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* nocapture readonly byval align 16 [[BC]]) +; ATTRIBUTOR-NEXT: store i8 [[V0]], i8* [[I8]] +; ATTRIBUTOR-NEXT: [[V2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* nocapture [[A]]) +; ATTRIBUTOR-NEXT: store i64 [[V2]], i64* [[I64A]] +; ATTRIBUTOR-NEXT: [[V3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* [[A]]) +; ATTRIBUTOR-NEXT: unreachable +; ATTRIBUTOR: entry.split: +; ATTRIBUTOR-NEXT: unreachable ; entry: - tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) - tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) - call i64 @AccessPaddingOfStruct(%struct.Foo* @a) - call i64 @CaptureAStruct(%struct.Foo* @a) - ret void + %bc = bitcast %struct.s* %b to %union.u* + %v0 = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %bc) + store i8 %v0, i8* %i8 + %v1 = tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %bc) + %v2 = call i64 @AccessPaddingOfStruct(%struct.Foo* %a) + store i64 %v2, i64* %i64a + %v3 = call i64 @CaptureAStruct(%struct.Foo* %a) + store i64 %v3, i64* %i64b + ret x86_fp80 %v1 } define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) { @@ -41,6 +60,14 @@ ; ARGPROMOTION-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] ; ARGPROMOTION-NEXT: ret i8 [[RESULT]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely +; ATTRIBUTOR-SAME: (%union.u* nocapture readonly byval align 16 [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[BITCAST:%.*]] = bitcast %union.u* [[ARG]] to %struct.s* +; ATTRIBUTOR-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2 +; ATTRIBUTOR-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] +; ATTRIBUTOR-NEXT: ret i8 [[RESULT]] +; entry: %bitcast = bitcast %union.u* %arg to %struct.s* %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 @@ -69,6 +96,12 @@ ; ARGPROMOTION-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64* ; ARGPROMOTION-NEXT: [[V:%.*]] = load i64, i64* [[P]] ; ARGPROMOTION-NEXT: ret i64 [[V]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@AccessPaddingOfStruct +; ATTRIBUTOR-SAME: (%struct.Foo* nocapture readonly byval [[A:%.*]]) +; ATTRIBUTOR-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A]] to i64* +; ATTRIBUTOR-NEXT: [[V:%.*]] = load i64, i64* [[P]] +; ATTRIBUTOR-NEXT: ret i64 [[V]] ; %p = bitcast %struct.Foo* %a to i64* %v = load i64, i64* %p @@ -88,6 +121,18 @@ ; ARGPROMOTION-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 ; ARGPROMOTION-NEXT: br label [[LOOP]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@CaptureAStruct +; ATTRIBUTOR-SAME: (%struct.Foo* writeonly byval [[A:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* +; ATTRIBUTOR-NEXT: br label [[LOOP:%.*]] +; ATTRIBUTOR: loop: +; ATTRIBUTOR-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ] +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ] +; ATTRIBUTOR-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8 +; ATTRIBUTOR-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 +; ATTRIBUTOR-NEXT: br label [[LOOP]] +; entry: %a_ptr = alloca %struct.Foo* br label %loop diff --git a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll --- a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll @@ -1,44 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.ss = type { i32, i32 } -; Argpromote + sroa should change this to passing the two integers by value. -define internal i32 @f(%struct.ss* inalloca %s) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[R:%.*]] = add i32 [[S_0_0_VAL]], [[S_0_1_VAL]] -; ARGPROMOTION-NEXT: ret i32 [[R]] -; -entry: - %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 - %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1 - %a = load i32, i32* %f0, align 4 - %b = load i32, i32* %f1, align 4 - %r = add i32 %a, %b - ret i32 %r -} - -define i32 @main() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@main() local_unnamed_addr -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2) -; ARGPROMOTION-NEXT: ret i32 [[R]] -; -entry: - %S = alloca inalloca %struct.ss - %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 - %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 - store i32 1, i32* %f0, align 4 - store i32 2, i32* %f1, align 4 - %r = call i32 @f(%struct.ss* inalloca %S) - ret i32 %r -} - ; Argpromote can't promote %a because of the icmp use. define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { ; ARGPROMOTION-LABEL: define {{[^@]+}}@g @@ -59,6 +27,10 @@ ; ARGPROMOTION-NEXT: [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]]) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %S = alloca inalloca %struct.ss %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) diff --git a/llvm/test/Transforms/ArgumentPromotion/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/invalidation.ll --- a/llvm/test/Transforms/ArgumentPromotion/invalidation.ll +++ b/llvm/test/Transforms/ArgumentPromotion/invalidation.ll @@ -7,7 +7,8 @@ ; invalidation this will crash in the second printer as it tries to reuse ; now-invalid demanded bits. ; -; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=6 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR @G = constant i32 0 @@ -17,6 +18,12 @@ ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: ret i32 [[X_VAL]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@a +; ATTRIBUTOR-SAME: (i32* nocapture nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V:%.*]] = load i32, i32* @G, align 4 +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %v = load i32, i32* %x ret i32 %v @@ -29,6 +36,11 @@ ; ARGPROMOTION-NEXT: [[V:%.*]] = call i32 @a(i32 [[G_VAL]]) ; ARGPROMOTION-NEXT: ret i32 [[V]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@b() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V:%.*]] = call i32 @a(i32* nonnull align 4 dereferenceable(4) @G) +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %v = call i32 @a(i32* @G) ret i32 %v @@ -43,6 +55,13 @@ ; ARGPROMOTION-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] ; ARGPROMOTION-NEXT: ret i32 [[RESULT]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@c() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V1:%.*]] = call i32 @a(i32* nonnull align 4 dereferenceable(4) @G) +; ATTRIBUTOR-NEXT: [[V2:%.*]] = call i32 @b() +; ATTRIBUTOR-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] +; ATTRIBUTOR-NEXT: ret i32 [[RESULT]] +; entry: %v1 = call i32 @a(i32* @G) %v2 = call i32 @b() diff --git a/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll --- a/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll +++ b/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Don't promote paramaters of/arguments to naked functions @@ -11,18 +12,23 @@ ; ARGPROMOTION-NEXT: [[CALL:%.*]] = call i32 @foo(i32* @g) ; ARGPROMOTION-NEXT: ret i32 [[CALL]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[CALL:%.*]] = call i32 @foo(i32* nonnull align 4 dereferenceable(4) @g) +; ATTRIBUTOR-NEXT: ret i32 [[CALL]] +; entry: %call = call i32 @foo(i32* @g) ret i32 %call } define internal i32 @foo(i32*) #0 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo -; ARGPROMOTION-SAME: (i32* [[TMP0:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; ARGPROMOTION-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() -; ARGPROMOTION-NEXT: unreachable +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i32* [[TMP0:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; ALL-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; ALL-NEXT: unreachable ; entry: %retval = alloca i32, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll --- a/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll +++ b/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; ArgumentPromotion should preserve the default function address space ; from the data layout. @@ -11,21 +12,27 @@ define i32 @bar() { ; ARGPROMOTION-LABEL: define {{[^@]+}}@bar() addrspace(1) ; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo() +; ARGPROMOTION-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo(i32* @g) ; ARGPROMOTION-NEXT: ret i32 [[CALL]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar() addrspace(1) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo(i32* nonnull align 4 dereferenceable(4) @g) +; ATTRIBUTOR-NEXT: ret i32 [[CALL]] +; entry: %call = call i32 @foo(i32* @g) ret i32 %call } -define internal i32 @foo(i32*) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() addrspace(1) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; ARGPROMOTION-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() -; ARGPROMOTION-NEXT: unreachable +define internal i32 @foo(i32*) naked { +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i32* [[TMP0:%.*]]) addrspace(1) +; ALL-NEXT: entry: +; ALL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; ALL-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; ALL-NEXT: unreachable ; entry: %retval = alloca i32, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/pr27568.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr27568.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr27568.ll @@ -1,14 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; RUN: opt -S -debugify -o /dev/null < %s target triple = "x86_64-pc-windows-msvc" define internal void @callee(i8*) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callee() -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: call void @thunk() -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@callee() +; ALL-NEXT: entry: +; ALL-NEXT: call void @thunk() +; ALL-NEXT: ret void ; entry: call void @thunk() @@ -16,16 +17,16 @@ } define void @test1() personality i32 (...)* @__CxxFrameHandler3 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: invoke void @thunk() -; ARGPROMOTION-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] -; ARGPROMOTION: out: -; ARGPROMOTION-NEXT: ret void -; ARGPROMOTION: cpad: -; ARGPROMOTION-NEXT: [[PAD:%.*]] = cleanuppad within none [] -; ARGPROMOTION-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] -; ARGPROMOTION-NEXT: cleanupret from [[PAD]] unwind to caller +; ALL-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 +; ALL-NEXT: entry: +; ALL-NEXT: invoke void @thunk() +; ALL-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] +; ALL: out: +; ALL-NEXT: ret void +; ALL: cpad: +; ALL-NEXT: [[PAD:%.*]] = cleanuppad within none [] +; ALL-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] +; ALL-NEXT: cleanupret from [[PAD]] unwind to caller ; entry: invoke void @thunk() diff --git a/llvm/test/Transforms/ArgumentPromotion/pr3085.ll b/llvm/test/Transforms/ArgumentPromotion/pr3085.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr3085.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr3085.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -disable-output -loop-extract-single -loop-rotate -loop-reduce -argpromotion +; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s ; PR 3085 %struct.Lit = type { i8 } diff --git a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR 32917 @b = common local_unnamed_addr global i32 0, align 4 @@ -14,6 +15,13 @@ ; ARGPROMOTION-NEXT: [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4 ; ARGPROMOTION-NEXT: call fastcc void @fn1(i32 [[DOTIDX_VAL]]) ; ARGPROMOTION-NEXT: ret i32 undef +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; ATTRIBUTOR-NEXT: call fastcc void @fn1(i32* [[TMP3]]) +; ATTRIBUTOR-NEXT: ret i32 undef ; %1 = load i32, i32* @b, align 4 %2 = sext i32 %1 to i64 @@ -27,6 +35,13 @@ ; ARGPROMOTION-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr ; ARGPROMOTION-NEXT: store i32 [[DOT18446744073709551615_VAL]], i32* @a, align 4 ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn1 +; ATTRIBUTOR-SAME: (i32* nocapture readonly [[TMP0:%.*]]) unnamed_addr +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 -1 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; ATTRIBUTOR-NEXT: store i32 [[TMP3]], i32* @a, align 4 +; ATTRIBUTOR-NEXT: ret void ; %2 = getelementptr inbounds i32, i32* %0, i64 -1 %3 = load i32, i32* %2, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='function(dse),attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to ; dbg.value which still used the removed argument. @@ -7,12 +8,13 @@ ; The %p argument should be removed, and the use of it in dbg.value should be ; changed to undef. + %p_t = type i16* %fun_t = type void (%p_t)* define void @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@foo() +; ALL-NEXT: ret void ; %tmp = alloca %fun_t store %fun_t @bar, %fun_t* %tmp diff --git a/llvm/test/Transforms/ArgumentPromotion/profile.ll b/llvm/test/Transforms/ArgumentPromotion/profile.ll --- a/llvm/test/Transforms/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/ArgumentPromotion/profile.ll @@ -1,13 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Checks if !prof metadata is corret in deadargelim. define void @caller() #0 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@caller() -; ARGPROMOTION-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@caller() +; ALL-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 +; ALL-NEXT: ret void ; %x = alloca i32 store i32 42, i32* %x @@ -16,10 +17,10 @@ } define internal void @promote_i32_ptr(i32* %xp) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@promote_i32_ptr -; ARGPROMOTION-SAME: (i32 [[XP_VAL:%.*]]) -; ARGPROMOTION-NEXT: call void @use_i32(i32 [[XP_VAL]]) -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@promote_i32_ptr +; ALL-SAME: (i32 [[TMP0:%.*]]) +; ALL-NEXT: call void @use_i32(i32 [[TMP0]]) +; ALL-NEXT: ret void ; %x = load i32, i32* %xp call void @use_i32(i32 %x) diff --git a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll --- a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll @@ -1,12 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR17906 ; When we promote two arguments in a single function with different types, ; before the fix, we used the same tag for the newly-created two loads. ; This testing case makes sure that we correctly transfer the tbaa tags from the ; original loads to the newly-created loads when promoting pointer arguments. +; +; TODO: This test doesn't work with the ATTRIBUTOR as it will not promote the arguments but propagate them. @a = global i32* null, align 8 @e = global i32** @a, align 8 @@ -23,6 +26,16 @@ ; ARGPROMOTION-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !0 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn +; ATTRIBUTOR-SAME: (i32* nocapture nonnull readonly align 4 dereferenceable(4) [[P1:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i64, i64* @c, align 8, !tbaa !0 +; ATTRIBUTOR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* @g, align 4, !tbaa !4 +; ATTRIBUTOR-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP1]] to i8 +; ATTRIBUTOR-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !6 +; ATTRIBUTOR-NEXT: ret void +; entry: %0 = load i64, i64* %p2, align 8, !tbaa !1 %conv = trunc i64 %0 to i32 @@ -44,6 +57,15 @@ ; ARGPROMOTION-NEXT: call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]]) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@main() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !7 +; ATTRIBUTOR-NEXT: store i32* @g, i32** [[TMP0]], align 8, !tbaa !7 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !7 +; ATTRIBUTOR-NEXT: store i32 1, i32* [[TMP1]], align 4, !tbaa !4 +; ATTRIBUTOR-NEXT: call fastcc void @fn(i32* nonnull align 4 dereferenceable(4) @g) +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %0 = load i32**, i32*** @e, align 8, !tbaa !8 store i32* @g, i32** %0, align 8, !tbaa !8 diff --git a/llvm/test/Transforms/ArgumentPromotion/sret.ll b/llvm/test/Transforms/ArgumentPromotion/sret.ll --- a/llvm/test/Transforms/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/ArgumentPromotion/sret.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" @@ -11,6 +12,16 @@ ; ARGPROMOTION-NEXT: [[AB:%.*]] = add i32 [[THIS_0_0_VAL]], [[THIS_0_1_VAL]] ; ARGPROMOTION-NEXT: store i32 [[AB]], i32* [[R]] ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@add +; ATTRIBUTOR-SAME: ({ i32, i32 }* noalias nocapture nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; ATTRIBUTOR-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 +; ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[BP]] +; ATTRIBUTOR-NEXT: [[AB:%.*]] = add i32 [[A]], [[B]] +; ATTRIBUTOR-NEXT: store i32 [[AB]], i32* [[R]], align 4 +; ATTRIBUTOR-NEXT: ret void ; %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0 %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1 @@ -31,6 +42,12 @@ ; ARGPROMOTION-NEXT: [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]] ; ARGPROMOTION-NEXT: call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@f() +; ATTRIBUTOR-NEXT: [[R:%.*]] = alloca i32 +; ATTRIBUTOR-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; ATTRIBUTOR-NEXT: call void @add({ i32, i32 }* noalias nocapture nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; ATTRIBUTOR-NEXT: ret void ; %r = alloca i32 %pair = alloca {i32, i32} diff --git a/llvm/test/Transforms/ArgumentPromotion/tail.ll b/llvm/test/Transforms/ArgumentPromotion/tail.ll --- a/llvm/test/Transforms/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/ArgumentPromotion/tail.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -argpromotion -S -o - | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt %s -passes=argpromotion -S -o - | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt %s -argpromotion -sroa -S -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt %s -passes='argpromotion,function(sroa)' -S -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR14710 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -19,6 +20,16 @@ ; ARGPROMOTION-NEXT: store i32 [[DATA_1]], i32* [[DOT1]] ; ARGPROMOTION-NEXT: [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; ATTRIBUTOR-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]] +; ATTRIBUTOR-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]] +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* nonnull [[DATA_PRIV]]) +; ATTRIBUTOR-NEXT: ret void ; tail call i8* @foo(%pair* %Data) ret void @@ -33,6 +44,15 @@ ; ARGPROMOTION-NEXT: [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]] ; ARGPROMOTION-NEXT: call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@zed +; ATTRIBUTOR-SAME: (%pair* nonnull byval [[DATA:%.*]]) +; ATTRIBUTOR-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32* +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]] +; ATTRIBUTOR-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]] +; ATTRIBUTOR-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]]) +; ATTRIBUTOR-NEXT: ret void ; call void @bar(%pair* byval %Data) ret void diff --git a/llvm/test/Transforms/ArgumentPromotion/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/variadic.ll --- a/llvm/test/Transforms/ArgumentPromotion/variadic.ll +++ b/llvm/test/Transforms/ArgumentPromotion/variadic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ARGPROMOTION,ALL +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Unused arguments from variadic functions cannot be eliminated as that changes ; their classiciation according to the SysV amd64 ABI. Clang and other frontends @@ -16,11 +17,11 @@ ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@main -; ARGPROMOTION-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) -; ARGPROMOTION-NEXT: ret i32 0 +; ALL-LABEL: define {{[^@]+}}@main +; ALL-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) +; ALL-NEXT: ret i32 0 ; entry: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) @@ -34,6 +35,11 @@ ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_t0f +; ATTRIBUTOR-SAME: (i8* noalias nocapture nonnull readnone [[TP13:%.*]], i8* noalias nocapture nonnull readnone [[TP14:%.*]], i8* noalias nocapture nonnull readnone [[TP15:%.*]], i8* noalias nocapture nonnull readnone [[TP16:%.*]], i8* noalias nocapture nonnull readnone [[TP17:%.*]], ...) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: ret void +; entry: ret void } diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/FunctionAttrs/callbacks.ll --- a/llvm/test/Transforms/FunctionAttrs/callbacks.ll +++ b/llvm/test/Transforms/FunctionAttrs/callbacks.ll @@ -24,7 +24,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -61,5 +61,112 @@ declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...) +; Test 1 +; +; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure +; we deduce and propagate noalias and others properly. + +define void @t1_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t1_caller +; CHECK-SAME: (i32* noalias nocapture [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t1_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync + +declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + +; Test 2 +; +; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved. + +define void @t2_caller(i32* noalias %a) { +; CHECK-LABEL: define {{[^@]+}}@t2_caller +; CHECK-SAME: (i32* noalias nocapture [[A:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 32 +; CHECK-NEXT: [[C:%.*]] = alloca i32*, align 64 +; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 128 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* +; CHECK-NEXT: store i32 42, i32* [[B]], align 32 +; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture [[A:%.*]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: ret void +; +entry: + %b = alloca i32, align 32 + %c = alloca i32*, align 64 + %ptr = alloca i32, align 128 + %0 = bitcast i32* %b to i8* + store i32 42, i32* %b, align 4 + store i32* %b, i32** %c, align 8 + call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c) + ret void +} + +; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below! +; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call. +; +; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls. +define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { +; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee +; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly dereferenceable(4) [[PTR:%.*]], i32* nocapture [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 +; CHECK-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C]], align 64 +; CHECK-NEXT: tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]]) +; CHECK-NEXT: ret void +; +entry: + %ptr_val = load i32, i32* %ptr, align 8 + store i32 %ptr_val, i32* %is_not_null + %0 = load i32*, i32** %c, align 8 + tail call void @t2_check(i32* %a, i64 %b, i32* %0) + ret void +} + +declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture) + +declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...) + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true}