diff --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h --- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -14,6 +14,7 @@ #include "llvm/IR/PassManager.h" namespace llvm { +class TargetTransformInfo; /// Argument promotion pass. /// @@ -26,6 +27,17 @@ public: ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {} + /// Check if callers and the callee \p F agree how promoted arguments would be + /// passed. The ones that they do not agree on are eliminated from the sets but + /// the return value has to be observed as well. + static bool areFunctionArgsABICompatible( + const Function &F, const TargetTransformInfo &TTI, + SmallPtrSetImpl &ArgsToPromote, + SmallPtrSetImpl &ByValArgsToTransform); + + /// Checks if a type could have padding bytes. + static bool isDenselyPacked(Type *type, const DataLayout &DL); + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); }; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -102,6 +102,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" @@ -620,6 +621,10 @@ TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { return AG.getAnalysis(F); } + const TargetTransformInfo * + getTargetTransformInfoForFunction(const Function &F) { + return AG.getAnalysis(F); + } /// Return AliasAnalysis Result for function \p F. AAResults *getAAResultsForFunction(const Function &F) { @@ -1965,6 +1970,40 @@ static const char ID; }; +/// An abstract interface for privatizability. +/// +/// A pointer is privatizable if it can be replaced by a new, private one. +/// Privatizing pointer reduces the use count, interaction between unrelated +/// code parts. +struct AAPrivatizablePtr : public StateWrapper, + public IRPosition { + AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {} + + /// Returns true if pointer privatization is assumed to be possible. + bool isAssumedPrivatizablePtr() const { return getAssumed(); } + + /// Returns true if pointer privatization is known to be possible. + bool isKnownPrivatizablePtr() const { return getKnown(); } + + /// Return the type we can chose for a private copy of the underlying + /// value. None means it is not clear yet, nullptr means there is none. + virtual Optional getPrivatizableType() const = 0; + + /// Return an IR position, see struct IRPosition. + /// + ///{ + IRPosition &getIRPosition() { return *this; } + const IRPosition &getIRPosition() const { return *this; } + ///} + + /// Create an abstract attribute view for the position \p IRP. + static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// An abstract interface for all memory related attributes. struct AAMemoryBehavior : public IRAttributeisSized()) return false; @@ -842,12 +841,14 @@ return false; } -static bool areFunctionArgsABICompatible( +bool ArgumentPromotionPass::areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform) { for (const Use &U : F.uses()) { CallSite CS(U.getUser()); + if (!CS) + return false; const Function *Caller = CS.getCaller(); const Function *Callee = CS.getCalledFunction(); if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || @@ -949,9 +950,9 @@ // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. - bool isSafeToPromote = - PtrArg->hasByValAttr() && - (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); + bool isSafeToPromote = PtrArg->hasByValAttr() && + (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) || + !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast(AgTy)) { if (MaxElements > 0 && STy->getNumElements() > MaxElements) { @@ -1009,8 +1010,8 @@ if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; - if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote, - ByValArgsToTransform)) + if (!ArgumentPromotionPass::areFunctionArgsABICompatible( + *F, TTI, ArgsToPromote, ByValArgsToTransform)) return nullptr; return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -29,11 +29,14 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/NoFolder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -192,12 +195,81 @@ // If no callbacks were found, or none used the underlying call site operand // exclusively, use the direct callee argument if available. const Function *Callee = ICS.getCalledFunction(); - if (Callee && Callee->arg_size() > ArgNo) + if (Callee && Callee->arg_size() > unsigned(ArgNo)) return Callee->getArg(ArgNo); return nullptr; } +/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and +/// advanced by \p Offset bytes. To aid later analysis the method tries to build +/// getelement pointer instructions that traverse the natural type of \p Ptr if +/// possible. If that fails, the remaining offset is adjusted byte-wise, hence +/// through a cast to i8*. +/// +/// TODO: This could probably live somewhere more prominantly if it doesn't +/// already exist. +static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset, + IRBuilder &IRB, const DataLayout &DL) { + assert(Offset >= 0 && "Negative offset not supported yet!"); + LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset + << "-bytes as " << *ResTy << "\n"); + + // The initial type we are trying to traverse to get nice GEPs. + Type *Ty = Ptr->getType(); + + SmallVector Indices; + std::string GEPName = Ptr->getName(); + while (Offset) { + uint64_t Idx, Rem; + + if (auto *STy = dyn_cast(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + if (int64_t(SL->getSizeInBytes()) < Offset) + break; + Idx = SL->getElementContainingOffset(Offset); + assert(Idx < STy->getNumElements() && "Offset calculation error!"); + Rem = Offset - SL->getElementOffset(Idx); + Ty = STy->getElementType(Idx); + } else if (auto *PTy = dyn_cast(Ty)) { + Ty = PTy->getElementType(); + if (!Ty->isSized()) + break; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + assert(ElementSize && "Expected type with size!"); + Idx = Offset / ElementSize; + Rem = Offset % ElementSize; + } else { + // Non-aggregate type, we cast and make byte-wise progress now. + break; + } + + LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset + << " Idx: " << Idx << " Rem: " << Rem << "\n"); + + GEPName += "." + std::to_string(Idx); + Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx)); + Offset = Rem; + } + + // Create a GEP if we collected indices above. + if (Indices.size()) + Ptr = IRB.CreateGEP(Ptr, Indices, GEPName); + + // If an offset is left we use byte-wise adjustment. + if (Offset) { + Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy()); + Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset), + GEPName + ".b" + Twine(Offset)); + } + + // Ensure the result has the requested type. + Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast"); + + LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n"); + return Ptr; +} + /// Recursively visit all values that might become \p IRP at some point. This /// will be done by looking through cast instructions, selects, phis, and calls /// with the "returned" attribute. Once we cannot look through the value any @@ -3602,6 +3674,433 @@ }; } // namespace +/// ----------------------- Privatizable Pointers ------------------------------ +struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { + AAPrivatizablePtrImpl(const IRPosition &IRP) + : AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {} + + ChangeStatus indicatePessimisticFixpoint() override { + AAPrivatizablePtr::indicatePessimisticFixpoint(); + PrivatizableType = nullptr; + return ChangeStatus::CHANGED; + } + + /// Identify the type we can chose for a private copy of the underlying + /// argument. None means it is not clear yet, nullptr means there is none. + virtual Optional identifyPrivatizableType(Attributor &A) = 0; + + /// Return a privatizable type that encloses both T0 and T1. + /// TODO: This is merely a stub for now as we should manage a mapping as well. + Optional combineTypes(Optional T0, Optional T1) { + if (!T0.hasValue()) + return T1; + if (!T1.hasValue()) + return T0; + if (T0 == T1) + return T0; + return nullptr; + } + + Optional getPrivatizableType() const override { + return PrivatizableType; + } + + const std::string getAsStr() const override { + return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]"; + } + +protected: + Optional PrivatizableType; +}; + +// TODO: Do this for call site arguments (probably also other values) as well. + +struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { + AAPrivatizablePtrArgument(const IRPosition &IRP) + : AAPrivatizablePtrImpl(IRP) {} + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional identifyPrivatizableType(Attributor &A) override { + // If this is a byval argument and we know all the call sites (so we can + // rewrite them), there is no need to check them explicitly. + if (getIRPosition().hasAttr(Attribute::ByVal) && + A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this, + true)) + return getAssociatedValue().getType()->getPointerElementType(); + + Optional Ty; + unsigned ArgNo = getIRPosition().getArgNo(); + + // Make sure the associated call site argument has the same type at all call + // sites and it is an allocation we know is safe to privatize, for now that + // means we only allow alloca instructions. + // TODO: We can additionally analyze the accesses in the callee to create + // the type from that information instead. That is a little more + // involved and will be done in a follow up patch. + auto CallSiteCheck = [&](AbstractCallSite ACS) { + IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); + // Check if a coresponding argument was found or if it is one not + // associated (which can happen for callback calls). + if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) + return false; + + // Check that all call sites agree on a type. + auto &PrivCSArgAA = A.getAAFor(*this, ACSArgPos); + Optional CSTy = PrivCSArgAA.getPrivatizableType(); + + LLVM_DEBUG({ + dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: "; + if (CSTy.hasValue() && CSTy.getValue()) + CSTy.getValue()->print(dbgs()); + else if (CSTy.hasValue()) + dbgs() << ""; + else + dbgs() << ""; + }); + + Ty = combineTypes(Ty, CSTy); + + LLVM_DEBUG({ + dbgs() << " : New Type: "; + if (Ty.hasValue() && Ty.getValue()) + Ty.getValue()->print(dbgs()); + else if (Ty.hasValue()) + dbgs() << ""; + else + dbgs() << ""; + dbgs() << "\n"; + }); + + return !Ty.hasValue() || Ty.getValue(); + }; + + if (!A.checkForAllCallSites(CallSiteCheck, *this, true)) + return nullptr; + return Ty; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + // Avoid arguments with padding for now. + if (!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(), + A.getInfoCache().getDL())) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n"); + return indicatePessimisticFixpoint(); + } + + // Verify callee and caller agree on how the promoted argument would be + // passed. + // TODO: We should re-implement this and not reuse the ArgumentPromotion + // version of this. + Function &Fn = *getIRPosition().getAnchorScope(); + SmallPtrSet ArgsToPromote, Dummy; + ArgsToPromote.insert(getAssociatedArgument()); + const auto *TTI = A.getInfoCache().getTargetTransformInfoForFunction(Fn); + if (!TTI || + !ArgumentPromotionPass::areFunctionArgsABICompatible( + Fn, *TTI, ArgsToPromote, Dummy) || + ArgsToPromote.empty()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// Given a type to private \p PrivType, collect the constituates (which are + /// used) in \p ReplacementTypes. + static void + identifyReplacementTypes(Type *PrivType, + SmallVectorImpl &ReplacementTypes) { + // TODO: For now we expand the privatization type to the fullest which can + // lead to dead arguments that need to be removed later. + assert(PrivType && "Expected privatizable type!"); + + // Traverse the type, extract constituate types on the outermost level. + if (auto *PrivStructType = dyn_cast(PrivType)) { + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) + ReplacementTypes.push_back(PrivStructType->getElementType(u)); + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + ReplacementTypes.append(PrivArrayType->getNumElements(), + PrivArrayType->getElementType()); + } else { + ReplacementTypes.push_back(PrivType); + } + } + + /// Initialize \p Base according to the type \p PrivType at position \p IP. + /// The values needed are taken from the arguments of \p F starting at + /// position \p ArgNo. + static void createInitialization(Type *PrivType, Value &Base, Function &F, + unsigned ArgNo, Instruction &IP) { + assert(PrivType && "Expected privatizable type!"); + + IRBuilder IRB(&IP); + const DataLayout &DL = F.getParent()->getDataLayout(); + + // Traverse the type, build GEPs and stores. + if (auto *PrivStructType = dyn_cast(PrivType)) { + const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo(); + Value *Ptr = constructPointer( + PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + Type *PointeeTy = PrivArrayType->getElementType(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = + constructPointer(PointeeTy, &Base, u * PointeeTySize, IRB, DL); + new StoreInst(F.getArg(ArgNo + u), Ptr, &IP); + } + } else { + new StoreInst(F.getArg(ArgNo), &Base, &IP); + } + } + + /// Extract values from \p Base according to the type \p PrivType at the + /// call position \p ACS. The values are appended to \p ReplacementValues. + void createReplacementValues(Type *PrivType, AbstractCallSite ACS, + Value *Base, + SmallVectorImpl &ReplacementValues) { + assert(Base && "Expected base value!"); + assert(PrivType && "Expected privatizable type!"); + Instruction *IP = ACS.getInstruction(); + + IRBuilder IRB(IP); + const DataLayout &DL = IP->getModule()->getDataLayout(); + + if (Base->getType()->getPointerElementType() != PrivType) + Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(), + "", ACS.getInstruction()); + + // Traverse the type, build GEPs and loads. + if (auto *PrivStructType = dyn_cast(PrivType)) { + const StructLayout *PrivStructLayout= DL.getStructLayout(PrivStructType); + for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivStructType->getElementType(u); + Value *Ptr = + constructPointer(PointeeTy->getPointerTo(), Base, + PrivStructLayout->getElementOffset(u), IRB, DL); + ReplacementValues.push_back(new LoadInst(PointeeTy, Ptr, "", IP)); + } + } else if (auto *PrivArrayType = dyn_cast(PrivType)) { + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Type *PointeeTy = PrivArrayType->getElementType(); + uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy); + for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) { + Value *Ptr = constructPointer(PointeeTy->getPointerTo(), Base, + u * PointeeTySize, IRB, DL); + ReplacementValues.push_back(new LoadInst(PointeeTy, Ptr, "", IP)); + } + } + } else { + ReplacementValues.push_back(new LoadInst(PrivType, Base, "", IP)); + } + } + + /// See AbstractAttribute::manifest(...) + ChangeStatus manifest(Attributor &A) override { + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + assert(PrivatizableType.getValue() && "Expected privatizable type!"); + + // Collect all tail calls in the function as we cannot allow new allocas to + // escape into tail recursion. + // TODO: Be smarter about new allocas escaping into tail calls. + SmallVector TailCalls; + if (!A.checkForAllInstructions( + [&](Instruction &I) { + CallInst &CI = cast(I); + if (CI.isTailCall()) + TailCalls.push_back(&CI); + return true; + }, + *this, {Instruction::Call})) + return ChangeStatus::UNCHANGED; + + Argument *Arg = getAssociatedArgument(); + + // Callback to repair the associated function. A new alloca is placed at the + // beginning and initialized with the values passed through arguments. The + // new alloca replaces the use of the old pointer argument. + Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + Function &ReplacementFn, Function::arg_iterator ArgIt) { + BasicBlock &EntryBB = ReplacementFn.getEntryBlock(); + Instruction *IP = &*EntryBB.getFirstInsertionPt(); + auto *AI = new AllocaInst(PrivatizableType.getValue(), 0, + Arg->getName() + ".priv", IP); + createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn, + ArgIt->getArgNo(), *IP); + Arg->replaceAllUsesWith(AI); + + for (CallInst *CI : TailCalls) + CI->setTailCall(false); + }; + + // Callback to repair a call site of the associated function. The elements + // of the privatizable type are loaded prior to the call and passed to the + // new function version. + Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB = + [=](const Attributor::ArgumentReplacementInfo &ARI, + AbstractCallSite ACS, SmallVectorImpl &NewArgOperands) { + createReplacementValues( + PrivatizableType.getValue(), ACS, + ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()), + NewArgOperands); + }; + + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + + // Register a rewrite of the argument. + A.registerFunctionSignatureRewrite( + *Arg, ReplacementTypes, std::move(FnRepairCB), std::move(ACSRepairCB)); + + return ChangeStatus::CHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_ARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl { + AAPrivatizablePtrFloating(const IRPosition &IRP) + : AAPrivatizablePtrImpl(IRP) {} + + /// See AbstractAttribute::initialize(...). + virtual void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + ChangeStatus updateImpl(Attributor &A) override { + llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::" + "updateImpl will not be called"); + } + + /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...) + Optional identifyPrivatizableType(Attributor &A) override { + Value *Obj = + GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL()); + if (!Obj) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n"); + return nullptr; + } + + if (auto *AI = dyn_cast(Obj)) + if (auto *CI = dyn_cast(AI->getArraySize())) + if (CI->isOne()) + return Obj->getType()->getPointerElementType(); + if (auto *Arg = dyn_cast(Obj)) { + auto &PrivArgAA = + A.getAAFor(*this, IRPosition::argument(*Arg)); + if (PrivArgAA.isAssumedPrivatizablePtr()) + return Obj->getType()->getPointerElementType(); + } + + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid " + "alloca nor privatizable argument: " + << *Obj << "!\n"); + return nullptr; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteArgument final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + PrivatizableType = identifyPrivatizableType(A); + if (!PrivatizableType.hasValue()) + return ChangeStatus::UNCHANGED; + if (!PrivatizableType.getValue()) + return indicatePessimisticFixpoint(); + + const IRPosition &IRP = getIRPosition(); + auto &NoCaptureAA = A.getAAFor(*this, IRP); + if (!NoCaptureAA.isAssumedNoCapture()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n"); + return indicatePessimisticFixpoint(); + } + + auto &NoAliasAA = A.getAAFor(*this, IRP); + if (!NoAliasAA.isAssumedNoAlias()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n"); + return indicatePessimisticFixpoint(); + } + + const auto &MemBehaviorAA = A.getAAFor(*this, IRP); + if (!MemBehaviorAA.isAssumedReadOnly()) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n"); + return indicatePessimisticFixpoint(); + } + + return ChangeStatus::UNCHANGED; + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrCallSiteReturned final + : public AAPrivatizablePtrFloating { + AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr); + } +}; + +struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating { + AAPrivatizablePtrReturned(const IRPosition &IRP) + : AAPrivatizablePtrFloating(IRP) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + // TODO: We can privatize more than arguments. + indicatePessimisticFixpoint(); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { + STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); + } +}; + /// -------------------- Memory Behavior Attributes ---------------------------- /// Includes read-none, read-only, and write-only. /// ---------------------------------------------------------------------------- @@ -4901,6 +5400,9 @@ // Every argument with pointer type might be marked // "readnone/readonly/writeonly/..." getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be privatizable (or promotable) + getOrCreateAAFor(ArgPos); } } @@ -5109,6 +5611,7 @@ const char AANoCapture::ID = 0; const char AAValueSimplify::ID = 0; const char AAHeapToStack::ID = 0; +const char AAPrivatizablePtr::ID = 0; const char AAMemoryBehavior::ID = 0; // Macro magic to create the static generator function for attributes that @@ -5214,6 +5717,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR define internal i32 @deref(i32* %x) nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@deref -; ARGPROMOTION-SAME: (i32 [[X_VAL:%.*]]) #0 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: ret i32 [[X_VAL:%.*]] +; ALL-LABEL: define {{[^@]+}}@deref +; ALL-SAME: (i32 [[TMP0:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: ret i32 [[TMP0:%.*]] ; entry: %tmp2 = load i32, i32* %x, align 4 @@ -13,14 +14,11 @@ } define i32 @f(i32 %x) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[X:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[X_ADDR:%.*]] = alloca i32 -; ARGPROMOTION-NEXT: store i32 [[X:%.*]], i32* [[X_ADDR]], align 4 -; ARGPROMOTION-NEXT: [[X_ADDR_VAL:%.*]] = load i32, i32* [[X_ADDR]], align 4 -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]]) #0 -; ARGPROMOTION-NEXT: ret i32 [[TMP1]] +; ALL-LABEL: define {{[^@]+}}@f +; ALL-SAME: (i32 [[X:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP1:%.*]] = call i32 @deref(i32 [[X:%.*]]) +; ALL-NEXT: ret i32 [[TMP1]] ; entry: %x_addr = alloca i32 diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR2498 ; This test tries to convince argpromotion about promoting the load from %A + 2, @@ -17,6 +18,18 @@ ; ARGPROMOTION-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] ; ARGPROMOTION-NEXT: ret i32 [[R]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee +; ATTRIBUTOR-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[A:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A_0:%.*]] = load i32, i32* [[A:%.*]] +; ATTRIBUTOR-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; ATTRIBUTOR: T: +; ATTRIBUTOR-NEXT: ret i32 [[A_0]] +; ATTRIBUTOR: F: +; ATTRIBUTOR-NEXT: [[A_2:%.*]] = getelementptr i32, i32* [[A]], i32 2 +; ATTRIBUTOR-NEXT: [[R:%.*]] = load i32, i32* [[A_2]] +; ATTRIBUTOR-NEXT: ret i32 [[R]] +; entry: ; Unconditonally load the element at %A %A.0 = load i32, i32* %A @@ -32,12 +45,18 @@ ret i32 %R } -define i32 @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* null) +define i32 @foo(i1 %c, i32* %A) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@foo +; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32* [[A:%.*]]) +; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C:%.*]], i32* [[A:%.*]]) ; ARGPROMOTION-NEXT: ret i32 [[X]] ; - %X = call i32 @callee(i1 false, i32* null) ; [#uses=1] +; ATTRIBUTOR-LABEL: define {{[^@]+}}@foo +; ATTRIBUTOR-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[A:%.*]]) +; ATTRIBUTOR-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C:%.*]], i32* nocapture readonly [[A:%.*]]) +; ATTRIBUTOR-NEXT: ret i32 [[X]] +; + %X = call i32 @callee(i1 %c, i32* %A) ; [#uses=1] ret i32 %X } diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -inline -argpromotion -disable-output +; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll --- a/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll +++ b/llvm/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -argpromotion -disable-output +; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll --- a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -S -argpromotion -mem2reg < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='argpromotion,function(mem2reg)' < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Test that we only promote arguments when the caller/callee have compatible ; function attrubtes. @@ -8,12 +9,19 @@ define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; ARGPROMOTION-LABEL: define {{[^@]+}}@no_promote_avx2 -; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]]) #0 +; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64>* readonly [[ARG1:%.*]]) ; ARGPROMOTION-NEXT: bb: ; ARGPROMOTION-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1:%.*]] ; ARGPROMOTION-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG:%.*]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@no_promote_avx2 +; ATTRIBUTOR-SAME: (<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1:%.*]], align 32 +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <4 x i64>, <4 x i64>* %arg1 store <4 x i64> %tmp, <4 x i64>* %arg @@ -22,7 +30,7 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; ARGPROMOTION-LABEL: define {{[^@]+}}@no_promote -; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]]) #1 +; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]]) ; ARGPROMOTION-NEXT: bb: ; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 @@ -33,6 +41,18 @@ ; ARGPROMOTION-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG:%.*]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@no_promote +; ATTRIBUTOR-SAME: (<4 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <4 x i64>, align 32 %tmp2 = alloca <4 x i64>, align 32 @@ -46,11 +66,17 @@ define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; ARGPROMOTION-LABEL: define {{[^@]+}}@promote_avx2 -; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]]) #0 +; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]]) ; ARGPROMOTION-NEXT: bb: ; ARGPROMOTION-NEXT: store <4 x i64> [[ARG1_VAL:%.*]], <4 x i64>* [[ARG:%.*]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@promote_avx2 +; ATTRIBUTOR-SAME: (<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP0:%.*]], <4 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = load <4 x i64>, <4 x i64>* %arg1 store <4 x i64> %tmp, <4 x i64>* %arg @@ -59,7 +85,7 @@ define void @promote(<4 x i64>* %arg) #0 { ; ARGPROMOTION-LABEL: define {{[^@]+}}@promote -; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]]) #0 +; ARGPROMOTION-SAME: (<4 x i64>* [[ARG:%.*]]) ; ARGPROMOTION-NEXT: bb: ; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 @@ -71,6 +97,19 @@ ; ARGPROMOTION-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG:%.*]], align 2 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@promote +; ATTRIBUTOR-SAME: (<4 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void +; bb: %tmp = alloca <4 x i64>, align 32 %tmp2 = alloca <4 x i64>, align 32 diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll --- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefix=ALL -; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefix=ALL +; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Test that we only promote arguments when the caller/callee have compatible ; function attrubtes. @@ -8,11 +9,20 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { -; ALL-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) #0 -; ALL-NEXT: bb: -; ALL-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0:%.*]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -21,18 +31,31 @@ } define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { -; ALL-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #0 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] -; ALL-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ARGPROMOTION-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -47,11 +70,20 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { -; ALL-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) #1 -; ALL-NEXT: bb: -; ALL-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0:%.*]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -60,18 +92,31 @@ } define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { -; ALL-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #1 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] -; ALL-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ARGPROMOTION-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -86,11 +131,20 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { -; ALL-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) #1 -; ALL-NEXT: bb: -; ALL-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0:%.*]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -99,18 +153,31 @@ } define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { -; ALL-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #0 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] -; ALL-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ARGPROMOTION-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -125,11 +192,20 @@ ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { -; ALL-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) #0 -; ALL-NEXT: bb: -; ALL-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0:%.*]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -138,18 +214,31 @@ } define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { -; ALL-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #1 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] -; ALL-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ARGPROMOTION-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -164,12 +253,19 @@ ; This should not promote define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { -; ALL-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) #1 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1:%.*]] -; ALL-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1:%.*]] +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1:%.*]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -178,17 +274,29 @@ } define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { -; ALL-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #2 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[TMP]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -203,12 +311,19 @@ ; This should not promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { -; ALL-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) #2 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1:%.*]] -; ALL-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64>* readonly [[ARG1:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1:%.*]] +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1:%.*]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -217,17 +332,29 @@ } define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { -; ALL-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #1 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64>* [[TMP]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nonnull readonly align 32 dereferenceable(64) [[TMP]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -242,11 +369,20 @@ ; This should promote define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { -; ALL-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) #3 -; ALL-NEXT: bb: -; ALL-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0:%.*]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -255,18 +391,31 @@ } define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { -; ALL-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #4 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] -; ALL-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ARGPROMOTION-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 @@ -281,11 +430,20 @@ ; This should promote define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { -; ALL-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) #4 -; ALL-NEXT: bb: -; ALL-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: store <8 x i64> [[ARG1_VAL:%.*]], <8 x i64>* [[ARG:%.*]] +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64> +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP0:%.*]], <8 x i64>* [[ARG1_PRIV]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG:%.*]], align 32 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -294,18 +452,31 @@ } define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { -; ALL-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; ALL-SAME: (<8 x i64>* [[ARG:%.*]]) #3 -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 -; ALL-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; ALL-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) -; ALL-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] -; ALL-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) -; ALL-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 -; ALL-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 -; ALL-NEXT: ret void +; ARGPROMOTION-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; ARGPROMOTION-SAME: (<8 x i64>* [[ARG:%.*]]) +; ARGPROMOTION-NEXT: bb: +; ARGPROMOTION-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ARGPROMOTION-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ARGPROMOTION-NEXT: call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP3]], i8 0, i64 32, i1 false) +; ARGPROMOTION-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ARGPROMOTION-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* [[TMP2]], <8 x i64> [[TMP_VAL]]) +; ARGPROMOTION-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ARGPROMOTION-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; ATTRIBUTOR-SAME: (<8 x i64>* nocapture writeonly [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* +; ATTRIBUTOR-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]] +; ATTRIBUTOR-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32 +; ATTRIBUTOR-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG:%.*]], align 2 +; ATTRIBUTOR-NEXT: ret void ; bb: %tmp = alloca <8 x i64>, align 32 diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll --- a/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/ArgumentPromotion/X86/thiscall.ll @@ -6,6 +6,7 @@ ; ; RUN: opt -S -argpromotion %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION ; RUN: opt -S -globalopt -argpromotion %s | FileCheck %s --check-prefixes=ALL,GLOBALOPT_ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i386-pc-windows-msvc19.11.0" @@ -33,6 +34,16 @@ ; GLOBALOPT_ARGPROMOTION-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca [[ARGMEM]]) ; GLOBALOPT_ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@internalfun +; ATTRIBUTOR-SAME: (%struct.a* nocapture readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A:%.*]] }>, <{ [[STRUCT_A]] }>* [[TMP0:%.*]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]]) +; ATTRIBUTOR-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; ATTRIBUTOR-NEXT: ret void +; entry: %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0 %argmem = alloca inalloca <{ %struct.a }>, align 4 @@ -59,6 +70,14 @@ ; GLOBALOPT_ARGPROMOTION-NEXT: call fastcc void @internalfun(<{ [[STRUCT_A]] }>* [[ARGMEM]]) ; GLOBALOPT_ARGPROMOTION-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) ; GLOBALOPT_ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@exportedfun +; ATTRIBUTOR-SAME: (%struct.a* nocapture readnone [[A:%.*]]) +; ATTRIBUTOR-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; ATTRIBUTOR-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 +; ATTRIBUTOR-NEXT: call x86_thiscallcc void @internalfun(%struct.a* nocapture readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; ATTRIBUTOR-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; ATTRIBUTOR-NEXT: ret void ; %inalloca.save = tail call i8* @llvm.stacksave() %argmem = alloca inalloca <{ %struct.a }>, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll --- a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR %T = type { i32, i32, i32, i32 } @G = constant %T { i32 0, i32 0, i32 17, i32 25 } @@ -12,6 +13,15 @@ ; ARGPROMOTION-NEXT: [[V:%.*]] = add i32 [[P_0_3_VAL:%.*]], [[P_0_2_VAL:%.*]] ; ARGPROMOTION-NEXT: ret i32 [[V]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* @G, i64 0, i32 3 +; ATTRIBUTOR-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* @G, i64 0, i32 2 +; ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; ATTRIBUTOR-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 @@ -31,6 +41,11 @@ ; ARGPROMOTION-NEXT: [[V:%.*]] = call i32 @test(i32 [[G_IDX_VAL]], i32 [[G_IDX1_VAL]]) ; ARGPROMOTION-NEXT: ret i32 [[V]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V:%.*]] = call i32 @test() +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %v = call i32 @test(%T* @G) ret i32 %v diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll --- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll @@ -1,13 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR %struct.ss = type { i32, i64 } +; It is unclear why we should not promote the byval but argument promotion doesn't: ; Don't drop 'byval' on %X here. define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]]) #0 +; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]]) ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]] ; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 @@ -21,6 +23,23 @@ ; ARGPROMOTION-NEXT: store i32 0, i32* [[X:%.*]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@f +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[X_PRIV:%.*]] = alloca i32 +; ATTRIBUTOR-NEXT: store i32 [[TMP2:%.*]], i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0:%.*]], i32* [[B_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 [[TMP1:%.*]], i64* [[B_PRIV_0_1]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; ATTRIBUTOR-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 +; ATTRIBUTOR-NEXT: store i32 0, i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: ret void +; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 @@ -49,6 +68,22 @@ ; ARGPROMOTION-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X:%.*]], i32 zeroext 0) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test +; ATTRIBUTOR-SAME: (i32* nocapture writeonly [[X:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: store i32 1, i32* [[TMP1]], align 8 +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 2, i64* [[TMP4]], align 4 +; ATTRIBUTOR-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; ATTRIBUTOR-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]] +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[X:%.*]] +; ATTRIBUTOR-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %S = alloca %struct.ss %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 diff --git a/llvm/test/Transforms/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/basictest.ll --- a/llvm/test/Transforms/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/ArgumentPromotion/basictest.ll @@ -1,12 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=4 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @test(i32* %X, i32* %Y) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@test -; ARGPROMOTION-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]]) -; ARGPROMOTION-NEXT: [[C:%.*]] = add i32 [[X_VAL:%.*]], [[Y_VAL:%.*]] -; ARGPROMOTION-NEXT: ret i32 [[C]] +; ALL-LABEL: define {{[^@]+}}@test +; ALL-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; ALL-NEXT: [[C:%.*]] = add i32 [[TMP0:%.*]], [[TMP1:%.*]] +; ALL-NEXT: ret i32 [[C]] ; %A = load i32, i32* %X %B = load i32, i32* %Y @@ -15,10 +16,10 @@ } define internal i32 @caller(i32* %B) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@caller -; ARGPROMOTION-SAME: (i32 [[B_VAL1:%.*]]) -; ARGPROMOTION-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[B_VAL1:%.*]]) -; ARGPROMOTION-NEXT: ret i32 [[C]] +; ALL-LABEL: define {{[^@]+}}@caller +; ALL-SAME: (i32 [[TMP0:%.*]]) +; ALL-NEXT: [[C:%.*]] = call i32 @test(i32 1, i32 [[TMP0:%.*]]) +; ALL-NEXT: ret i32 [[C]] ; %A = alloca i32 store i32 1, i32* %A @@ -27,9 +28,9 @@ } define i32 @callercaller() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callercaller() -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @caller(i32 2) -; ARGPROMOTION-NEXT: ret i32 [[X]] +; ALL-LABEL: define {{[^@]+}}@callercaller() +; ALL-NEXT: [[X:%.*]] = call i32 @caller(i32 2) +; ALL-NEXT: ret i32 [[X]] ; %B = alloca i32 store i32 2, i32* %B diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll @@ -1,15 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Arg promotion eliminates the struct argument. ; FIXME: Should it eliminate the i32* argument? +; The attributor eliminates the i32*. %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { ; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]]) #0 +; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]]) ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]] ; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 @@ -23,6 +25,23 @@ ; ARGPROMOTION-NEXT: store i32 0, i32* [[X:%.*]] ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@f +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[X_PRIV:%.*]] = alloca i32 +; ATTRIBUTOR-NEXT: store i32 [[TMP2:%.*]], i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0:%.*]], i32* [[B_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 [[TMP1:%.*]], i64* [[B_PRIV_0_1]] +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 +; ATTRIBUTOR-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 8 +; ATTRIBUTOR-NEXT: store i32 0, i32* [[X_PRIV]] +; ATTRIBUTOR-NEXT: ret void +; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 %tmp1 = load i32, i32* %tmp, align 4 @@ -49,6 +68,22 @@ ; ARGPROMOTION-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X:%.*]]) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test +; ATTRIBUTOR-SAME: (i32* nocapture writeonly [[X:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: store i32 1, i32* [[TMP1]], align 8 +; ATTRIBUTOR-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i64 2, i64* [[TMP4]], align 4 +; ATTRIBUTOR-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]] +; ATTRIBUTOR-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]] +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[X:%.*]] +; ATTRIBUTOR-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %S = alloca %struct.ss %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll @@ -1,25 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes='argpromotion,function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval %b) nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #0 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]] -; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: store i32 [[B_0:%.*]], i32* [[DOT0]] -; ARGPROMOTION-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; ARGPROMOTION-NEXT: store i64 [[B_1:%.*]], i64* [[DOT1]] -; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; ARGPROMOTION-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@f +; ALL-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP2:%.*]] = add i32 [[TMP0:%.*]], 1 +; ALL-NEXT: ret void ; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 @@ -31,19 +24,11 @@ define internal void @g(%struct.ss* byval align 32 %b) nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@g -; ARGPROMOTION-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]]) #0 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32 -; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: store i32 [[B_0:%.*]], i32* [[DOT0]] -; ARGPROMOTION-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1 -; ARGPROMOTION-NEXT: store i64 [[B_1:%.*]], i64* [[DOT1]] -; ARGPROMOTION-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 1 -; ARGPROMOTION-NEXT: store i32 [[TMP2]], i32* [[TMP]], align 4 -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@g +; ALL-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[TMP2:%.*]] = add i32 [[TMP0:%.*]], 1 +; ALL-NEXT: ret void ; entry: %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 @@ -55,24 +40,11 @@ define i32 @main() nounwind { -; ARGPROMOTION-LABEL: define {{[^@]+}}@main() #0 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]] -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; ARGPROMOTION-NEXT: store i32 1, i32* [[TMP1]], align 8 -; ARGPROMOTION-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; ARGPROMOTION-NEXT: store i64 2, i64* [[TMP4]], align 4 -; ARGPROMOTION-NEXT: [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[S_0_VAL:%.*]] = load i32, i32* [[S_0]] -; ARGPROMOTION-NEXT: [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; ARGPROMOTION-NEXT: [[S_1_VAL:%.*]] = load i64, i64* [[S_1]] -; ARGPROMOTION-NEXT: call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]]) #0 -; ARGPROMOTION-NEXT: [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 -; ARGPROMOTION-NEXT: [[S_01_VAL:%.*]] = load i32, i32* [[S_01]] -; ARGPROMOTION-NEXT: [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; ARGPROMOTION-NEXT: [[S_12_VAL:%.*]] = load i64, i64* [[S_12]] -; ARGPROMOTION-NEXT: call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]]) #0 -; ARGPROMOTION-NEXT: ret i32 0 +; ALL-LABEL: define {{[^@]+}}@main() +; ALL-NEXT: entry: +; ALL-NEXT: call void @f(i32 1, i64 2) +; ALL-NEXT: call void @g(i32 1, i64 2) +; ALL-NEXT: ret i32 0 ; entry: %S = alloca %struct.ss diff --git a/llvm/test/Transforms/ArgumentPromotion/chained.ll b/llvm/test/Transforms/ArgumentPromotion/chained.ll --- a/llvm/test/Transforms/ArgumentPromotion/chained.ll +++ b/llvm/test/Transforms/ArgumentPromotion/chained.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR @G1 = constant i32 0 @G2 = constant i32* @G1 @@ -11,6 +12,12 @@ ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: ret i32 [[X_VAL_VAL:%.*]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[Y:%.*]] = load i32*, i32** @G2, align 8 +; ATTRIBUTOR-NEXT: [[Z:%.*]] = load i32, i32* [[Y]] +; ATTRIBUTOR-NEXT: ret i32 [[Z]] +; entry: %y = load i32*, i32** %x %z = load i32, i32* %y @@ -25,6 +32,11 @@ ; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @test(i32 [[G2_VAL_VAL]]) ; ARGPROMOTION-NEXT: ret i32 [[X]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[X:%.*]] = call i32 @test() +; ATTRIBUTOR-NEXT: ret i32 [[X]] +; entry: %x = call i32 @test(i32** @G2) ret i32 %x diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll --- a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll +++ b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll @@ -1,18 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Don't promote around control flow. -define internal i32 @callee(i1 %C, i32* %P) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callee -; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32* [[P:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] -; ARGPROMOTION: T: -; ARGPROMOTION-NEXT: ret i32 17 -; ARGPROMOTION: F: -; ARGPROMOTION-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]] -; ARGPROMOTION-NEXT: ret i32 [[X]] +define internal i32 @callee(i1 %C, i32* nocapture readonly %P) { +; ALL-LABEL: define {{[^@]+}}@callee +; ALL-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; ALL: T: +; ALL-NEXT: ret i32 17 +; ALL: F: +; ALL-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]] +; ALL-NEXT: ret i32 [[X]] ; entry: br i1 %C, label %T, label %F @@ -25,14 +26,15 @@ ret i32 %X } -define i32 @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 true, i32* null) -; ARGPROMOTION-NEXT: ret i32 [[X]] +define i32 @foo(i1 %C, i32* nocapture readonly %P) { +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C:%.*]], i32* nocapture readonly [[P:%.*]]) +; ALL-NEXT: ret i32 [[X]] ; entry: - %X = call i32 @callee(i1 true, i32* null) + %X = call i32 @callee(i1 %C, i32* nocapture readonly %P) ret i32 %X } diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll --- a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll @@ -1,17 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes='argpromotion,function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @callee(i1 %C, i32* %P) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callee -; ARGPROMOTION-SAME: (i1 [[C:%.*]], i32 [[P_VAL:%.*]]) -; ARGPROMOTION-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] -; ARGPROMOTION: T: -; ARGPROMOTION-NEXT: ret i32 17 -; ARGPROMOTION: F: -; ARGPROMOTION-NEXT: ret i32 [[P_VAL:%.*]] +; ALL-LABEL: define {{[^@]+}}@callee +; ALL-SAME: (i1 [[C:%.*]], i32 [[TMP0:%.*]]) +; ALL-NEXT: br i1 [[C:%.*]], label [[T:%.*]], label [[F:%.*]] +; ALL: T: +; ALL-NEXT: ret i32 17 +; ALL: F: +; ALL-NEXT: ret i32 [[TMP0:%.*]] ; br i1 %C, label %T, label %F @@ -23,17 +24,15 @@ ret i32 %X } -define i32 @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: [[A:%.*]] = alloca i32 -; ARGPROMOTION-NEXT: store i32 17, i32* [[A]] -; ARGPROMOTION-NEXT: [[A_VAL:%.*]] = load i32, i32* [[A]] -; ARGPROMOTION-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]]) -; ARGPROMOTION-NEXT: ret i32 [[X]] +define i32 @foo(i1 %C) { +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i1 [[C:%.*]]) +; ALL-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C:%.*]], i32 17) +; ALL-NEXT: ret i32 [[X]] ; %A = alloca i32 ; [#uses=2] store i32 17, i32* %A - %X = call i32 @callee( i1 false, i32* %A ) ; [#uses=1] + %X = call i32 @callee( i1 %C, i32* %A ) ; [#uses=1] ret i32 %X } diff --git a/llvm/test/Transforms/ArgumentPromotion/crash.ll b/llvm/test/Transforms/ArgumentPromotion/crash.ll --- a/llvm/test/Transforms/ArgumentPromotion/crash.ll +++ b/llvm/test/Transforms/ArgumentPromotion/crash.ll @@ -1,23 +1,41 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -inline -argpromotion | FileCheck %s --check-prefixes=ALL,ARGPROMOTION_OLDPM ; RUN: opt -S < %s -passes=inline,argpromotion | FileCheck %s --check-prefixes=ALL,ARGPROMOTION_NEWPM +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR %S = type { %S* } ; Inlining should nuke the invoke (and any inlined calls) here even with ; argument promotion running along with it. define void @zot() personality i32 (...)* @wibble { -; ALL-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble -; ALL-NEXT: bb: -; ALL-NEXT: unreachable -; ALL: hoge.exit: -; ALL-NEXT: br label [[BB1:%.*]] -; ALL: bb1: -; ALL-NEXT: unreachable -; ALL: bb2: -; ALL-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } -; ALL-NEXT: cleanup -; ALL-NEXT: unreachable +; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION_OLDPM-NEXT: bb: +; ARGPROMOTION_OLDPM-NEXT: unreachable +; ARGPROMOTION_OLDPM: hoge.exit: +; ARGPROMOTION_OLDPM-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION_OLDPM: bb1: +; ARGPROMOTION_OLDPM-NEXT: unreachable +; ARGPROMOTION_OLDPM: bb2: +; ARGPROMOTION_OLDPM-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION_OLDPM-NEXT: cleanup +; ARGPROMOTION_OLDPM-NEXT: unreachable +; +; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@zot() personality i32 (...)* @wibble +; ARGPROMOTION_NEWPM-NEXT: bb: +; ARGPROMOTION_NEWPM-NEXT: unreachable +; ARGPROMOTION_NEWPM: hoge.exit: +; ARGPROMOTION_NEWPM-NEXT: br label [[BB1:%.*]] +; ARGPROMOTION_NEWPM: bb1: +; ARGPROMOTION_NEWPM-NEXT: unreachable +; ARGPROMOTION_NEWPM: bb2: +; ARGPROMOTION_NEWPM-NEXT: [[TMP:%.*]] = landingpad { i8*, i32 } +; ARGPROMOTION_NEWPM-NEXT: cleanup +; ARGPROMOTION_NEWPM-NEXT: unreachable +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@zot() #0 personality i32 (...)* @wibble +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: call void @hoge() +; ATTRIBUTOR-NEXT: unreachable ; bb: invoke void @hoge() @@ -33,6 +51,11 @@ } define internal void @hoge() { +; ATTRIBUTOR-LABEL: define {{[^@]+}}@hoge() +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = call fastcc i8* @spam() +; ATTRIBUTOR-NEXT: unreachable +; bb: %tmp = call fastcc i8* @spam(i1 (i8*)* @eggs) %tmp1 = call fastcc i8* @spam(i1 (i8*)* @barney) @@ -40,6 +63,10 @@ } define internal fastcc i8* @spam(i1 (i8*)* %arg) { +; ATTRIBUTOR-LABEL: define {{[^@]+}}@spam() +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: unreachable +; bb: unreachable } @@ -49,24 +76,51 @@ ; ARGPROMOTION_NEWPM-NEXT: bb: ; ARGPROMOTION_NEWPM-NEXT: unreachable ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@eggs +; ATTRIBUTOR-SAME: (i8* nocapture readnone [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = call zeroext i1 @barney(i8* nocapture undef) +; ATTRIBUTOR-NEXT: unreachable +; bb: %tmp = call zeroext i1 @barney(i8* %arg) unreachable } define internal i1 @barney(i8* %arg) { +; ATTRIBUTOR-LABEL: define {{[^@]+}}@barney +; ATTRIBUTOR-SAME: (i8* nocapture readnone [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: ret i1 undef +; bb: ret i1 undef } define i32 @test_inf_promote_caller(i32 %arg) { -; ALL-LABEL: define {{[^@]+}}@test_inf_promote_caller -; ALL-SAME: (i32 [[ARG:%.*]]) -; ALL-NEXT: bb: -; ALL-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] -; ALL-NEXT: [[TMP1:%.*]] = alloca [[S]] -; ALL-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) -; ALL-NEXT: ret i32 0 +; ARGPROMOTION_OLDPM-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION_OLDPM-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION_OLDPM-NEXT: bb: +; ARGPROMOTION_OLDPM-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION_OLDPM-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION_OLDPM-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION_OLDPM-NEXT: ret i32 0 +; +; ARGPROMOTION_NEWPM-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ARGPROMOTION_NEWPM-SAME: (i32 [[ARG:%.*]]) +; ARGPROMOTION_NEWPM-NEXT: bb: +; ARGPROMOTION_NEWPM-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ARGPROMOTION_NEWPM-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ARGPROMOTION_NEWPM-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP]], %S* [[TMP1]]) +; ARGPROMOTION_NEWPM-NEXT: ret i32 0 +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_inf_promote_caller +; ATTRIBUTOR-SAME: (i32 [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: bb: +; ATTRIBUTOR-NEXT: [[TMP:%.*]] = alloca [[S:%.*]] +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = alloca [[S]] +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = call i32 @test_inf_promote_callee(%S* noalias nonnull align 8 dereferenceable(8) [[TMP]], %S* noalias nonnull align 8 dereferenceable(8) [[TMP1]]) +; ATTRIBUTOR-NEXT: unreachable ; bb: %tmp = alloca %S @@ -76,16 +130,16 @@ ret i32 0 } -define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { +define internal i32 @test_inf_promote_callee(%S* nocapture readonly %arg, %S* nocapture readonly %arg1) { ; ALL-LABEL: define {{[^@]+}}@test_inf_promote_callee -; ALL-SAME: (%S* [[ARG:%.*]], %S* [[ARG1:%.*]]) +; ALL-SAME: (%S* nocapture readonly [[ARG:%.*]], %S* nocapture readonly [[ARG1:%.*]]) ; ALL-NEXT: bb: ; ALL-NEXT: [[TMP:%.*]] = getelementptr [[S:%.*]], %S* [[ARG1:%.*]], i32 0, i32 0 ; ALL-NEXT: [[TMP2:%.*]] = load %S*, %S** [[TMP]] ; ALL-NEXT: [[TMP3:%.*]] = getelementptr [[S]], %S* [[ARG:%.*]], i32 0, i32 0 ; ALL-NEXT: [[TMP4:%.*]] = load %S*, %S** [[TMP3]] ; ALL-NEXT: [[TMP5:%.*]] = call i32 @test_inf_promote_callee(%S* [[TMP4]], %S* [[TMP2]]) -; ALL-NEXT: ret i32 0 +; ALL-NEXT: unreachable ; bb: %tmp = getelementptr %S, %S* %arg1, i32 0, i32 0 @@ -93,8 +147,7 @@ %tmp3 = getelementptr %S, %S* %arg, i32 0, i32 0 %tmp4 = load %S*, %S** %tmp3 %tmp5 = call i32 @test_inf_promote_callee(%S* %tmp4, %S* %tmp2) - - ret i32 0 + unreachable } declare i32 @wibble(...) diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll --- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR declare void @sink(i32) @@ -9,6 +10,13 @@ ; ARGPROMOTION-SAME: (i32 [[X_VAL_VAL:%.*]]) !dbg !3 ; ARGPROMOTION-NEXT: call void @sink(i32 [[X_VAL_VAL:%.*]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test +; ATTRIBUTOR-SAME: (i32** nocapture readonly [[X:%.*]]) !dbg !3 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32*, i32** [[X:%.*]], align 8 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 8 +; ATTRIBUTOR-NEXT: call void @sink(i32 [[TMP2]]) +; ATTRIBUTOR-NEXT: ret void ; %1 = load i32*, i32** %X, align 8 %2 = load i32, i32* %1, align 8 @@ -18,6 +26,8 @@ %struct.pair = type { i32, i32 } +declare void @use(%struct.pair*) + define internal void @test_byval(%struct.pair* byval %P) { ; ARGPROMOTION-LABEL: define {{[^@]+}}@test_byval ; ARGPROMOTION-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]]) @@ -26,8 +36,20 @@ ; ARGPROMOTION-NEXT: store i32 [[P_0:%.*]], i32* [[DOT0]] ; ARGPROMOTION-NEXT: [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1 ; ARGPROMOTION-NEXT: store i32 [[P_1:%.*]], i32* [[DOT1]] +; ARGPROMOTION-NEXT: call void @use(%struct.pair* [[P]]) ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test_byval +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; ATTRIBUTOR-NEXT: [[P_PRIV:%.*]] = alloca [[STRUCT_PAIR:%.*]] +; ATTRIBUTOR-NEXT: [[P_PRIV_CAST:%.*]] = bitcast %struct.pair* [[P_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0:%.*]], i32* [[P_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[P_PRIV_0_1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i32 [[TMP1:%.*]], i32* [[P_PRIV_0_1]] +; ATTRIBUTOR-NEXT: call void @use(%struct.pair* nonnull [[P_PRIV]]) +; ATTRIBUTOR-NEXT: ret void +; + call void @use(%struct.pair* %P) ret void } @@ -43,6 +65,16 @@ ; ARGPROMOTION-NEXT: [[P_1_VAL:%.*]] = load i32, i32* [[P_1]], !dbg !5 ; ARGPROMOTION-NEXT: call void @test_byval(i32 [[P_0_VAL]], i32 [[P_1_VAL]]), !dbg !5 ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller +; ATTRIBUTOR-SAME: (i32** nocapture readonly [[Y:%.*]], %struct.pair* [[P:%.*]]) +; ATTRIBUTOR-NEXT: call void @test(i32** nocapture readonly [[Y:%.*]]), !dbg !4 +; ATTRIBUTOR-NEXT: [[P_CAST:%.*]] = bitcast %struct.pair* [[P:%.*]] to i32*, !dbg !5 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[P_CAST]] +; ATTRIBUTOR-NEXT: [[P_0_1:%.*]] = getelementptr [[STRUCT_PAIR:%.*]], %struct.pair* [[P]], i32 0, i32 1, !dbg !5 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[P_0_1]] +; ATTRIBUTOR-NEXT: call void @test_byval(i32 [[TMP1]], i32 [[TMP2]]), !dbg !5 +; ATTRIBUTOR-NEXT: ret void ; call void @test(i32** %Y), !dbg !1 diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll --- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll @@ -1,35 +1,53 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %union.u = type { x86_fp80 } %struct.s = type { double, i16, i8, [5 x i8] } - -@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16 - %struct.Foo = type { i32, i64 } -@a = internal global %struct.Foo { i32 1, i64 2 }, align 8 -define void @run() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@run() +define x86_fp80 @run(%struct.Foo* %a, %struct.s* %b, i8* %i8, i64* %i64a, i64* %i64b) { +; ARGPROMOTION-LABEL: define {{[^@]+}}@run +; ARGPROMOTION-SAME: (%struct.Foo* [[A:%.*]], %struct.s* [[B:%.*]], i8* [[I8:%.*]], i64* [[I64A:%.*]], i64* [[I64B:%.*]]) ; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[TMP0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) -; ARGPROMOTION-NEXT: [[DOT0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* bitcast (%struct.s* @b to %union.u*), i32 0, i32 0 -; ARGPROMOTION-NEXT: [[DOT0_VAL:%.*]] = load x86_fp80, x86_fp80* [[DOT0]] -; ARGPROMOTION-NEXT: [[TMP1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[DOT0_VAL]]) -; ARGPROMOTION-NEXT: [[TMP2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* @a) -; ARGPROMOTION-NEXT: [[TMP3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* @a) -; ARGPROMOTION-NEXT: ret void +; ARGPROMOTION-NEXT: [[BC:%.*]] = bitcast %struct.s* [[B:%.*]] to %union.u* +; ARGPROMOTION-NEXT: [[V0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 [[BC]]) +; ARGPROMOTION-NEXT: store i8 [[V0]], i8* [[I8:%.*]] +; ARGPROMOTION-NEXT: [[BC_0:%.*]] = getelementptr [[UNION_U:%.*]], %union.u* [[BC]], i32 0, i32 0 +; ARGPROMOTION-NEXT: [[BC_0_VAL:%.*]] = load x86_fp80, x86_fp80* [[BC_0]] +; ARGPROMOTION-NEXT: [[V1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(x86_fp80 [[BC_0_VAL]]) +; ARGPROMOTION-NEXT: [[V2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* [[A:%.*]]) +; ARGPROMOTION-NEXT: store i64 [[V2]], i64* [[I64A:%.*]] +; ARGPROMOTION-NEXT: [[V3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* [[A]]) +; ARGPROMOTION-NEXT: store i64 [[V3]], i64* [[I64B:%.*]] +; ARGPROMOTION-NEXT: ret x86_fp80 [[V1]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@run +; ATTRIBUTOR-SAME: (%struct.Foo* [[A:%.*]], %struct.s* nocapture readonly [[B:%.*]], i8* nocapture writeonly [[I8:%.*]], i64* nocapture writeonly [[I64A:%.*]], i64* nocapture readnone [[I64B:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[BC:%.*]] = bitcast %struct.s* [[B:%.*]] to %union.u* +; ATTRIBUTOR-NEXT: [[V0:%.*]] = tail call i8 @UseLongDoubleUnsafely(%union.u* nocapture readonly byval align 16 [[BC]]) +; ATTRIBUTOR-NEXT: store i8 [[V0]], i8* [[I8:%.*]] +; ATTRIBUTOR-NEXT: [[V1:%.*]] = tail call x86_fp80 @UseLongDoubleSafely(%union.u* nocapture readonly byval align 16 [[BC]]) +; ATTRIBUTOR-NEXT: [[V2:%.*]] = call i64 @AccessPaddingOfStruct(%struct.Foo* nocapture [[A:%.*]]) +; ATTRIBUTOR-NEXT: store i64 [[V2]], i64* [[I64A:%.*]] +; ATTRIBUTOR-NEXT: [[V3:%.*]] = call i64 @CaptureAStruct(%struct.Foo* [[A]]) +; ATTRIBUTOR-NEXT: unreachable ; entry: - tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) - tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*)) - call i64 @AccessPaddingOfStruct(%struct.Foo* @a) - call i64 @CaptureAStruct(%struct.Foo* @a) - ret void + %bc = bitcast %struct.s* %b to %union.u* + %v0 = tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %bc) + store i8 %v0, i8* %i8 + %v1 = tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %bc) + %v2 = call i64 @AccessPaddingOfStruct(%struct.Foo* %a) + store i64 %v2, i64* %i64a + %v3 = call i64 @CaptureAStruct(%struct.Foo* %a) + store i64 %v3, i64* %i64b + ret x86_fp80 %v1 } define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) { @@ -41,6 +59,14 @@ ; ARGPROMOTION-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] ; ARGPROMOTION-NEXT: ret i8 [[RESULT]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely +; ATTRIBUTOR-SAME: (%union.u* nocapture readonly byval align 16 [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[BITCAST:%.*]] = bitcast %union.u* [[ARG:%.*]] to %struct.s* +; ATTRIBUTOR-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.s* [[BITCAST]], i64 0, i32 2 +; ATTRIBUTOR-NEXT: [[RESULT:%.*]] = load i8, i8* [[GEP]] +; ATTRIBUTOR-NEXT: ret i8 [[RESULT]] +; entry: %bitcast = bitcast %union.u* %arg to %struct.s* %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2 @@ -57,6 +83,12 @@ ; ARGPROMOTION-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U]], %union.u* [[ARG]], i64 0, i32 0 ; ARGPROMOTION-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]] ; ARGPROMOTION-NEXT: ret x86_fp80 [[FP80]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@UseLongDoubleSafely +; ATTRIBUTOR-SAME: (%union.u* nocapture readonly byval align 16 [[ARG:%.*]]) +; ATTRIBUTOR-NEXT: [[GEP:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.u* [[ARG:%.*]], i64 0, i32 0 +; ATTRIBUTOR-NEXT: [[FP80:%.*]] = load x86_fp80, x86_fp80* [[GEP]], align 16 +; ATTRIBUTOR-NEXT: ret x86_fp80 [[FP80]] ; %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0 %fp80 = load x86_fp80, x86_fp80* %gep @@ -69,6 +101,12 @@ ; ARGPROMOTION-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A:%.*]] to i64* ; ARGPROMOTION-NEXT: [[V:%.*]] = load i64, i64* [[P]] ; ARGPROMOTION-NEXT: ret i64 [[V]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@AccessPaddingOfStruct +; ATTRIBUTOR-SAME: (%struct.Foo* nocapture readonly byval [[A:%.*]]) +; ATTRIBUTOR-NEXT: [[P:%.*]] = bitcast %struct.Foo* [[A:%.*]] to i64* +; ATTRIBUTOR-NEXT: [[V:%.*]] = load i64, i64* [[P]] +; ATTRIBUTOR-NEXT: ret i64 [[V]] ; %p = bitcast %struct.Foo* %a to i64* %v = load i64, i64* %p @@ -88,6 +126,18 @@ ; ARGPROMOTION-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 ; ARGPROMOTION-NEXT: br label [[LOOP]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@CaptureAStruct +; ATTRIBUTOR-SAME: (%struct.Foo* byval [[A:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[A_PTR:%.*]] = alloca %struct.Foo* +; ATTRIBUTOR-NEXT: br label [[LOOP:%.*]] +; ATTRIBUTOR: loop: +; ATTRIBUTOR-NEXT: [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ] +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = phi %struct.Foo* [ [[A:%.*]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ] +; ATTRIBUTOR-NEXT: store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8 +; ATTRIBUTOR-NEXT: [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0 +; ATTRIBUTOR-NEXT: br label [[LOOP]] +; entry: %a_ptr = alloca %struct.Foo* br label %loop diff --git a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll --- a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll @@ -1,52 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" %struct.ss = type { i32, i32 } -; Argpromote + sroa should change this to passing the two integers by value. -define internal i32 @f(%struct.ss* inalloca %s) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@f -; ARGPROMOTION-SAME: (i32 [[S_0_0_VAL:%.*]], i32 [[S_0_1_VAL:%.*]]) unnamed_addr -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[R:%.*]] = add i32 [[S_0_0_VAL:%.*]], [[S_0_1_VAL:%.*]] -; ARGPROMOTION-NEXT: ret i32 [[R]] -; -entry: - %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0 - %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1 - %a = load i32, i32* %f0, align 4 - %b = load i32, i32* %f1, align 4 - %r = add i32 %a, %b - ret i32 %r -} - -define i32 @main() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@main() local_unnamed_addr -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[R:%.*]] = call fastcc i32 @f(i32 1, i32 2) -; ARGPROMOTION-NEXT: ret i32 [[R]] -; -entry: - %S = alloca inalloca %struct.ss - %f0 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 - %f1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 - store i32 1, i32* %f0, align 4 - store i32 2, i32* %f1, align 4 - %r = call i32 @f(%struct.ss* inalloca %S) - ret i32 %r -} - ; Argpromote can't promote %a because of the icmp use. define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { ; ARGPROMOTION-LABEL: define {{[^@]+}}@g -; ARGPROMOTION-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr #0 +; ARGPROMOTION-SAME: (%struct.ss* [[A:%.*]], %struct.ss* [[B:%.*]]) unnamed_addr ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: [[C:%.*]] = icmp eq %struct.ss* [[A:%.*]], [[B:%.*]] ; ARGPROMOTION-NEXT: ret i1 [[C]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@g +; ATTRIBUTOR-SAME: (%struct.ss* nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* inalloca nonnull writeonly align 4 dereferenceable(8) [[B:%.*]]) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[C:%.*]] = icmp eq %struct.ss* [[A:%.*]], [[B:%.*]] +; ATTRIBUTOR-NEXT: ret i1 [[C]] +; entry: %c = icmp eq %struct.ss* %a, %b ret i1 %c @@ -59,6 +33,12 @@ ; ARGPROMOTION-NEXT: [[C:%.*]] = call fastcc i1 @g(%struct.ss* [[S]], %struct.ss* [[S]]) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]] +; ATTRIBUTOR-NEXT: [[C:%.*]] = call i1 @g(%struct.ss* nonnull align 4 dereferenceable(8) [[S]], %struct.ss* inalloca nonnull align 4 dereferenceable(8) [[S]]) +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %S = alloca inalloca %struct.ss %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S) diff --git a/llvm/test/Transforms/ArgumentPromotion/invalidation.ll b/llvm/test/Transforms/ArgumentPromotion/invalidation.ll --- a/llvm/test/Transforms/ArgumentPromotion/invalidation.ll +++ b/llvm/test/Transforms/ArgumentPromotion/invalidation.ll @@ -7,7 +7,8 @@ ; invalidation this will crash in the second printer as it tries to reuse ; now-invalid demanded bits. ; -; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -passes='function(print),cgscc(argpromotion,function(print))' -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR @G = constant i32 0 @@ -17,6 +18,11 @@ ; ARGPROMOTION-NEXT: entry: ; ARGPROMOTION-NEXT: ret i32 [[X_VAL:%.*]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@a() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V:%.*]] = load i32, i32* @G, align 4 +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %v = load i32, i32* %x ret i32 %v @@ -29,6 +35,11 @@ ; ARGPROMOTION-NEXT: [[V:%.*]] = call i32 @a(i32 [[G_VAL]]) ; ARGPROMOTION-NEXT: ret i32 [[V]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@b() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V:%.*]] = call i32 @a() +; ATTRIBUTOR-NEXT: ret i32 [[V]] +; entry: %v = call i32 @a(i32* @G) ret i32 %v @@ -43,6 +54,13 @@ ; ARGPROMOTION-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] ; ARGPROMOTION-NEXT: ret i32 [[RESULT]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@c() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[V1:%.*]] = call i32 @a() +; ATTRIBUTOR-NEXT: [[V2:%.*]] = call i32 @b() +; ATTRIBUTOR-NEXT: [[RESULT:%.*]] = add i32 [[V1]], [[V2]] +; ATTRIBUTOR-NEXT: ret i32 [[RESULT]] +; entry: %v1 = call i32 @a(i32* @G) %v2 = call i32 @b() diff --git a/llvm/test/Transforms/ArgumentPromotion/musttail.ll b/llvm/test/Transforms/ArgumentPromotion/musttail.ll --- a/llvm/test/Transforms/ArgumentPromotion/musttail.ll +++ b/llvm/test/Transforms/ArgumentPromotion/musttail.ll @@ -1,20 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR36543 ; Don't promote arguments of musttail callee %T = type { i32, i32, i32, i32 } -define internal i32 @test(%T* %p) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@test -; ARGPROMOTION-SAME: (%T* [[P:%.*]]) -; ARGPROMOTION-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P:%.*]], i64 0, i32 3 -; ARGPROMOTION-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 -; ARGPROMOTION-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] -; ARGPROMOTION-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] -; ARGPROMOTION-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; ARGPROMOTION-NEXT: ret i32 [[V]] +define internal i32 @test(%T* nocapture readonly %p) { +; ALL-LABEL: define {{[^@]+}}@test +; ALL-SAME: (%T* nocapture readonly [[P:%.*]]) +; ALL-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P:%.*]], i64 0, i32 3 +; ALL-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; ALL-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; ALL-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; ALL-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; ALL-NEXT: ret i32 [[V]] ; %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 @@ -29,6 +30,11 @@ ; ARGPROMOTION-SAME: (%T* [[P:%.*]]) ; ARGPROMOTION-NEXT: [[V:%.*]] = musttail call i32 @test(%T* [[P:%.*]]) ; ARGPROMOTION-NEXT: ret i32 [[V]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller +; ATTRIBUTOR-SAME: (%T* nocapture readonly [[P:%.*]]) +; ATTRIBUTOR-NEXT: [[V:%.*]] = musttail call i32 @test(%T* nocapture readonly [[P:%.*]]) +; ATTRIBUTOR-NEXT: ret i32 [[V]] ; %v = musttail call i32 @test(%T* %p) ret i32 %v @@ -40,23 +46,37 @@ ; ARGPROMOTION-LABEL: define {{[^@]+}}@foo ; ARGPROMOTION-SAME: (%T* [[P:%.*]], i32 [[V:%.*]]) ; ARGPROMOTION-NEXT: ret i32 0 +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@foo +; ATTRIBUTOR-SAME: (%T* nocapture readnone [[P:%.*]], i32 [[V:%.*]]) +; ATTRIBUTOR-NEXT: ret i32 0 ; ret i32 0 } -define internal i32 @test2(%T* %p, i32 %p2) { +define internal i32 @test2(%T* %QQQQ, i32 %p2) { ; ARGPROMOTION-LABEL: define {{[^@]+}}@test2 -; ARGPROMOTION-SAME: (%T* [[P:%.*]], i32 [[P2:%.*]]) -; ARGPROMOTION-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P:%.*]], i64 0, i32 3 -; ARGPROMOTION-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[P]], i64 0, i32 2 +; ARGPROMOTION-SAME: (%T* [[QQQQ:%.*]], i32 [[P2:%.*]]) +; ARGPROMOTION-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[QQQQ:%.*]], i64 0, i32 3 +; ARGPROMOTION-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[QQQQ]], i64 0, i32 2 ; ARGPROMOTION-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] ; ARGPROMOTION-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] ; ARGPROMOTION-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] ; ARGPROMOTION-NEXT: [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 [[V]]) ; ARGPROMOTION-NEXT: ret i32 [[CA]] ; - %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 - %b.gep = getelementptr %T, %T* %p, i64 0, i32 2 +; ATTRIBUTOR-LABEL: define {{[^@]+}}@test2 +; ATTRIBUTOR-SAME: (%T* nocapture readonly [[QQQQ:%.*]], i32 [[P2:%.*]]) +; ATTRIBUTOR-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[QQQQ:%.*]], i64 0, i32 3 +; ATTRIBUTOR-NEXT: [[B_GEP:%.*]] = getelementptr [[T]], %T* [[QQQQ]], i64 0, i32 2 +; ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]] +; ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]] +; ATTRIBUTOR-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] +; ATTRIBUTOR-NEXT: [[CA:%.*]] = musttail call i32 @foo(%T* undef, i32 undef) +; ATTRIBUTOR-NEXT: ret i32 [[CA]] +; + %a.gep = getelementptr %T, %T* %QQQQ, i64 0, i32 3 + %b.gep = getelementptr %T, %T* %QQQQ, i64 0, i32 2 %a = load i32, i32* %a.gep %b = load i32, i32* %b.gep %v = add i32 %a, %b @@ -69,6 +89,11 @@ ; ARGPROMOTION-SAME: (%T* [[G:%.*]]) ; ARGPROMOTION-NEXT: [[V:%.*]] = call i32 @test2(%T* [[G:%.*]], i32 0) ; ARGPROMOTION-NEXT: ret i32 [[V]] +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@caller2 +; ATTRIBUTOR-SAME: (%T* nocapture readonly [[G:%.*]]) +; ATTRIBUTOR-NEXT: [[V:%.*]] = call i32 @test2(%T* nocapture readonly [[G:%.*]], i32 undef) +; ATTRIBUTOR-NEXT: ret i32 0 ; %v = call i32 @test2(%T* %g, i32 0) ret i32 %v diff --git a/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll b/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll --- a/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll +++ b/llvm/test/Transforms/ArgumentPromotion/naked_functions.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Don't promote paramaters of/arguments to naked functions @@ -11,18 +12,23 @@ ; ARGPROMOTION-NEXT: [[CALL:%.*]] = call i32 @foo(i32* @g) ; ARGPROMOTION-NEXT: ret i32 [[CALL]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[CALL:%.*]] = call i32 @foo(i32* nonnull align 4 dereferenceable(4) @g) +; ATTRIBUTOR-NEXT: ret i32 [[CALL]] +; entry: %call = call i32 @foo(i32* @g) ret i32 %call } define internal i32 @foo(i32*) #0 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo -; ARGPROMOTION-SAME: (i32* [[TMP0:%.*]]) #0 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; ARGPROMOTION-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() -; ARGPROMOTION-NEXT: unreachable +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i32* [[TMP0:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; ALL-NEXT: call void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; ALL-NEXT: unreachable ; entry: %retval = alloca i32, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll --- a/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll +++ b/llvm/test/Transforms/ArgumentPromotion/nonzero-address-spaces.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; ArgumentPromotion should preserve the default function address space ; from the data layout. @@ -11,21 +12,27 @@ define i32 @bar() { ; ARGPROMOTION-LABEL: define {{[^@]+}}@bar() addrspace(1) ; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo() +; ARGPROMOTION-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo(i32* @g) ; ARGPROMOTION-NEXT: ret i32 [[CALL]] ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar() addrspace(1) +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[CALL:%.*]] = call addrspace(1) i32 @foo(i32* nonnull align 4 dereferenceable(4) @g) +; ATTRIBUTOR-NEXT: ret i32 [[CALL]] +; entry: %call = call i32 @foo(i32* @g) ret i32 %call } -define internal i32 @foo(i32*) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() addrspace(1) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; ARGPROMOTION-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() -; ARGPROMOTION-NEXT: unreachable +define internal i32 @foo(i32*) naked { +; ALL-LABEL: define {{[^@]+}}@foo +; ALL-SAME: (i32* [[TMP0:%.*]]) addrspace(1) +; ALL-NEXT: entry: +; ALL-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +; ALL-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""() +; ALL-NEXT: unreachable ; entry: %retval = alloca i32, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/pr27568.ll b/llvm/test/Transforms/ArgumentPromotion/pr27568.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr27568.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr27568.ll @@ -1,14 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -S -argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; RUN: opt -S -debugify -o /dev/null < %s target triple = "x86_64-pc-windows-msvc" define internal void @callee(i8*) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callee() -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: call void @thunk() -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@callee() +; ALL-NEXT: entry: +; ALL-NEXT: call void @thunk() +; ALL-NEXT: ret void ; entry: call void @thunk() @@ -16,16 +17,16 @@ } define void @test1() personality i32 (...)* @__CxxFrameHandler3 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: invoke void @thunk() -; ARGPROMOTION-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] -; ARGPROMOTION: out: -; ARGPROMOTION-NEXT: ret void -; ARGPROMOTION: cpad: -; ARGPROMOTION-NEXT: [[PAD:%.*]] = cleanuppad within none [] -; ARGPROMOTION-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] -; ARGPROMOTION-NEXT: cleanupret from [[PAD]] unwind to caller +; ALL-LABEL: define {{[^@]+}}@test1() personality i32 (...)* @__CxxFrameHandler3 +; ALL-NEXT: entry: +; ALL-NEXT: invoke void @thunk() +; ALL-NEXT: to label [[OUT:%.*]] unwind label [[CPAD:%.*]] +; ALL: out: +; ALL-NEXT: ret void +; ALL: cpad: +; ALL-NEXT: [[PAD:%.*]] = cleanuppad within none [] +; ALL-NEXT: call void @callee() [ "funclet"(token [[PAD]]) ] +; ALL-NEXT: cleanupret from [[PAD]] unwind to caller ; entry: invoke void @thunk() diff --git a/llvm/test/Transforms/ArgumentPromotion/pr3085.ll b/llvm/test/Transforms/ArgumentPromotion/pr3085.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr3085.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr3085.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -disable-output -loop-extract-single -loop-rotate -loop-reduce -argpromotion +; RUN: opt -disable-output -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s ; PR 3085 %struct.Lit = type { i8 } diff --git a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr32917.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr32917.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR 32917 @b = common local_unnamed_addr global i32 0, align 4 @@ -14,6 +15,13 @@ ; ARGPROMOTION-NEXT: [[DOTIDX_VAL:%.*]] = load i32, i32* [[DOTIDX]], align 4 ; ARGPROMOTION-NEXT: call fastcc void @fn1(i32 [[DOTIDX_VAL]]) ; ARGPROMOTION-NEXT: ret i32 undef +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn2() local_unnamed_addr +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* @b, align 4 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32* +; ATTRIBUTOR-NEXT: call fastcc void @fn1(i32* [[TMP3]]) +; ATTRIBUTOR-NEXT: ret i32 undef ; %1 = load i32, i32* @b, align 4 %2 = sext i32 %1 to i64 @@ -27,6 +35,13 @@ ; ARGPROMOTION-SAME: (i32 [[DOT18446744073709551615_VAL:%.*]]) unnamed_addr ; ARGPROMOTION-NEXT: store i32 [[DOT18446744073709551615_VAL:%.*]], i32* @a, align 4 ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn1 +; ATTRIBUTOR-SAME: (i32* nocapture readonly [[TMP0:%.*]]) unnamed_addr +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0:%.*]], i64 -1 +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; ATTRIBUTOR-NEXT: store i32 [[TMP3]], i32* @a, align 4 +; ATTRIBUTOR-NEXT: ret void ; %2 = getelementptr inbounds i32, i32* %0, i64 -1 %3 = load i32, i32* %2, align 4 diff --git a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll --- a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -argpromotion -verify -dse -S %s -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='function(dse),attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Fix for PR33641. ArgumentPromotion removed the argument to bar but left the call to ; dbg.value which still used the removed argument. @@ -7,12 +8,13 @@ ; The %p argument should be removed, and the use of it in dbg.value should be ; changed to undef. + %p_t = type i16* %fun_t = type void (%p_t)* define void @foo() { -; ARGPROMOTION-LABEL: define {{[^@]+}}@foo() -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@foo() +; ALL-NEXT: ret void ; %tmp = alloca %fun_t store %fun_t @bar, %fun_t* %tmp diff --git a/llvm/test/Transforms/ArgumentPromotion/profile.ll b/llvm/test/Transforms/ArgumentPromotion/profile.ll --- a/llvm/test/Transforms/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/ArgumentPromotion/profile.ll @@ -1,13 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt -argpromotion -mem2reg -S < %s | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(mem2reg)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" ; Checks if !prof metadata is corret in deadargelim. define void @caller() #0 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@caller() -; ARGPROMOTION-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@caller() +; ALL-NEXT: call void @promote_i32_ptr(i32 42), !prof !0 +; ALL-NEXT: ret void ; %x = alloca i32 store i32 42, i32* %x @@ -16,10 +17,10 @@ } define internal void @promote_i32_ptr(i32* %xp) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@promote_i32_ptr -; ARGPROMOTION-SAME: (i32 [[XP_VAL:%.*]]) -; ARGPROMOTION-NEXT: call void @use_i32(i32 [[XP_VAL:%.*]]) -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@promote_i32_ptr +; ALL-SAME: (i32 [[TMP0:%.*]]) +; ALL-NEXT: call void @use_i32(i32 [[TMP0:%.*]]) +; ALL-NEXT: ret void ; %x = load i32, i32* %xp call void @use_i32(i32 %x) diff --git a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll --- a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll @@ -1,12 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR17906 ; When we promote two arguments in a single function with different types, ; before the fix, we used the same tag for the newly-created two loads. ; This testing case makes sure that we correctly transfer the tbaa tags from the ; original loads to the newly-created loads when promoting pointer arguments. +; +; TODO: This test doesn't work with the ATTRIBUTOR as it will not promote the arguments but propagate them. @a = global i32* null, align 8 @e = global i32** @a, align 8 @@ -23,6 +26,15 @@ ; ARGPROMOTION-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !0 ; ARGPROMOTION-NEXT: ret void ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@fn() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i64, i64* @c, align 8, !tbaa !0 +; ATTRIBUTOR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* @g, align 4, !tbaa !4 +; ATTRIBUTOR-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP1]] to i8 +; ATTRIBUTOR-NEXT: store i8 [[CONV1]], i8* @d, align 1, !tbaa !6 +; ATTRIBUTOR-NEXT: ret void +; entry: %0 = load i64, i64* %p2, align 8, !tbaa !1 %conv = trunc i64 %0 to i32 @@ -44,6 +56,15 @@ ; ARGPROMOTION-NEXT: call fastcc void @fn(i32 [[G_VAL]], i64 [[C_VAL]]) ; ARGPROMOTION-NEXT: ret i32 0 ; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@main() +; ATTRIBUTOR-NEXT: entry: +; ATTRIBUTOR-NEXT: [[TMP0:%.*]] = load i32**, i32*** @e, align 8, !tbaa !7 +; ATTRIBUTOR-NEXT: store i32* @g, i32** [[TMP0]], align 8, !tbaa !7 +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa !7 +; ATTRIBUTOR-NEXT: store i32 1, i32* [[TMP1]], align 4, !tbaa !4 +; ATTRIBUTOR-NEXT: call fastcc void @fn() +; ATTRIBUTOR-NEXT: ret i32 0 +; entry: %0 = load i32**, i32*** @e, align 8, !tbaa !8 store i32* @g, i32** %0, align 8, !tbaa !8 diff --git a/llvm/test/Transforms/ArgumentPromotion/sret.ll b/llvm/test/Transforms/ArgumentPromotion/sret.ll --- a/llvm/test/Transforms/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/ArgumentPromotion/sret.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc" @@ -11,6 +12,16 @@ ; ARGPROMOTION-NEXT: [[AB:%.*]] = add i32 [[THIS_0_0_VAL:%.*]], [[THIS_0_1_VAL:%.*]] ; ARGPROMOTION-NEXT: store i32 [[AB]], i32* [[R:%.*]] ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@add +; ATTRIBUTOR-SAME: ({ i32, i32 }* noalias nocapture nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; ATTRIBUTOR-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS:%.*]], i32 0, i32 0 +; ATTRIBUTOR-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 +; ATTRIBUTOR-NEXT: [[B:%.*]] = load i32, i32* [[BP]] +; ATTRIBUTOR-NEXT: [[AB:%.*]] = add i32 [[A]], [[B]] +; ATTRIBUTOR-NEXT: store i32 [[AB]], i32* [[R:%.*]], align 4 +; ATTRIBUTOR-NEXT: ret void ; %ap = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 0 %bp = getelementptr {i32, i32}, {i32, i32}* %this, i32 0, i32 1 @@ -31,6 +42,12 @@ ; ARGPROMOTION-NEXT: [[PAIR_IDX1_VAL:%.*]] = load i32, i32* [[PAIR_IDX1]] ; ARGPROMOTION-NEXT: call void @add(i32 [[PAIR_IDX_VAL]], i32 [[PAIR_IDX1_VAL]], i32* noalias [[R]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@f() +; ATTRIBUTOR-NEXT: [[R:%.*]] = alloca i32 +; ATTRIBUTOR-NEXT: [[PAIR:%.*]] = alloca { i32, i32 } +; ATTRIBUTOR-NEXT: call void @add({ i32, i32 }* noalias nocapture nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; ATTRIBUTOR-NEXT: ret void ; %r = alloca i32 %pair = alloca {i32, i32} diff --git a/llvm/test/Transforms/ArgumentPromotion/tail.ll b/llvm/test/Transforms/ArgumentPromotion/tail.ll --- a/llvm/test/Transforms/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/ArgumentPromotion/tail.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt %s -argpromotion -S -o - | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt %s -passes=argpromotion -S -o - | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt %s -argpromotion -sroa -S -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt %s -passes='argpromotion,function(sroa)' -S -o - | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes='attributor,function(sroa)' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; PR14710 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -19,6 +20,16 @@ ; ARGPROMOTION-NEXT: store i32 [[DATA_1:%.*]], i32* [[DOT1]] ; ARGPROMOTION-NEXT: [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@bar +; ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) +; ATTRIBUTOR-NEXT: [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]] +; ATTRIBUTOR-NEXT: [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32* +; ATTRIBUTOR-NEXT: store i32 [[TMP0:%.*]], i32* [[DATA_PRIV_CAST]] +; ATTRIBUTOR-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: store i32 [[TMP1:%.*]], i32* [[DATA_PRIV_0_1]] +; ATTRIBUTOR-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* nonnull [[DATA_PRIV]]) +; ATTRIBUTOR-NEXT: ret void ; tail call i8* @foo(%pair* %Data) ret void @@ -33,6 +44,15 @@ ; ARGPROMOTION-NEXT: [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]] ; ARGPROMOTION-NEXT: call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]]) ; ARGPROMOTION-NEXT: ret void +; +; ATTRIBUTOR-LABEL: define {{[^@]+}}@zed +; ATTRIBUTOR-SAME: (%pair* byval [[DATA:%.*]]) +; ATTRIBUTOR-NEXT: [[DATA_CAST:%.*]] = bitcast %pair* [[DATA:%.*]] to i32* +; ATTRIBUTOR-NEXT: [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]] +; ATTRIBUTOR-NEXT: [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1 +; ATTRIBUTOR-NEXT: [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]] +; ATTRIBUTOR-NEXT: call void @bar(i32 [[TMP1]], i32 [[TMP2]]) +; ATTRIBUTOR-NEXT: ret void ; call void @bar(%pair* byval %Data) ret void diff --git a/llvm/test/Transforms/ArgumentPromotion/variadic.ll b/llvm/test/Transforms/ArgumentPromotion/variadic.ll --- a/llvm/test/Transforms/ArgumentPromotion/variadic.ll +++ b/llvm/test/Transforms/ArgumentPromotion/variadic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION -; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefix=ARGPROMOTION +; RUN: opt < %s -argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s --check-prefixes=ALL,ARGPROMOTION +; RUN: opt -S -passes=attributor -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=1 < %s | FileCheck %s --check-prefixes=ALL,ATTRIBUTOR ; Unused arguments from variadic functions cannot be eliminated as that changes ; their classiciation according to the SysV amd64 ABI. Clang and other frontends @@ -16,11 +17,11 @@ ; Function Attrs: nounwind uwtable define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { -; ARGPROMOTION-LABEL: define {{[^@]+}}@main -; ARGPROMOTION-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) -; ARGPROMOTION-NEXT: ret i32 0 +; ALL-LABEL: define {{[^@]+}}@main +; ALL-SAME: (i32 [[ARGC:%.*]], i8** nocapture readnone [[ARGV:%.*]]) +; ALL-NEXT: entry: +; ALL-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) +; ALL-NEXT: ret i32 0 ; entry: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45) @@ -29,10 +30,10 @@ ; Function Attrs: nounwind uwtable define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) { -; ARGPROMOTION-LABEL: define {{[^@]+}}@callee_t0f -; ARGPROMOTION-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...) -; ARGPROMOTION-NEXT: entry: -; ARGPROMOTION-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@callee_t0f +; ALL-SAME: (i8* nocapture readnone [[TP13:%.*]], i8* nocapture readnone [[TP14:%.*]], i8* nocapture readnone [[TP15:%.*]], i8* nocapture readnone [[TP16:%.*]], i8* nocapture readnone [[TP17:%.*]], ...) +; ALL-NEXT: entry: +; ALL-NEXT: ret void ; entry: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/callbacks.ll b/llvm/test/Transforms/FunctionAttrs/callbacks.ll --- a/llvm/test/Transforms/FunctionAttrs/callbacks.ll +++ b/llvm/test/Transforms/FunctionAttrs/callbacks.ll @@ -22,7 +22,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 undef, i32** noalias nocapture nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A:%.*]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -73,7 +73,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture [[A:%.*]], i64 undef, i32** noalias nocapture nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture [[A:%.*]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -123,7 +123,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; CHECK-NEXT: store i32 42, i32* [[B]], align 32 ; CHECK-NEXT: store i32* [[B]], i32** [[C]], align 64 -; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture [[A:%.*]], i64 undef, i32** noalias nocapture nonnull align 64 dereferenceable(8) [[C]]) +; CHECK-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture [[A:%.*]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; CHECK-NEXT: ret void ; entry: